dune-istl
2.2.0
|
00001 #ifndef DUNE_REPARTITION_HH 00002 #define DUNE_REPARTITION_HH 00003 00004 #include <cassert> 00005 #include <map> 00006 #include <utility> 00007 00008 #if HAVE_PARMETIS 00009 #include <parmetis.h> 00010 #endif 00011 00012 #include <dune/common/timer.hh> 00013 #include <dune/common/enumset.hh> 00014 #include <dune/common/mpitraits.hh> 00015 #include <dune/common/stdstreams.hh> 00016 #include <dune/common/parallel/communicator.hh> 00017 #include <dune/common/parallel/indexset.hh> 00018 #include <dune/common/parallel/indicessyncer.hh> 00019 #include <dune/common/parallel/remoteindices.hh> 00020 00021 #include <dune/istl/owneroverlapcopy.hh> 00022 #include <dune/istl/paamg/graph.hh> 00023 00032 namespace Dune 00033 { 00034 #if HAVE_MPI 00035 00048 template<class G, class T1, class T2> 00049 void fillIndexSetHoles(const G& graph, Dune::OwnerOverlapCopyCommunication<T1,T2>& oocomm) 00050 { 00051 typedef typename Dune::OwnerOverlapCopyCommunication<T1,T2>::ParallelIndexSet IndexSet; 00052 typedef typename IndexSet::LocalIndex::Attribute Attribute; 00053 00054 IndexSet& indexSet = oocomm.indexSet(); 00055 const typename Dune::OwnerOverlapCopyCommunication<T1,T2>::GlobalLookupIndexSet& lookup =oocomm.globalLookup(); 00056 00057 // The type of the const vertex iterator. 00058 typedef typename G::ConstVertexIterator VertexIterator; 00059 00060 00061 std::size_t sum=0, needed = graph.noVertices()-indexSet.size(); 00062 std::vector<std::size_t> neededall(oocomm.communicator().size(), 0); 00063 00064 MPI_Allgather(&needed, 1, MPITraits<std::size_t>::getType() , &(neededall[0]), 1, MPITraits<std::size_t>::getType(), oocomm.communicator()); 00065 for(int i=0; i<oocomm.communicator().size(); ++i) 00066 sum=sum+neededall[i]; // MAke this for generic 00067 00068 if(sum==0) 00069 // Nothing to do 00070 return; 00071 00072 //Compute Maximum Global Index 00073 T1 maxgi=0; 00074 typedef typename IndexSet::const_iterator Iterator; 00075 Iterator end; 00076 end = indexSet.end(); 00077 for(Iterator it = indexSet.begin(); it != end; ++it) 00078 maxgi=std::max(maxgi,it->global()); 00079 00080 //Process p creates global indices consecutively 00081 //starting atmaxgi+\sum_{i=1}^p neededall[i] 00082 // All created indices are owned by the process 00083 maxgi=oocomm.communicator().max(maxgi); 00084 ++maxgi;//Sart with the next free index. 00085 00086 for(int i=0; i<oocomm.communicator().rank(); ++i) 00087 maxgi=maxgi+neededall[i]; // TODO: make this more generic 00088 00089 // Store the global index information for repairing the remote index information 00090 std::map<int,SLList<std::pair<T1,Attribute> > > globalIndices; 00091 storeGlobalIndicesOfRemoteIndices(globalIndices, oocomm.remoteIndices(), indexSet); 00092 indexSet.beginResize(); 00093 00094 for(VertexIterator vertex = graph.begin(), vend=graph.end(); vertex != vend; ++vertex){ 00095 const typename IndexSet::IndexPair* pair=lookup.pair(*vertex); 00096 if(pair==0){ 00097 // No index yet, add new one 00098 indexSet.add(maxgi, typename IndexSet::LocalIndex(*vertex, OwnerOverlapCopyAttributeSet::owner, false)); 00099 ++maxgi; 00100 } 00101 } 00102 00103 indexSet.endResize(); 00104 00105 repairLocalIndexPointers(globalIndices, oocomm.remoteIndices(), indexSet); 00106 00107 oocomm.freeGlobalLookup(); 00108 oocomm.buildGlobalLookup(); 00109 #ifdef DEBUG_REPART 00110 std::cout<<"Holes are filled!"<<std::endl; 00111 std::cout<<oocomm.communicator().rank()<<": "<<oocomm.indexSet()<<std::endl; 00112 #endif 00113 } 00114 00115 namespace 00116 { 00117 00118 class ParmetisDuneIndexMap 00119 { 00120 public: 00121 template<class Graph, class OOComm> 00122 ParmetisDuneIndexMap(const Graph& graph, const OOComm& com); 00123 int toParmetis(int i) const 00124 { 00125 return duneToParmetis[i]; 00126 } 00127 int toLocalParmetis(int i) const 00128 { 00129 return duneToParmetis[i]-base_; 00130 } 00131 int operator[](int i) const 00132 { 00133 return duneToParmetis[i]; 00134 } 00135 int toDune(int i) const 00136 { 00137 return parmetisToDune[i]; 00138 } 00139 std::vector<int>::size_type numOfOwnVtx() const 00140 { 00141 return parmetisToDune.size(); 00142 } 00143 int* vtxDist() 00144 { 00145 return &vtxDist_[0]; 00146 } 00147 int globalOwnerVertices; 00148 private: 00149 int base_; 00150 std::vector<int> duneToParmetis; 00151 std::vector<int> parmetisToDune; 00152 // range of vertices for processor i: vtxdist[i] to vtxdist[i+1] (parmetis global) 00153 std::vector<int> vtxDist_; 00154 }; 00155 00156 template<class G, class OOComm> 00157 ParmetisDuneIndexMap::ParmetisDuneIndexMap(const G& graph, const OOComm& oocomm) 00158 : duneToParmetis(graph.noVertices(), -1), vtxDist_(oocomm.communicator().size()+1) 00159 { 00160 int npes=oocomm.communicator().size(), mype=oocomm.communicator().rank(); 00161 00162 typedef typename OOComm::ParallelIndexSet::const_iterator Iterator; 00163 typedef typename OOComm::OwnerSet OwnerSet; 00164 00165 int numOfOwnVtx=0; 00166 Iterator end = oocomm.indexSet().end(); 00167 for(Iterator index = oocomm.indexSet().begin(); index != end; ++index) { 00168 if (OwnerSet::contains(index->local().attribute())) { 00169 numOfOwnVtx++; 00170 } 00171 } 00172 parmetisToDune.resize(numOfOwnVtx); 00173 std::vector<int> globalNumOfVtx(npes); 00174 // make this number available to all processes 00175 MPI_Allgather(&numOfOwnVtx, 1, MPI_INT, &(globalNumOfVtx[0]), 1, MPI_INT, oocomm.communicator()); 00176 00177 int base=0; 00178 vtxDist_[0] = 0; 00179 for(int i=0; i<npes; i++) { 00180 if (i<mype) { 00181 base += globalNumOfVtx[i]; 00182 } 00183 vtxDist_[i+1] = vtxDist_[i] + globalNumOfVtx[i]; 00184 } 00185 globalOwnerVertices=vtxDist_[npes]; 00186 base_=base; 00187 00188 // The type of the const vertex iterator. 00189 typedef typename G::ConstVertexIterator VertexIterator; 00190 #ifdef DEBUG_REPART 00191 std::cout << oocomm.communicator().rank()<<" vtxDist: "; 00192 for(int i=0; i<= npes;++i) 00193 std::cout << vtxDist_[i]<<" "; 00194 std::cout<<std::endl; 00195 #endif 00196 00197 // Traverse the graph and assign a new consecutive number/index 00198 // starting by "base" to all owner vertices. 00199 // The new index is used as the ParMETIS global index and is 00200 // stored in the vector "duneToParmetis" 00201 VertexIterator vend = graph.end(); 00202 for(VertexIterator vertex = graph.begin(); vertex != vend; ++vertex) { 00203 const typename OOComm::ParallelIndexSet::IndexPair* index=oocomm.globalLookup().pair(*vertex); 00204 assert(index); 00205 if (OwnerSet::contains(index->local().attribute())) { 00206 // assign and count the index 00207 parmetisToDune[base-base_]=index->local(); 00208 duneToParmetis[index->local()] = base++; 00209 } 00210 } 00211 00212 // At this point, every process knows the ParMETIS global index 00213 // of it's owner vertices. The next step is to get the 00214 // ParMETIS global index of the overlap vertices from the 00215 // associated processes. To do this, the Dune::Interface class 00216 // is used. 00217 #ifdef DEBUG_REPART 00218 std::cout <<oocomm.communicator().rank()<<": before "; 00219 for(std::size_t i=0; i<duneToParmetis.size(); ++i) 00220 std::cout<<duneToParmetis[i]<<" "; 00221 std::cout<<std::endl; 00222 #endif 00223 oocomm.copyOwnerToAll(duneToParmetis,duneToParmetis); 00224 #ifdef DEBUG_REPART 00225 std::cout <<oocomm.communicator().rank()<<": after "; 00226 for(std::size_t i=0; i<duneToParmetis.size(); ++i) 00227 std::cout<<duneToParmetis[i]<<" "; 00228 std::cout<<std::endl; 00229 #endif 00230 } 00231 } 00232 00233 struct RedistributeInterface 00234 : public Interface 00235 { 00236 void setCommunicator(MPI_Comm comm) 00237 { 00238 communicator_=comm; 00239 } 00240 template<class Flags,class IS> 00241 void buildSendInterface(const std::vector<int>& toPart, const IS& idxset) 00242 { 00243 typedef std::vector<int>::const_iterator Iter; 00244 std::map<int,int> sizes; 00245 00246 typedef typename IS::const_iterator IIter; 00247 for(IIter i=idxset.begin(), end=idxset.end();i!=end; ++i) 00248 if(Flags::contains(i->local().attribute())) 00249 ++sizes[toPart[i->local()]]; 00250 00251 // Allocate the necessary space 00252 typedef std::map<int,int>::const_iterator MIter; 00253 for(MIter i=sizes.begin(), end=sizes.end(); i!=end; ++i) 00254 interfaces()[i->first].first.reserve(i->second); 00255 00256 //Insert the interface information 00257 typedef typename IS::const_iterator IIter; 00258 for(IIter i=idxset.begin(), end=idxset.end();i!=end; ++i) 00259 if(Flags::contains(i->local().attribute())) 00260 interfaces()[toPart[i->local()]].first.add(i->local()); 00261 } 00262 00263 void reserveSpaceForReceiveInterface(int proc, int size) 00264 { 00265 interfaces()[proc].second.reserve(size); 00266 } 00267 void addReceiveIndex(int proc, std::size_t idx) 00268 { 00269 interfaces()[proc].second.add(idx); 00270 } 00271 template<typename TG> 00272 void buildReceiveInterface(std::vector<std::pair<TG,int> >& indices) 00273 { 00274 typedef typename std::vector<std::pair<TG,int> >::const_iterator VIter; 00275 std::size_t i=0; 00276 for(VIter idx=indices.begin(); idx!= indices.end(); ++idx){ 00277 interfaces()[idx->second].second.add(i++); 00278 } 00279 } 00280 00281 ~RedistributeInterface() 00282 { 00283 } 00284 00285 }; 00286 00287 namespace 00288 { 00298 template<class GI> 00299 void createSendBuf(std::vector<GI>& ownerVec, std::set<GI>& overlapVec, std::set<int>& neighbors, char *sendBuf, int buffersize, MPI_Comm comm) { 00300 // Pack owner vertices 00301 std::size_t s=ownerVec.size(); 00302 int pos=0; 00303 if(s==0) 00304 ownerVec.resize(1); // otherwise would read beyond the memory bound 00305 MPI_Pack(&s, 1, MPITraits<std::size_t>::getType(), sendBuf, buffersize, &pos, comm); 00306 MPI_Pack(&(ownerVec[0]), s, MPITraits<GI>::getType(), sendBuf, buffersize, &pos, comm); 00307 s = overlapVec.size(); 00308 MPI_Pack(&s, 1, MPITraits<std::size_t>::getType(), sendBuf, buffersize, &pos, comm); 00309 typedef typename std::set<GI>::iterator Iter; 00310 for(Iter i=overlapVec.begin(), end= overlapVec.end(); i != end; ++i) 00311 MPI_Pack(const_cast<GI*>(&(*i)), 1, MPITraits<GI>::getType(), sendBuf, buffersize, &pos, comm); 00312 00313 s=neighbors.size(); 00314 MPI_Pack(&s, 1, MPITraits<std::size_t>::getType(), sendBuf, buffersize, &pos, comm); 00315 typedef typename std::set<int>::iterator IIter; 00316 00317 for(IIter i=neighbors.begin(), end= neighbors.end(); i != end; ++i) 00318 MPI_Pack(const_cast<int*>(&(*i)), 1, MPI_INT, sendBuf, buffersize, &pos, comm); 00319 } 00328 template<class GI> 00329 void saveRecvBuf(char *recvBuf, int bufferSize, std::vector<std::pair<GI,int> >& ownerVec, 00330 std::set<GI>& overlapVec, std::set<int>& neighbors, RedistributeInterface& inf, int from, MPI_Comm comm) { 00331 std::size_t size; 00332 int pos=0; 00333 // unpack owner vertices 00334 MPI_Unpack(recvBuf, bufferSize, &pos, &size, 1, MPITraits<std::size_t>::getType(), comm); 00335 inf.reserveSpaceForReceiveInterface(from, size); 00336 ownerVec.reserve(ownerVec.size()+size); 00337 for(;size!=0;--size){ 00338 GI gi; 00339 MPI_Unpack(recvBuf, bufferSize, &pos, &gi, 1, MPITraits<GI>::getType(), comm); 00340 ownerVec.push_back(std::make_pair(gi,from)); 00341 } 00342 // unpack overlap vertices 00343 MPI_Unpack(recvBuf, bufferSize, &pos, &size, 1, MPITraits<std::size_t>::getType(), comm); 00344 typename std::set<GI>::iterator ipos = overlapVec.begin(); 00345 Dune::dverb << "unpacking "<<size<<" overlap"<<std::endl; 00346 for(;size!=0;--size){ 00347 GI gi; 00348 MPI_Unpack(recvBuf, bufferSize, &pos, &gi, 1, MPITraits<GI>::getType(), comm); 00349 ipos=overlapVec.insert(ipos, gi); 00350 } 00351 //unpack neighbors 00352 MPI_Unpack(recvBuf, bufferSize, &pos, &size, 1, MPITraits<std::size_t>::getType(), comm); 00353 Dune::dverb << "unpacking "<<size<<" neighbors"<<std::endl; 00354 typename std::set<int>::iterator npos = neighbors.begin(); 00355 for(;size!=0;--size){ 00356 int n; 00357 MPI_Unpack(recvBuf, bufferSize, &pos, &n, 1, MPI_INT, comm); 00358 npos=neighbors.insert(npos, n); 00359 } 00360 } 00361 00375 template<typename T> 00376 void getDomain(const MPI_Comm& comm, T *part, int numOfOwnVtx, int nparts, int *myDomain, int domainMapping[]) { 00377 int npes, mype; 00378 MPI_Comm_size(comm, &npes); 00379 MPI_Comm_rank(comm, &mype); 00380 MPI_Status status; 00381 00382 *myDomain = -1; 00383 int i=0; 00384 int j=0; 00385 00386 int domain[nparts]; 00387 int assigned[npes]; 00388 // init 00389 for (i=0; i<nparts; i++) { 00390 domainMapping[i] = -1; 00391 domain[i] = 0; 00392 } 00393 for (i=0; i<npes; i++) { 00394 assigned[i] = -0; 00395 } 00396 // count the occurance of domains 00397 for (i=0; i<numOfOwnVtx; i++) { 00398 domain[part[i]]++; 00399 } 00400 00401 int *domainMatrix = new int[npes * nparts]; 00402 // init 00403 for(i=0; i<npes*nparts; i++) { 00404 domainMatrix[i]=-1; 00405 } 00406 00407 // init buffer with the own domain 00408 int *buf = new int[nparts]; 00409 for (i=0; i<nparts; i++) { 00410 buf[i] = domain[i]; 00411 domainMatrix[mype*nparts+i] = domain[i]; 00412 } 00413 int pe=0; 00414 int src = (mype-1+npes)%npes; 00415 int dest = (mype+1)%npes; 00416 // ring communication, we need n-1 communications for n processors 00417 for (i=0; i<npes-1; i++) { 00418 MPI_Sendrecv_replace(buf, nparts, MPI_INT, dest, 0, src, 0, comm, &status); 00419 // pe is the process of the actual received buffer 00420 pe = ((mype-1-i)+npes)%npes; 00421 for(j=0; j<nparts; j++) { 00422 // save the values to the domain matrix 00423 domainMatrix[pe*nparts+j] = buf[j]; 00424 } 00425 } 00426 delete[] buf; 00427 00428 // Start the domain calculation. 00429 // The process which contains the maximum number of vertices of a 00430 // particular domain is selected to choose it's favorate domain 00431 int maxOccurance = 0; 00432 pe = -1; 00433 for(i=0; i<nparts; i++) { 00434 for(j=0; j<npes; j++) { 00435 // process has no domain assigned 00436 if (assigned[j]==0) { 00437 if (maxOccurance < domainMatrix[j*nparts+i]) { 00438 maxOccurance = domainMatrix[j*nparts+i]; 00439 pe = j; 00440 } 00441 } 00442 00443 } 00444 if (pe!=-1) { 00445 // process got a domain, ... 00446 domainMapping[i] = pe; 00447 // ...mark as assigned 00448 assigned[pe] = 1; 00449 if (pe==mype) { 00450 *myDomain = i; 00451 } 00452 pe = -1; 00453 } 00454 maxOccurance = 0; 00455 } 00456 00457 delete[] domainMatrix; 00458 00459 } 00460 00461 struct SortFirst 00462 { 00463 template<class T> 00464 bool operator()(const T& t1, const T& t2) const 00465 { 00466 return t1<t2; 00467 } 00468 }; 00469 00470 00481 template<class GI> 00482 void mergeVec(std::vector<std::pair<GI, int> >& ownerVec, std::set<GI>& overlapSet) { 00483 00484 typedef typename std::vector<std::pair<GI,int> >::const_iterator VIter; 00485 #ifdef DEBUG_REPART 00486 // Safty check for duplicates. 00487 if(ownerVec.size()>0) 00488 { 00489 VIter old=ownerVec.begin(); 00490 for(VIter i=old+1, end=ownerVec.end(); i != end; old=i++) 00491 { 00492 if(i->first==old->first) 00493 { 00494 std::cerr<<"Value at indes"<<old-ownerVec.begin()<<" is the same as at index " 00495 <<i-ownerVec.begin()<<" ["<<old->first<<","<<old->second<<"]==[" 00496 <<i->first<<","<<i->second<<"]"<<std::endl; 00497 throw "Huch!"; 00498 } 00499 } 00500 } 00501 00502 #endif 00503 00504 typedef typename std::set<GI>::iterator SIter; 00505 VIter v=ownerVec.begin(), vend=ownerVec.end(); 00506 for(SIter s=overlapSet.begin(), send=overlapSet.end(); s!=send;) 00507 { 00508 while(v!=vend && v->first<*s) ++v; 00509 if(v!=vend && v->first==*s){ 00510 // Move to the next element before erasing 00511 // thus s stays valid! 00512 SIter tmp=s; 00513 ++s; 00514 overlapSet.erase(tmp); 00515 }else 00516 ++s; 00517 } 00518 } 00519 00520 00534 template<class OwnerSet, class Graph, class IS, class GI> 00535 void getNeighbor(const Graph& g, std::vector<int>& part, 00536 typename Graph::VertexDescriptor vtx, const IS& indexSet, 00537 int toPe, std::set<GI>& neighbor, std::set<int>& neighborProcs) { 00538 typedef typename Graph::ConstEdgeIterator Iter; 00539 for(Iter edge=g.beginEdges(vtx), end=g.endEdges(vtx); edge!=end; ++edge) 00540 { 00541 const typename IS::IndexPair* pindex = indexSet.pair(edge.target()); 00542 assert(pindex); 00543 if(part[pindex->local()]!=toPe || !OwnerSet::contains(pindex->local().attribute())) 00544 { 00545 // is sent to another process and therefore becomes overlap 00546 neighbor.insert(pindex->global()); 00547 neighborProcs.insert(part[pindex->local()]); 00548 } 00549 } 00550 } 00551 00552 template<class T, class I> 00553 void my_push_back(std::vector<T>& ownerVec, const I& index, int proc) 00554 { 00555 ownerVec.push_back(index); 00556 } 00557 00558 template<class T, class I> 00559 void my_push_back(std::vector<std::pair<T,int> >& ownerVec, const I& index, int proc) 00560 { 00561 ownerVec.push_back(std::make_pair(index,proc)); 00562 } 00563 template<class T> 00564 void reserve(std::vector<T>&, RedistributeInterface&, int) 00565 { 00566 } 00567 template<class T> 00568 void reserve(std::vector<std::pair<T,int> >& ownerVec, RedistributeInterface& redist, int proc) 00569 { 00570 redist.reserveSpaceForReceiveInterface(proc, ownerVec.size()); 00571 } 00572 00573 00591 template<class OwnerSet, class G, class IS, class T, class GI> 00592 void getOwnerOverlapVec(const G& graph, std::vector<int>& part, IS& indexSet, 00593 int myPe, int toPe, std::vector<T>& ownerVec, std::set<GI>& overlapSet, 00594 RedistributeInterface& redist, std::set<int>& neighborProcs) { 00595 00596 //typedef typename IndexSet::const_iterator Iterator; 00597 typedef typename IS::const_iterator Iterator; 00598 for(Iterator index = indexSet.begin(); index != indexSet.end(); ++index) { 00599 // Only Process owner vertices, the others are not in the parmetis graph. 00600 if(OwnerSet::contains(index->local().attribute())) 00601 { 00602 if(part[index->local()]==toPe) 00603 { 00604 getNeighbor<OwnerSet>(graph, part, index->local(), indexSet, 00605 toPe, overlapSet, neighborProcs); 00606 my_push_back(ownerVec, index->global(), toPe); 00607 } 00608 } 00609 } 00610 reserve(ownerVec, redist, toPe); 00611 00612 } 00613 00614 00621 template<class F, class IS> 00622 inline bool isOwner(IS& indexSet, int index) { 00623 00624 const typename IS::IndexPair* pindex=indexSet.pair(index); 00625 00626 assert(pindex); 00627 return F::contains(pindex->local().attribute()); 00628 } 00629 00630 00631 class BaseEdgeFunctor 00632 { 00633 public: 00634 BaseEdgeFunctor(int* adj,const ParmetisDuneIndexMap& data) 00635 :i_(), adj_(adj), data_(data) 00636 {} 00637 00638 template<class T> 00639 void operator()(const T& edge) 00640 { 00641 // Get the egde weight 00642 // const Weight& weight=edge.weight(); 00643 adj_[i_] = data_.toParmetis(edge.target()); 00644 i_++; 00645 } 00646 std::size_t index() 00647 { 00648 return i_; 00649 } 00650 00651 private: 00652 std::size_t i_; 00653 int* adj_; 00654 const ParmetisDuneIndexMap& data_; 00655 }; 00656 00657 template<typename G> 00658 struct EdgeFunctor 00659 : public BaseEdgeFunctor 00660 { 00661 EdgeFunctor(int* adj, const ParmetisDuneIndexMap& data, std::size_t s) 00662 : BaseEdgeFunctor(adj, data) 00663 {} 00664 00665 int* getWeights() 00666 { 00667 return NULL; 00668 } 00669 void free(){} 00670 }; 00671 00672 template<class G, class V, class E, class VM, class EM> 00673 class EdgeFunctor<Dune::Amg::PropertiesGraph<G,V,E,VM,EM> > 00674 : public BaseEdgeFunctor 00675 { 00676 public: 00677 EdgeFunctor(int* adj, const ParmetisDuneIndexMap& data, std::size_t s) 00678 :BaseEdgeFunctor(adj, data) 00679 { 00680 weight_=new int[s]; 00681 } 00682 00683 template<class T> 00684 void operator()(const T& edge) 00685 { 00686 weight_[index()]=edge.properties().depends()?3:1; 00687 BaseEdgeFunctor::operator()(edge); 00688 } 00689 int* getWeights() 00690 { 00691 return weight_; 00692 } 00693 void free(){ 00694 if(weight_!=0){ 00695 delete weight_; 00696 weight_=0; 00697 } 00698 } 00699 private: 00700 int* weight_; 00701 }; 00702 00703 00704 00718 template<class F, class G, class IS, class EW> 00719 void getAdjArrays(G& graph, IS& indexSet, int *xadj, 00720 EW& ew) 00721 { 00722 int j=0; 00723 00724 // The type of the const vertex iterator. 00725 typedef typename G::ConstVertexIterator VertexIterator; 00726 //typedef typename IndexSet::const_iterator Iterator; 00727 typedef typename IS::const_iterator Iterator; 00728 00729 VertexIterator vend = graph.end(); 00730 Iterator end; 00731 00732 for(VertexIterator vertex = graph.begin(); vertex != vend; ++vertex){ 00733 if (isOwner<F>(indexSet,*vertex)) { 00734 // The type of const edge iterator. 00735 typedef typename G::ConstEdgeIterator EdgeIterator; 00736 EdgeIterator eend = vertex.end(); 00737 xadj[j] = ew.index(); 00738 j++; 00739 for(EdgeIterator edge = vertex.begin(); edge != eend; ++edge){ 00740 ew(edge); 00741 } 00742 } 00743 } 00744 xadj[j] = ew.index(); 00745 } 00746 }// end anonymous namespace 00747 00748 template<class G, class T1, class T2> 00749 bool buildCommunication(const G& graph, std::vector<int>& realparts, 00750 Dune::OwnerOverlapCopyCommunication<T1,T2>& oocomm, 00751 Dune::OwnerOverlapCopyCommunication<T1,T2>*& outcomm, 00752 RedistributeInterface& redistInf, 00753 bool verbose=false); 00754 #if HAVE_PARMETIS 00755 extern "C"{ 00756 void METIS_PartGraphKway(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, 00757 idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, 00758 int *options, int *edgecut, idxtype *part); 00759 00760 void METIS_PartGraphRecursive(int *nvtxs, idxtype *xadj, idxtype *adjncy, idxtype *vwgt, 00761 idxtype *adjwgt, int *wgtflag, int *numflag, int *nparts, 00762 int *options, int *edgecut, idxtype *part); 00763 } 00764 #endif 00765 00766 template<class S, class T> 00767 inline void print_carray(S& os, T* array, std::size_t l) 00768 { 00769 for(T *cur=array, *end=array+l; cur!=end; ++cur) 00770 os<<*cur<<" "; 00771 } 00772 #if !HAVE_PARMETIS 00773 typedef std::size_t idxtype; 00774 #endif 00775 00776 inline bool isValidGraph(std::size_t noVtx, std::size_t gnoVtx, idxtype noEdges, idxtype* xadj, 00777 idxtype* adjncy, bool checkSymmetry) 00778 { 00779 bool correct=true; 00780 00781 for(idxtype vtx=0; vtx<(idxtype)noVtx; ++vtx){ 00782 if(xadj[vtx]>noEdges||xadj[vtx]<0){ 00783 std::cerr <<"Check graph: xadj["<<vtx<<"]="<<xadj[vtx]<<" (>" 00784 <<noEdges<<") out of range!"<<std::endl; 00785 correct=false; 00786 } 00787 if(xadj[vtx+1]>noEdges||xadj[vtx+1]<0){ 00788 std::cerr <<"Check graph: xadj["<<vtx+1<<"]="<<xadj[vtx+1]<<" (>" 00789 <<noEdges<<") out of range!"<<std::endl; 00790 correct=false; 00791 } 00792 // Check numbers in adjncy 00793 for(idxtype i=xadj[vtx]; i< xadj[vtx+1];++i){ 00794 if(adjncy[i]<0||((std::size_t)adjncy[i])>gnoVtx){ 00795 std::cerr<<" Edge "<<adjncy[i]<<" out of range ["<<0<<","<<noVtx<<")" 00796 <<std::endl; 00797 correct=false; 00798 } 00799 } 00800 if(checkSymmetry){ 00801 for(idxtype i=xadj[vtx]; i< xadj[vtx+1];++i){ 00802 idxtype target=adjncy[i]; 00803 // search for symmetric edge 00804 int found=0; 00805 for(idxtype j=xadj[target]; j< xadj[target+1];++j) 00806 if(adjncy[j]==vtx) 00807 found++; 00808 if(found!=1){ 00809 std::cerr<<"Edge ("<<target<<","<<vtx<<") "<<i<<" time"<<std::endl; 00810 correct=false; 00811 } 00812 } 00813 } 00814 } 00815 return correct; 00816 } 00817 00818 template<class M, class T1, class T2> 00819 bool commGraphRepartition(const M& mat, Dune::OwnerOverlapCopyCommunication<T1,T2>& oocomm, int nparts, 00820 Dune::OwnerOverlapCopyCommunication<T1,T2>*& outcomm, 00821 RedistributeInterface& redistInf, 00822 bool verbose=false) 00823 { 00824 if(verbose && oocomm.communicator().rank()==0) 00825 std::cout<<"Repartitioning from "<<oocomm.communicator().size() 00826 <<" to "<<nparts<<" parts"<<std::endl; 00827 Timer time; 00828 int rank = oocomm.communicator().rank(); 00829 #if !HAVE_PARMETIS 00830 int* part = new int[1]; 00831 part[0]=0; 00832 #else 00833 idxtype* part = new idxtype[1]; // where all our data moves to 00834 00835 if(nparts>1){ 00836 00837 part[0]=rank; 00838 00839 { // sublock for automatic memory deletion 00840 00841 // Build the graph of the communication scheme and create an appropriate indexset. 00842 // calculate the neighbour vertices 00843 int noNeighbours = oocomm.remoteIndices().neighbours(); 00844 typedef typename Dune::OwnerOverlapCopyCommunication<T1,T2>::RemoteIndices RemoteIndices; 00845 typedef typename RemoteIndices::const_iterator 00846 NeighbourIterator; 00847 00848 for(NeighbourIterator n= oocomm.remoteIndices().begin(); n != oocomm.remoteIndices().end(); 00849 ++n) 00850 if(n->first==rank){ 00851 //do not include ourselves. 00852 --noNeighbours; 00853 break; 00854 } 00855 00856 // A parmetis graph representing the communication graph. 00857 // The diagonal entries are the number of nodes on the process. 00858 // The offdiagonal entries are the number of edges leading to other processes. 00859 00860 idxtype *xadj=new idxtype[2], *vwgt = 0; 00861 idxtype *vtxdist=new idxtype[oocomm.communicator().size()+1]; 00862 idxtype * adjncy=new idxtype[noNeighbours], *adjwgt = 0; 00863 00864 // each process has exactly one vertex! 00865 for(int i=0; i<oocomm.communicator().size(); ++i) 00866 vtxdist[i]=i; 00867 vtxdist[oocomm.communicator().size()]=oocomm.communicator().size(); 00868 00869 xadj[0]=0; 00870 xadj[1]=noNeighbours; 00871 00872 // count edges to other processor 00873 // a vector mapping the index to the owner 00874 // std::vector<int> owner(mat.N(), oocomm.communicator().rank()); 00875 // for(NeighbourIterator n= oocomm.remoteIndices().begin(); n != oocomm.remoteIndices().end(); 00876 // ++n) 00877 // { 00878 // if(n->first!=oocomm.communicator().rank()){ 00879 // typedef typename RemoteIndices::RemoteIndexList RIList; 00880 // const RIList& rlist = *(n->second.first); 00881 // typedef typename RIList::const_iterator LIter; 00882 // for(LIter entry=rlist.begin(); entry!=rlist.end(); ++entry){ 00883 // if(entry->attribute()==OwnerOverlapCopyAttributeSet::owner) 00884 // owner[entry->localIndexPair().local()] = n->first; 00885 // } 00886 // } 00887 // } 00888 00889 // std::map<int,idxtype> edgecount; // edges to other processors 00890 // typedef typename M::ConstRowIterator RIter; 00891 // typedef typename M::ConstColIterator CIter; 00892 00893 // // calculate edge count 00894 // for(RIter row=mat.begin(), endr=mat.end(); row != endr; ++row) 00895 // if(owner[row.index()]==OwnerOverlapCopyAttributeSet::owner) 00896 // for(CIter entry= row->begin(), ende = row->end(); entry != ende; ++entry) 00897 // ++edgecount[owner[entry.index()]]; 00898 00899 // setup edge and weight pattern 00900 typedef typename RemoteIndices::const_iterator NeighbourIterator; 00901 typedef typename Dune::OwnerOverlapCopyCommunication<T1,T2>::ParallelIndexSet IndexSet; 00902 typedef typename IndexSet::LocalIndex LocalIndex; 00903 00904 idxtype* adjp=adjncy; 00905 00906 #ifdef USE_WEIGHTS 00907 vwgt = new idxtype[1]; 00908 vwgt[0]= mat.N(); // weight is numer of rows TODO: Should actually be the nonzeros. 00909 00910 adjwgt = new idxtype[noNeighbours]; 00911 idxtype* adjwp=adjwgt; 00912 #endif 00913 00914 for(NeighbourIterator n= oocomm.remoteIndices().begin(); n != oocomm.remoteIndices().end(); 00915 ++n) 00916 if(n->first != rank){ 00917 *adjp=n->first; 00918 ++adjp; 00919 #ifdef USE_WEIGHTS 00920 *adjwp=1;//edgecount[n->first]; 00921 ++adjwp; 00922 #endif 00923 } 00924 assert(isValidGraph(vtxdist[rank+1]-vtxdist[rank], 00925 vtxdist[oocomm.communicator().size()], 00926 noNeighbours, xadj, adjncy, false)); 00927 00928 int wgtflag=0, numflag=0, edgecut; 00929 #ifdef USE_WEIGHTS 00930 wgtflag=3; 00931 #endif 00932 float *tpwgts = new float[nparts]; 00933 for(int i=0; i<nparts; ++i) 00934 tpwgts[i]=1.0/nparts; 00935 int options[5] ={ 0,1,15,0,0}; 00936 MPI_Comm comm=oocomm.communicator(); 00937 00938 Dune::dinfo<<rank<<" vtxdist: "; 00939 print_carray(Dune::dinfo, vtxdist, oocomm.communicator().size()+1); 00940 Dune::dinfo<<std::endl<<rank<<" xadj: "; 00941 print_carray(Dune::dinfo, xadj, 2); 00942 Dune::dinfo<<std::endl<<rank<<" adjncy: "; 00943 print_carray(Dune::dinfo, adjncy, noNeighbours); 00944 00945 #ifdef USE_WEIGHTS 00946 Dune::dinfo<<std::endl<<rank<<" vwgt: "; 00947 print_carray(Dune::dinfo, vwgt, 1); 00948 Dune::dinfo<<std::endl<<rank<<" adwgt: "; 00949 print_carray(Dune::dinfo, adjwgt, noNeighbours); 00950 #endif 00951 Dune::dinfo<<std::endl; 00952 oocomm.communicator().barrier(); 00953 if(verbose && oocomm.communicator().rank()==0) 00954 std::cout<<"Creating comm graph took "<<time.elapsed()<<std::endl; 00955 time.reset(); 00956 00957 #ifdef PARALLEL_PARTITION 00958 float ubvec = 1.15; 00959 int ncon=1; 00960 00961 //======================================================= 00962 // ParMETIS_V3_PartKway 00963 //======================================================= 00964 ParMETIS_V3_PartKway(vtxdist, xadj, adjncy, 00965 vwgt, adjwgt, &wgtflag, 00966 &numflag, &ncon, &nparts, tpwgts, &ubvec, options, &edgecut, part, 00967 &comm); 00968 if(verbose && oocomm.communicator().rank()==0) 00969 std::cout<<"ParMETIS took "<<time.elapsed()<<std::endl; 00970 time.reset(); 00971 #else 00972 Timer time1; 00973 std::size_t gnoedges=0; 00974 int* noedges = 0; 00975 noedges = new int[oocomm.communicator().size()]; 00976 Dune::dverb<<"noNeighbours: "<<noNeighbours<<std::endl; 00977 // gather number of edges for each vertex. 00978 MPI_Allgather(&noNeighbours,1,MPI_INT,noedges,1, MPI_INT,oocomm.communicator()); 00979 00980 if(verbose && oocomm.communicator().rank()==0) 00981 std::cout<<"Gathering noedges took "<<time1.elapsed()<<std::endl; 00982 time1.reset(); 00983 00984 int noVertices = vtxdist[oocomm.communicator().size()]; 00985 idxtype *gxadj = 0; 00986 idxtype *gvwgt = 0; 00987 idxtype *gadjncy = 0; 00988 idxtype *gadjwgt = 0; 00989 idxtype *gpart = 0; 00990 int* displ = 0; 00991 int* noxs = 0; 00992 int* xdispl = 0; // displacement for xadj 00993 int* novs = 0; 00994 int* vdispl=0; // real vertex displacement 00995 std::size_t localNoVtx=vtxdist[rank+1]-vtxdist[rank]; 00996 std::size_t gxadjlen = vtxdist[oocomm.communicator().size()]-vtxdist[0]+oocomm.communicator().size(); 00997 00998 { 00999 Dune::dinfo<<"noedges: "; 01000 print_carray(Dune::dinfo, noedges, oocomm.communicator().size()); 01001 Dune::dinfo<<std::endl; 01002 displ = new int[oocomm.communicator().size()]; 01003 xdispl = new int[oocomm.communicator().size()]; 01004 noxs = new int[oocomm.communicator().size()]; 01005 vdispl = new int[oocomm.communicator().size()]; 01006 novs = new int[oocomm.communicator().size()]; 01007 01008 for(int i=0; i < oocomm.communicator().size(); ++i){ 01009 noxs[i]=vtxdist[i+1]-vtxdist[i]+1; 01010 novs[i]=vtxdist[i+1]-vtxdist[i]; 01011 } 01012 01013 idxtype *so= vtxdist; 01014 int offset = 0; 01015 for(int *xcurr = xdispl, *vcurr = vdispl, *end=vdispl+oocomm.communicator().size(); 01016 vcurr!=end; ++vcurr, ++xcurr, ++so, ++offset){ 01017 *vcurr = *so; 01018 *xcurr = offset + *so; 01019 } 01020 01021 int *pdispl =displ; 01022 int cdispl = 0; 01023 *pdispl = 0; 01024 for(int *curr=noedges, *end=noedges+oocomm.communicator().size()-1; 01025 curr!=end; ++curr){ 01026 ++pdispl; // next displacement 01027 cdispl += *curr; // next value 01028 *pdispl = cdispl; 01029 } 01030 Dune::dinfo<<"displ: "; 01031 print_carray(Dune::dinfo, displ, oocomm.communicator().size()); 01032 Dune::dinfo<<std::endl; 01033 01034 // calculate global number of edges 01035 // It is bigger than the actual one as we habe size-1 additional end entries 01036 for(int *curr=noedges, *end=noedges+oocomm.communicator().size(); 01037 curr!=end; ++curr) 01038 gnoedges += *curr; 01039 01040 // alocate gobal graph 01041 Dune::dinfo<<"gxadjlen: "<<gxadjlen<<" noVertices: "<<noVertices 01042 <<" gnoedges: "<<gnoedges<<std::endl; 01043 gxadj = new idxtype[gxadjlen]; 01044 gpart = new idxtype[noVertices]; 01045 #ifdef USE_WEIGHTS 01046 gvwgt = new idxtype[noVertices]; 01047 gadjwgt = new idxtype[gnoedges]; 01048 #endif 01049 gadjncy = new idxtype[gnoedges]; 01050 } 01051 01052 if(verbose && oocomm.communicator().rank()==0) 01053 std::cout<<"Preparing global graph took "<<time1.elapsed()<<std::endl; 01054 time1.reset(); 01055 // Communicate data 01056 01057 MPI_Allgatherv(xadj,2,MPITraits<idxtype>::getType(), 01058 gxadj,noxs,xdispl,MPITraits<idxtype>::getType(), 01059 comm); 01060 MPI_Allgatherv(adjncy,noNeighbours,MPITraits<idxtype>::getType(), 01061 gadjncy,noedges,displ,MPITraits<idxtype>::getType(), 01062 comm); 01063 #ifdef USE_WEIGHTS 01064 MPI_Allgatherv(adjwgt,noNeighbours,MPITraits<idxtype>::getType(), 01065 gadjwgt,noedges,displ,MPITraits<idxtype>::getType(), 01066 comm); 01067 MPI_Allgatherv(vwgt,localNoVtx,MPITraits<idxtype>::getType(), 01068 gvwgt,novs,vdispl,MPITraits<idxtype>::getType(), 01069 comm); 01070 #endif 01071 if(verbose && oocomm.communicator().rank()==0) 01072 std::cout<<"Gathering global graph data took "<<time1.elapsed()<<std::endl; 01073 time1.reset(); 01074 01075 { 01076 // create the real gxadj array 01077 // i.e. shift entries and add displacements. 01078 01079 print_carray(Dune::dinfo, gxadj, gxadjlen); 01080 01081 int offset = 0; 01082 idxtype increment = vtxdist[1]; 01083 idxtype *start=gxadj+1; 01084 for(int i=1; i<oocomm.communicator().size(); ++i){ 01085 offset+=1; 01086 int lprev = vtxdist[i]-vtxdist[i-1]; 01087 int l = vtxdist[i+1]-vtxdist[i]; 01088 start+=lprev; 01089 assert((start+l+offset)-gxadj<=static_cast<idxtype>(gxadjlen)); 01090 increment = *(start-1); 01091 std::transform(start+offset, start+l+offset, start, std::bind2nd(std::plus<idxtype>(), increment)); 01092 } 01093 Dune::dinfo<<std::endl<<"shifted xadj:"; 01094 print_carray(Dune::dinfo, gxadj, noVertices+1); 01095 Dune::dinfo<<std::endl<<" gadjncy: "; 01096 print_carray(Dune::dinfo, gadjncy, gnoedges); 01097 #ifdef USE_WEIGHTS 01098 Dune::dinfo<<std::endl<<" gvwgt: "; 01099 print_carray(Dune::dinfo, gvwgt, noVertices); 01100 Dune::dinfo<<std::endl<<"adjwgt: "; 01101 print_carray(Dune::dinfo, gadjwgt, gnoedges); 01102 Dune::dinfo<<std::endl; 01103 #endif 01104 // everything should be fine now!!! 01105 if(verbose && oocomm.communicator().rank()==0) 01106 std::cout<<"Postprocesing global graph data took "<<time1.elapsed()<<std::endl; 01107 time1.reset(); 01108 #ifndef NDEBUG 01109 assert(isValidGraph(noVertices, noVertices, gnoedges, 01110 gxadj, gadjncy, true)); 01111 #endif 01112 01113 if(verbose && oocomm.communicator().rank()==0) 01114 std::cout<<"Creating grah one 1 process took "<<time.elapsed()<<std::endl; 01115 time.reset(); 01116 options[0]=0; options[1]=1; options[2]=1; options[3]=3; options[4]=3; 01117 // Call metis 01118 METIS_PartGraphRecursive(&noVertices, gxadj, gadjncy, gvwgt, gadjwgt, &wgtflag, 01119 &numflag, &nparts, options, &edgecut, gpart); 01120 01121 if(verbose && oocomm.communicator().rank()==0) 01122 std::cout<<"METIS took "<<time.elapsed()<<std::endl; 01123 time.reset(); 01124 01125 Dune::dinfo<<std::endl<<"part:"; 01126 print_carray(Dune::dinfo, gpart, noVertices); 01127 01128 delete[] gxadj; 01129 delete[] gadjncy; 01130 #ifdef USE_WEIGHTS 01131 delete[] gvwgt; 01132 delete[] gadjwgt; 01133 #endif 01134 } 01135 // Scatter result 01136 MPI_Scatter(gpart, 1, MPITraits<idxtype>::getType(), part, 1, 01137 MPITraits<idxtype>::getType(), 0, comm); 01138 01139 { 01140 // release remaining memory 01141 delete[] gpart; 01142 delete[] noedges; 01143 delete[] displ; 01144 } 01145 01146 01147 #endif 01148 delete[] xadj; 01149 delete[] vtxdist; 01150 delete[] adjncy; 01151 #ifdef USE_WEIGHTS 01152 delete[] vwgt; 01153 delete[] adjwgt; 01154 #endif 01155 delete[] tpwgts; 01156 } 01157 }else{ 01158 part[0]=0; 01159 } 01160 #endif 01161 Dune::dinfo<<" repart "<<rank <<" -> "<< part[0]<<std::endl; 01162 01163 std::vector<int> realpart(mat.N(), part[0]); 01164 delete[] part; 01165 01166 oocomm.copyOwnerToAll(realpart, realpart); 01167 01168 if(verbose && oocomm.communicator().rank()==0) 01169 std::cout<<"Scattering repartitioning took "<<time.elapsed()<<std::endl; 01170 time.reset(); 01171 01172 01173 oocomm.buildGlobalLookup(mat.N()); 01174 Dune::Amg::MatrixGraph<M> graph(const_cast<M&>(mat)); 01175 fillIndexSetHoles(graph, oocomm); 01176 if(verbose && oocomm.communicator().rank()==0) 01177 std::cout<<"Filling index set took "<<time.elapsed()<<std::endl; 01178 time.reset(); 01179 01180 if(verbose){ 01181 int noNeighbours=oocomm.remoteIndices().neighbours(); 01182 noNeighbours = oocomm.communicator().sum(noNeighbours) 01183 / oocomm.communicator().size(); 01184 if(oocomm.communicator().rank()==0) 01185 std::cout<<"Average no neighbours was "<<noNeighbours<<std::endl; 01186 } 01187 bool ret = buildCommunication(graph, realpart, oocomm, outcomm, redistInf, 01188 verbose); 01189 if(verbose && oocomm.communicator().rank()==0) 01190 std::cout<<"Building index sets took "<<time.elapsed()<<std::endl; 01191 time.reset(); 01192 01193 01194 return ret; 01195 01196 } 01197 01212 template<class G, class T1, class T2> 01213 bool graphRepartition(const G& graph, Dune::OwnerOverlapCopyCommunication<T1,T2>& oocomm, int nparts, 01214 Dune::OwnerOverlapCopyCommunication<T1,T2>*& outcomm, 01215 RedistributeInterface& redistInf, 01216 bool verbose=false) 01217 { 01218 Timer time; 01219 01220 MPI_Comm comm=oocomm.communicator(); 01221 oocomm.buildGlobalLookup(graph.noVertices()); 01222 fillIndexSetHoles(graph, oocomm); 01223 01224 if(verbose && oocomm.communicator().rank()==0) 01225 std::cout<<"Filling holes took "<<time.elapsed()<<std::endl; 01226 time.reset(); 01227 01228 // simple precondition checks 01229 01230 #ifdef PERF_REPART 01231 // Profiling variables 01232 double t1=0.0, t2=0.0, t3=0.0, t4=0.0, tSum=0.0; 01233 #endif 01234 01235 01236 // MPI variables 01237 int mype = oocomm.communicator().rank(); 01238 01239 assert(nparts<=oocomm.communicator().size()); 01240 01241 typedef typename Dune::OwnerOverlapCopyCommunication<T1,T2>::ParallelIndexSet::GlobalIndex GI; 01242 typedef std::vector<GI> GlobalVector; 01243 int myDomain; 01244 01245 // 01246 // 1) Prepare the required parameters for using ParMETIS 01247 // Especially the arrays that represent the graph must be 01248 // generated by the DUNE Graph and IndexSet input variables. 01249 // These are the arrays: 01250 // - vtxdist 01251 // - xadj 01252 // - adjncy 01253 // 01254 // 01255 #ifdef PERF_REPART 01256 // reset timer for step 1) 01257 t1=MPI_Wtime(); 01258 #endif 01259 01260 01261 typedef typename Dune::OwnerOverlapCopyCommunication<T1,T2> OOComm; 01262 typedef typename OOComm::OwnerSet OwnerSet; 01263 01264 // Create the vtxdist array and parmetisVtxMapping. 01265 // Global communications are necessary 01266 // The parmetis global identifiers for the owner vertices. 01267 ParmetisDuneIndexMap indexMap(graph,oocomm); 01268 #if HAVE_PARMETIS 01269 idxtype *part = new idxtype[indexMap.numOfOwnVtx()]; 01270 #else 01271 std::size_t *part = new std::size_t[indexMap.numOfOwnVtx()]; 01272 #endif 01273 for(std::size_t i=0; i < indexMap.numOfOwnVtx(); ++i) 01274 part[i]=mype; 01275 01276 #if !HAVE_PARMETIS 01277 if(oocomm.communicator().rank()==0 && nparts>1) 01278 std::cerr<<"ParMETIS not activated. Will repartition to 1 domain instead of requested " 01279 <<nparts<<" domains."<<std::endl; 01280 nparts=1; // No parmetis available, fallback to agglomerating to 1 process 01281 typedef std::size_t idxtype; 01282 01283 #else 01284 01285 if(nparts>1){ 01286 // Create the xadj and adjncy arrays 01287 idxtype *xadj = new idxtype[indexMap.numOfOwnVtx()+1]; 01288 idxtype *adjncy = new idxtype[graph.noEdges()]; 01289 EdgeFunctor<G> ef(adjncy, indexMap, graph.noEdges()); 01290 getAdjArrays<OwnerSet>(graph, oocomm.globalLookup(), xadj, ef); 01291 01292 // 01293 // 2) Call ParMETIS 01294 // 01295 // 01296 int numflag=0, wgtflag=0, options[3], edgecut=0, ncon=1; 01297 float *tpwgts = NULL; 01298 float ubvec[1]; 01299 options[0] = 0; // 0=default, 1=options are defined in [1]+[2] 01300 #ifdef DEBUG_REPART 01301 options[1] = 3; // show info: 0=no message 01302 #else 01303 options[1] = 0; // show info: 0=no message 01304 #endif 01305 options[2] = 1; // random number seed, default is 15 01306 wgtflag = (ef.getWeights()!=NULL)?1:0; 01307 numflag = 0; 01308 edgecut = 0; 01309 ncon=1; 01310 ubvec[0]=1.05; // recommended by ParMETIS 01311 01312 #ifdef DEBUG_REPART 01313 if (mype == 0) { 01314 std::cout<<std::endl; 01315 std::cout<<"Testing ParMETIS_V3_PartKway with options[1-2] = {" 01316 <<options[1]<<" "<<options[2]<<"}, Ncon: " 01317 <<ncon<<", Nparts: "<<nparts<<std::endl; 01318 } 01319 #endif 01320 #ifdef PERF_REPART 01321 // stop the time for step 1) 01322 t1=MPI_Wtime()-t1; 01323 // reset timer for step 2) 01324 t2=MPI_Wtime(); 01325 #endif 01326 01327 if(verbose){ 01328 oocomm.communicator().barrier(); 01329 if(oocomm.communicator().rank()==0) 01330 std::cout<<"Preparing for parmetis took "<<time.elapsed()<<std::endl; 01331 } 01332 time.reset(); 01333 01334 //======================================================= 01335 // ParMETIS_V3_PartKway 01336 //======================================================= 01337 ParMETIS_V3_PartKway(indexMap.vtxDist(), xadj, adjncy, 01338 NULL, ef.getWeights(), &wgtflag, 01339 &numflag, &ncon, &nparts, tpwgts, ubvec, options, &edgecut, part, &const_cast<MPI_Comm&>(comm)); 01340 01341 01342 delete[] xadj; 01343 delete[] adjncy; 01344 ef.free(); 01345 01346 #ifdef DEBUG_REPART 01347 if (mype == 0) { 01348 std::cout<<std::endl; 01349 std::cout<<"ParMETIS_V3_PartKway reported a cut of "<<edgecut<<std::endl; 01350 std::cout<<std::endl; 01351 } 01352 std::cout<<mype<<": PARMETIS-Result: "; 01353 for(int i=0; i < indexMap.vtxDist()[mype+1]-indexMap.vtxDist()[mype]; ++i) { 01354 std::cout<<part[i]<<" "; 01355 } 01356 std::cout<<std::endl; 01357 std::cout<<"Testing ParMETIS_V3_PartKway with options[1-2] = {" 01358 <<options[1]<<" "<<options[2]<<"}, Ncon: " 01359 <<ncon<<", Nparts: "<<nparts<<std::endl; 01360 #endif 01361 #ifdef PERF_REPART 01362 // stop the time for step 2) 01363 t2=MPI_Wtime()-t2; 01364 // reset timer for step 3) 01365 t3=MPI_Wtime(); 01366 #endif 01367 01368 01369 if(verbose){ 01370 oocomm.communicator().barrier(); 01371 if(oocomm.communicator().rank()==0) 01372 std::cout<<"Parmetis took "<<time.elapsed()<<std::endl; 01373 } 01374 time.reset(); 01375 }else 01376 #endif 01377 { 01378 // Everything goes to process 0! 01379 for(std::size_t i=0; i<indexMap.numOfOwnVtx();++i) 01380 part[i]=0; 01381 } 01382 01383 01384 // 01385 // 3) Find a optimal domain based on the ParMETIS repatitioning 01386 // result 01387 // 01388 01389 int domainMapping[nparts]; 01390 if(nparts>1) 01391 getDomain(comm, part, indexMap.numOfOwnVtx(), nparts, &myDomain, domainMapping); 01392 else 01393 domainMapping[0]=0; 01394 01395 #ifdef DEBUG_REPART 01396 std::cout<<mype<<": myDomain: "<<myDomain<<std::endl; 01397 std::cout<<mype<<": DomainMapping: "; 01398 for(int j=0; j<nparts; j++) { 01399 std::cout<<" do: "<<j<<" pe: "<<domainMapping[j]<<" "; 01400 } 01401 std::cout<<std::endl; 01402 #endif 01403 01404 // Make a domain mapping for the indexset and translate 01405 //domain number to real process number 01406 // domainMapping is the one of parmetis, that is without 01407 // the overlap/copy vertices 01408 std::vector<int> setPartition(oocomm.indexSet().size(), -1); 01409 01410 typedef typename OOComm::ParallelIndexSet::const_iterator Iterator; 01411 std::size_t i=0; // parmetis index 01412 for(Iterator index = oocomm.indexSet().begin(); index != oocomm.indexSet().end(); ++index) 01413 if(OwnerSet::contains(index->local().attribute())){ 01414 setPartition[index->local()]=domainMapping[part[i++]]; 01415 } 01416 01417 delete[] part; 01418 oocomm.copyOwnerToAll(setPartition, setPartition); 01419 // communication only needed for ALU 01420 // (ghosts with same global id as owners on the same process) 01421 if (oocomm.getSolverCategory() == 01422 static_cast<int>(SolverCategory::nonoverlapping)) 01423 oocomm.copyCopyToAll(setPartition, setPartition); 01424 bool ret = buildCommunication(graph, setPartition, oocomm, outcomm, redistInf, 01425 verbose); 01426 if(verbose){ 01427 oocomm.communicator().barrier(); 01428 if(oocomm.communicator().rank()==0) 01429 std::cout<<"Creating indexsets took "<<time.elapsed()<<std::endl; 01430 } 01431 return ret; 01432 } 01433 01434 01435 01436 template<class G, class T1, class T2> 01437 bool buildCommunication(const G& graph, 01438 std::vector<int>& setPartition, Dune::OwnerOverlapCopyCommunication<T1,T2>& oocomm, 01439 Dune::OwnerOverlapCopyCommunication<T1,T2>*& outcomm, 01440 RedistributeInterface& redistInf, 01441 bool verbose) 01442 { 01443 typedef typename Dune::OwnerOverlapCopyCommunication<T1,T2> OOComm; 01444 typedef typename OOComm::OwnerSet OwnerSet; 01445 01446 Timer time; 01447 01448 // Build the send interface 01449 redistInf.buildSendInterface<OwnerSet>(setPartition, oocomm.indexSet()); 01450 01451 #ifdef PERF_REPART 01452 // stop the time for step 3) 01453 t3=MPI_Wtime()-t3; 01454 // reset timer for step 4) 01455 t4=MPI_Wtime(); 01456 #endif 01457 01458 01459 // 01460 // 4) Create the output IndexSet and RemoteIndices 01461 // 4.1) Determine the "send to" and "receive from" relation 01462 // according to the new partition using a MPI ring 01463 // communication. 01464 // 01465 // 4.2) Depends on the "send to" and "receive from" vector, 01466 // the processes will exchange the vertices each other 01467 // 01468 // 4.3) Create the IndexSet, RemoteIndices and the new MPI 01469 // communicator 01470 // 01471 01472 // 01473 // 4.1) Let's start... 01474 // 01475 int npes = oocomm.communicator().size(); 01476 int *sendTo = 0; 01477 int noSendTo = 0; 01478 std::set<int> recvFrom; 01479 01480 // the max number of vertices is stored in the sendTo buffer, 01481 // not the number of vertices to send! Because the max number of Vtx 01482 // is used as the fixed buffer size by the MPI send/receive calls 01483 01484 typedef typename std::vector<int>::const_iterator VIter; 01485 int mype = oocomm.communicator().rank(); 01486 01487 { 01488 std::set<int> tsendTo; 01489 for(VIter i=setPartition.begin(), iend = setPartition.end(); i!=iend; ++i) 01490 tsendTo.insert(*i); 01491 01492 noSendTo = tsendTo.size(); 01493 sendTo = new int[noSendTo]; 01494 typedef std::set<int>::const_iterator iterator; 01495 int idx=0; 01496 for(iterator i=tsendTo.begin(); i != tsendTo.end(); ++i, ++idx) 01497 sendTo[idx]=*i; 01498 } 01499 01500 // 01501 int* gnoSend= new int[oocomm.communicator().size()]; 01502 int* gsendToDispl = new int[oocomm.communicator().size()+1]; 01503 01504 MPI_Allgather(&noSendTo, 1, MPI_INT, gnoSend, 1, 01505 MPI_INT, oocomm.communicator()); 01506 01507 // calculate total receive message size 01508 int totalNoRecv = 0; 01509 for(int i=0; i<npes; ++i) 01510 totalNoRecv += gnoSend[i]; 01511 01512 int *gsendTo = new int[totalNoRecv]; 01513 01514 // calculate displacement for allgatherv 01515 gsendToDispl[0]=0; 01516 for(int i=0; i<npes; ++i) 01517 gsendToDispl[i+1]=gsendToDispl[i]+gnoSend[i]; 01518 01519 // gather the data 01520 MPI_Allgatherv(sendTo, noSendTo, MPI_INT, gsendTo, gnoSend, gsendToDispl, 01521 MPI_INT, oocomm.communicator()); 01522 01523 // Extract from which processes we will receive data 01524 for(int proc=0; proc < npes; ++proc) 01525 for(int i=gsendToDispl[proc]; i < gsendToDispl[proc+1]; ++i) 01526 if(gsendTo[i]==mype) 01527 recvFrom.insert(proc); 01528 01529 bool existentOnNextLevel = recvFrom.size()>0; 01530 01531 // Delete memory 01532 delete[] gnoSend; 01533 delete[] gsendToDispl; 01534 delete[] gsendTo; 01535 01536 01537 #ifdef DEBUG_REPART 01538 if(recvFrom.size()){ 01539 std::cout<<mype<<": recvFrom: "; 01540 typedef typename std::set<int>::const_iterator siter; 01541 for(siter i=recvFrom.begin(); i!= recvFrom.end(); ++i) { 01542 std::cout<<*i<<" "; 01543 } 01544 } 01545 01546 std::cout<<std::endl<<std::endl; 01547 std::cout<<mype<<": sendTo: "; 01548 for(int i=0; i<noSendTo; i++) { 01549 std::cout<<sendTo[i]<<" "; 01550 } 01551 std::cout<<std::endl<<std::endl; 01552 #endif 01553 01554 if(verbose) 01555 if(oocomm.communicator().rank()==0) 01556 std::cout<<" Communicating the receive information took "<< 01557 time.elapsed()<<std::endl; 01558 time.reset(); 01559 01560 // 01561 // 4.2) Start the communication 01562 // 01563 01564 // Get all the owner and overlap vertices for myself ans save 01565 // it in the vectors myOwnerVec and myOverlapVec. 01566 // The received vertices from the other processes are simple 01567 // added to these vector. 01568 // 01569 01570 01571 typedef typename OOComm::ParallelIndexSet::GlobalIndex GI; 01572 typedef std::vector<GI> GlobalVector; 01573 std::vector<std::pair<GI,int> > myOwnerVec; 01574 std::set<GI> myOverlapSet; 01575 GlobalVector sendOwnerVec; 01576 std::set<GI> sendOverlapSet; 01577 std::set<int> myNeighbors; 01578 01579 // getOwnerOverlapVec<OwnerSet>(graph, setPartition, oocomm.globalLookup(), 01580 // mype, mype, myOwnerVec, myOverlapSet, redistInf, myNeighbors); 01581 01582 char **sendBuffers=new char*[noSendTo]; 01583 MPI_Request *requests = new MPI_Request[noSendTo]; 01584 01585 // Create all messages to be sent 01586 for(int i=0; i < noSendTo; ++i){ 01587 // clear the vector for sending 01588 sendOwnerVec.clear(); 01589 sendOverlapSet.clear(); 01590 // get all owner and overlap vertices for process j and save these 01591 // in the vectors sendOwnerVec and sendOverlapSet 01592 std::set<int> neighbors; 01593 getOwnerOverlapVec<OwnerSet>(graph, setPartition, oocomm.globalLookup(), 01594 mype, sendTo[i], sendOwnerVec, sendOverlapSet, redistInf, 01595 neighbors); 01596 // +2, we need 2 integer more for the length of each part 01597 // (owner/overlap) of the array 01598 int buffersize=0; 01599 int tsize; 01600 MPI_Pack_size(1, MPITraits<std::size_t>::getType(), oocomm.communicator(), &buffersize); 01601 MPI_Pack_size(sendOwnerVec.size(), MPITraits<GI>::getType(), oocomm.communicator(), &tsize); 01602 buffersize +=tsize; 01603 MPI_Pack_size(1, MPITraits<std::size_t>::getType(), oocomm.communicator(), &tsize); 01604 buffersize +=tsize; 01605 MPI_Pack_size(sendOverlapSet.size(), MPITraits<GI>::getType(), oocomm.communicator(), &tsize); 01606 buffersize += tsize; 01607 MPI_Pack_size(1, MPITraits<std::size_t>::getType(), oocomm.communicator(), &tsize); 01608 buffersize += tsize; 01609 MPI_Pack_size(neighbors.size(), MPI_INT, oocomm.communicator(), &tsize); 01610 buffersize += tsize; 01611 01612 sendBuffers[i] = new char[buffersize]; 01613 01614 #ifdef DEBUG_REPART 01615 std::cout<<mype<<" sending "<<sendOwnerVec.size()<<" owner and "<< 01616 sendOverlapSet.size()<<" overlap to "<<sendTo[i]<<" buffersize="<<buffersize<<std::endl; 01617 #endif 01618 createSendBuf(sendOwnerVec, sendOverlapSet, neighbors, sendBuffers[i], buffersize, oocomm.communicator()); 01619 MPI_Issend(sendBuffers[i], buffersize, MPI_PACKED, sendTo[i], 99, oocomm.communicator(), requests+i); 01620 } 01621 01622 if(verbose){ 01623 oocomm.communicator().barrier(); 01624 if(oocomm.communicator().rank()==0) 01625 std::cout<<" Creating sends took "<< 01626 time.elapsed()<<std::endl; 01627 } 01628 time.reset(); 01629 01630 // Receive Messages 01631 int noRecv = recvFrom.size(); 01632 int oldbuffersize=0; 01633 char* recvBuf = 0; 01634 while(noRecv>0){ 01635 // probe for an incoming message 01636 MPI_Status stat; 01637 MPI_Probe(MPI_ANY_SOURCE, 99, oocomm.communicator(), &stat); 01638 int buffersize; 01639 MPI_Get_count(&stat, MPI_PACKED, &buffersize); 01640 01641 if(oldbuffersize<buffersize){ 01642 // buffer too small, reallocate 01643 delete[] recvBuf; 01644 recvBuf = new char[buffersize]; 01645 oldbuffersize = buffersize; 01646 } 01647 MPI_Recv(recvBuf, buffersize, MPI_PACKED, stat.MPI_SOURCE, 99, oocomm.communicator(), &stat); 01648 saveRecvBuf(recvBuf, buffersize, myOwnerVec, myOverlapSet, myNeighbors, redistInf, 01649 stat.MPI_SOURCE, oocomm.communicator()); 01650 --noRecv; 01651 } 01652 01653 if(recvBuf) 01654 delete[] recvBuf; 01655 01656 time.reset(); 01657 // Wait for sending messages to complete 01658 MPI_Status *statuses = new MPI_Status[noSendTo]; 01659 int send = MPI_Waitall(noSendTo, requests, statuses); 01660 01661 // check for errors 01662 if(send==MPI_ERR_IN_STATUS){ 01663 std::cerr<<mype<<": Error in sending :"<<std::endl; 01664 // Search for the error 01665 for(int i=0; i< noSendTo; i++) 01666 if(statuses[i].MPI_ERROR!=MPI_SUCCESS){ 01667 char message[300]; 01668 int messageLength; 01669 MPI_Error_string(statuses[i].MPI_ERROR, message, &messageLength); 01670 std::cerr<<" source="<<statuses[i].MPI_SOURCE<<" message: "; 01671 for(int i=0; i< messageLength; i++) 01672 std::cout<<message[i]; 01673 } 01674 std::cerr<<std::endl; 01675 } 01676 01677 if(verbose){ 01678 oocomm.communicator().barrier(); 01679 if(oocomm.communicator().rank()==0) 01680 std::cout<<" Receiving and saving took "<< 01681 time.elapsed()<<std::endl; 01682 } 01683 time.reset(); 01684 01685 for(int i=0; i < noSendTo; ++i) 01686 delete[] sendBuffers[i]; 01687 01688 delete[] sendBuffers; 01689 delete[] statuses; 01690 delete[] requests; 01691 01692 redistInf.setCommunicator(oocomm.communicator()); 01693 01694 // 01695 // 4.2) Create the IndexSet etc. 01696 // 01697 01698 // build the new outputIndexSet 01699 01700 01701 int color=0; 01702 01703 if (!existentOnNextLevel) { 01704 // this process is not used anymore 01705 color= MPI_UNDEFINED; 01706 } 01707 MPI_Comm outputComm; 01708 01709 MPI_Comm_split(oocomm.communicator(), color, oocomm.communicator().rank(), &outputComm); 01710 outcomm = new OOComm(outputComm,oocomm.getSolverCategory(),true); 01711 01712 // translate neighbor ranks. 01713 int newrank=outcomm->communicator().rank(); 01714 int *newranks=new int[oocomm.communicator().size()]; 01715 std::vector<int> tneighbors; 01716 tneighbors.reserve(myNeighbors.size()); 01717 01718 typename OOComm::ParallelIndexSet& outputIndexSet = outcomm->indexSet(); 01719 01720 MPI_Allgather(&newrank, 1, MPI_INT, newranks, 1, 01721 MPI_INT, oocomm.communicator()); 01722 typedef typename std::set<int>::const_iterator IIter; 01723 01724 #ifdef DEBUG_REPART 01725 std::cout<<oocomm.communicator().rank()<<" "; 01726 for(IIter i=myNeighbors.begin(), end=myNeighbors.end(); 01727 i!=end; ++i){ 01728 assert(newranks[*i]>=0); 01729 std::cout<<*i<<"->"<<newranks[*i]<<" "; 01730 tneighbors.push_back(newranks[*i]); 01731 } 01732 std::cout<<std::endl; 01733 #else 01734 for(IIter i=myNeighbors.begin(), end=myNeighbors.end(); 01735 i!=end; ++i){ 01736 tneighbors.push_back(newranks[*i]); 01737 } 01738 #endif 01739 delete[] newranks; 01740 myNeighbors.clear(); 01741 01742 if(verbose){ 01743 oocomm.communicator().barrier(); 01744 if(oocomm.communicator().rank()==0) 01745 std::cout<<" Calculating new neighbours ("<<tneighbors.size()<<") took "<< 01746 time.elapsed()<<std::endl; 01747 } 01748 time.reset(); 01749 01750 01751 outputIndexSet.beginResize(); 01752 // 1) add the owner vertices 01753 // Sort the owners 01754 std::sort(myOwnerVec.begin(), myOwnerVec.end(), SortFirst()); 01755 // The owners are sorted according to there global index 01756 // Therefore the entries of ownerVec are the same as the 01757 // ones in the resulting index set. 01758 typedef typename OOComm::ParallelIndexSet::LocalIndex LocalIndex; 01759 typedef typename std::vector<std::pair<GI,int> >::const_iterator VPIter; 01760 int i=0; 01761 for(VPIter g=myOwnerVec.begin(), end =myOwnerVec.end(); g!=end; ++g, ++i ) { 01762 outputIndexSet.add(g->first,LocalIndex(i, OwnerOverlapCopyAttributeSet::owner, true)); 01763 redistInf.addReceiveIndex(g->second, i); 01764 } 01765 01766 if(verbose){ 01767 oocomm.communicator().barrier(); 01768 if(oocomm.communicator().rank()==0) 01769 std::cout<<" Adding owner indices took "<< 01770 time.elapsed()<<std::endl; 01771 } 01772 time.reset(); 01773 01774 01775 // After all the vertices are received, the vectors must 01776 // be "merged" together to create the final vectors. 01777 // Because some vertices that are sent as overlap could now 01778 // already included as owner vertiecs in the new partition 01779 mergeVec(myOwnerVec, myOverlapSet); 01780 01781 // Trick to free memory 01782 myOwnerVec.clear(); 01783 myOwnerVec.swap(myOwnerVec); 01784 01785 if(verbose){ 01786 oocomm.communicator().barrier(); 01787 if(oocomm.communicator().rank()==0) 01788 std::cout<<" Merging indices took "<< 01789 time.elapsed()<<std::endl; 01790 } 01791 time.reset(); 01792 01793 01794 // 2) add the overlap vertices 01795 typedef typename std::set<GI>::const_iterator SIter; 01796 for(SIter g=myOverlapSet.begin(), end=myOverlapSet.end(); g!=end; ++g, i++) { 01797 outputIndexSet.add(*g,LocalIndex(i, OwnerOverlapCopyAttributeSet::copy, true)); 01798 } 01799 myOverlapSet.clear(); 01800 outputIndexSet.endResize(); 01801 01802 #ifdef DUNE_ISTL_WITH_CHECKING 01803 int numOfOwnVtx =0; 01804 typedef typename OOComm::ParallelIndexSet::const_iterator Iterator; 01805 Iterator end = outputIndexSet.end(); 01806 for(Iterator index = outputIndexSet.begin(); index != end; ++index) { 01807 if (OwnerSet::contains(index->local().attribute())) { 01808 numOfOwnVtx++; 01809 } 01810 } 01811 numOfOwnVtx = oocomm.communicator().sum(numOfOwnVtx); 01812 // if(numOfOwnVtx!=indexMap.globalOwnerVertices) 01813 // { 01814 // std::cerr<<numOfOwnVtx<<"!="<<indexMap.globalOwnerVertices<<" owners missing or additional ones!"<<std::endl; 01815 // DUNE_THROW(ISTLError, numOfOwnVtx<<"!="<<indexMap.globalOwnerVertices<<" owners missing or additional ones" 01816 // <<" during repartitioning."); 01817 // } 01818 Iterator index=outputIndexSet.begin(); 01819 if(index!=end){ 01820 ++index; 01821 for(Iterator old = outputIndexSet.begin(); index != end; old=index++) { 01822 if(old->global()>index->global()) 01823 DUNE_THROW(ISTLError, "Index set's globalindex not sorted correctly"); 01824 } 01825 } 01826 #endif 01827 if(verbose){ 01828 oocomm.communicator().barrier(); 01829 if(oocomm.communicator().rank()==0) 01830 std::cout<<" Adding overlap indices took "<< 01831 time.elapsed()<<std::endl; 01832 } 01833 time.reset(); 01834 01835 01836 if(color != MPI_UNDEFINED){ 01837 outcomm->remoteIndices().setNeighbours(tneighbors); 01838 outcomm->remoteIndices().template rebuild<true>(); 01839 01840 } 01841 01842 // release the memory 01843 delete[] sendTo; 01844 01845 if(verbose){ 01846 oocomm.communicator().barrier(); 01847 if(oocomm.communicator().rank()==0) 01848 std::cout<<" Storing indexsets took "<< 01849 time.elapsed()<<std::endl; 01850 } 01851 01852 #ifdef PERF_REPART 01853 // stop the time for step 4) and print the results 01854 t4=MPI_Wtime()-t4; 01855 tSum = t1 + t2 + t3 + t4; 01856 std::cout<<std::endl 01857 <<mype<<": WTime for step 1): "<<t1 01858 <<" 2): "<<t2 01859 <<" 3): "<<t3 01860 <<" 4): "<<t4 01861 <<" total: "<<tSum 01862 <<std::endl; 01863 #endif 01864 01865 return color!=MPI_UNDEFINED; 01866 01867 } 01868 #else 01869 template<class G, class P,class T1, class T2, class R> 01870 bool graphRepartition(const G& graph, P& oocomm, int nparts, 01871 P*& outcomm, 01872 R& redistInf, 01873 bool v=false) 01874 { 01875 if(nparts!=oocomm.size()) 01876 DUNE_THROW(NotImplemented, "only available for MPI programs"); 01877 } 01878 01879 01880 template<class G, class P,class T1, class T2, class R> 01881 bool commGraphRepartition(const G& graph, P& oocomm, int nparts, 01882 P*& outcomm, 01883 R& redistInf, 01884 bool v=false) 01885 { 01886 if(nparts!=oocomm.size()) 01887 DUNE_THROW(NotImplemented, "only available for MPI programs"); 01888 } 01889 #endif // HAVE_MPI 01890 } // end of namespace Dune 01891 #endif