NamdCentLB Class Reference

#include <NamdCentLB.h>

List of all members.

Public Member Functions

 NamdCentLB (const CkLBOptions &opt)
 NamdCentLB (CkMigrateMessage *)
CLBMigrateMsg * Strategy (LDStats *stats)

Detailed Description

Definition at line 52 of file NamdCentLB.h.


Constructor & Destructor Documentation

NamdCentLB::NamdCentLB ( const CkLBOptions &  opt  ) 

Definition at line 51 of file NamdCentLB.C.

00051                                             : CentralLB(opt)
00052 {
00053   //  if (CkMyPe()==0)
00054   //   CkPrintf("[%d] NamdCentLB created\n",CkMyPe());
00055   processorArray = 0;
00056   patchArray = 0;
00057   computeArray = 0;
00058 }

NamdCentLB::NamdCentLB ( CkMigrateMessage *  msg  ) 

Migratable Object Constructor.

Definition at line 45 of file NamdCentLB.C.

00045                                            : CentralLB(msg) {
00046   processorArray = 0;
00047   patchArray = 0;
00048   computeArray = 0;
00049 } 


Member Function Documentation

CLBMigrateMsg * NamdCentLB::Strategy ( LDStats *  stats  ) 

Definition at line 88 of file NamdCentLB.C.

References averageLoad, processorInfo::backgroundLoad, cpuloads, endi(), computeInfo::handle, InfoRecord::Id, iINFO(), iout, SimParameters::LCPOOn, LdbIdField(), SimParameters::ldbRelativeGrainsize, LDBSTRAT_COMPREHENSIVE, LDBSTRAT_DEFAULT, LDBSTRAT_OLD, LDBSTRAT_REFINEONLY, SimParameters::ldbStrategy, InfoRecord::load, load, NAMD_die(), ComputeMap::numComputes(), ComputeMap::numPartitions(), PatchMap::numPatches(), numPatches, Node::Object(), ComputeMap::Object(), PatchMap::Object(), computeInfo::oldProcessor, patchInfo::processor, computeInfo::processor, ComputeMap::setNewNode(), ComputeMap::setNewNumPartitions(), Node::simParameters, and simParams.

00089 {
00090   //  CkPrintf("LDB: All statistics received at %f, %f\n",
00091   //  CmiTimer(),CmiWallTimer());
00092 
00093   int numProcessors = stats->nprocs();
00094   int numPatches = PatchMap::Object()->numPatches();
00095   ComputeMap *computeMap = ComputeMap::Object();
00096   const int numComputes = computeMap->numComputes();
00097   const SimParameters* simParams = Node::Object()->simParameters;
00098 
00099   // these sizes should never change
00100   if ( ! processorArray ) processorArray = new processorInfo[numProcessors];
00101   if ( ! patchArray ) patchArray = new patchInfo[numPatches];
00102   if ( ! computeArray ) computeArray = new computeInfo[numComputes];
00103 
00104   int nMoveableComputes = buildData(stats);
00105 
00106 #if LDB_DEBUG
00107 #define DUMP_LDBDATA 1
00108 #define LOAD_LDBDATA 1
00109 #endif
00110 
00111 #if DUMP_LDBDATA 
00112   dumpDataASCII("ldbd_before", numProcessors, numPatches, nMoveableComputes);
00113 #elif LOAD_LDBDATA
00114   loadDataASCII("ldbd_before.5", numProcessors, numPatches, nMoveableComputes);
00115   // CkExit();
00116 #endif
00117 
00118   double averageLoad = 0.;
00119   double avgCompute = 0.;
00120   if ( nMoveableComputes ) {
00121    int i;
00122    double total = 0.;
00123    double maxCompute = 0.;
00124    int maxi = 0;
00125    for (i=0; i<nMoveableComputes; i++) {
00126       double load = computeArray[i].load;
00127       total += load;
00128       if ( load > maxCompute ) { maxCompute = load;  maxi = i; }
00129    }
00130    avgCompute = total / nMoveableComputes;
00131 
00132     int P = stats->nprocs();
00133    int numPesAvailable = 0;
00134    for (i=0; i<P; i++) {
00135       if (processorArray[i].available) {
00136         ++numPesAvailable;
00137         total += processorArray[i].backgroundLoad;
00138       }
00139    }
00140    if (numPesAvailable == 0)
00141      NAMD_die("No processors available for load balancing!\n");
00142 
00143    averageLoad = total/numPesAvailable;
00144    CkPrintf("LDB: Largest compute %d load %f is %.1f%% of average load %f\n",
00145             LdbIdField(computeArray[maxi].handle.id, 0),
00146             maxCompute, 100. * maxCompute / averageLoad, averageLoad);
00147    CkPrintf("LDB: Average compute %f is %.1f%% of average load %f\n",
00148             avgCompute, 100. * avgCompute / averageLoad, averageLoad);
00149   }
00150 
00151   if ( step() == 1 ) {
00152     // compute splitting only
00153     // partitions are stored as char but mostly limited by
00154     // high load noise at low outer-loop iteration counts
00155     int maxParts = 10;
00156 #ifdef NAMD_CUDA
00157 //split LCPO compute very small, else CUDA compute is delayed
00158     if (simParams->LCPOOn) {
00159       maxParts = 20;
00160     }
00161 #endif
00162     int totalAddedParts = 0;
00163     double maxCompute = averageLoad / 10.;
00164     if ( maxCompute < 2. * avgCompute ) maxCompute = 2. * avgCompute;
00165     if ( simParams->ldbRelativeGrainsize > 0. ) {
00166       maxCompute = averageLoad * simParams->ldbRelativeGrainsize;
00167     }
00168     CkPrintf("LDB: Partitioning computes with target load %f\n", maxCompute);
00169     double maxUnsplit = 0.;
00170     for (int i=0; i<nMoveableComputes; i++) {
00171       computeArray[i].processor = computeArray[i].oldProcessor;
00172       const int cid = LdbIdField(computeArray[i].handle.id, 0);
00173       const double load = computeArray[i].load;
00174       if ( computeMap->numPartitions(cid) == 0 ) {
00175         if ( load > maxUnsplit ) maxUnsplit = load;
00176         continue;
00177       }
00178       int nparts = (int) ceil(load / maxCompute);
00179       if ( nparts > maxParts ) nparts = maxParts;
00180       if ( nparts < 1 ) nparts = 1;
00181       if ( 0 && nparts > 1 ) {
00182         CkPrintf("LDB: Partitioning compute %d with load %f by %d\n",
00183                   cid, load, nparts);
00184       }
00185       computeMap->setNewNumPartitions(cid,nparts);
00186       totalAddedParts += nparts - 1;
00187     }
00188     CkPrintf("LDB: Increased migratable compute count from %d to %d\n",
00189               nMoveableComputes,nMoveableComputes+totalAddedParts);
00190     CkPrintf("LDB: Largest unpartitionable compute is %f\n", maxUnsplit);
00191   } else if (simParams->ldbStrategy == LDBSTRAT_DEFAULT) { // default
00192     if (step() < 4)
00193       TorusLB(computeArray, patchArray, processorArray,
00194                   nMoveableComputes, numPatches, numProcessors);
00195     else
00196       RefineTorusLB(computeArray, patchArray, processorArray,
00197                   nMoveableComputes, numPatches, numProcessors, 1);
00198   } else if (simParams->ldbStrategy == LDBSTRAT_COMPREHENSIVE) {
00199     TorusLB(computeArray, patchArray, processorArray,
00200                   nMoveableComputes, numPatches, numProcessors);
00201   } else if (simParams->ldbStrategy == LDBSTRAT_REFINEONLY) {
00202     RefineTorusLB(computeArray, patchArray, processorArray,
00203                   nMoveableComputes, numPatches, numProcessors, 1);
00204   } else if (simParams->ldbStrategy == LDBSTRAT_OLD) {
00205     if (step() < 4)
00206       Alg7(computeArray, patchArray, processorArray,
00207                   nMoveableComputes, numPatches, numProcessors);
00208     else
00209       RefineOnly(computeArray, patchArray, processorArray, 
00210                   nMoveableComputes, numPatches, numProcessors);
00211   }
00212 
00213 #if LDB_DEBUG && USE_TOPOMAP
00214   TopoManager tmgr;
00215   int pe1, pe2, pe3, hops=0;
00216   /* This is double counting the hops
00217   for(int i=0; i<nMoveableComputes; i++)
00218   {
00219     pe1 = computeArray[i].processor;
00220     pe2 = patchArray[computeArray[i].patch1].processor;
00221     pe3 = patchArray[computeArray[i].patch2].processor;
00222     hops += tmgr.getHopsBetweenRanks(pe1, pe2);
00223     if(computeArray[i].patch1 != computeArray[i].patch2)
00224       hops += tmgr.getHopsBetweenRanks(pe1, pe3);  
00225   }*/
00226   for (int i=0; i<numPatches; i++)  {
00227     //int num = patchArray[i].proxiesOn.numElements();
00228     pe1 = patchArray[i].processor;
00229     Iterator nextProc;
00230     processorInfo *p = (processorInfo *)patchArray[i].proxiesOn.iterator((Iterator *)&nextProc);
00231     while (p) {
00232       pe2 = p->Id;
00233       hops += tmgr.getHopsBetweenRanks(pe1, pe2);
00234       p = (processorInfo *)patchArray[i].proxiesOn.next((Iterator*)&nextProc);
00235     }
00236   }
00237   CkPrintf("Load Balancing: Number of Hops: %d\n", hops);
00238 #endif
00239 
00240 #if DUMP_LDBDATA
00241   dumpDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
00242 #elif LOAD_LDBDATA
00243   dumpDataASCII("ldbd_after.5", numProcessors, numPatches, nMoveableComputes);
00244   // loadDataASCII("ldbd_after", numProcessors, numPatches, nMoveableComputes);
00245   // CkExit();
00246 #endif
00247 
00248   // For error checking:
00249   // Count up computes, to see if somebody doesn't have any computes
00250   int i;
00251 #if 0
00252   int* computeCount = new int[numProcessors];
00253   for(i=0; i<numProcessors; i++)
00254     computeCount[i]=0;
00255   for(i=0; i<nMoveableComputes; i++)
00256     computeCount[computeArray[i].processor]++;
00257   for(i=0; i<numProcessors; i++) {
00258     if (computeCount[i]==0)
00259       iout << iINFO <<"Warning: Processor " << i 
00260            << " has NO moveable computes.\n" << endi;
00261   }
00262   delete [] computeCount;
00263 #endif
00264   
00265   CkVec<MigrateInfo *> migrateInfo;
00266   for(i=0;i<nMoveableComputes;i++) {
00267     if (computeArray[i].processor != computeArray[i].oldProcessor) {
00268       //      CkPrintf("[%d] Obj %d migrating from %d to %d\n",
00269       //               CkMyPe(),computeArray[i].handle.id.id[0],
00270       //               computeArray[i].processor,computeArray[i].oldProcessor);
00271       MigrateInfo *migrateMe = new MigrateInfo;
00272       migrateMe->obj = computeArray[i].handle;
00273       migrateMe->from_pe = computeArray[i].oldProcessor;
00274       migrateMe->to_pe = computeArray[i].processor;
00275       migrateInfo.insertAtEnd(migrateMe);
00276 
00277       // sneak in updates to ComputeMap
00278       computeMap->setNewNode(LdbIdField(computeArray[i].handle.id, 0),
00279                                 computeArray[i].processor);
00280     }
00281   }
00282   
00283   int migrate_count=migrateInfo.length();
00284   // CkPrintf("NamdCentLB migrating %d elements\n",migrate_count);
00285   CLBMigrateMsg* msg = new(migrate_count,CkNumPes(),CkNumPes(),0) CLBMigrateMsg;
00286 
00287   msg->n_moves = migrate_count;
00288   for(i=0; i < migrate_count; i++) {
00289     MigrateInfo* item = migrateInfo[i];
00290     msg->moves[i] = *item;
00291     delete item;
00292     migrateInfo[i] = 0;
00293   }
00294 
00295   for (i=0; i<numProcessors; i++) {
00296     cpuloads[i] = processorArray[i].load;
00297   }
00298 
00299   delete [] processorArray;
00300   delete [] patchArray;
00301   delete [] computeArray;
00302 
00303   processorArray = NULL;
00304   patchArray = NULL;
00305   computeArray = NULL;
00306   
00307   return msg;
00308 };


The documentation for this class was generated from the following files:

Generated on 21 Sep 2020 for NAMD by  doxygen 1.6.1