WorkDistrib.C File Reference

#include <stdio.h>
#include "InfoStream.h"
#include "Communicate.h"
#include "ProcessorPrivate.h"
#include "BOCgroup.h"
#include "WorkDistrib.decl.h"
#include "WorkDistrib.h"
#include "Lattice.h"
#include "ComputeMsmMsa.h"
#include "main.decl.h"
#include "main.h"
#include "Node.h"
#include "PatchMgr.h"
#include "PatchMap.inl"
#include "NamdTypes.h"
#include "PDB.h"
#include "SimParameters.h"
#include "Molecule.h"
#include "NamdOneTools.h"
#include "Compute.h"
#include "ComputeMap.h"
#include "RecBisection.h"
#include "Random.h"
#include "varsizemsg.h"
#include "ProxyMgr.h"
#include "Priorities.h"
#include "SortAtoms.h"
#include <algorithm>
#include "TopoManager.h"
#include "ComputePmeCUDAMgr.h"
#include "DeviceCUDA.h"
#include "Debug.h"
#include "WorkDistrib.def.h"

Go to the source code of this file.

Classes

class  ComputeMapChangeMsg
struct  pe_sortop_bit_reversed
struct  pe_sortop_coord_x
struct  pe_sortop_coord_y
class  PatchMapMsg
struct  nodesort
struct  TopoManagerWrapper
struct  TopoManagerWrapper::pe_sortop_topo
struct  patch_sortop_curve_a
struct  patch_sortop_curve_b
struct  patch_sortop_curve_c

Defines

#define MIN_DEBUG_LEVEL   2
#define MACHINE_PROGRESS   { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }

Functions

static void build_ordering (void *)
void topo_getargs (char **argv)
static int compare_bit_reversed (int a, int b)
static bool less_than_bit_reversed (int a, int b)
void cuda_initialize ()
void mic_initialize ()
static void recursive_bisect_coord (int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
static void recursive_bisect_with_curve (int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)

Variables

__thread DeviceCUDAdeviceCUDA
static int randtopo
static int eventMachineProgress

Detailed Description

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Currently, WorkDistrib generates the layout of the Patches, directs the construction and distribution of Computes and associates Computes with Patches.

Definition in file WorkDistrib.C.


Define Documentation

#define MACHINE_PROGRESS   { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }
#define MIN_DEBUG_LEVEL   2

Definition at line 61 of file WorkDistrib.C.


Function Documentation

static void build_ordering ( void *   )  [static]

Definition at line 86 of file WorkDistrib.C.

References WorkDistrib::buildNodeAwarePeOrdering().

Referenced by topo_getargs().

00086                                    {
00087   WorkDistrib::buildNodeAwarePeOrdering();
00088 }

static int compare_bit_reversed ( int  a,
int  b 
) [static]

Definition at line 120 of file WorkDistrib.C.

00120                                               {
00121   int d = a ^ b;
00122   int c = 1;
00123   if ( d ) while ( ! (d & c) ) {
00124     c = c << 1;
00125   }
00126   return (a & c) - (b & c);
00127 }

void cuda_initialize (  ) 

Definition at line 20 of file DeviceCUDA.C.

References deviceCUDA, and DeviceCUDA::initialize().

Referenced by WorkDistrib::peOrderingReady().

00020                        {
00021         deviceCUDA = new DeviceCUDA();
00022         deviceCUDA->initialize();
00023 }

static bool less_than_bit_reversed ( int  a,
int  b 
) [static]

Definition at line 129 of file WorkDistrib.C.

00129                                                  {
00130   int d = a ^ b;
00131   int c = 1;
00132   if ( d ) while ( ! (d & c) ) {
00133     c = c << 1;
00134   }
00135   return d && (b & c);
00136 }

void mic_initialize (  ) 
static void recursive_bisect_coord ( int  x_begin,
int  x_end,
int  y_begin,
int  y_end,
int *  pe_begin,
ScaledPosition coord,
int *  result,
int  ydim 
) [static]

Definition at line 268 of file WorkDistrib.C.

References sort.

Referenced by WorkDistrib::sortPmePes().

00272     {
00273   int x_len = x_end - x_begin;
00274   int y_len = y_end - y_begin;
00275   if ( x_len == 1 && y_len == 1 ) {
00276     // done, now put this pe in the right place
00277     if ( 0 ) CkPrintf("pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
00278       coord[*pe_begin].x, coord[*pe_begin].y);
00279     result[x_begin*ydim + y_begin] = *pe_begin;
00280     return;
00281   }
00282   int *pe_end = pe_begin + x_len * y_len;
00283   if ( x_len >= y_len ) {
00284     std::sort(pe_begin, pe_end, pe_sortop_coord_x(coord));
00285     int x_split = x_begin + x_len / 2;
00286     int* pe_split = pe_begin + (x_split - x_begin) * y_len;
00287     //CkPrintf("x_split %5d %5d %5d\n", x_begin, x_split, x_end);
00288     recursive_bisect_coord(x_begin, x_split, y_begin, y_end, pe_begin, coord, result, ydim);
00289     recursive_bisect_coord(x_split, x_end, y_begin, y_end, pe_split, coord, result, ydim);
00290   } else {
00291     std::sort(pe_begin, pe_end, pe_sortop_coord_y(coord));
00292     int y_split = y_begin + y_len / 2;
00293     int* pe_split = pe_begin + (y_split - y_begin) * x_len;
00294     //CkPrintf("y_split %5d %5d %5d\n", y_begin, y_split, y_end);
00295     recursive_bisect_coord(x_begin, x_end, y_begin, y_split, pe_begin, coord, result, ydim);
00296     recursive_bisect_coord(x_begin, x_end, y_split, y_end, pe_split, coord, result, ydim);
00297   }
00298 }

static void recursive_bisect_with_curve ( int *  patch_begin,
int *  patch_end,
int *  node_begin,
int *  node_end,
double *  patchLoads,
double *  sortedLoads,
int *  assignedNode,
TopoManagerWrapper tmgr 
) [static]

Definition at line 1972 of file WorkDistrib.C.

References TopoManagerWrapper::coords(), SimParameters::disableTopology, Patch::getNumAtoms(), PatchMap::index_a(), PatchMap::index_b(), PatchMap::index_c(), NAMD_bug(), PatchMap::Object(), Node::Object(), PatchMap::patch(), Node::simParameters, sort, TopoManagerWrapper::sortAndSplit(), and SimParameters::verboseTopology.

01979     {
01980 
01981   SimParameters *simParams = Node::Object()->simParameters;
01982   PatchMap *patchMap = PatchMap::Object();
01983   int *patches = patch_begin;
01984   int npatches = patch_end - patch_begin;
01985   int *nodes = node_begin;
01986   int nnodes = node_end - node_begin;
01987 
01988   // assign patch loads
01989   double totalRawLoad = 0;
01990   for ( int i=0; i<npatches; ++i ) {
01991     int pid=patches[i];
01992 #ifdef MEM_OPT_VERSION
01993     double load = patchMap->numAtoms(pid) + 10;      
01994 #else
01995     double load = patchMap->patch(pid)->getNumAtoms() + 10;
01996 #endif
01997     patchLoads[pid] = load;
01998     sortedLoads[i] = load;
01999     totalRawLoad += load;
02000   }
02001   std::sort(sortedLoads,sortedLoads+npatches);
02002 
02003   // limit maxPatchLoad to adjusted average load per node
02004   double sumLoad = 0;
02005   double maxPatchLoad = 1;
02006   for ( int i=0; i<npatches; ++i ) {
02007     double load = sortedLoads[i];
02008     double total = sumLoad + (npatches-i) * load;
02009     if ( nnodes * load > total ) break;
02010     sumLoad += load;
02011     maxPatchLoad = load;
02012   }
02013   double totalLoad = 0;
02014   for ( int i=0; i<npatches; ++i ) {
02015     int pid=patches[i];
02016     if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
02017     totalLoad += patchLoads[pid];
02018   }
02019   if ( nnodes * maxPatchLoad > totalLoad )
02020     NAMD_bug("algorithm failure in WorkDistrib recursive_bisect_with_curve()");
02021 
02022   int a_len, b_len, c_len;
02023   int a_min, b_min, c_min;
02024   { // find dimensions
02025     a_min = patchMap->index_a(patches[0]);
02026     b_min = patchMap->index_b(patches[0]);
02027     c_min = patchMap->index_c(patches[0]);
02028     int a_max = a_min;
02029     int b_max = b_min;
02030     int c_max = c_min;
02031     for ( int i=1; i<npatches; ++i ) {
02032       int a = patchMap->index_a(patches[i]);
02033       int b = patchMap->index_b(patches[i]);
02034       int c = patchMap->index_c(patches[i]);
02035       if ( a < a_min ) a_min = a;
02036       if ( b < b_min ) b_min = b;
02037       if ( c < c_min ) c_min = c;
02038       if ( a > a_max ) a_max = a;
02039       if ( b > b_max ) b_max = b;
02040       if ( c > c_max ) c_max = c;
02041     }
02042     a_len = a_max - a_min;
02043     b_len = b_max - b_min;
02044     c_len = c_max - c_min;
02045   }
02046 
02047   int *node_split = node_begin;
02048 
02049   if ( simParams->disableTopology ) ; else
02050   if ( a_len >= b_len && a_len >= c_len ) {
02051     node_split = tmgr.sortAndSplit(node_begin,node_end,0);
02052   } else if ( b_len >= a_len && b_len >= c_len ) {
02053     node_split = tmgr.sortAndSplit(node_begin,node_end,1);
02054   } else if ( c_len >= a_len && c_len >= b_len ) {
02055     node_split = tmgr.sortAndSplit(node_begin,node_end,2);
02056   }
02057 
02058   if ( node_split == node_begin ) {  // unable to split torus
02059     // make sure physical nodes are together
02060     std::sort(node_begin, node_end, WorkDistrib::pe_sortop_compact());
02061     // find physical node boundary to split on
02062     int i_split = 0;
02063     for ( int i=0; i<nnodes; ++i ) {
02064       if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
02065         int mid = (nnodes+1)/2;
02066         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
02067         else break;
02068       }
02069     }
02070     node_split = node_begin + i_split;
02071   }
02072 
02073   bool final_patch_sort = false;
02074 
02075   if ( node_split == node_begin ) {  // all on same physical node
02076     if ( ( simParams->verboseTopology ) &&
02077         nnodes == CmiNumPesOnPhysicalNode(CmiPhysicalNodeID(*node_begin)) ) {
02078       int crds[3];
02079       tmgr.coords(*node_begin, crds);
02080       CkPrintf("WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
02081                CmiPhysicalNodeID(*node_begin), *node_begin,
02082                CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
02083                a_min, b_min, c_min, npatches,
02084                a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
02085     }
02086 
02087     // final sort along a to minimize pme message count
02088     final_patch_sort = true;
02089 
02090     // find node (process) boundary to split on
02091     int i_split = 0;
02092     for ( int i=0; i<nnodes; ++i ) {
02093       if ( CmiNodeOf(nodes[i_split]) != CmiNodeOf(nodes[i]) ) {
02094         int mid = (nnodes+1)/2;
02095         if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
02096         else break;
02097       }
02098     }
02099     node_split = node_begin + i_split;
02100   }
02101 
02102   if ( node_split == node_begin ) {  // all on same node (process)
02103     if ( ( simParams->verboseTopology ) &&
02104         nnodes == CmiNodeSize(CmiNodeOf(*node_begin)) ) {
02105       int crds[3];
02106       tmgr.coords(*node_begin, crds);
02107       CkPrintf("WorkDistrib: node %5d pe %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
02108                CmiNodeOf(*node_begin), *node_begin, npatches,
02109                a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
02110     }
02111 
02112     // no natural divisions so just split at midpoint
02113     node_split = node_begin + nnodes/2;
02114   }
02115 
02116   if ( nnodes == 1 ) {  // down to a single pe
02117     // assign all patches
02118     int *node = node_begin;
02119     sumLoad = 0;
02120     for ( int i=0; i < npatches; ++i ) {
02121       int pid = patches[i];
02122       assignedNode[pid] = *node;
02123       sumLoad += patchLoads[pid];
02124       if ( 0 ) CkPrintf("assign %5d node %5d patch %5d %5d %5d load %7f total %7f\n",
02125                 i, *node,
02126                 patchMap->index_a(pid),
02127                 patchMap->index_b(pid),
02128                 patchMap->index_c(pid),
02129                 patchLoads[pid], sumLoad);
02130     }
02131 
02132     return;
02133   }
02134 
02135   if ( final_patch_sort ) {
02136     // final sort along a to minimize pme message count
02137     std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
02138   } else if ( a_len >= b_len && a_len >= c_len ) {
02139     if ( 0 ) CkPrintf("sort a\n");
02140     std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
02141   } else if ( b_len >= a_len && b_len >= c_len ) {
02142     if ( 0 ) CkPrintf("sort b\n");
02143     std::sort(patch_begin,patch_end,patch_sortop_curve_b(patchMap));
02144   } else if ( c_len >= a_len && c_len >= b_len ) {
02145     if ( 0 ) CkPrintf("sort c\n");
02146     std::sort(patch_begin,patch_end,patch_sortop_curve_c(patchMap));
02147   }
02148 
02149   int *patch_split;
02150   { // walk through patches in sorted order
02151     int *node = node_begin;
02152     sumLoad = 0;
02153     for ( patch_split = patch_begin;
02154           patch_split != patch_end && node != node_split;
02155           ++patch_split ) {
02156       sumLoad += patchLoads[*patch_split];
02157       double targetLoad = totalLoad *
02158         ((double)(node-node_begin+1) / (double)nnodes);
02159       if ( 0 ) CkPrintf("test %5d node %5d patch %5d %5d %5d load %7f target %7f\n",
02160                 patch_split - patch_begin, *node,
02161                 patchMap->index_a(*patch_split),
02162                 patchMap->index_b(*patch_split),
02163                 patchMap->index_c(*patch_split),
02164                 sumLoad, targetLoad);
02165       double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
02166       if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
02167     }
02168     double targetLoad = totalLoad *
02169       ((double)(node_split-node_begin) / (double)nnodes);
02170     if ( 0 ) CkPrintf("split node %5d/%5d patch %5d/%5d load %7f target %7f\n",
02171               node_split-node_begin, nnodes,
02172               patch_split-patch_begin, npatches,
02173               sumLoad, targetLoad);
02174   }
02175 
02176   // recurse
02177   recursive_bisect_with_curve(
02178     patch_begin, patch_split, node_begin, node_split,
02179     patchLoads, sortedLoads, assignedNode, tmgr);
02180   recursive_bisect_with_curve(
02181     patch_split, patch_end, node_split, node_end,
02182     patchLoads, sortedLoads, assignedNode, tmgr);
02183 }

void topo_getargs ( char **  argv  ) 

Definition at line 90 of file WorkDistrib.C.

References build_ordering(), and randtopo.

Referenced by all_init().

00090                                {
00091   randtopo = CmiGetArgFlag(argv, "+randtopo");
00092   if ( CkMyPe() >= CkNumPes() ) return;
00093   CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)build_ordering, (void*)0);
00094 }


Variable Documentation

Definition at line 18 of file DeviceCUDA.C.

int eventMachineProgress [static]

Definition at line 96 of file WorkDistrib.C.

Referenced by WorkDistrib::WorkDistrib().

int randtopo [static]

Definition at line 84 of file WorkDistrib.C.

Referenced by WorkDistrib::buildNodeAwarePeOrdering(), and topo_getargs().


Generated on 19 Nov 2019 for NAMD by  doxygen 1.6.1