NAMD
Classes | Macros | Functions | Variables
WorkDistrib.C File Reference
#include <stdio.h>
#include "InfoStream.h"
#include "Communicate.h"
#include "ProcessorPrivate.h"
#include "BOCgroup.h"
#include "WorkDistrib.decl.h"
#include "WorkDistrib.h"
#include "Lattice.h"
#include "ComputeMsmMsa.h"
#include "main.decl.h"
#include "main.h"
#include "Node.h"
#include "PatchMgr.h"
#include "PatchMap.inl"
#include "NamdTypes.h"
#include "PDB.h"
#include "SimParameters.h"
#include "Molecule.h"
#include "NamdOneTools.h"
#include "Compute.h"
#include "ComputeMap.h"
#include "RecBisection.h"
#include "Random.h"
#include "varsizemsg.h"
#include "ProxyMgr.h"
#include "Priorities.h"
#include "SortAtoms.h"
#include <algorithm>
#include "TopoManager.h"
#include "ComputePmeCUDAMgr.h"
#include "DeviceCUDA.h"
#include "Debug.h"
#include "WorkDistrib.def.h"

Go to the source code of this file.

Classes

class  ComputeMapChangeMsg
 
struct  pe_sortop_bit_reversed
 
struct  pe_sortop_coord_x
 
struct  pe_sortop_coord_y
 
class  PatchMapMsg
 
struct  nodesort
 
struct  TopoManagerWrapper
 
struct  TopoManagerWrapper::pe_sortop_topo
 
struct  patch_sortop_curve_a
 
struct  patch_sortop_curve_b
 
struct  patch_sortop_curve_c
 

Macros

#define MIN_DEBUG_LEVEL   2
 
#define MACHINE_PROGRESS   { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }
 

Functions

static void build_ordering (void *)
 
void topo_getargs (char **argv)
 
static int compare_bit_reversed (int a, int b)
 
static bool less_than_bit_reversed (int a, int b)
 
void cuda_initialize ()
 
void mic_initialize ()
 
static void recursive_bisect_coord (int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
 
static void recursive_bisect_with_curve (int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)
 

Variables

__thread DeviceCUDAdeviceCUDA
 
static int randtopo
 
static int eventMachineProgress
 

Detailed Description

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved. Currently, WorkDistrib generates the layout of the Patches, directs the construction and distribution of Computes and associates Computes with Patches.

Definition in file WorkDistrib.C.

Macro Definition Documentation

#define MACHINE_PROGRESS   { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }
#define MIN_DEBUG_LEVEL   2

Definition at line 61 of file WorkDistrib.C.

Function Documentation

static void build_ordering ( void )
static

Definition at line 86 of file WorkDistrib.C.

References WorkDistrib::buildNodeAwarePeOrdering().

Referenced by topo_getargs().

86  {
88 }
static void buildNodeAwarePeOrdering(void)
Definition: WorkDistrib.C:176
static int compare_bit_reversed ( int  a,
int  b 
)
static

Definition at line 120 of file WorkDistrib.C.

120  {
121  int d = a ^ b;
122  int c = 1;
123  if ( d ) while ( ! (d & c) ) {
124  c = c << 1;
125  }
126  return (a & c) - (b & c);
127 }
void cuda_initialize ( )

Definition at line 20 of file DeviceCUDA.C.

References deviceCUDA, and DeviceCUDA::initialize().

Referenced by WorkDistrib::peOrderingReady().

20  {
21  deviceCUDA = new DeviceCUDA();
23 }
void initialize()
Definition: DeviceCUDA.C:85
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:18
static bool less_than_bit_reversed ( int  a,
int  b 
)
static

Definition at line 129 of file WorkDistrib.C.

129  {
130  int d = a ^ b;
131  int c = 1;
132  if ( d ) while ( ! (d & c) ) {
133  c = c << 1;
134  }
135  return d && (b & c);
136 }
void mic_initialize ( )
static void recursive_bisect_coord ( int  x_begin,
int  x_end,
int  y_begin,
int  y_end,
int *  pe_begin,
ScaledPosition coord,
int *  result,
int  ydim 
)
static

Definition at line 268 of file WorkDistrib.C.

References sort, x, and y.

Referenced by WorkDistrib::sortPmePes().

272  {
273  int x_len = x_end - x_begin;
274  int y_len = y_end - y_begin;
275  if ( x_len == 1 && y_len == 1 ) {
276  // done, now put this pe in the right place
277  if ( 0 ) CkPrintf("pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
278  coord[*pe_begin].x, coord[*pe_begin].y);
279  result[x_begin*ydim + y_begin] = *pe_begin;
280  return;
281  }
282  int *pe_end = pe_begin + x_len * y_len;
283  if ( x_len >= y_len ) {
284  std::sort(pe_begin, pe_end, pe_sortop_coord_x(coord));
285  int x_split = x_begin + x_len / 2;
286  int* pe_split = pe_begin + (x_split - x_begin) * y_len;
287  //CkPrintf("x_split %5d %5d %5d\n", x_begin, x_split, x_end);
288  recursive_bisect_coord(x_begin, x_split, y_begin, y_end, pe_begin, coord, result, ydim);
289  recursive_bisect_coord(x_split, x_end, y_begin, y_end, pe_split, coord, result, ydim);
290  } else {
291  std::sort(pe_begin, pe_end, pe_sortop_coord_y(coord));
292  int y_split = y_begin + y_len / 2;
293  int* pe_split = pe_begin + (y_split - y_begin) * x_len;
294  //CkPrintf("y_split %5d %5d %5d\n", y_begin, y_split, y_end);
295  recursive_bisect_coord(x_begin, x_end, y_begin, y_split, pe_begin, coord, result, ydim);
296  recursive_bisect_coord(x_begin, x_end, y_split, y_end, pe_split, coord, result, ydim);
297  }
298 }
static void recursive_bisect_coord(int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
Definition: WorkDistrib.C:268
BlockRadixSort::TempStorage sort
gridSize y
gridSize x
static void recursive_bisect_with_curve ( int *  patch_begin,
int *  patch_end,
int *  node_begin,
int *  node_end,
double *  patchLoads,
double *  sortedLoads,
int *  assignedNode,
TopoManagerWrapper tmgr 
)
static

Definition at line 1972 of file WorkDistrib.C.

References TopoManagerWrapper::coords(), SimParameters::disableTopology, Patch::getNumAtoms(), PatchMap::index_a(), PatchMap::index_b(), PatchMap::index_c(), load, NAMD_bug(), PatchMap::Object(), Node::Object(), PatchMap::patch(), Node::simParameters, sort, TopoManagerWrapper::sortAndSplit(), and SimParameters::verboseTopology.

1979  {
1980 
1982  PatchMap *patchMap = PatchMap::Object();
1983  int *patches = patch_begin;
1984  int npatches = patch_end - patch_begin;
1985  int *nodes = node_begin;
1986  int nnodes = node_end - node_begin;
1987 
1988  // assign patch loads
1989  double totalRawLoad = 0;
1990  for ( int i=0; i<npatches; ++i ) {
1991  int pid=patches[i];
1992 #ifdef MEM_OPT_VERSION
1993  double load = patchMap->numAtoms(pid) + 10;
1994 #else
1995  double load = patchMap->patch(pid)->getNumAtoms() + 10;
1996 #endif
1997  patchLoads[pid] = load;
1998  sortedLoads[i] = load;
1999  totalRawLoad += load;
2000  }
2001  std::sort(sortedLoads,sortedLoads+npatches);
2002 
2003  // limit maxPatchLoad to adjusted average load per node
2004  double sumLoad = 0;
2005  double maxPatchLoad = 1;
2006  for ( int i=0; i<npatches; ++i ) {
2007  double load = sortedLoads[i];
2008  double total = sumLoad + (npatches-i) * load;
2009  if ( nnodes * load > total ) break;
2010  sumLoad += load;
2011  maxPatchLoad = load;
2012  }
2013  double totalLoad = 0;
2014  for ( int i=0; i<npatches; ++i ) {
2015  int pid=patches[i];
2016  if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
2017  totalLoad += patchLoads[pid];
2018  }
2019  if ( nnodes * maxPatchLoad > totalLoad )
2020  NAMD_bug("algorithm failure in WorkDistrib recursive_bisect_with_curve()");
2021 
2022  int a_len, b_len, c_len;
2023  int a_min, b_min, c_min;
2024  { // find dimensions
2025  a_min = patchMap->index_a(patches[0]);
2026  b_min = patchMap->index_b(patches[0]);
2027  c_min = patchMap->index_c(patches[0]);
2028  int a_max = a_min;
2029  int b_max = b_min;
2030  int c_max = c_min;
2031  for ( int i=1; i<npatches; ++i ) {
2032  int a = patchMap->index_a(patches[i]);
2033  int b = patchMap->index_b(patches[i]);
2034  int c = patchMap->index_c(patches[i]);
2035  if ( a < a_min ) a_min = a;
2036  if ( b < b_min ) b_min = b;
2037  if ( c < c_min ) c_min = c;
2038  if ( a > a_max ) a_max = a;
2039  if ( b > b_max ) b_max = b;
2040  if ( c > c_max ) c_max = c;
2041  }
2042  a_len = a_max - a_min;
2043  b_len = b_max - b_min;
2044  c_len = c_max - c_min;
2045  }
2046 
2047  int *node_split = node_begin;
2048 
2049  if ( simParams->disableTopology ) ; else
2050  if ( a_len >= b_len && a_len >= c_len ) {
2051  node_split = tmgr.sortAndSplit(node_begin,node_end,0);
2052  } else if ( b_len >= a_len && b_len >= c_len ) {
2053  node_split = tmgr.sortAndSplit(node_begin,node_end,1);
2054  } else if ( c_len >= a_len && c_len >= b_len ) {
2055  node_split = tmgr.sortAndSplit(node_begin,node_end,2);
2056  }
2057 
2058  if ( node_split == node_begin ) { // unable to split torus
2059  // make sure physical nodes are together
2060  std::sort(node_begin, node_end, WorkDistrib::pe_sortop_compact());
2061  // find physical node boundary to split on
2062  int i_split = 0;
2063  for ( int i=0; i<nnodes; ++i ) {
2064  if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
2065  int mid = (nnodes+1)/2;
2066  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2067  else break;
2068  }
2069  }
2070  node_split = node_begin + i_split;
2071  }
2072 
2073  bool final_patch_sort = false;
2074 
2075  if ( node_split == node_begin ) { // all on same physical node
2076  if ( ( simParams->verboseTopology ) &&
2077  nnodes == CmiNumPesOnPhysicalNode(CmiPhysicalNodeID(*node_begin)) ) {
2078  int crds[3];
2079  tmgr.coords(*node_begin, crds);
2080  CkPrintf("WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2081  CmiPhysicalNodeID(*node_begin), *node_begin,
2082  CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
2083  a_min, b_min, c_min, npatches,
2084  a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2085  }
2086 
2087  // final sort along a to minimize pme message count
2088  final_patch_sort = true;
2089 
2090  // find node (process) boundary to split on
2091  int i_split = 0;
2092  for ( int i=0; i<nnodes; ++i ) {
2093  if ( CmiNodeOf(nodes[i_split]) != CmiNodeOf(nodes[i]) ) {
2094  int mid = (nnodes+1)/2;
2095  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2096  else break;
2097  }
2098  }
2099  node_split = node_begin + i_split;
2100  }
2101 
2102  if ( node_split == node_begin ) { // all on same node (process)
2103  if ( ( simParams->verboseTopology ) &&
2104  nnodes == CmiNodeSize(CmiNodeOf(*node_begin)) ) {
2105  int crds[3];
2106  tmgr.coords(*node_begin, crds);
2107  CkPrintf("WorkDistrib: node %5d pe %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2108  CmiNodeOf(*node_begin), *node_begin, npatches,
2109  a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2110  }
2111 
2112  // no natural divisions so just split at midpoint
2113  node_split = node_begin + nnodes/2;
2114  }
2115 
2116  if ( nnodes == 1 ) { // down to a single pe
2117  // assign all patches
2118  int *node = node_begin;
2119  sumLoad = 0;
2120  for ( int i=0; i < npatches; ++i ) {
2121  int pid = patches[i];
2122  assignedNode[pid] = *node;
2123  sumLoad += patchLoads[pid];
2124  if ( 0 ) CkPrintf("assign %5d node %5d patch %5d %5d %5d load %7f total %7f\n",
2125  i, *node,
2126  patchMap->index_a(pid),
2127  patchMap->index_b(pid),
2128  patchMap->index_c(pid),
2129  patchLoads[pid], sumLoad);
2130  }
2131 
2132  return;
2133  }
2134 
2135  if ( final_patch_sort ) {
2136  // final sort along a to minimize pme message count
2137  std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
2138  } else if ( a_len >= b_len && a_len >= c_len ) {
2139  if ( 0 ) CkPrintf("sort a\n");
2140  std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
2141  } else if ( b_len >= a_len && b_len >= c_len ) {
2142  if ( 0 ) CkPrintf("sort b\n");
2143  std::sort(patch_begin,patch_end,patch_sortop_curve_b(patchMap));
2144  } else if ( c_len >= a_len && c_len >= b_len ) {
2145  if ( 0 ) CkPrintf("sort c\n");
2146  std::sort(patch_begin,patch_end,patch_sortop_curve_c(patchMap));
2147  }
2148 
2149  int *patch_split;
2150  { // walk through patches in sorted order
2151  int *node = node_begin;
2152  sumLoad = 0;
2153  for ( patch_split = patch_begin;
2154  patch_split != patch_end && node != node_split;
2155  ++patch_split ) {
2156  sumLoad += patchLoads[*patch_split];
2157  double targetLoad = totalLoad *
2158  ((double)(node-node_begin+1) / (double)nnodes);
2159  if ( 0 ) CkPrintf("test %5d node %5d patch %5d %5d %5d load %7f target %7f\n",
2160  patch_split - patch_begin, *node,
2161  patchMap->index_a(*patch_split),
2162  patchMap->index_b(*patch_split),
2163  patchMap->index_c(*patch_split),
2164  sumLoad, targetLoad);
2165  double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
2166  if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
2167  }
2168  double targetLoad = totalLoad *
2169  ((double)(node_split-node_begin) / (double)nnodes);
2170  if ( 0 ) CkPrintf("split node %5d/%5d patch %5d/%5d load %7f target %7f\n",
2171  node_split-node_begin, nnodes,
2172  patch_split-patch_begin, npatches,
2173  sumLoad, targetLoad);
2174  }
2175 
2176  // recurse
2178  patch_begin, patch_split, node_begin, node_split,
2179  patchLoads, sortedLoads, assignedNode, tmgr);
2181  patch_split, patch_end, node_split, node_end,
2182  patchLoads, sortedLoads, assignedNode, tmgr);
2183 }
static Node * Object()
Definition: Node.h:86
BlockLoad::TempStorage load
static void recursive_bisect_with_curve(int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)
Definition: WorkDistrib.C:1972
static PatchMap * Object()
Definition: PatchMap.h:27
SimParameters * simParameters
Definition: Node.h:178
int index_a(int pid) const
Definition: PatchMap.h:86
Patch * patch(PatchID pid)
Definition: PatchMap.h:235
void NAMD_bug(const char *err_msg)
Definition: common.C:123
int index_b(int pid) const
Definition: PatchMap.h:87
int index_c(int pid) const
Definition: PatchMap.h:88
BlockRadixSort::TempStorage sort
#define simParams
Definition: Output.C:127
int getNumAtoms()
Definition: Patch.h:105
void coords(int pe, int *crds)
Definition: WorkDistrib.C:1821
int * sortAndSplit(int *node_begin, int *node_end, int splitdim)
Definition: WorkDistrib.C:1856
void topo_getargs ( char **  argv)

Definition at line 90 of file WorkDistrib.C.

References build_ordering(), and randtopo.

Referenced by all_init().

90  {
91  randtopo = CmiGetArgFlag(argv, "+randtopo");
92  if ( CkMyPe() >= CkNumPes() ) return;
93  CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)build_ordering, (void*)0);
94 }
static int randtopo
Definition: WorkDistrib.C:84
static void build_ordering(void *)
Definition: WorkDistrib.C:86

Variable Documentation

__thread DeviceCUDA* deviceCUDA

Definition at line 18 of file DeviceCUDA.C.

int eventMachineProgress
static

Definition at line 96 of file WorkDistrib.C.

Referenced by WorkDistrib::WorkDistrib().

int randtopo
static

Definition at line 84 of file WorkDistrib.C.

Referenced by WorkDistrib::buildNodeAwarePeOrdering(), and topo_getargs().