NAMD
WorkDistrib.C
Go to the documentation of this file.
1 
7 /*****************************************************************************
8  * $Source: /home/cvs/namd/cvsroot/namd2/src/WorkDistrib.C,v $
9  * $Author: jim $
10  * $Date: 2017/03/30 20:06:17 $
11  * $Revision: 1.1291 $
12  *****************************************************************************/
13 
20 #include <stdio.h>
21 
22 #include "InfoStream.h"
23 #include "Communicate.h"
24 #include "ProcessorPrivate.h"
25 #include "BOCgroup.h"
26 #include "WorkDistrib.decl.h"
27 #include "WorkDistrib.h"
28 #include "Lattice.h"
29 #include "ComputeMsmMsa.h" // needed for MsmMsaData definition
30 #include "main.decl.h"
31 #include "main.h"
32 #include "Node.h"
33 #include "PatchMgr.h"
34 #include "PatchMap.inl"
35 #include "NamdTypes.h"
36 #include "PDB.h"
37 #include "SimParameters.h"
38 #include "Molecule.h"
39 #include "NamdOneTools.h"
40 #include "Compute.h"
41 #include "ComputeMap.h"
42 #include "RecBisection.h"
43 #include "Random.h"
44 #include "varsizemsg.h"
45 #include "ProxyMgr.h"
46 #include "Priorities.h"
47 #include "SortAtoms.h"
48 #include <algorithm>
49 #include "TopoManager.h"
50 #include "ComputePmeCUDAMgr.h"
51 
52 #include "DeviceCUDA.h"
53 #ifdef NAMD_CUDA
54 #ifdef WIN32
55 #define __thread __declspec(thread)
56 #endif
57 extern __thread DeviceCUDA *deviceCUDA;
58 #endif
59 
60 //#define DEBUGM
61 #define MIN_DEBUG_LEVEL 2
62 #include "Debug.h"
63 #ifdef MEM_OPT_VERSION
64 extern int isOutputProcessor(int);
65 #endif
66 class ComputeMapChangeMsg : public CMessage_ComputeMapChangeMsg
67 {
68 public:
69 
72  int *newNodes;
74 
75 // VARSIZE_DECL(ComputeMapChangeMsg);
76 };
77 
78 /*
79 VARSIZE_MSG(ComputeMapChangeMsg,
80  VARSIZE_ARRAY(newNodes);
81 )
82 */
83 
84 static int randtopo;
85 
86 static void build_ordering(void *) {
88 }
89 
90 void topo_getargs(char **argv) {
91  randtopo = CmiGetArgFlag(argv, "+randtopo");
92  if ( CkMyPe() >= CkNumPes() ) return;
93  CcdCallOnCondition(CcdTOPOLOGY_AVAIL, (CcdVoidFn)build_ordering, (void*)0);
94 }
95 
97 
98 //======================================================================
99 // Public functions
100 //----------------------------------------------------------------------
102 {
103  CkpvAccess(BOCclass_group).workDistrib = thisgroup;
104  patchMapArrived = false;
105  computeMapArrived = false;
106 
107 #if CMK_SMP
108 #define MACHINE_PROGRESS
109 #else
110 #define MACHINE_PROGRESS { traceUserEvent(eventMachineProgress); CmiMachineProgressImpl(); }
111  if ( CkMyNodeSize() > 1 ) NAMD_bug("CkMyNodeSize() > 1 for non-smp build");
112  eventMachineProgress = traceRegisterUserEvent("CmiMachineProgressImpl",233);
113 #endif
114 }
115 
116 //----------------------------------------------------------------------
118 { }
119 
120 static int compare_bit_reversed(int a, int b) {
121  int d = a ^ b;
122  int c = 1;
123  if ( d ) while ( ! (d & c) ) {
124  c = c << 1;
125  }
126  return (a & c) - (b & c);
127 }
128 
129 static bool less_than_bit_reversed(int a, int b) {
130  int d = a ^ b;
131  int c = 1;
132  if ( d ) while ( ! (d & c) ) {
133  c = c << 1;
134  }
135  return d && (b & c);
136 }
137 
141  inline bool operator() (int a, int b) const {
142  int c = compare_bit_reversed(CmiRankOf(a),CmiRankOf(b));
143  if ( c < 0 ) return true;
144  if ( c > 0 ) return false;
146  rankInPhysOfNode[CmiNodeOf(a)],rankInPhysOfNode[CmiNodeOf(b)]);
147  if ( c < 0 ) return true;
148  if ( c > 0 ) return false;
149  c = compare_bit_reversed(CmiPhysicalNodeID(a),CmiPhysicalNodeID(b));
150  return ( c < 0 );
151  }
152 };
153 
159 
160 #ifdef NAMD_CUDA
161 extern void cuda_initialize();
162 #endif
163 
164 void mic_initialize();
165 
167  //CkPrintf("WorkDistrib::peOrderingReady on %d\n", CkMyPe());
168 #ifdef NAMD_CUDA
169  cuda_initialize();
170 #endif
171 #ifdef NAMD_MIC
172  mic_initialize();
173 #endif
174 }
175 
177 
178  CmiMemLock();
179  if ( ! peOrderingInit ) {
180  //CkPrintf("WorkDistrib::buildNodeAwarePeOrdering on pe %d\n", CkMyPe());
181 
182  const int numPhys = CmiNumPhysicalNodes();
183  const int numNode = CmiNumNodes();
184  const int numPe = CmiNumPes();
185  ResizeArray<int> numNodeInPhys(numPhys);
186  ResizeArray<int> rankInPhysOfNode(numNode);
187 
188  peDiffuseOrdering = new int[numPe];
189  peDiffuseOrderingIndex = new int[numPe];
190  peCompactOrdering = new int[numPe];
191  peCompactOrderingIndex = new int[numPe];
192 
193  int k = 0;
194  for ( int ph=0; ph<numPhys; ++ph ) {
195  int *pes, npes;
196  CmiGetPesOnPhysicalNode(ph, &pes, &npes);
197  for ( int i=0; i<npes; ++i, ++k ) {
198  peCompactOrdering[k] = pes[i];
199  }
200  numNodeInPhys[ph] = 0;
201  for ( int i=0, j=0; i<npes; i += CmiNodeSize(CmiNodeOf(pes[i])), ++j ) {
202  rankInPhysOfNode[CmiNodeOf(pes[i])] = j;
203  numNodeInPhys[ph] += 1;
204  }
205  }
206 
207  if ( randtopo && numPhys > 2 ) {
208  if ( ! CkMyNode() ) {
209  iout << iWARN << "RANDOMIZING PHYSICAL NODE ORDERING\n" << endi;
210  }
211  ResizeArray<int> randPhysOrder(numPhys);
212  for ( int j=0; j<numPhys; ++j ) {
213  randPhysOrder[j] = j;
214  }
215  Random(314159265).reorder(randPhysOrder.begin()+2, numPhys-2);
216  for ( int j=0, k=0; j<numPhys; ++j ) {
217  const int ph = randPhysOrder[j];
218  int *pes, npes;
219  CmiGetPesOnPhysicalNode(ph, &pes, &npes);
220  for ( int i=0; i<npes; ++i, ++k ) {
221  peCompactOrdering[k] = pes[i];
222  }
223  }
224  }
225 
226  for ( int i=0; i<numPe; ++i ) {
227  peDiffuseOrdering[i] = i;
228  }
230  pe_sortop_bit_reversed(rankInPhysOfNode.begin()));
231 
232  for ( int i=0; i<numPe; ++i ) {
235  }
236 
237  if ( 0 && CmiMyNode() == 0 ) for ( int i=0; i<numPe; ++i ) {
238  CkPrintf("order %5d %5d %5d %5d %5d\n", i,
243  }
244 
245  peOrderingInit = 1;
246  }
247  CmiMemUnlock();
248  peOrderingReady();
249 
250 }
251 
255  inline bool operator() (int a, int b) const {
256  return ( spos[a].x < spos[b].x );
257  }
258 };
259 
263  inline bool operator() (int a, int b) const {
264  return ( spos[a].y < spos[b].y );
265  }
266 };
267 
269  int x_begin, int x_end, int y_begin, int y_end,
270  int *pe_begin, ScaledPosition *coord,
271  int *result, int ydim
272  ) {
273  int x_len = x_end - x_begin;
274  int y_len = y_end - y_begin;
275  if ( x_len == 1 && y_len == 1 ) {
276  // done, now put this pe in the right place
277  if ( 0 ) CkPrintf("pme %5d %5d on pe %5d at %f %f\n", x_begin, y_begin, *pe_begin,
278  coord[*pe_begin].x, coord[*pe_begin].y);
279  result[x_begin*ydim + y_begin] = *pe_begin;
280  return;
281  }
282  int *pe_end = pe_begin + x_len * y_len;
283  if ( x_len >= y_len ) {
284  std::sort(pe_begin, pe_end, pe_sortop_coord_x(coord));
285  int x_split = x_begin + x_len / 2;
286  int* pe_split = pe_begin + (x_split - x_begin) * y_len;
287  //CkPrintf("x_split %5d %5d %5d\n", x_begin, x_split, x_end);
288  recursive_bisect_coord(x_begin, x_split, y_begin, y_end, pe_begin, coord, result, ydim);
289  recursive_bisect_coord(x_split, x_end, y_begin, y_end, pe_split, coord, result, ydim);
290  } else {
291  std::sort(pe_begin, pe_end, pe_sortop_coord_y(coord));
292  int y_split = y_begin + y_len / 2;
293  int* pe_split = pe_begin + (y_split - y_begin) * x_len;
294  //CkPrintf("y_split %5d %5d %5d\n", y_begin, y_split, y_end);
295  recursive_bisect_coord(x_begin, x_end, y_begin, y_split, pe_begin, coord, result, ydim);
296  recursive_bisect_coord(x_begin, x_end, y_split, y_end, pe_split, coord, result, ydim);
297  }
298 }
299 
300 void WorkDistrib::sortPmePes(int *pmepes, int xdim, int ydim) {
301  int numpes = CkNumPes();
302  ResizeArray<int> count(numpes);
303  ResizeArray<ScaledPosition> sumPos(numpes);
304  ResizeArray<ScaledPosition> avgPos(numpes);
305  for ( int i=0; i<numpes; ++i ) {
306  count[i] = 0;
307  sumPos[i] = 0;
308  avgPos[i] = 0;
309  }
310  PatchMap *patchMap = PatchMap::Object();
311  for ( int i=0, npatches=patchMap->numPatches(); i<npatches; ++i ) {
312  int pe = patchMap->node(i);
313  count[pe] += 1;
314  sumPos[pe] += patchMap->center(i);
315  }
316  const int npmepes = xdim*ydim;
317  ResizeArray<int> sortpes(npmepes);
318  for ( int i=0; i<npmepes; ++i ) {
319  int pe = sortpes[i] = pmepes[i];
320  int cnt = count[pe];
321  ScaledPosition sum = sumPos[pe];
322  if ( cnt == 0 ) {
323  // average over node
324  int node = CkNodeOf(pe);
325  int nsize = CkNodeSize(node);
326  int pe2 = CkNodeFirst(node);
327  for ( int j=0; j<nsize; ++j, ++pe2 ) {
328  cnt += count[pe2];
329  sum += sumPos[pe2];
330  }
331  }
332  if ( cnt == 0 ) {
333  // average over physical node
334  int node = CmiPhysicalNodeID(pe);
335  int nsize, *nlist;
336  CmiGetPesOnPhysicalNode(node, &nlist, &nsize);
337  for ( int j=0; j<nsize; ++j ) {
338  int pe2 = nlist[j];
339  cnt += count[pe2];
340  sum += sumPos[pe2];
341  }
342  }
343  if ( cnt ) {
344  avgPos[pe] = sum / cnt;
345  }
346  }
347  recursive_bisect_coord(0, xdim, 0, ydim, sortpes.begin(), avgPos.begin(), pmepes, ydim);
348 }
349 
350 
351 //----------------------------------------------------------------------
352 void WorkDistrib::saveComputeMapChanges(int ep, CkGroupID chareID)
353 {
354  saveComputeMapReturnEP = ep;
355  saveComputeMapReturnChareID = chareID;
356 
357  ComputeMapChangeMsg *mapMsg = new (0, 0, 0) ComputeMapChangeMsg;
358  CProxy_WorkDistrib(thisgroup).recvComputeMapChanges(mapMsg);
359 
360 /*
361  // store the latest compute map
362  SimParameters *simParams = Node::Object()->simParameters;
363  if (simParams->storeComputeMap) {
364  computeMap->saveComputeMap(simParams->computeMapFilename);
365  CkPrintf("ComputeMap has been stored in %s.\n", simParams->computeMapFilename);
366  }
367 */
368 }
369 
371 
372  delete msg;
373 
374  ComputeMap *computeMap = ComputeMap::Object();
375 
376  int i;
377  int nc = computeMap->numComputes();
378 
379  if ( ! CkMyPe() ) { // send
380  // CkPrintf("At %f on %d WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
381  MOStream *msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, COMPUTEMAPTAG, BUFSIZE);
382  msg->put(nc);
383  for (i=0; i<nc; i++) {
384  int data = computeMap->newNode(i);
385  msg->put(data);
386  }
387  msg->put(nc);
388  for (i=0; i<nc; i++) {
389  char data = computeMap->newNumPartitions(i);
390  msg->put(data);
391  }
392  msg->put(nc);
393  msg->end();
394  delete msg;
395  // CkPrintf("At %f on %d done WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
396  } else if ( ! CkMyRank() ) { // receive
397  // if ( CkMyNode() == 1 ) CkPrintf("At %f on %d WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
398  MIStream *msg = CkpvAccess(comm)->newInputStream(0, COMPUTEMAPTAG);
399  msg->get(i);
400  if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 1 failed\n");
401  for (i=0; i<nc; i++) {
402  int data;
403  msg->get(data);
404  computeMap->setNewNode(i,data);
405  }
406  msg->get(i);
407  if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 2 failed\n");
408  for (i=0; i<nc; i++) {
409  char data;
410  msg->get(data);
411  computeMap->setNewNumPartitions(i,data);
412  }
413  msg->get(i);
414  if ( i != nc ) NAMD_bug("WorkDistrib::recvComputeMapChanges check 3 failed\n");
415  delete msg;
416  // if ( CkMyNode() == 1 ) CkPrintf("At %f on %d done WorkDistrib::recvComputeMapChanges %d\n", CmiWallTimer(), CkMyPe(), nc);
417  }
418 
419  CkCallback cb(CkIndex_WorkDistrib::doneSaveComputeMap(NULL), 0, thisgroup);
420  contribute(0, NULL, CkReduction::random, cb);
421 }
422 
423 void WorkDistrib::doneSaveComputeMap(CkReductionMsg *msg) {
424  delete msg;
425 
426  CkSendMsgBranch(saveComputeMapReturnEP, CkAllocMsg(0,0,0), 0, saveComputeMapReturnChareID);
427 }
428 
429 #ifdef MEM_OPT_VERSION
430 //All basic info already exists for each atom inside the FullAtomList because
431 //it is loaded when reading the binary per-atom file. This function will fill
432 //the info regarding transform, nonbondedGroupSize etc. Refer to
433 //WorkDistrib::createAtomLists
434 void WorkDistrib::fillAtomListForOnePatch(int pid, FullAtomList &alist){
435  PatchMap *patchMap = PatchMap::Object();
436 
437  ScaledPosition center(0.5*(patchMap->min_a(pid)+patchMap->max_a(pid)),
438  0.5*(patchMap->min_b(pid)+patchMap->max_b(pid)),
439  0.5*(patchMap->min_c(pid)+patchMap->max_c(pid)));
440 
441  int n = alist.size();
442  FullAtom *a = alist.begin();
443 /*
444  //Those options are not supported in MEM_OPT_VERSIOn -Chao Mei
445 //Modifications for alchemical fep
446  Bool alchFepOn = params->alchFepOn;
447  Bool alchThermIntOn = params->alchThermIntOn;
448 //fepe
449  Bool lesOn = params->lesOn;
450  Bool pairInteractionOn = params->pairInteractionOn;
451 
452  Bool pressureProfileTypes = (params->pressureProfileAtomTypes > 1);
453 */
455  const Lattice lattice = params->lattice;
456  Transform mother_transform;
457  for(int j=0; j < n; j++)
458  {
459  int aid = a[j].id;
460  a[j].nonbondedGroupSize = 0; // must be set based on coordinates
461 
462  // a[j].fixedPosition = a[j].position; ParallelIOMgr stores ref coord here.
463 
464  if ( a[j].migrationGroupSize ) {
465  if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
466  Position pos = a[j].position;
467  int mgs = a[j].migrationGroupSize;
468  int c = 1;
469 
470  for ( int k=a[j].hydrogenGroupSize; k<mgs;
471  k+=a[j+k].hydrogenGroupSize ) {
472  pos += a[j+k].position;
473  ++c;
474  }
475 
476  pos *= 1./c;
477  mother_transform = a[j].transform; // should be 0,0,0
478  pos = lattice.nearest(pos,center,&mother_transform);
479  a[j].position = lattice.apply_transform(a[j].position,mother_transform);
480  a[j].transform = mother_transform;
481  } else {
482  a[j].position = lattice.nearest(a[j].position, center, &(a[j].transform));
483  mother_transform = a[j].transform;
484  }
485  } else {
486  a[j].position = lattice.apply_transform(a[j].position,mother_transform);
487  a[j].transform = mother_transform;
488  }
489 
490 /*
491  //Those options are not supported in MEM_OPT_VERSIOn -Chao Mei
492 //Modifications for alchemical fep
493  if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
494  a[j].partition = molecule->get_fep_type(aid);
495  }
496  else {
497  a[j].partition = 0;
498  }
499 //fepe
500 */
501  a[j].partition = 0;
502 
503  //set langevinParams based on atom status
504  if(params->langevinOn) {
505  BigReal bval = params->langevinDamping;
506  if(!params->langevinHydrogen &&
507  ((a[j].status & HydrogenAtom)!=0)) {
508  bval = 0;
509  }else if ((a[j].status & LonepairAtom)!=0) {
510  bval = 0;
511  }else if ((a[j].status & DrudeAtom)!=0) {
512  bval = params->langevinDamping;
513  }
514  a[j].langevinParam = bval;
515  }
516 
517  }
518 
519  int size, allfixed;
520  for(int j=0; j < n; j+=size) {
521  size = a[j].hydrogenGroupSize;
522  if ( ! size ) {
523  NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
524  }
525  allfixed = 1;
526  for (int k = 0; k < size; ++k ) {
527  allfixed = ( allfixed && (a[j+k].atomFixed) );
528  }
529  for (int k = 0; k < size; ++k ) {
530  a[j+k].groupFixed = allfixed ? 1 : 0;
531  }
532  }
533 
534  if ( params->outputPatchDetails ) {
535  int patchId = pid;
536  int numAtomsInPatch = n;
537  int numFixedAtomsInPatch = 0;
538  int numAtomsInFixedGroupsInPatch = 0;
539  for(int j=0; j < n; j++) {
540  numFixedAtomsInPatch += ( a[j].atomFixed ? 1 : 0 );
541  numAtomsInFixedGroupsInPatch += ( a[j].groupFixed ? 1 : 0 );
542  }
543  iout << "PATCH_DETAILS:"
544  << " on proc " << CkMyPe()
545  << " patch " << patchId
546  << " atoms " << numAtomsInPatch
547  << " fixed_atoms " << numFixedAtomsInPatch
548  << " fixed_groups " << numAtomsInFixedGroupsInPatch
549  << "\n" << endi;
550  }
551 
552 }
553 
554 void WorkDistrib::random_velocities_parallel(BigReal Temp,InputAtomList &inAtoms)
555 {
556  int i, j; // Loop counter
557  BigReal kbT; // Boltzman constant * Temp
558  BigReal randnum; // Random number from -6.0 to 6.0
559  BigReal kbToverM; // sqrt(Kb*Temp/Mass)
561  Bool lesOn = simParams->lesOn;
562  Random vel_random(simParams->randomSeed);
563  int lesReduceTemp = lesOn && simParams->lesReduceTemp;
564  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
565 
566  kbT = Temp*BOLTZMANN;
567  int count=0;
568  int totalAtoms = inAtoms.size();
569  for(i=0;i<totalAtoms;i++)
570  {
571  Real atomMs=inAtoms[i].mass;
572 
573  if (atomMs <= 0.) {
574  kbToverM = 0.;
575  } else {
576  /*
577  * lesOn is not supported in MEM_OPT_VERSION, so the original assignment
578  * is simplified. --Chao Mei
579  */
580  //kbToverM = sqrt(kbT *
581  //( lesOn && structure->get_fep_type(aid) ? tempFactor : 1.0 ) /
582  // atomMs );
583  kbToverM = sqrt(kbT * 1.0 / atomMs);
584  }
585  for (randnum=0.0, j=0; j<12; j++)
586  {
587  randnum += vel_random.uniform();
588  }
589 
590  randnum -= 6.0;
591 
592  inAtoms[i].velocity.x = randnum*kbToverM;
593 
594  for (randnum=0.0, j=0; j<12; j++)
595  {
596  randnum += vel_random.uniform();
597  }
598 
599  randnum -= 6.0;
600 
601  inAtoms[i].velocity.y = randnum*kbToverM;
602 
603  for (randnum=0.0, j=0; j<12; j++)
604  {
605  randnum += vel_random.uniform();
606  }
607 
608  randnum -= 6.0;
609 
610  inAtoms[i].velocity.z = randnum*kbToverM;
611  }
612 }
613 #endif
614 
615 //----------------------------------------------------------------------
616 // This should only be called on node 0.
617 //----------------------------------------------------------------------
619 {
620  int i;
621  StringList *current; // Pointer used to retrieve configuration items
622  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
623  Node *node = nd.ckLocalBranch();
624  PatchMap *patchMap = PatchMap::Object();
625  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
626  PatchMgr *patchMgr = pm.ckLocalBranch();
627  SimParameters *params = node->simParameters;
628  Molecule *molecule = node->molecule;
629  PDB *pdb = node->pdb;
630 
631  int numPatches = patchMap->numPatches();
632  int numAtoms = pdb->num_atoms();
633 
634  Vector *positions = new Position[numAtoms];
635  Vector *velocities = new Velocity[numAtoms];
636 
637  if ( basename ) {
638  if ( params->binaryOutput ) {
639  read_binary_file((std::string(basename)+".coor").c_str(), positions, numAtoms);
640  read_binary_file((std::string(basename)+".vel").c_str(), velocities, numAtoms);
641  } else {
642  PDB coorpdb((std::string(basename)+".coor").c_str());
643  if ( coorpdb.num_atoms() != numAtoms ) {
644  NAMD_die("Incorrect atom count in coordinate pdb file");
645  }
646  coorpdb.get_all_positions(positions);
647  velocities_from_PDB((std::string(basename)+".vel").c_str(), velocities, numAtoms);
648  }
649  } else {
650  pdb->get_all_positions(positions);
651 
652  if ( params->initialTemp < 0.0 ) {
653  Bool binvels=FALSE;
654 
655  // Reading the veolcities from a PDB
656  current = node->configList->find("velocities");
657 
658  if (current == NULL) {
659  current = node->configList->find("binvelocities");
660  binvels = TRUE;
661  }
662 
663  if (!binvels) {
664  velocities_from_PDB(current->data, velocities, numAtoms);
665  }
666  else {
667  velocities_from_binfile(current->data, velocities, numAtoms);
668  }
669  }
670  else {
671  // Random velocities for a given temperature
672  random_velocities(params->initialTemp, molecule, velocities, numAtoms);
673  }
674  }
675 
676  // If COMMotion == no, remove center of mass motion
677  if (!(params->comMove)) {
678  remove_com_motion(velocities, molecule, numAtoms);
679  }
680 
682 
683  const Lattice lattice = params->lattice;
684 
685  if ( params->staticAtomAssignment ) {
686  FullAtomList sortAtoms;
687  for ( i=0; i < numAtoms; i++ ) {
688  HydrogenGroupID &h = molecule->hydrogenGroup[i];
689  if ( ! h.isMP ) continue;
690  FullAtom a;
691  a.id = i;
693  a.position = positions[h.atomID];
694  sortAtoms.add(a);
695  }
696  int *order = new int[sortAtoms.size()];
697  for ( i=0; i < sortAtoms.size(); i++ ) {
698  order[i] = i;
699  }
700  int *breaks = new int[numPatches];
701  sortAtomsForPatches(order,breaks,sortAtoms.begin(),
702  sortAtoms.size(),numAtoms,
703  patchMap->gridsize_c(),
704  patchMap->gridsize_b(),
705  patchMap->gridsize_a());
706 
707  i = 0;
708  for ( int pid = 0; pid < numPatches; ++pid ) {
709  int iend = breaks[pid];
710  for ( ; i<iend; ++i ) {
711  FullAtom &sa = sortAtoms[order[i]];
712  int mgs = sa.migrationGroupSize;
713 /*
714 CkPrintf("patch %d (%d %d %d) has group %d atom %d size %d at %.2f %.2f %.2f\n",
715  pid, patchMap->index_a(pid), patchMap->index_b(pid),
716  patchMap->index_c(pid), order[i], sa.id, mgs,
717  sa.position.x, sa.position.y, sa.position.z);
718 */
719  for ( int k=0; k<mgs; ++k ) {
720  HydrogenGroupID &h = molecule->hydrogenGroup[sa.id + k];
721  int aid = h.atomID;
722  FullAtom a;
723  a.id = aid;
724  a.position = positions[aid];
725  a.velocity = velocities[aid];
726  a.vdwType = molecule->atomvdwtype(aid);
727  a.status = molecule->getAtoms()[aid].status;
728  a.langevinParam = molecule->langevin_param(aid);
729  a.hydrogenGroupSize = h.isGP ? h.atomsInGroup : 0;
731  if(params->rigidBonds != RIGID_NONE) {
732  a.rigidBondLength = molecule->rigid_bond_length(aid);
733  }else{
734  a.rigidBondLength = 0.0;
735  }
736  atoms[pid].add(a);
737  }
738  }
739 CkPrintf("patch %d (%d %d %d) has %d atoms\n",
740  pid, patchMap->index_a(pid), patchMap->index_b(pid),
741  patchMap->index_c(pid), atoms[pid].size());
742  }
743  delete [] order;
744  delete [] breaks;
745  } else
746  {
747  // split atoms into patches based on migration group and position
748  int aid, pid=0;
749  for(i=0; i < numAtoms; i++)
750  {
751  // Assign atoms to patches without splitting hydrogen groups.
752  // We know that the hydrogenGroup array is sorted with group parents
753  // listed first. Thus, only change the pid if an atom is a group parent.
754  HydrogenGroupID &h = molecule->hydrogenGroup[i];
755  aid = h.atomID;
756  FullAtom a;
757  a.id = aid;
758  a.position = positions[aid];
759  a.velocity = velocities[aid];
760  a.vdwType = molecule->atomvdwtype(aid);
761  a.status = molecule->getAtoms()[aid].status;
762  a.langevinParam = molecule->langevin_param(aid);
763  a.hydrogenGroupSize = h.isGP ? h.atomsInGroup : 0;
765  if(params->rigidBonds != RIGID_NONE) {
766  a.rigidBondLength = molecule->rigid_bond_length(aid);
767  }else{
768  a.rigidBondLength = 0.0;
769  }
770  if (h.isMP) {
771  pid = patchMap->assignToPatch(positions[aid],lattice);
772  } // else: don't change pid
773  atoms[pid].add(a);
774  }
775  }
776 
777  delete [] positions;
778  delete [] velocities;
779 
780  for(i=0; i < numPatches; i++)
781  {
782  ScaledPosition center(0.5*(patchMap->min_a(i)+patchMap->max_a(i)),
783  0.5*(patchMap->min_b(i)+patchMap->max_b(i)),
784  0.5*(patchMap->min_c(i)+patchMap->max_c(i)));
785 
786  int n = atoms[i].size();
787  FullAtom *a = atoms[i].begin();
788  int j;
789 //Modifications for alchemical fep
790  Bool alchOn = params->alchOn;
791 //fepe
792  Bool lesOn = params->lesOn;
793 
794  Bool pairInteractionOn = params->pairInteractionOn;
795 
796  Bool pressureProfileTypes = (params->pressureProfileAtomTypes > 1);
797 
798  Transform mother_transform;
799  for(j=0; j < n; j++)
800  {
801  int aid = a[j].id;
802 
803  a[j].nonbondedGroupSize = 0; // must be set based on coordinates
804 
805  a[j].atomFixed = molecule->is_atom_fixed(aid) ? 1 : 0;
806  a[j].fixedPosition = a[j].position;
807 
808  if ( a[j].migrationGroupSize ) {
809  if ( a[j].migrationGroupSize != a[j].hydrogenGroupSize ) {
810  Position pos = a[j].position;
811  int mgs = a[j].migrationGroupSize;
812  int c = 1;
813  for ( int k=a[j].hydrogenGroupSize; k<mgs;
814  k+=a[j+k].hydrogenGroupSize ) {
815  pos += a[j+k].position;
816  ++c;
817  }
818  pos *= 1./c;
819  mother_transform = a[j].transform; // should be 0,0,0
820  pos = lattice.nearest(pos,center,&mother_transform);
821  a[j].position = lattice.apply_transform(a[j].position,mother_transform);
822  a[j].transform = mother_transform;
823  } else {
824  a[j].position = lattice.nearest(
825  a[j].position, center, &(a[j].transform));
826  mother_transform = a[j].transform;
827  }
828  } else {
829  a[j].position = lattice.apply_transform(a[j].position,mother_transform);
830  a[j].transform = mother_transform;
831  }
832 
833  a[j].mass = molecule->atommass(aid);
834  // Using double precision division for reciprocal mass.
835  a[j].recipMass = ( a[j].mass > 0 ? (1. / a[j].mass) : 0 );
836  a[j].charge = molecule->atomcharge(aid);
837 
838 //Modifications for alchemical fep
839  if ( alchOn || lesOn || pairInteractionOn || pressureProfileTypes) {
840  a[j].partition = molecule->get_fep_type(aid);
841  }
842  else {
843  a[j].partition = 0;
844  }
845 //fepe
846 
847  }
848 
849  int size, allfixed, k;
850  for(j=0; j < n; j+=size) {
851  size = a[j].hydrogenGroupSize;
852  if ( ! size ) {
853  NAMD_bug("Mother atom with hydrogenGroupSize of 0!");
854  }
855  allfixed = 1;
856  for ( k = 0; k < size; ++k ) {
857  allfixed = ( allfixed && (a[j+k].atomFixed) );
858  }
859  for ( k = 0; k < size; ++k ) {
860  a[j+k].groupFixed = allfixed ? 1 : 0;
861  }
862  }
863 
864  if ( params->outputPatchDetails ) {
865  int patchId = i;
866  int numAtomsInPatch = n;
867  int numFixedAtomsInPatch = 0;
868  int numAtomsInFixedGroupsInPatch = 0;
869  for(j=0; j < n; j++) {
870  numFixedAtomsInPatch += ( a[j].atomFixed ? 1 : 0 );
871  numAtomsInFixedGroupsInPatch += ( a[j].groupFixed ? 1 : 0 );
872  }
873  iout << "PATCH_DETAILS:"
874  << " patch " << patchId
875  << " atoms " << numAtomsInPatch
876  << " fixed_atoms " << numFixedAtomsInPatch
877  << " fixed_groups " << numAtomsInFixedGroupsInPatch
878  << "\n" << endi;
879  }
880  }
881 
882  return atoms;
883 
884 }
885 
886 //----------------------------------------------------------------------
887 // This should only be called on node 0.
888 //----------------------------------------------------------------------
890 {
891  int i;
892  PatchMap *patchMap = PatchMap::Object();
893  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
894  PatchMgr *patchMgr = pm.ckLocalBranch();
895 
896  int numPatches = patchMap->numPatches();
897 
899 
900 #ifdef MEM_OPT_VERSION
901 /* CProxy_Node nd(CkpvAccess(BOCclass_group).node);
902  Node *node = nd.ckLocalBranch();
903  node->molecule->delEachAtomSigs();
904  node->molecule->delMassChargeSpace();
905 */
906 #endif
907 
908  int maxAtoms = -1;
909  int maxPatch = -1;
910  for(i=0; i < numPatches; i++) {
911  int numAtoms = atoms[i].size();
912  if ( numAtoms > maxAtoms ) { maxAtoms = numAtoms; maxPatch = i; }
913  }
914  iout << iINFO << "LARGEST PATCH (" << maxPatch <<
915  ") HAS " << maxAtoms << " ATOMS\n" << endi;
916 
917  for(i=0; i < numPatches; i++)
918  {
919  if ( ! ( i % 100 ) )
920  {
921  DebugM(3,"Created " << i << " patches so far.\n");
922  }
923 
924  patchMgr->createHomePatch(i,atoms[i]);
925  }
926 
927  delete [] atoms;
928 }
929 
931  // ref BOC
932  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
933  Node *node = nd.ckLocalBranch();
934  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
935  PatchMgr *patchMgr = pm.ckLocalBranch();
936  // ref singleton
937  PatchMap *patchMap = PatchMap::Object();
938 
939  // Move patches to the proper node
940  for(int i=0;i < patchMap->numPatches(); i++)
941  {
942  if (patchMap->node(i) != node->myid() )
943  {
944  DebugM(3,"patchMgr->movePatch("
945  << i << "," << patchMap->node(i) << ")\n");
946  patchMgr->movePatch(i,patchMap->node(i));
947  }
948  }
949  patchMgr->sendMovePatches();
950 }
951 
952 void WorkDistrib::reinitAtoms(const char *basename) {
953 
954  PatchMap *patchMap = PatchMap::Object();
955  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
956  PatchMgr *patchMgr = pm.ckLocalBranch();
957 
958  int numPatches = patchMap->numPatches();
959 
960  FullAtomList *atoms = createAtomLists(basename);
961 
962  for(int i=0; i < numPatches; i++) {
963  patchMgr->sendAtoms(i,atoms[i]);
964  }
965 
966  delete [] atoms;
967 
968 }
969 
970 
971 //----------------------------------------------------------------------
972 
973 class PatchMapMsg : public CMessage_PatchMapMsg {
974  public:
976 };
977 
979 {
980  if ( CkNumPes() == 1 ) {
981  patchMapArrived = true;
982  return;
983  }
984 
985  //Automatically enable spanning tree
986  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
987  Node *node = nd.ckLocalBranch();
988  SimParameters *params = node->simParameters;
989  if( ( PatchMap::Object()->numPatches() <= CkNumPes()/4
990 #ifdef NODEAWARE_PROXY_SPANNINGTREE
991  || CkNumPes() > CkNumNodes()
992  ) && ( CkNumNodes() > 1
993 #endif
994  ) && params->isSendSpanningTreeUnset() )
996 
997 #ifdef NODEAWARE_PROXY_SPANNINGTREE
998  if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
999  && params->isRecvSpanningTreeUnset() )
1001 #endif
1002 
1003  int size = PatchMap::Object()->packSize();
1004 
1005  PatchMapMsg *mapMsg = new (size, 0) PatchMapMsg;
1006 
1007  PatchMap::Object()->pack(mapMsg->patchMapData, size);
1008 
1009  CProxy_WorkDistrib workProxy(thisgroup);
1010  workProxy[0].savePatchMap(mapMsg);
1011 }
1012 
1013 // saveMaps() is called when the map message is received
1015 {
1016  // Use a resend to forward messages before processing. Otherwise the
1017  // map distribution is slow on many CPUs. We need to use a tree
1018  // rather than a broadcast because some implementations of broadcast
1019  // generate a copy of the message on the sender for each recipient.
1020  // This is because MPI doesn't allow re-use of an outstanding buffer.
1021 
1022  if ( CkMyRank() ) patchMapArrived = true;
1023 
1024  if ( patchMapArrived && CkMyPe() ) {
1026 
1027  //Automatically enable spanning tree
1028  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1029  Node *node = nd.ckLocalBranch();
1030  SimParameters *params = node->simParameters;
1031  if( ( PatchMap::Object()->numPatches() <= CkNumPes()/4
1032 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1033  || CkNumPes() > CkNumNodes()
1034  ) && ( CkNumNodes() > 1
1035 #endif
1036  ) && params->isSendSpanningTreeUnset() )
1038 
1039 #ifdef NODEAWARE_PROXY_SPANNINGTREE
1040  if ( CkNumPes() > CkNumNodes() && CkNumNodes() > 1
1041  && params->isRecvSpanningTreeUnset() )
1043 #endif
1044  }
1045 
1046  if ( patchMapArrived ) {
1047  if ( CkMyRank() + 1 < CkNodeSize(CkMyNode()) ) {
1048  ((CProxy_WorkDistrib(thisgroup))[CkMyPe()+1]).savePatchMap(msg);
1049  } else {
1050  delete msg;
1051  }
1052  return;
1053  }
1054 
1055  patchMapArrived = true;
1056 
1057  int self = CkMyNode();
1058  int range_begin = 0;
1059  int range_end = CkNumNodes();
1060  while ( self != range_begin ) {
1061  ++range_begin;
1062  int split = range_begin + ( range_end - range_begin ) / 2;
1063  if ( self < split ) { range_end = split; }
1064  else { range_begin = split; }
1065  }
1066  int send_near = self + 1;
1067  int send_far = send_near + ( range_end - send_near ) / 2;
1068 
1069  int pids[3];
1070  int npid = 0;
1071  if ( send_far < range_end ) pids[npid++] = CkNodeFirst(send_far);
1072  if ( send_near < send_far ) pids[npid++] = CkNodeFirst(send_near);
1073  pids[npid++] = CkMyPe(); // always send the message to ourselves
1074  CProxy_WorkDistrib(thisgroup).savePatchMap(msg,npid,pids);
1075 }
1076 
1077 
1079 {
1080  if ( CkMyRank() ) return;
1081 
1082  if ( CkNumNodes() == 1 ) {
1083  computeMapArrived = true;
1085  return;
1086  }
1087 
1088  if ( ! CkMyPe() ) { // send
1089  MOStream *msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, COMPUTEMAPTAG, BUFSIZE);
1090  ComputeMap::Object()->pack(msg);
1091  msg->end();
1092  delete msg;
1093  } else if ( ! CkMyRank() ) { // receive
1094  MIStream *msg = CkpvAccess(comm)->newInputStream(0, COMPUTEMAPTAG);
1095  ComputeMap::Object()->unpack(msg);
1096  delete msg;
1097  }
1098 
1099  computeMapArrived = true;
1101 }
1102 
1103 
1104 //----------------------------------------------------------------------
1106 {
1107  PatchMap *patchMap = PatchMap::Object();
1108  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1109  Node *node = nd.ckLocalBranch();
1110  SimParameters *params = node->simParameters;
1111  Lattice lattice = params->lattice;
1112 
1113  BigReal patchSize = params->patchDimension;
1114 
1115 #ifndef MEM_OPT_VERSION
1116  const int totalAtoms = node->pdb->num_atoms();
1117 #else
1118  const int totalAtoms = node->molecule->numAtoms;
1119 #endif
1120 
1121  ScaledPosition xmin, xmax;
1122 
1123  double maxNumPatches = 1.e9; // need to adjust fractional values
1124  if ( params->minAtomsPerPatch > 0 )
1125  maxNumPatches = totalAtoms / params->minAtomsPerPatch;
1126 
1127  DebugM(3,"Mapping patches\n");
1128  if ( lattice.a_p() && lattice.b_p() && lattice.c_p() ) {
1129  xmin = 0.; xmax = 0.;
1130  }
1131  else if ( params->FMAOn || params->MSMOn || params->FMMOn ) {
1132  // Need to use full box for FMA to match NAMD 1.X results.
1133 #if 0
1134  node->pdb->find_extremes(&(xmin.x),&(xmax.x),lattice.a_r());
1135  node->pdb->find_extremes(&(xmin.y),&(xmax.y),lattice.b_r());
1136  node->pdb->find_extremes(&(xmin.z),&(xmax.z),lattice.c_r());
1137 #endif
1138  node->pdb->find_extremes(lattice);
1139  node->pdb->get_extremes(xmin, xmax);
1140 #if 0
1141  printf("+++ center=%.4f %.4f %.4f\n",
1142  lattice.origin().x, lattice.origin().y, lattice.origin().z);
1143  printf("+++ xmin=%.4f xmax=%.4f\n", xmin.x, xmax.x);
1144  printf("+++ ymin=%.4f ymax=%.4f\n", xmin.y, xmax.y);
1145  printf("+++ zmin=%.4f zmax=%.4f\n", xmin.z, xmax.z);
1146 #endif
1147  // Otherwise, this allows a small number of stray atoms.
1148  }
1149  else {
1150 #if 0
1151  node->pdb->find_extremes(&(xmin.x),&(xmax.x),lattice.a_r(),0.9);
1152  node->pdb->find_extremes(&(xmin.y),&(xmax.y),lattice.b_r(),0.9);
1153  node->pdb->find_extremes(&(xmin.z),&(xmax.z),lattice.c_r(),0.9);
1154 #endif
1155  node->pdb->find_extremes(lattice, 1.0);
1156  node->pdb->get_extremes(xmin, xmax);
1157  iout << iINFO << "ORIGINAL ATOMS MINMAX IS " << xmin << " " << xmax << "\n" << endi;
1158  double frac = ( (double)totalAtoms - 10000. ) / (double)totalAtoms;
1159  if ( frac < 0.9 ) { frac = 0.9; }
1160  node->pdb->find_extremes(lattice, frac);
1161  node->pdb->get_extremes(xmin, xmax);
1162  iout << iINFO << "ADJUSTED ATOMS MINMAX IS " << xmin << " " << xmax << "\n" << endi;
1163  }
1164 
1165 #if 0
1166  BigReal origin_shift;
1167  origin_shift = lattice.a_r() * lattice.origin();
1168  xmin.x -= origin_shift;
1169  xmax.x -= origin_shift;
1170  origin_shift = lattice.b_r() * lattice.origin();
1171  xmin.y -= origin_shift;
1172  xmax.y -= origin_shift;
1173  origin_shift = lattice.c_r() * lattice.origin();
1174  xmin.z -= origin_shift;
1175  xmax.z -= origin_shift;
1176 #endif
1177 
1178  // SimParameters default is -1 for unset
1179  int twoAwayX = params->twoAwayX;
1180  int twoAwayY = params->twoAwayY;
1181  int twoAwayZ = params->twoAwayZ;
1182 
1183  // SASA implementation is not compatible with twoAway patches
1184  if (params->LCPOOn && patchSize < 32.4) {
1185  if ( twoAwayX > 0 || twoAwayY > 0 || twoAwayZ > 0 ) {
1186  iout << iWARN << "Ignoring twoAway[XYZ] due to LCPO SASA implementation.\n" << endi;
1187  }
1188  twoAwayX = twoAwayY = twoAwayZ = 0;
1189  }
1190 
1191  // if you think you know what you're doing go right ahead
1192  if ( twoAwayX > 0 ) maxNumPatches = 1.e9;
1193  if ( twoAwayY > 0 ) maxNumPatches = 1.e9;
1194  if ( twoAwayZ > 0 ) maxNumPatches = 1.e9;
1195  if ( params->maxPatches > 0 ) {
1196  maxNumPatches = params->maxPatches;
1197  iout << iINFO << "LIMITING NUMBER OF PATCHES TO " <<
1198  maxNumPatches << "\n" << endi;
1199  }
1200 
1201  int numpes = CkNumPes();
1202  SimParameters *simparam = Node::Object()->simParameters;
1203  if(simparam->simulateInitialMapping) {
1204  numpes = simparam->simulatedPEs;
1205  delete [] patchMap->nPatchesOnNode;
1206  patchMap->nPatchesOnNode = new int[numpes];
1207  memset(patchMap->nPatchesOnNode, 0, numpes*sizeof(int));
1208  }
1209 
1210 #if defined(NAMD_CUDA) || defined(NAMD_MIC)
1211  // for CUDA be sure there are more patches than pes
1212 
1213  int numPatches = patchMap->sizeGrid(
1214  xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
1215  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1216  if ( numPatches < numpes && twoAwayX < 0 ) {
1217  twoAwayX = 1;
1218  numPatches = patchMap->sizeGrid(
1219  xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
1220  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1221  }
1222  if ( numPatches < numpes && twoAwayY < 0 ) {
1223  twoAwayY = 1;
1224  numPatches = patchMap->sizeGrid(
1225  xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
1226  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1227  }
1228  if ( numPatches < numpes && twoAwayZ < 0 ) {
1229  twoAwayZ = 1;
1230  numPatches = patchMap->sizeGrid(
1231  xmin,xmax,lattice,patchSize,maxNumPatches,params->staticAtomAssignment,
1232  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1233  }
1234  if ( numPatches < numpes ) {
1235  #if defined(NAMD_MIC)
1236  NAMD_die("MIC-enabled NAMD requires at least one patch per thread.");
1237  #endif
1238  }
1239  if ( numPatches % numpes && numPatches <= 1.4 * numpes ) {
1240  int exactFit = numPatches - numPatches % numpes;
1241  int newNumPatches = patchMap->sizeGrid(
1242  xmin,xmax,lattice,patchSize,exactFit,params->staticAtomAssignment,
1243  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1244  if ( newNumPatches == exactFit ) {
1245  iout << iINFO << "REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << endi;
1246  maxNumPatches = exactFit;
1247  }
1248  }
1249 
1250  patchMap->makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
1251  params->staticAtomAssignment, params->replicaUniformPatchGrids, params->LCPOOn,
1252  twoAwayX>0 ? 2 : 1, twoAwayY>0 ? 2 : 1, twoAwayZ>0 ? 2 : 1);
1253 
1254 #else
1255 
1256  int availPes = numpes;
1257  if ( params->noPatchesOnZero && numpes > 1 ) {
1258  availPes -= 1;
1259  if(params->noPatchesOnOne && numpes > 2)
1260  availPes -= 1;
1261  }
1262 #ifdef MEM_OPT_VERSION
1263  if(params->noPatchesOnOutputPEs && numpes - params->numoutputprocs >2)
1264  {
1265  availPes -= params->numoutputprocs;
1266  if ( params->noPatchesOnZero && numpes > 1 && isOutputProcessor(0)){
1267  availPes++;
1268  }
1269  if ( params->noPatchesOnOne && numpes > 2 && isOutputProcessor(1)){
1270  availPes++;
1271  }
1272  }
1273 #endif
1274 
1275  int numPatches = patchMap->sizeGrid(
1276  xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
1277  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1278  if ( ( numPatches > (0.3*availPes) || numPatches > maxNumPatches
1279  ) && twoAwayZ < 0 ) {
1280  twoAwayZ = 0;
1281  numPatches = patchMap->sizeGrid(
1282  xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
1283  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1284  }
1285  if ( ( numPatches > (0.6*availPes) || numPatches > maxNumPatches
1286  ) && twoAwayY < 0 ) {
1287  twoAwayY = 0;
1288  numPatches = patchMap->sizeGrid(
1289  xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
1290  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1291  }
1292  if ( ( numPatches > availPes || numPatches > maxNumPatches
1293  ) && twoAwayX < 0 ) {
1294  twoAwayX = 0;
1295  numPatches = patchMap->sizeGrid(
1296  xmin,xmax,lattice,patchSize,1.e9,params->staticAtomAssignment,
1297  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1298  }
1299  if ( numPatches > availPes && numPatches <= (1.4*availPes) && availPes <= maxNumPatches ) {
1300  int newNumPatches = patchMap->sizeGrid(
1301  xmin,xmax,lattice,patchSize,availPes,params->staticAtomAssignment,
1302  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1303  if ( newNumPatches <= availPes && numPatches <= (1.4*newNumPatches) ) {
1304  iout << iINFO << "REDUCING NUMBER OF PATCHES TO IMPROVE LOAD BALANCE\n" << endi;
1305  maxNumPatches = availPes;
1306  }
1307  }
1308 
1309  patchMap->makePatches(xmin,xmax,lattice,patchSize,maxNumPatches,
1310  params->staticAtomAssignment, params->replicaUniformPatchGrids, params->LCPOOn,
1311  twoAwayX ? 2 : 1, twoAwayY ? 2 : 1, twoAwayZ ? 2 : 1);
1312 
1313 #endif
1314 
1315 }
1316 
1317 
1318 //----------------------------------------------------------------------
1320 {
1321  PatchMap *patchMap = PatchMap::Object();
1322  int nNodes = Node::Object()->numNodes();
1323  SimParameters *simparam = Node::Object()->simParameters;
1324  if(simparam->simulateInitialMapping) {
1325  nNodes = simparam->simulatedPEs;
1326  }
1327 
1328 #if (CMK_BLUEGENEP | CMK_BLUEGENEL) && USE_TOPOMAP
1329  TopoManager tmgr;
1330  int numPes = tmgr.getDimNX() * tmgr.getDimNY() * tmgr.getDimNZ();
1331  if (numPes > patchMap->numPatches() && (assignPatchesTopoGridRecBisection() > 0)) {
1332  CkPrintf ("Blue Gene/L topology partitioner finished successfully \n");
1333  }
1334  else
1335 #endif
1336  assignPatchesSpaceFillingCurve();
1337 
1338  int *nAtoms = new int[nNodes];
1339  int numAtoms=0;
1340  int i;
1341  for(i=0; i < nNodes; i++)
1342  nAtoms[i] = 0;
1343 
1344  for(i=0; i < patchMap->numPatches(); i++)
1345  {
1346  // iout << iINFO << "Patch " << i << " has "
1347  // << patchMap->patch(i)->getNumAtoms() << " atoms and "
1348  // << patchMap->patch(i)->getNumAtoms() *
1349  // patchMap->patch(i)->getNumAtoms()
1350  // << " pairs.\n" << endi;
1351 #ifdef MEM_OPT_VERSION
1352  numAtoms += patchMap->numAtoms(i);
1353  nAtoms[patchMap->node(i)] += patchMap->numAtoms(i);
1354 #else
1355  if (patchMap->patch(i)) {
1356  numAtoms += patchMap->patch(i)->getNumAtoms();
1357  nAtoms[patchMap->node(i)] += patchMap->patch(i)->getNumAtoms();
1358  }
1359 #endif
1360  }
1361 
1362  if ( numAtoms != Node::Object()->molecule->numAtoms ) {
1363  for(i=0; i < nNodes; i++)
1364  iout << iINFO << nAtoms[i] << " atoms assigned to node " << i << "\n" << endi;
1365  iout << iINFO << "Assigned " << numAtoms << " atoms but expected " << Node::Object()->molecule->numAtoms << "\n" << endi;
1366  NAMD_die("Incorrect atom count in WorkDistrib::assignNodeToPatch\n");
1367  }
1368 
1369  delete [] nAtoms;
1370 
1371  // PatchMap::Object()->printPatchMap();
1372 }
1373 
1374 //----------------------------------------------------------------------
1375 // void WorkDistrib::assignPatchesSlices()
1376 // {
1377 // int pid;
1378 // int assignedNode = 0;
1379 // PatchMap *patchMap = PatchMap::Object();
1380 // Node *node = CLocalBranch(Node, CkpvAccess(BOCclass_group).node);
1381 
1382 // int *numAtoms = new int[node->numNodes()];
1383 // for (int i=0; i<node->numNodes(); i++) {
1384 // numAtoms[i] = 0;
1385 // }
1386 
1387 // // Assign patch to node with least atoms assigned.
1388 // for(pid=0; pid < patchMap->numPatches(); pid++) {
1389 // assignedNode = 0;
1390 // for (i=1; i < node->numNodes(); i++) {
1391 // if (numAtoms[i] < numAtoms[assignedNode]) assignedNode = i;
1392 // }
1393 // patchMap->assignNode(pid, assignedNode);
1394 // numAtoms[assignedNode] += patchMap->patch(pid)->getNumAtoms();
1395 
1396 // /*
1397 // iout << iINFO << "Patch (" << pid << ") has "
1398 // << patchMap->patch(pid)->getNumAtoms()
1399 // << " atoms: Assigned to Node(" << assignedNode << ")\n"
1400 // << endi;
1401 // */
1402 // }
1403 
1404 // delete[] numAtoms;
1405 // }
1406 
1407 //----------------------------------------------------------------------
1408 void WorkDistrib::assignPatchesToLowestLoadNode()
1409 {
1410  int pid;
1411  int assignedNode = 0;
1412  PatchMap *patchMap = PatchMap::Object();
1413  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1414  Node *node = nd.ckLocalBranch();
1415  SimParameters *simParams = node->simParameters;
1416  int ncpus = node->numNodes();
1417  if(simParams->simulateInitialMapping) {
1418  ncpus = simParams->simulatedPEs;
1419  }
1420 
1421  int *load = new int[ncpus];
1422  int *assignedNodes = new int[patchMap->numPatches()];
1423  for (int i=0; i<ncpus; i++) {
1424  load[i] = 0;
1425  }
1426  CkPrintf("assignPatchesToLowestLoadNode\n");
1427  int defaultNode = 0;
1428  if ( simParams->noPatchesOnZero && ncpus > 1 ){
1429  defaultNode = 1;
1430  if( simParams->noPatchesOnOne && ncpus > 2)
1431  defaultNode = 2;
1432  }
1433  // Assign patch to node with least atoms assigned.
1434  for(pid=0; pid < patchMap->numPatches(); pid++) {
1435  assignedNode = defaultNode;
1436  for (int i=assignedNode + 1; i < ncpus; i++) {
1437  if (load[i] < load[assignedNode]) assignedNode = i;
1438  }
1439  assignedNodes[pid] = assignedNode;
1440 #ifdef MEM_OPT_VERSION
1441  load[assignedNode] += patchMap->numAtoms(pid) + 1;
1442 #else
1443  load[assignedNode] += patchMap->patch(pid)->getNumAtoms() + 1;
1444 #endif
1445  }
1446 
1447  delete[] load;
1448  sortNodesAndAssign(assignedNodes);
1449  delete[] assignedNodes;
1450 }
1451 
1452 //----------------------------------------------------------------------
1453 void WorkDistrib::assignPatchesBitReversal()
1454 {
1455  int pid;
1456  PatchMap *patchMap = PatchMap::Object();
1457  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1458  Node *node = nd.ckLocalBranch();
1459  SimParameters *simparam = node->simParameters;
1460 
1461  int ncpus = node->numNodes();
1462  if(simparam->simulateInitialMapping) {
1463  ncpus = simparam->simulatedPEs;
1464  }
1465  int npatches = patchMap->numPatches();
1466  if ( ncpus <= npatches )
1467  NAMD_bug("WorkDistrib::assignPatchesBitReversal called improperly");
1468 
1469  SortableResizeArray<int> seq(ncpus);
1470  // avoid using node 0 (reverse of 0 is 0 so start at 1)
1471  for ( int i = 1; i < ncpus; ++i ) {
1472  seq[i-1] = peDiffuseOrdering[i];
1473  }
1474 
1475  // extract and sort patch locations
1476  sortNodesAndAssign(seq.begin());
1477  if ( ncpus > 2*npatches ) sortNodesAndAssign(seq.begin()+npatches, 1);
1478 }
1479 
1480 //----------------------------------------------------------------------
1481 struct nodesort {
1482  int node;
1483  int a_total;
1484  int b_total;
1485  int c_total;
1487  nodesort() : node(-1),a_total(0),b_total(0),c_total(0),npatches(0) { ; }
1488  int operator==(const nodesort &o) const {
1489  float a1 = ((float)a_total)/((float)npatches);
1490  float a2 = ((float)o.a_total)/((float)o.npatches);
1491  float b1 = ((float)b_total)/((float)npatches);
1492  float b2 = ((float)o.b_total)/((float)o.npatches);
1493  float c1 = ((float)c_total)/((float)npatches);
1494  float c2 = ((float)o.c_total)/((float)o.npatches);
1495  return ((a1 == a2) && (b1 == b2) && (c1 == c2));
1496  }
1497  int operator<(const nodesort &o) const {
1498  float a1 = ((float)a_total)/((float)npatches);
1499  float a2 = ((float)o.a_total)/((float)o.npatches);
1500  float b1 = ((float)b_total)/((float)npatches);
1501  float b2 = ((float)o.b_total)/((float)o.npatches);
1502  float c1 = ((float)c_total)/((float)npatches);
1503  float c2 = ((float)o.c_total)/((float)o.npatches);
1504  return ( (a1 < a2) || ((a1 == a2) && (b1 < b2)) ||
1505  ((a1 == a2) && (b1 == b2) && (c1 < c2)) );
1506  }
1507 };
1508 
1509 void WorkDistrib::sortNodesAndAssign(int *assignedNode, int baseNodes) {
1510  // if baseNodes is zero (default) then set both nodes and basenodes
1511  // if baseNodes is nonzero then this is a second call to set basenodes only
1512  int i, pid;
1513  PatchMap *patchMap = PatchMap::Object();
1514  int npatches = patchMap->numPatches();
1515  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1516  Node *node = nd.ckLocalBranch();
1517  int nnodes = node->numNodes();
1518  SimParameters *simparam = node->simParameters;
1519  if(simparam->simulateInitialMapping) {
1520  nnodes = simparam->simulatedPEs;
1521  }
1522 
1523  ResizeArray<nodesort> allnodes(nnodes);
1524  for ( i=0; i < nnodes; ++i ) {
1525  allnodes[i].node = i;
1526  }
1527  for ( pid=0; pid<npatches; ++pid ) {
1528  // iout << pid << " " << assignedNode[pid] << "\n" << endi;
1529  allnodes[assignedNode[pid]].npatches++;
1530  allnodes[assignedNode[pid]].a_total += patchMap->index_a(pid);
1531  allnodes[assignedNode[pid]].b_total += patchMap->index_b(pid);
1532  allnodes[assignedNode[pid]].c_total += patchMap->index_c(pid);
1533  }
1534  SortableResizeArray<nodesort> usednodes(nnodes);
1535  usednodes.resize(0);
1536  for ( i=0; i < nnodes; ++i ) {
1537  if ( allnodes[i].npatches ) usednodes.add(allnodes[i]);
1538  }
1539  usednodes.sort();
1540  int i2 = 0;
1541  for ( i=0; i < nnodes; ++i ) {
1542  int pe = peCompactOrdering[i];
1543  if ( allnodes[pe].npatches ) allnodes[usednodes[i2++].node].node = pe;
1544  }
1545 
1546  for ( pid=0; pid<npatches; ++pid ) {
1547  // iout << pid << " " << allnodes[assignedNode[pid]].node << "\n" << endi;
1548  if ( ! baseNodes ) {
1549  patchMap->assignNode(pid, allnodes[assignedNode[pid]].node);
1550  }
1551  patchMap->assignBaseNode(pid, allnodes[assignedNode[pid]].node);
1552  }
1553 }
1554 
1555 //----------------------------------------------------------------------
1556 void WorkDistrib::assignPatchesRoundRobin()
1557 {
1558  int pid;
1559  PatchMap *patchMap = PatchMap::Object();
1560  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1561  Node *node = nd.ckLocalBranch();
1562  SimParameters *simparam = node->simParameters;
1563  int ncpus = node->numNodes();
1564  if(simparam->simulateInitialMapping) {
1565  ncpus = simparam->simulatedPEs;
1566  }
1567  int *assignedNode = new int[patchMap->numPatches()];
1568 
1569  for(pid=0; pid < patchMap->numPatches(); pid++) {
1570  assignedNode[pid] = pid % ncpus;
1571  }
1572 
1573  sortNodesAndAssign(assignedNode);
1574  delete [] assignedNode;
1575 }
1576 
1577 //----------------------------------------------------------------------
1578 void WorkDistrib::assignPatchesRecursiveBisection()
1579 {
1580  PatchMap *patchMap = PatchMap::Object();
1581  int *assignedNode = new int[patchMap->numPatches()];
1582  SimParameters *simParams = Node::Object()->simParameters;
1583  int numNodes = Node::Object()->numNodes();
1584  if(simParams->simulateInitialMapping) {
1585  numNodes = simParams->simulatedPEs;
1586  }
1587 
1588  int usedNodes = numNodes;
1589  int unusedNodes = 0;
1590  CkPrintf("assignPatchesRecursiveBisection\n");
1591  if ( simParams->noPatchesOnZero && numNodes > 1 ){
1592  usedNodes -= 1;
1593  if(simParams->noPatchesOnOne && numNodes > 2)
1594  usedNodes -= 1;
1595  }
1596  unusedNodes = numNodes - usedNodes;
1597  RecBisection recBisec(usedNodes,PatchMap::Object());
1598  if ( recBisec.partition(assignedNode) ) {
1599  if ( unusedNodes !=0 ) {
1600  for ( int i=0; i<patchMap->numPatches(); ++i ) {
1601  assignedNode[i] += unusedNodes;
1602  }
1603  }
1604  sortNodesAndAssign(assignedNode);
1605  delete [] assignedNode;
1606  } else {
1607  //free the array here since a same array will be allocated
1608  //in assignPatchesToLowestLoadNode function, thus reducting
1609  //temporary memory usage
1610  delete [] assignedNode;
1611 
1612  iout << iWARN
1613  << "WorkDistrib: Recursive bisection fails, "
1614  << "invoking space-filling curve algorithm\n";
1615  assignPatchesSpaceFillingCurve();
1616  }
1617 }
1618 
1619 // class to re-order dimensions in decreasing size
1621  TopoManager tmgr;
1625  int fixpe(int pe) { // compensate for lame fallback topology information
1626  return CmiGetFirstPeOnPhysicalNode(CmiPhysicalNodeID(pe));
1627  }
1629 #if CMK_BLUEGENEQ
1630  int na=tmgr.getDimNA();
1631  int nb=tmgr.getDimNB();
1632  int nc=tmgr.getDimNC();
1633  int nd=tmgr.getDimND();
1634  int ne=tmgr.getDimNE();
1635 #else
1636  int na=tmgr.getDimNX();
1637  int nb=tmgr.getDimNY();
1638  int nc=tmgr.getDimNZ();
1639  int nd=1;
1640  int ne=1;
1641 #endif
1642  ResizeArray<int> a_flags(na);
1643  ResizeArray<int> b_flags(nb);
1644  ResizeArray<int> c_flags(nc);
1645  ResizeArray<int> d_flags(nd);
1646  ResizeArray<int> e_flags(ne);
1647  for ( int i=0; i<na; ++i ) { a_flags[i] = 0; }
1648  for ( int i=0; i<nb; ++i ) { b_flags[i] = 0; }
1649  for ( int i=0; i<nc; ++i ) { c_flags[i] = 0; }
1650  for ( int i=0; i<nd; ++i ) { d_flags[i] = 0; }
1651  for ( int i=0; i<ne; ++i ) { e_flags[i] = 0; }
1652  int npes = CkNumPes();
1653  for ( int pe=0; pe<npes; ++pe ) {
1654  int a,b,c,d,e,t;
1655 #if CMK_BLUEGENEQ
1656  tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
1657 #else
1658  tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
1659  d=0; e=0;
1660 #endif
1661  if ( a < 0 || a >= na ) NAMD_bug("inconsistent torus topology!");
1662  if ( b < 0 || b >= nb ) NAMD_bug("inconsistent torus topology!");
1663  if ( c < 0 || c >= nc ) NAMD_bug("inconsistent torus topology!");
1664  if ( d < 0 || d >= nd ) NAMD_bug("inconsistent torus topology!");
1665  if ( e < 0 || e >= ne ) NAMD_bug("inconsistent torus topology!");
1666  a_flags[a] = 1;
1667  b_flags[b] = 1;
1668  c_flags[c] = 1;
1669  d_flags[d] = 1;
1670  e_flags[e] = 1;
1671  }
1672  iout << iINFO << "TORUS A SIZE " << na << " USING";
1673  for ( int i=0; i<na; ++i ) { if ( a_flags[i] ) iout << " " << i; }
1674  iout << "\n" << endi;
1675  iout << iINFO << "TORUS B SIZE " << nb << " USING";
1676  for ( int i=0; i<nb; ++i ) { if ( b_flags[i] ) iout << " " << i; }
1677  iout << "\n" << endi;
1678  iout << iINFO << "TORUS C SIZE " << nc << " USING";
1679  for ( int i=0; i<nc; ++i ) { if ( c_flags[i] ) iout << " " << i; }
1680  iout << "\n" << endi;
1681 #if CMK_BLUEGENEQ
1682  iout << iINFO << "TORUS D SIZE " << nd << " USING";
1683  for ( int i=0; i<nd; ++i ) { if ( d_flags[i] ) iout << " " << i; }
1684  iout << "\n" << endi;
1685  iout << iINFO << "TORUS E SIZE " << ne << " USING";
1686  for ( int i=0; i<ne; ++i ) { if ( e_flags[i] ) iout << " " << i; }
1687  iout << "\n" << endi;
1688 #endif
1689  // find most compact representation of our subset
1690  a_rot = b_rot = c_rot = d_rot = e_rot = 0;
1691  a_mod = na; b_mod = nb; c_mod = nc; d_mod = nd; e_mod = ne;
1692 #if CMK_BLUEGENEQ
1693  if ( tmgr.absA(na) == 0 ) // torus
1694 #else
1695  if ( tmgr.absX(na) == 0 ) // torus
1696 #endif
1697  for ( int i=0, gaplen=0, gapstart=0; i<2*na; ++i ) {
1698  if ( a_flags[i%na] ) gapstart = i+1;
1699  else if ( i - gapstart >= gaplen ) {
1700  a_rot = 2*na-i-1; gaplen = i - gapstart;
1701  }
1702  }
1703 #if CMK_BLUEGENEQ
1704  if ( tmgr.absB(nb) == 0 ) // torus
1705 #else
1706  if ( tmgr.absY(nb) == 0 ) // torus
1707 #endif
1708  for ( int i=0, gaplen=0, gapstart=0; i<2*nb; ++i ) {
1709  if ( b_flags[i%nb] ) gapstart = i+1;
1710  else if ( i - gapstart >= gaplen ) {
1711  b_rot = 2*nb-i-1; gaplen = i - gapstart;
1712  }
1713  }
1714 #if CMK_BLUEGENEQ
1715  if ( tmgr.absC(nc) == 0 ) // torus
1716 #else
1717  if ( tmgr.absZ(nc) == 0 ) // torus
1718 #endif
1719  for ( int i=0, gaplen=0, gapstart=0; i<2*nc; ++i ) {
1720  if ( c_flags[i%nc] ) gapstart = i+1;
1721  else if ( i - gapstart >= gaplen ) {
1722  c_rot = 2*nc-i-1; gaplen = i - gapstart;
1723  }
1724  }
1725 #if CMK_BLUEGENEQ
1726  if ( tmgr.absD(nd) == 0 ) // torus
1727  for ( int i=0, gaplen=0, gapstart=0; i<2*nd; ++i ) {
1728  if ( d_flags[i%nd] ) gapstart = i+1;
1729  else if ( i - gapstart >= gaplen ) {
1730  d_rot = 2*nd-i-1; gaplen = i - gapstart;
1731  }
1732  }
1733  if ( tmgr.absE(ne) == 0 ) // torus
1734  for ( int i=0, gaplen=0, gapstart=0; i<2*ne; ++i ) {
1735  if ( e_flags[i%ne] ) gapstart = i+1;
1736  else if ( i - gapstart >= gaplen ) {
1737  e_rot = 2*ne-i-1; gaplen = i - gapstart;
1738  }
1739  }
1740 #endif
1741  // order dimensions by length
1742  int a_min=na, a_max=-1;
1743  int b_min=nb, b_max=-1;
1744  int c_min=nc, c_max=-1;
1745  int d_min=nd, d_max=-1;
1746  int e_min=ne, e_max=-1;
1747  for ( int pe=0; pe<npes; ++pe ) {
1748  int a,b,c,d,e,t;
1749 #if CMK_BLUEGENEQ
1750  tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
1751 #else
1752  tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
1753  d=0; e=0;
1754 #endif
1755  a = (a+a_rot)%a_mod;
1756  b = (b+b_rot)%b_mod;
1757  c = (c+c_rot)%c_mod;
1758  d = (d+d_rot)%d_mod;
1759  e = (e+e_rot)%e_mod;
1760  if ( a < a_min ) a_min = a;
1761  if ( b < b_min ) b_min = b;
1762  if ( c < c_min ) c_min = c;
1763  if ( d < d_min ) d_min = d;
1764  if ( e < e_min ) e_min = e;
1765  if ( a > a_max ) a_max = a;
1766  if ( b > b_max ) b_max = b;
1767  if ( c > c_max ) c_max = c;
1768  if ( d > d_max ) d_max = d;
1769  if ( e > e_max ) e_max = e;
1770  }
1771  int a_len = a_max - a_min + 1;
1772  int b_len = b_max - b_min + 1;
1773  int c_len = c_max - c_min + 1;
1774  int d_len = d_max - d_min + 1;
1775  int e_len = e_max - e_min + 1;
1776  int lensort[5];
1777  lensort[0] = (a_len << 3) + 0;
1778  lensort[1] = (b_len << 3) + 1;
1779  lensort[2] = (c_len << 3) + 2;
1780  lensort[3] = (d_len << 3) + 3;
1781  lensort[4] = (e_len << 3) + 4;
1782  // CkPrintf("TopoManagerWrapper lensort before %d %d %d %d %d\n", lensort[0] & 7, lensort[1] & 7, lensort[2] & 7, lensort[3] & 7, lensort[4] & 7);
1783  std::sort(lensort, lensort+5);
1784  // CkPrintf("TopoManagerWrapper lensort after %d %d %d %d %d\n", lensort[0] & 7, lensort[1] & 7, lensort[2] & 7, lensort[3] & 7, lensort[4] & 7);
1785  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 0 ) a_dim = 4-i; }
1786  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 1 ) b_dim = 4-i; }
1787  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 2 ) c_dim = 4-i; }
1788  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 3 ) d_dim = 4-i; }
1789  for ( int i=0; i<5; ++i ) { if ( (lensort[i] & 7) == 4 ) e_dim = 4-i; }
1790 #if 0
1791  if ( a_len >= b_len && a_len >= c_len ) {
1792  a_dim = 0;
1793  if ( b_len >= c_len ) {
1794  b_dim = 1; c_dim = 2;
1795  } else {
1796  b_dim = 2; c_dim = 1;
1797  }
1798  } else if ( b_len >= a_len && b_len >= c_len ) {
1799  b_dim = 0;
1800  if ( a_len >= c_len ) {
1801  a_dim = 1; c_dim = 2;
1802  } else {
1803  a_dim = 2; c_dim = 1;
1804  }
1805  } else { // c is longest
1806  c_dim = 0;
1807  if ( a_len >= b_len ) {
1808  a_dim = 1; b_dim = 2;
1809  } else {
1810  a_dim = 2; b_dim = 1;
1811  }
1812  }
1813 #endif
1814  iout << iINFO << "TORUS MINIMAL MESH SIZE IS " << a_len << " BY " << b_len << " BY " << c_len
1815 #if CMK_BLUEGENEQ
1816  << " BY " << d_len << " BY " << e_len
1817 #endif
1818  << "\n" << endi;
1819  // CkPrintf("TopoManagerWrapper dims %d %d %d %d %d\n", a_dim, b_dim, c_dim, d_dim, e_dim);
1820  }
1821  void coords(int pe, int *crds) {
1822  int a,b,c,d,e,t;
1823 #if CMK_BLUEGENEQ
1824  tmgr.rankToCoordinates(fixpe(pe),a,b,c,d,e,t);
1825 #else
1826  tmgr.rankToCoordinates(fixpe(pe),a,b,c,t);
1827  d=0; e=0;
1828 #endif
1829  if ( a_dim < 3 ) crds[a_dim] = (a+a_rot)%a_mod;
1830  if ( b_dim < 3 ) crds[b_dim] = (b+b_rot)%b_mod;
1831  if ( c_dim < 3 ) crds[c_dim] = (c+c_rot)%c_mod;
1832  if ( d_dim < 3 ) crds[d_dim] = (d+d_rot)%d_mod;
1833  if ( e_dim < 3 ) crds[e_dim] = (e+e_rot)%e_mod;
1834  }
1835  int coord(int pe, int dim) {
1836  int crds[3];
1837  coords(pe,crds);
1838  return crds[dim];
1839  }
1842  const int *sortdims;
1844  bool operator() (int pe1, int pe2) const {
1845  int crds1[3], crds2[3];
1846  tmgr.coords(pe1,crds1);
1847  tmgr.coords(pe2,crds2);
1848  for ( int i=0; i<3; ++i ) {
1849  int d = sortdims[i];
1850  if ( crds1[d] != crds2[d] ) return ( crds1[d] < crds2[d] );
1851  }
1852  const int *index = WorkDistrib::peCompactOrderingIndex;
1853  return ( index[pe1] < index[pe2] );
1854  }
1855  };
1856  int* sortAndSplit(int *node_begin, int *node_end, int splitdim) {
1857  if ( node_begin == node_end ) return node_begin;
1858  int tmins[3], tmaxs[3], tlens[3], sortdims[3];
1859  coords(*node_begin, tmins);
1860  coords(*node_begin, tmaxs);
1861  for ( int *peitr = node_begin; peitr != node_end; ++peitr ) {
1862  int tvals[3];
1863  coords(*peitr, tvals);
1864  for ( int i=0; i<3; ++i ) {
1865  if ( tvals[i] < tmins[i] ) tmins[i] = tvals[i];
1866  if ( tvals[i] > tmaxs[i] ) tmaxs[i] = tvals[i];
1867  }
1868  }
1869  for ( int i=0; i<3; ++i ) {
1870  tlens[i] = tmaxs[i] - tmins[i];
1871  }
1872  sortdims[0] = splitdim;
1873  for ( int i=0, j=0; i<3; ++i ) {
1874  if ( i != splitdim ) sortdims[++j] = i;
1875  }
1876  if ( tlens[sortdims[1]] < tlens[sortdims[2]] ) {
1877  int tmp = sortdims[1];
1878  sortdims[1] = sortdims[2];
1879  sortdims[2] = tmp;
1880  }
1881  std::sort(node_begin,node_end,pe_sortop_topo(*this,sortdims));
1882  int *nodes = node_begin;
1883  int nnodes = node_end - node_begin;
1884  int i_split = 0;
1885 #if 0
1886  int c_split = coord(nodes[0],splitdim);
1887  for ( int i=0; i<nnodes; ++i ) {
1888  if ( coord(nodes[i],splitdim) != c_split ) {
1889  int mid = (nnodes+1)/2;
1890  if ( abs(i-mid) < abs(i_split-mid) ) {
1891  i_split = i;
1892  c_split = coord(i,splitdim);
1893  }
1894  else break;
1895  }
1896  }
1897 #endif
1898  for ( int i=0; i<nnodes; ++i ) {
1899  if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
1900  int mid = (nnodes+1)/2;
1901  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
1902  else break;
1903  }
1904  }
1905  return ( node_begin + i_split );
1906  }
1907 };
1908 
1912  inline bool operator() (int p1, int p2) const {
1913  int a1 = pmap->index_a(p1);
1914  int a2 = pmap->index_a(p2);
1915  if ( a1 < a2 ) return true;
1916  if ( a1 > a2 ) return false;
1917  int dir = ( (a1 & 1) ? -1 : 1 );
1918  int b1 = pmap->index_b(p1);
1919  int b2 = pmap->index_b(p2);
1920  if ( b1 * dir < b2 * dir ) return true;
1921  if ( b1 * dir > b2 * dir ) return false;
1922  dir *= ( (b1 & 1) ? -1 : 1 );
1923  int c1 = pmap->index_c(p1);
1924  int c2 = pmap->index_c(p2);
1925  if ( c1 * dir < c2 * dir ) return true;
1926  return false;
1927  }
1928 };
1929 
1933  inline bool operator() (int p1, int p2) const {
1934  int a1 = pmap->index_b(p1);
1935  int a2 = pmap->index_b(p2);
1936  if ( a1 < a2 ) return true;
1937  if ( a1 > a2 ) return false;
1938  int dir = ( (a1 & 1) ? -1 : 1 );
1939  int b1 = pmap->index_a(p1);
1940  int b2 = pmap->index_a(p2);
1941  if ( b1 * dir < b2 * dir ) return true;
1942  if ( b1 * dir > b2 * dir ) return false;
1943  dir *= ( (b1 & 1) ? -1 : 1 );
1944  int c1 = pmap->index_c(p1);
1945  int c2 = pmap->index_c(p2);
1946  if ( c1 * dir < c2 * dir ) return true;
1947  return false;
1948  }
1949 };
1950 
1954  inline bool operator() (int p1, int p2) const {
1955  int a1 = pmap->index_c(p1);
1956  int a2 = pmap->index_c(p2);
1957  if ( a1 < a2 ) return true;
1958  if ( a1 > a2 ) return false;
1959  int dir = ( (a1 & 1) ? -1 : 1 );
1960  int b1 = pmap->index_a(p1);
1961  int b2 = pmap->index_a(p2);
1962  if ( b1 * dir < b2 * dir ) return true;
1963  if ( b1 * dir > b2 * dir ) return false;
1964  dir *= ( (b1 & 1) ? -1 : 1 );
1965  int c1 = pmap->index_b(p1);
1966  int c2 = pmap->index_b(p2);
1967  if ( c1 * dir < c2 * dir ) return true;
1968  return false;
1969  }
1970 };
1971 
1973  int *patch_begin, int *patch_end,
1974  int *node_begin, int *node_end,
1975  double *patchLoads,
1976  double *sortedLoads,
1977  int *assignedNode,
1978  TopoManagerWrapper &tmgr
1979  ) {
1980 
1981  SimParameters *simParams = Node::Object()->simParameters;
1982  PatchMap *patchMap = PatchMap::Object();
1983  int *patches = patch_begin;
1984  int npatches = patch_end - patch_begin;
1985  int *nodes = node_begin;
1986  int nnodes = node_end - node_begin;
1987 
1988  // assign patch loads
1989  double totalRawLoad = 0;
1990  for ( int i=0; i<npatches; ++i ) {
1991  int pid=patches[i];
1992 #ifdef MEM_OPT_VERSION
1993  double load = patchMap->numAtoms(pid) + 10;
1994 #else
1995  double load = patchMap->patch(pid)->getNumAtoms() + 10;
1996 #endif
1997  patchLoads[pid] = load;
1998  sortedLoads[i] = load;
1999  totalRawLoad += load;
2000  }
2001  std::sort(sortedLoads,sortedLoads+npatches);
2002 
2003  // limit maxPatchLoad to adjusted average load per node
2004  double sumLoad = 0;
2005  double maxPatchLoad = 1;
2006  for ( int i=0; i<npatches; ++i ) {
2007  double load = sortedLoads[i];
2008  double total = sumLoad + (npatches-i) * load;
2009  if ( nnodes * load > total ) break;
2010  sumLoad += load;
2011  maxPatchLoad = load;
2012  }
2013  double totalLoad = 0;
2014  for ( int i=0; i<npatches; ++i ) {
2015  int pid=patches[i];
2016  if ( patchLoads[pid] > maxPatchLoad ) patchLoads[pid] = maxPatchLoad;
2017  totalLoad += patchLoads[pid];
2018  }
2019  if ( nnodes * maxPatchLoad > totalLoad )
2020  NAMD_bug("algorithm failure in WorkDistrib recursive_bisect_with_curve()");
2021 
2022  int a_len, b_len, c_len;
2023  int a_min, b_min, c_min;
2024  { // find dimensions
2025  a_min = patchMap->index_a(patches[0]);
2026  b_min = patchMap->index_b(patches[0]);
2027  c_min = patchMap->index_c(patches[0]);
2028  int a_max = a_min;
2029  int b_max = b_min;
2030  int c_max = c_min;
2031  for ( int i=1; i<npatches; ++i ) {
2032  int a = patchMap->index_a(patches[i]);
2033  int b = patchMap->index_b(patches[i]);
2034  int c = patchMap->index_c(patches[i]);
2035  if ( a < a_min ) a_min = a;
2036  if ( b < b_min ) b_min = b;
2037  if ( c < c_min ) c_min = c;
2038  if ( a > a_max ) a_max = a;
2039  if ( b > b_max ) b_max = b;
2040  if ( c > c_max ) c_max = c;
2041  }
2042  a_len = a_max - a_min;
2043  b_len = b_max - b_min;
2044  c_len = c_max - c_min;
2045  }
2046 
2047  int *node_split = node_begin;
2048 
2049  if ( simParams->disableTopology ) ; else
2050  if ( a_len >= b_len && a_len >= c_len ) {
2051  node_split = tmgr.sortAndSplit(node_begin,node_end,0);
2052  } else if ( b_len >= a_len && b_len >= c_len ) {
2053  node_split = tmgr.sortAndSplit(node_begin,node_end,1);
2054  } else if ( c_len >= a_len && c_len >= b_len ) {
2055  node_split = tmgr.sortAndSplit(node_begin,node_end,2);
2056  }
2057 
2058  if ( node_split == node_begin ) { // unable to split torus
2059  // make sure physical nodes are together
2060  std::sort(node_begin, node_end, WorkDistrib::pe_sortop_compact());
2061  // find physical node boundary to split on
2062  int i_split = 0;
2063  for ( int i=0; i<nnodes; ++i ) {
2064  if ( ! CmiPeOnSamePhysicalNode(nodes[i_split],nodes[i]) ) {
2065  int mid = (nnodes+1)/2;
2066  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2067  else break;
2068  }
2069  }
2070  node_split = node_begin + i_split;
2071  }
2072 
2073  bool final_patch_sort = false;
2074 
2075  if ( node_split == node_begin ) { // all on same physical node
2076  if ( ( simParams->verboseTopology ) &&
2077  nnodes == CmiNumPesOnPhysicalNode(CmiPhysicalNodeID(*node_begin)) ) {
2078  int crds[3];
2079  tmgr.coords(*node_begin, crds);
2080  CkPrintf("WorkDistrib: physnode %5d pe %5d node %5d at %5d %5d %5d from %5d %5d %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2081  CmiPhysicalNodeID(*node_begin), *node_begin,
2082  CkNodeOf(*node_begin), crds[0], crds[1], crds[2],
2083  a_min, b_min, c_min, npatches,
2084  a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2085  }
2086 
2087  // final sort along a to minimize pme message count
2088  final_patch_sort = true;
2089 
2090  // find node (process) boundary to split on
2091  int i_split = 0;
2092  for ( int i=0; i<nnodes; ++i ) {
2093  if ( CmiNodeOf(nodes[i_split]) != CmiNodeOf(nodes[i]) ) {
2094  int mid = (nnodes+1)/2;
2095  if ( abs(i-mid) < abs(i_split-mid) ) i_split = i;
2096  else break;
2097  }
2098  }
2099  node_split = node_begin + i_split;
2100  }
2101 
2102  if ( node_split == node_begin ) { // all on same node (process)
2103  if ( ( simParams->verboseTopology ) &&
2104  nnodes == CmiNodeSize(CmiNodeOf(*node_begin)) ) {
2105  int crds[3];
2106  tmgr.coords(*node_begin, crds);
2107  CkPrintf("WorkDistrib: node %5d pe %5d has %5d patches %5d x %5d x %5d load %7f pes %5d\n",
2108  CmiNodeOf(*node_begin), *node_begin, npatches,
2109  a_len+1, b_len+1, c_len+1, totalRawLoad, nnodes);
2110  }
2111 
2112  // no natural divisions so just split at midpoint
2113  node_split = node_begin + nnodes/2;
2114  }
2115 
2116  if ( nnodes == 1 ) { // down to a single pe
2117  // assign all patches
2118  int *node = node_begin;
2119  sumLoad = 0;
2120  for ( int i=0; i < npatches; ++i ) {
2121  int pid = patches[i];
2122  assignedNode[pid] = *node;
2123  sumLoad += patchLoads[pid];
2124  if ( 0 ) CkPrintf("assign %5d node %5d patch %5d %5d %5d load %7f total %7f\n",
2125  i, *node,
2126  patchMap->index_a(pid),
2127  patchMap->index_b(pid),
2128  patchMap->index_c(pid),
2129  patchLoads[pid], sumLoad);
2130  }
2131 
2132  return;
2133  }
2134 
2135  if ( final_patch_sort ) {
2136  // final sort along a to minimize pme message count
2137  std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
2138  } else if ( a_len >= b_len && a_len >= c_len ) {
2139  if ( 0 ) CkPrintf("sort a\n");
2140  std::sort(patch_begin,patch_end,patch_sortop_curve_a(patchMap));
2141  } else if ( b_len >= a_len && b_len >= c_len ) {
2142  if ( 0 ) CkPrintf("sort b\n");
2143  std::sort(patch_begin,patch_end,patch_sortop_curve_b(patchMap));
2144  } else if ( c_len >= a_len && c_len >= b_len ) {
2145  if ( 0 ) CkPrintf("sort c\n");
2146  std::sort(patch_begin,patch_end,patch_sortop_curve_c(patchMap));
2147  }
2148 
2149  int *patch_split;
2150  { // walk through patches in sorted order
2151  int *node = node_begin;
2152  sumLoad = 0;
2153  for ( patch_split = patch_begin;
2154  patch_split != patch_end && node != node_split;
2155  ++patch_split ) {
2156  sumLoad += patchLoads[*patch_split];
2157  double targetLoad = totalLoad *
2158  ((double)(node-node_begin+1) / (double)nnodes);
2159  if ( 0 ) CkPrintf("test %5d node %5d patch %5d %5d %5d load %7f target %7f\n",
2160  patch_split - patch_begin, *node,
2161  patchMap->index_a(*patch_split),
2162  patchMap->index_b(*patch_split),
2163  patchMap->index_c(*patch_split),
2164  sumLoad, targetLoad);
2165  double extra = ( patch_split+1 != patch_end ? 0.5 * patchLoads[*(patch_split+1)] : 0 );
2166  if ( node+1 < node_end && sumLoad + extra >= targetLoad ) { ++node; }
2167  }
2168  double targetLoad = totalLoad *
2169  ((double)(node_split-node_begin) / (double)nnodes);
2170  if ( 0 ) CkPrintf("split node %5d/%5d patch %5d/%5d load %7f target %7f\n",
2171  node_split-node_begin, nnodes,
2172  patch_split-patch_begin, npatches,
2173  sumLoad, targetLoad);
2174  }
2175 
2176  // recurse
2178  patch_begin, patch_split, node_begin, node_split,
2179  patchLoads, sortedLoads, assignedNode, tmgr);
2181  patch_split, patch_end, node_split, node_end,
2182  patchLoads, sortedLoads, assignedNode, tmgr);
2183 }
2184 
2185 //----------------------------------------------------------------------
2186 void WorkDistrib::assignPatchesSpaceFillingCurve()
2187 {
2188  TopoManagerWrapper tmgr;
2189  PatchMap *patchMap = PatchMap::Object();
2190  const int numPatches = patchMap->numPatches();
2191  int *assignedNode = new int[numPatches];
2192  ResizeArray<double> patchLoads(numPatches);
2193  SortableResizeArray<double> sortedLoads(numPatches);
2194  int numNodes = Node::Object()->numNodes();
2195  SimParameters *simParams = Node::Object()->simParameters;
2196  if(simParams->simulateInitialMapping) {
2197  NAMD_die("simulateInitialMapping not supported by assignPatchesSpaceFillingCurve()");
2198  numNodes = simParams->simulatedPEs;
2199  }
2200 
2201  ResizeArray<int> patchOrdering(numPatches);
2202  for ( int i=0; i<numPatches; ++i ) {
2203  patchOrdering[i] = i;
2204  }
2205 
2206  ResizeArray<int> nodeOrdering(numNodes);
2207  nodeOrdering.resize(0);
2208  for ( int i=0; i<numNodes; ++i ) {
2209  int pe = peDiffuseOrdering[(i+1)%numNodes]; // avoid 0 if possible
2210  if ( simParams->noPatchesOnZero && numNodes > 1 ) {
2211  if ( pe == 0 ) continue;
2212  if(simParams->noPatchesOnOne && numNodes > 2) {
2213  if ( pe == 1 ) continue;
2214  }
2215  }
2216 #ifdef MEM_OPT_VERSION
2217  if(simParams->noPatchesOnOutputPEs && numNodes-simParams->numoutputprocs >2) {
2218  if ( isOutputProcessor(pe) ) continue;
2219  }
2220 #endif
2221  nodeOrdering.add(pe);
2222  if ( 0 ) CkPrintf("using pe %5d\n", pe);
2223  }
2224 
2225  int *node_begin = nodeOrdering.begin();
2226  int *node_end = nodeOrdering.end();
2227  if ( nodeOrdering.size() > numPatches ) {
2228  node_end = node_begin + numPatches;
2229  }
2230  std::sort(node_begin, node_end, pe_sortop_compact());
2231 
2232  int *basenode_begin = node_begin;
2233  int *basenode_end = node_end;
2234  if ( nodeOrdering.size() > 2*numPatches ) {
2235  basenode_begin = node_end;
2236  basenode_end = basenode_begin + numPatches;
2237  std::sort(basenode_begin, basenode_end, pe_sortop_compact());
2238  }
2239 
2240  if ( simParams->disableTopology ) {
2241  iout << iWARN << "IGNORING TORUS TOPOLOGY DURING PATCH PLACEMENT\n" << endi;
2242  }
2243 
2245  patchOrdering.begin(), patchOrdering.end(),
2246  node_begin, node_end,
2247  patchLoads.begin(), sortedLoads.begin(), assignedNode, tmgr);
2248 
2249  std::sort(node_begin, node_end, pe_sortop_compact());
2250 
2251  int samenodecount = 0;
2252 
2253  for ( int pid=0; pid<numPatches; ++pid ) {
2254  int node = assignedNode[pid];
2255  patchMap->assignNode(pid, node);
2256  int nodeidx = std::lower_bound(node_begin, node_end, node,
2257  pe_sortop_compact()) - node_begin;
2258  int basenode = basenode_begin[nodeidx];
2259  patchMap->assignBaseNode(pid, basenode);
2260  if ( CmiPeOnSamePhysicalNode(node,basenode) ) ++samenodecount;
2261  }
2262 
2263  iout << iINFO << "Placed " << (samenodecount*100./numPatches) << "% of base nodes on same physical node as patch\n" << endi;
2264 
2265  delete [] assignedNode;
2266 }
2267 
2268 //----------------------------------------------------------------------
2270 {
2271  PatchMap *patchMap = PatchMap::Object();
2272  ComputeMap *computeMap = ComputeMap::Object();
2273  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2274  Node *node = nd.ckLocalBranch();
2275 
2276  DebugM(3,"Mapping computes\n");
2277 
2278  computeMap->allocateCids();
2279 
2280  // Handle full electrostatics
2281  if ( node->simParameters->fullDirectOn )
2282  mapComputeHomePatches(computeFullDirectType);
2283  if ( node->simParameters->FMAOn )
2284 #ifdef DPMTA
2285  mapComputeHomePatches(computeDPMTAType);
2286 #else
2287  NAMD_die("This binary does not include DPMTA (FMA).");
2288 #endif
2289  if ( node->simParameters->PMEOn ) {
2290 #ifdef DPME
2291  if ( node->simParameters->useDPME )
2292  mapComputeHomePatches(computeDPMEType);
2293  else {
2294  mapComputeHomePatches(computePmeType);
2296  mapComputeHomePatches(computeEwaldType);
2297  }
2298 #else
2299 #ifdef NAMD_CUDA
2300  if (node->simParameters->usePMECUDA) {
2301  mapComputePatch(computePmeCUDAType);
2302  } else
2303 #endif
2304  {
2305  mapComputePatch(computePmeType);
2306  }
2308  mapComputeHomePatches(computeEwaldType);
2309 #endif
2310  }
2311 
2312  if ( node->simParameters->globalForcesOn ) {
2313  DebugM(2,"adding ComputeGlobal\n");
2314  mapComputeHomePatches(computeGlobalType);
2315  }
2316 
2317  if ( node->simParameters->extForcesOn )
2318  mapComputeHomePatches(computeExtType);
2319 
2320  if ( node->simParameters->qmForcesOn )
2321  mapComputeHomePatches(computeQMType);
2322 
2323  if ( node->simParameters->GBISserOn )
2324  mapComputeHomePatches(computeGBISserType);
2325 
2326  if ( node->simParameters->MsmSerialOn )
2327  mapComputeHomePatches(computeMsmSerialType);
2328 #ifdef CHARM_HAS_MSA
2329  else if ( node->simParameters->MSMOn )
2330  mapComputeHomePatches(computeMsmMsaType);
2331 #else
2332  else if ( node->simParameters->MSMOn )
2333  mapComputeHomePatches(computeMsmType);
2334 #endif
2335 
2336  if ( node->simParameters->FMMOn )
2337  mapComputeHomePatches(computeFmmType);
2338 
2339 #ifdef NAMD_CUDA
2340 #ifdef BONDED_CUDA
2341  if (node->simParameters->bondedCUDA) {
2342  mapComputeNode(computeBondedCUDAType);
2343  }
2344 #endif
2345 #endif
2346 
2347 #ifdef NAMD_CUDA
2348  if (node->simParameters->useCUDA2) {
2349  mapComputeNode(computeNonbondedCUDA2Type);
2350  } else {
2351  mapComputeNode(computeNonbondedCUDAType);
2352  }
2353  mapComputeHomeTuples(computeExclsType);
2354  mapComputePatch(computeSelfExclsType);
2355 #endif
2356 
2357 #ifdef NAMD_MIC
2358  mapComputeNode(computeNonbondedMICType);
2359 #endif
2360 
2361  mapComputeNonbonded();
2362 
2363  if ( node->simParameters->LCPOOn ) {
2364  mapComputeLCPO();
2365  }
2366 
2367  // If we're doing true pair interactions, no need for bonded terms.
2368  // But if we're doing within-group interactions, we do need them.
2369  if ( !node->simParameters->pairInteractionOn ||
2371  mapComputeHomeTuples(computeBondsType);
2372  mapComputeHomeTuples(computeAnglesType);
2373  mapComputeHomeTuples(computeDihedralsType);
2374  mapComputeHomeTuples(computeImpropersType);
2375  mapComputeHomeTuples(computeCrosstermsType);
2376  mapComputePatch(computeSelfBondsType);
2377  mapComputePatch(computeSelfAnglesType);
2378  mapComputePatch(computeSelfDihedralsType);
2379  mapComputePatch(computeSelfImpropersType);
2380  mapComputePatch(computeSelfCrosstermsType);
2381  }
2382 
2383  if ( node->simParameters->goGroPair ) {
2384  // JLai
2385  mapComputeHomeTuples(computeGromacsPairType);
2386  mapComputePatch(computeSelfGromacsPairType);
2387  // End of JLai
2388  }
2389 
2390  if ( node->simParameters->drudeOn ) {
2391  mapComputeHomeTuples(computeTholeType);
2392  mapComputePatch(computeSelfTholeType);
2393  mapComputeHomeTuples(computeAnisoType);
2394  mapComputePatch(computeSelfAnisoType);
2395  }
2396 
2397  if ( node->simParameters->eFieldOn )
2398  mapComputePatch(computeEFieldType);
2399  /* BEGIN gf */
2400  if ( node->simParameters->mgridforceOn )
2401  mapComputePatch(computeGridForceType);
2402  /* END gf */
2403  if ( node->simParameters->stirOn )
2404  mapComputePatch(computeStirType);
2405  if ( node->simParameters->sphericalBCOn )
2406  mapComputePatch(computeSphericalBCType);
2407  if ( node->simParameters->cylindricalBCOn )
2408  mapComputePatch(computeCylindricalBCType);
2409  if ( node->simParameters->tclBCOn ) {
2410  mapComputeHomePatches(computeTclBCType);
2411  }
2412  if ( node->simParameters->constraintsOn )
2413  mapComputePatch(computeRestraintsType);
2414  if ( node->simParameters->consForceOn )
2415  mapComputePatch(computeConsForceType);
2416  if ( node->simParameters->consTorqueOn )
2417  mapComputePatch(computeConsTorqueType);
2418 
2419  // store the latest compute map
2420  SimParameters *simParams = Node::Object()->simParameters;
2421  if (simParams->storeComputeMap) {
2422  computeMap->saveComputeMap(simParams->computeMapFilename);
2423  }
2424  // override mapping decision
2425  if (simParams->loadComputeMap) {
2426  computeMap->loadComputeMap(simParams->computeMapFilename);
2427  CkPrintf("ComputeMap has been loaded from %s.\n", simParams->computeMapFilename);
2428  }
2429 }
2430 
2431 //----------------------------------------------------------------------
2432 void WorkDistrib::mapComputeHomeTuples(ComputeType type)
2433 {
2434  PatchMap *patchMap = PatchMap::Object();
2435  ComputeMap *computeMap = ComputeMap::Object();
2436  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2437  Node *node = nd.ckLocalBranch();
2438 
2439  int numNodes = node->numNodes();
2440  SimParameters *simparam = node->simParameters;
2441  if(simparam->simulateInitialMapping) {
2442  numNodes = simparam->simulatedPEs;
2443  }
2444 
2445  char *isBaseNode = new char[numNodes];
2446  memset(isBaseNode,0,numNodes*sizeof(char));
2447 
2448  int numPatches = patchMap->numPatches();
2449  for(int j=0; j<numPatches; j++) {
2450  isBaseNode[patchMap->basenode(j)] = 1;
2451  }
2452 
2453  for(int i=0; i<numNodes; i++) {
2454  if ( isBaseNode[i] ) {
2455  computeMap->storeCompute(i,0,type);
2456  }
2457  }
2458 
2459  delete [] isBaseNode;
2460 }
2461 
2462 //----------------------------------------------------------------------
2463 void WorkDistrib::mapComputeHomePatches(ComputeType type)
2464 {
2465  PatchMap *patchMap = PatchMap::Object();
2466  ComputeMap *computeMap = ComputeMap::Object();
2467  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2468  Node *node = nd.ckLocalBranch();
2469 
2470  int numNodes = node->numNodes();
2471  SimParameters *simparam = node->simParameters;
2472  if(simparam->simulateInitialMapping) {
2473  numNodes = simparam->simulatedPEs;
2474  }
2475 
2476  for(int i=0; i<numNodes; i++) {
2477  if ( patchMap->numPatchesOnNode(i) ) {
2478  computeMap->storeCompute(i,0,type);
2479  }
2480  }
2481 }
2482 
2483 //----------------------------------------------------------------------
2484 void WorkDistrib::mapComputePatch(ComputeType type)
2485 {
2486  PatchMap *patchMap = PatchMap::Object();
2487  ComputeMap *computeMap = ComputeMap::Object();
2488 
2489  PatchID i;
2490  ComputeID cid;
2491 
2492  for(i=0; i<patchMap->numPatches(); i++)
2493  {
2494  cid=computeMap->storeCompute(patchMap->node(i),1,type);
2495  computeMap->newPid(cid,i);
2496  patchMap->newCid(i,cid);
2497  }
2498 
2499 }
2500 
2501 //----------------------------------------------------------------------
2502 void WorkDistrib::mapComputeNode(ComputeType type)
2503 {
2504  PatchMap *patchMap = PatchMap::Object();
2505  ComputeMap *computeMap = ComputeMap::Object();
2506 
2507  PatchID i;
2508  ComputeID cid;
2509 
2510  int ncpus = CkNumPes();
2511  SimParameters *simparam = Node::Object()->simParameters;
2512  if(simparam->simulateInitialMapping) {
2513  ncpus = simparam->simulatedPEs;
2514  }
2515 
2516  for(int i=0; i<ncpus; i++) {
2517  computeMap->storeCompute(i,0,type);
2518  }
2519 
2520 }
2521 
2522 //----------------------------------------------------------------------
2523 void WorkDistrib::mapComputeNonbonded(void)
2524 {
2525  // For each patch, create 1 electrostatic object for self-interaction.
2526  // Then create 1 for each 1-away and 2-away neighbor which has a larger
2527  // pid.
2528 
2529  PatchMap *patchMap = PatchMap::Object();
2530  ComputeMap *computeMap = ComputeMap::Object();
2531  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2532  Node *node = nd.ckLocalBranch();
2533  SimParameters *simParams = Node::Object()->simParameters;
2534  int ncpus = CkNumPes();
2535  int nodesize = CkMyNodeSize();
2536  if(simParams->simulateInitialMapping) {
2537  ncpus = simParams->simulatedPEs;
2538  nodesize = simParams->simulatedNodeSize;
2539  }
2540 
2542  PatchID oneAwayDownstream[PatchMap::MaxOneOrTwoAway];
2543  int oneAwayTrans[PatchMap::MaxOneOrTwoAway];
2544 
2545  PatchID i;
2546  ComputeID cid;
2547  int numNeighbors;
2548  int j;
2549  double partScaling = 1.0;
2550  if ( ncpus < patchMap->numPatches() ) {
2551  partScaling = ((double)ncpus) / ((double)patchMap->numPatches());
2552  }
2553 
2554  for(i=0; i<patchMap->numPatches(); i++) // do the self
2555  {
2556 
2557  int numPartitions = 1;
2558 #if 0
2559  if ( simParams->ldBalancer == LDBAL_HYBRID ) {
2560 #ifdef MEM_OPT_VERSION
2561  int64 numFixed = patchMap->numFixedAtoms(i);
2562  int64 numAtoms = patchMap->numAtoms(i);
2563 #else
2564  int64 numFixed = patchMap->patch(i)->getNumFixedAtoms(); // avoid overflow
2565  int64 numAtoms = patchMap->patch(i)->getNumAtoms();
2566 #endif
2567 
2568  int divide = node->simParameters->numAtomsSelf;
2569  if (divide > 0) {
2570  numPartitions = (int) ( partScaling * ( 0.5 +
2571  (numAtoms*numAtoms-numFixed*numFixed) / (double)(2*divide*divide) ) );
2572  }
2573  if (numPartitions < 1) numPartitions = 1;
2574  if ( numPartitions > node->simParameters->maxSelfPart )
2575  numPartitions = node->simParameters->maxSelfPart;
2576  // self-interaction
2577  DebugM(4,"Mapping " << numPartitions << " ComputeNonbondedSelf objects for patch " << i << "\n");
2578 // iout <<"Self numPartitions = " <<numPartitions <<" numAtoms " <<numAtoms <<std::endl;
2579  }
2580 #endif
2581 
2582  // DMK - NOTE - For MIC builds (i.e. NAMD_MIC is defined), it is assumed that self computes are
2583  // mapped to the PE their associated patch is on. If the code below should change, making that
2584  // untrue, MIC builds should be special cased so that assumption still holds (or the host vs
2585  // device load balancing scheme should be modified). (See the comment in the function
2586  // mic_assignComputes() in ComputeNonbondedMIC.C for more details.)
2587  for(int partition=0; partition < numPartitions; partition++)
2588  {
2589  cid=computeMap->storeCompute(patchMap->node(i),1,
2591  partition,numPartitions);
2592  computeMap->newPid(cid,i);
2593  patchMap->newCid(i,cid);
2594  }
2595  }
2596 
2597  for(int p1=0; p1 <patchMap->numPatches(); p1++) // do the pairs
2598  {
2599  // this only returns half of neighbors, which is what we want
2600  numNeighbors=patchMap->oneOrTwoAwayNeighbors(p1,oneAway,oneAwayDownstream,oneAwayTrans);
2601  for(j=0;j<numNeighbors;j++)
2602  {
2603  int p2 = oneAway[j];
2604  int dsp = oneAwayDownstream[j];
2605 
2606  int numPartitions = 1;
2607 #if 0
2608  if ( simParams->ldBalancer == LDBAL_HYBRID ) {
2609 #ifdef MEM_OPT_VERSION
2610  int64 numAtoms1 = patchMap->numAtoms(p1);
2611  int64 numAtoms2 = patchMap->numAtoms(p2);
2612  int64 numFixed1 = patchMap->numFixedAtoms(p1);
2613  int64 numFixed2 = patchMap->numFixedAtoms(p2);
2614 #else
2615  int64 numAtoms1 = patchMap->patch(p1)->getNumAtoms();
2616  int64 numAtoms2 = patchMap->patch(p2)->getNumAtoms();
2617  int64 numFixed1 = patchMap->patch(p1)->getNumFixedAtoms();
2618  int64 numFixed2 = patchMap->patch(p2)->getNumFixedAtoms();
2619 #endif
2620 
2621 
2622  const int t2 = oneAwayTrans[j];
2623  const int adim = patchMap->gridsize_a();
2624  const int bdim = patchMap->gridsize_b();
2625  const int cdim = patchMap->gridsize_c();
2626  const int nax = patchMap->numaway_a(); // 1 or 2
2627  const int nay = patchMap->numaway_b(); // 1 or 2
2628  const int naz = patchMap->numaway_c(); // 1 or 2
2629  const int ia1 = patchMap->index_a(p1);
2630  const int ia2 = patchMap->index_a(p2) + adim * Lattice::offset_a(t2);
2631  const int ib1 = patchMap->index_b(p1);
2632  const int ib2 = patchMap->index_b(p2) + bdim * Lattice::offset_b(t2);
2633  const int ic1 = patchMap->index_c(p1);
2634  const int ic2 = patchMap->index_c(p2) + cdim * Lattice::offset_c(t2);
2635 
2636  if ( abs(ia2-ia1) > nax ||
2637  abs(ib2-ib1) > nay ||
2638  abs(ic2-ic1) > naz )
2639  NAMD_bug("Bad patch distance in WorkDistrib::mapComputeNonbonded");
2640 
2641  int distance = 3;
2642  if ( ia1 == ia2 ) --distance;
2643  else if ( ia1 == ia2 + nax - 1 ) --distance;
2644  else if ( ia1 + nax - 1 == ia2 ) --distance;
2645  if ( ib1 == ib2 ) --distance;
2646  else if ( ib1 == ib2 + nay - 1 ) --distance;
2647  else if ( ib1 + nay - 1 == ib2 ) --distance;
2648  if ( ic1 == ic2 ) --distance;
2649  else if ( ic1 == ic2 + naz - 1 ) --distance;
2650  else if ( ic1 + naz - 1 == ic2 ) --distance;
2651  int divide = 0;
2652  if ( distance == 0 ) {
2653  divide = node->simParameters->numAtomsSelf2;
2654  } else if (distance == 1) {
2655  divide = node->simParameters->numAtomsPair;
2656  } else {
2657  divide = node->simParameters->numAtomsPair2;
2658  }
2659  if (divide > 0) {
2660  numPartitions = (int) ( partScaling * ( 0.5 +
2661  (numAtoms1*numAtoms2-numFixed1*numFixed2)/(double)(divide*divide) ) );
2662  }
2663  if ( numPartitions < 1 ) numPartitions = 1;
2664  if ( numPartitions > node->simParameters->maxPairPart )
2665  numPartitions = node->simParameters->maxPairPart;
2666 // if ( numPartitions > 1 ) iout << "Mapping " << numPartitions << " ComputeNonbondedPair objects for patches " << p1 << "(" << numAtoms1 << ") and " << p2 << "(" << numAtoms2 << ")\n" << endi;
2667  }
2668 #endif
2669  for(int partition=0; partition < numPartitions; partition++)
2670  {
2671  cid=computeMap->storeCompute( patchMap->basenode(dsp),
2672  2,computeNonbondedPairType,partition,numPartitions);
2673  computeMap->newPid(cid,p1);
2674  computeMap->newPid(cid,p2,oneAwayTrans[j]);
2675  patchMap->newCid(p1,cid);
2676  patchMap->newCid(p2,cid);
2677  }
2678  }
2679  }
2680 }
2681 
2682 //----------------------------------------------------------------------
2683 void WorkDistrib::mapComputeLCPO(void) {
2684  //iterate over all needed objects
2685 
2686  PatchMap *patchMap = PatchMap::Object();
2687  ComputeMap *computeMap = ComputeMap::Object();
2688  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
2689  Node *node = nd.ckLocalBranch();
2690  SimParameters *simParams = Node::Object()->simParameters;
2691  int ncpus = CkNumPes();
2692  int nodesize = CkMyNodeSize();
2693  const int maxPatches = 8;
2694 
2695  int numPatchesInOctet;
2696  PatchID patchesInOctet[maxPatches];
2697  int oneAwayTrans[maxPatches];
2698 
2699  //partitioned after 1st timestep
2700  int numPartitions = 1;
2701 
2702  PatchID i;
2703  ComputeID cid;
2704 
2705  // one octet per patch
2706  for(i=0; i<patchMap->numPatches(); i++) {
2707  numPatchesInOctet =
2708  patchMap->getPatchesInOctet(i, patchesInOctet, oneAwayTrans);
2709 
2710  for(int partition=0; partition < numPartitions; partition++) {
2711  cid=computeMap->storeCompute(patchMap->node(i),
2712  numPatchesInOctet,
2714  partition,
2715  numPartitions);
2716  for (int p = 0; p < numPatchesInOctet; p++) {
2717  computeMap->newPid(cid, patchesInOctet[p], oneAwayTrans[p]);
2718  }
2719  for (int p = 0; p < numPatchesInOctet; p++) {
2720  patchMap->newCid(patchesInOctet[p],cid);
2721  }
2722  } // for partitions
2723  } // for patches
2724 } // mapComputeLCPO
2725 
2726 //----------------------------------------------------------------------
2728  LocalWorkMsg *msg = compute->localWorkMsg;
2729  int seq = compute->sequence();
2730  int gbisPhase = compute->getGBISPhase();
2731 
2732  if ( seq < 0 ) {
2733  NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
2734  } else {
2735  SET_PRIORITY(msg,seq,compute->priority());
2736  }
2737 
2738  msg->compute = compute; // pointer is valid since send is to local Pe
2739  int type = compute->type();
2740  int cid = compute->cid;
2741 
2742  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
2743  switch ( type ) {
2744  case computeExclsType:
2745  case computeSelfExclsType:
2746  wdProxy[CkMyPe()].enqueueExcls(msg);
2747  break;
2748  case computeBondsType:
2749  case computeSelfBondsType:
2750  wdProxy[CkMyPe()].enqueueBonds(msg);
2751  break;
2752  case computeAnglesType:
2753  case computeSelfAnglesType:
2754  wdProxy[CkMyPe()].enqueueAngles(msg);
2755  break;
2756  case computeDihedralsType:
2758  wdProxy[CkMyPe()].enqueueDihedrals(msg);
2759  break;
2760  case computeImpropersType:
2762  wdProxy[CkMyPe()].enqueueImpropers(msg);
2763  break;
2764  case computeTholeType:
2765  case computeSelfTholeType:
2766  wdProxy[CkMyPe()].enqueueThole(msg);
2767  break;
2768  case computeAnisoType:
2769  case computeSelfAnisoType:
2770  wdProxy[CkMyPe()].enqueueAniso(msg);
2771  break;
2772  case computeCrosstermsType:
2774  wdProxy[CkMyPe()].enqueueCrossterms(msg);
2775  break;
2776  // JLai
2779  wdProxy[CkMyPe()].enqueueGromacsPair(msg);
2780  break;
2781  // End of JLai
2782  case computeLCPOType:
2783  wdProxy[CkMyPe()].enqueueLCPO(msg);
2784  break;
2786  switch ( seq % 2 ) {
2787  case 0:
2788  //wdProxy[CkMyPe()].enqueueSelfA(msg);
2789  switch ( gbisPhase ) {
2790  case 1:
2791  wdProxy[CkMyPe()].enqueueSelfA1(msg);
2792  break;
2793  case 2:
2794  wdProxy[CkMyPe()].enqueueSelfA2(msg);
2795  break;
2796  case 3:
2797  wdProxy[CkMyPe()].enqueueSelfA3(msg);
2798  break;
2799  }
2800  break;
2801  case 1:
2802  //wdProxy[CkMyPe()].enqueueSelfB(msg);
2803  switch ( gbisPhase ) {
2804  case 1:
2805  wdProxy[CkMyPe()].enqueueSelfB1(msg);
2806  break;
2807  case 2:
2808  wdProxy[CkMyPe()].enqueueSelfB2(msg);
2809  break;
2810  case 3:
2811  wdProxy[CkMyPe()].enqueueSelfB3(msg);
2812  break;
2813  }
2814  break;
2815  default:
2816  NAMD_bug("WorkDistrib::messageEnqueueSelf case statement error!");
2817  }
2818  break;
2820  switch ( seq % 2 ) {
2821  case 0:
2822  //wdProxy[CkMyPe()].enqueueWorkA(msg);
2823  switch ( gbisPhase ) {
2824  case 1:
2825  wdProxy[CkMyPe()].enqueueWorkA1(msg);
2826  break;
2827  case 2:
2828  wdProxy[CkMyPe()].enqueueWorkA2(msg);
2829  break;
2830  case 3:
2831  wdProxy[CkMyPe()].enqueueWorkA3(msg);
2832  break;
2833  }
2834  break;
2835  case 1:
2836  //wdProxy[CkMyPe()].enqueueWorkB(msg);
2837  switch ( gbisPhase ) {
2838  case 1:
2839  wdProxy[CkMyPe()].enqueueWorkB1(msg);
2840  break;
2841  case 2:
2842  wdProxy[CkMyPe()].enqueueWorkB2(msg);
2843  break;
2844  case 3:
2845  wdProxy[CkMyPe()].enqueueWorkB3(msg);
2846  break;
2847  }
2848  break;
2849  case 2:
2850  wdProxy[CkMyPe()].enqueueWorkC(msg);
2851  break;
2852  default:
2853  NAMD_bug("WorkDistrib::messageEnqueueWork case statement error!");
2854  }
2855  break;
2857 #ifdef NAMD_CUDA
2859 // CkPrintf("WorkDistrib[%d]::CUDA seq=%d phase=%d\n", CkMyPe(), seq, gbisPhase);
2860  //wdProxy[CkMyPe()].enqueueCUDA(msg);
2861  switch ( gbisPhase ) {
2862  case 1:
2863  wdProxy[CkMyPe()].enqueueCUDA(msg);
2864  break;
2865  case 2:
2866  wdProxy[CkMyPe()].enqueueCUDAP2(msg);
2867  break;
2868  case 3:
2869  wdProxy[CkMyPe()].enqueueCUDAP3(msg);
2870  break;
2871  }
2872 #else
2874 #endif
2875  break;
2877 #ifdef NAMD_MIC
2878  wdProxy[CkMyPe()].enqueueMIC(msg);
2879 #endif
2880  break;
2881  case computePmeType:
2882  // CkPrintf("PME %d %d %x\n", CkMyPe(), seq, compute->priority());
2883  wdProxy[CkMyPe()].enqueuePme(msg);
2884  break;
2885 #ifdef NAMD_CUDA
2886  case computePmeCUDAType:
2887  wdProxy[CkMyPe()].enqueuePme(msg);
2888  break;
2889 #endif
2890  default:
2891  wdProxy[CkMyPe()].enqueueWork(msg);
2892  }
2893 }
2894 
2895 //----------------------------------------------------------------------
2897  LocalWorkMsg *msg = compute->localWorkMsg;
2898  int seq = compute->sequence();
2899  int gbisPhase = compute->getGBISPhase();
2900 
2901  if ( seq < 0 ) {
2902  NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageEnqueueWork");
2903  } else {
2904  SET_PRIORITY(msg,seq,compute->priority());
2905  }
2906 
2907  msg->compute = compute; // pointer is valid since send is to local Pe
2908  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
2909 
2910 #ifdef NAMD_CUDA
2911  //wdProxy[CkMyPe()].finishCUDA(msg);
2912  switch ( gbisPhase ) {
2913  case 1:
2914  wdProxy[CkMyPe()].finishCUDA(msg);
2915  break;
2916  case 2:
2917  wdProxy[CkMyPe()].finishCUDAP2(msg);
2918  break;
2919  case 3:
2920  wdProxy[CkMyPe()].finishCUDAP3(msg);
2921  break;
2922  }
2923 #else
2925 #endif
2926 }
2927 
2928 //----------------------------------------------------------------------
2930  LocalWorkMsg *msg = compute->localWorkMsg;
2931  int seq = compute->sequence();
2932  int gbisPhase = compute->getGBISPhase();
2933 
2934  if ( seq < 0 ) {
2935  NAMD_bug("compute->sequence() < 0 in WorkDistrib::messageFinishMIC");
2936  } else {
2937  SET_PRIORITY(msg,seq,compute->priority());
2938  }
2939 
2940  msg->compute = compute; // pointer is valid since send is to local Pe
2941  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
2942 
2943 #ifdef NAMD_MIC
2944  wdProxy[CkMyPe()].finishMIC(msg);
2945 #else
2947 #endif
2948 }
2949 
2952  if ( msg->compute->localWorkMsg != msg )
2953  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
2954 }
2955 
2958  if ( msg->compute->localWorkMsg != msg )
2959  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
2960 }
2961 
2964  if ( msg->compute->localWorkMsg != msg )
2965  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
2966 }
2967 
2970  if ( msg->compute->localWorkMsg != msg )
2971  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
2972 }
2973 
2976  if ( msg->compute->localWorkMsg != msg )
2977  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
2978 }
2979 
2982  if ( msg->compute->localWorkMsg != msg )
2983  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
2984 }
2985 
2988  if ( msg->compute->localWorkMsg != msg )
2989  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
2990 }
2991 
2994  if ( msg->compute->localWorkMsg != msg )
2995  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
2996 }
2997 
3000  if ( msg->compute->localWorkMsg != msg )
3001  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3002 }
3003 
3004 // JLai
3006  msg->compute->doWork();
3008  if ( msg->compute->localWorkMsg != msg )
3009  NAMD_bug("\nWorkDistrib LocalWorkMsg recycling failed! Check enqueueGromacsPair from WorkDistrib.C\n");
3010 }
3011 // End of JLai
3012 
3015  if ( msg->compute->localWorkMsg != msg )
3016  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3017 }
3018 
3020  msg->compute->doWork();
3021  if ( msg->compute->localWorkMsg != msg )
3022  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3023 }
3026  if ( msg->compute->localWorkMsg != msg )
3027  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3028 }
3031  if ( msg->compute->localWorkMsg != msg )
3032  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3033 }
3036  if ( msg->compute->localWorkMsg != msg )
3037  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3038 }
3039 
3042  if ( msg->compute->localWorkMsg != msg )
3043  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3044 }
3047  if ( msg->compute->localWorkMsg != msg )
3048  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3049 }
3052  if ( msg->compute->localWorkMsg != msg )
3053  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3054 }
3055 
3058  if ( msg->compute->localWorkMsg != msg )
3059  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3060 }
3063  if ( msg->compute->localWorkMsg != msg )
3064  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3065 }
3068  if ( msg->compute->localWorkMsg != msg )
3069  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3070 }
3071 
3074  if ( msg->compute->localWorkMsg != msg )
3075  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3076 }
3079  if ( msg->compute->localWorkMsg != msg )
3080  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3081 }
3084  if ( msg->compute->localWorkMsg != msg )
3085  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3086 }
3087 
3088 
3089 
3092  if ( msg->compute->localWorkMsg != msg )
3093  NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3094 }
3095 
3098  // ComputeNonbondedCUDA *c = msg->compute;
3099  // if ( c->localWorkMsg != msg && c->localWorkMsg2 != msg )
3100  // NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3101 }
3104 }
3107 }
3108 
3110  msg->compute->finishPatch(msg->data);
3111 }
3112 
3115  // ComputeNonbondedCUDA *c = msg->compute;
3116  // if ( c->localWorkMsg != msg && c->localWorkMsg2 != msg )
3117  // NAMD_bug("WorkDistrib LocalWorkMsg recycling failed!");
3118 }
3121 }
3124 }
3125 
3128 }
3131 }
3132 
3133 
3134 //**********************************************************************
3135 //
3136 // FUNCTION velocities_from_PDB
3137 //
3138 // INPUTS:
3139 // v - Array of vectors to populate
3140 // filename - name of the PDB filename to read in
3141 //
3142 // This function reads in a set of initial velocities from a
3143 // PDB file. It places the velocities into the array of Vectors
3144 // passed to it.
3145 //
3146 //***********************************************************************/
3147 
3148 void WorkDistrib::velocities_from_PDB(const char *filename,
3149  Vector *v, int totalAtoms)
3150 {
3151  PDB *v_pdb; // PDB info from velocity PDB
3152  int i;
3153 
3154  // Read the PDB
3155  v_pdb = new PDB(filename);
3156  if ( v_pdb == NULL )
3157  {
3158  NAMD_die("memory allocation failed in Node::velocities_from_PDB");
3159  }
3160 
3161  // Make sure the number of velocities read in matches
3162  // the number of atoms we have
3163  if (v_pdb->num_atoms() != totalAtoms)
3164  {
3165  char err_msg[129];
3166 
3167  sprintf(err_msg, "FOUND %d COORDINATES IN VELOCITY PDB!!",
3168  v_pdb->num_atoms());
3169 
3170  NAMD_die(err_msg);
3171  }
3172 
3173  // Get the entire list of atom info and loop through
3174  // them assigning the velocity vector for each one
3175  v_pdb->get_all_positions(v);
3176 
3177  for (i=0; i<totalAtoms; i++)
3178  {
3179  v[i].x *= PDBVELINVFACTOR;
3180  v[i].y *= PDBVELINVFACTOR;
3181  v[i].z *= PDBVELINVFACTOR;
3182  }
3183 
3184  delete v_pdb;
3185 }
3186 // END OF FUNCTION velocities_from_PDB
3187 
3188 //**********************************************************************
3189 //
3190 // FUNCTION velocities_from_binfile
3191 //
3192 // INPUTS:
3193 // fname - File name to write velocities to
3194 // n - Number of atoms in system
3195 // vels - Array of velocity vectors
3196 //
3197 // This function writes out the velocities in binary format. This is
3198 // done to preserve accuracy between restarts of namd.
3199 //
3200 //**********************************************************************
3201 
3202 void WorkDistrib::velocities_from_binfile(const char *fname, Vector *vels, int n)
3203 {
3204  read_binary_file(fname,vels,n);
3205 }
3206 // END OF FUNCTION velocities_from_binfile
3207 
3208 //**********************************************************************
3209 //
3210 // FUNCTION random_velocities
3211 //
3212 // INPUTS:
3213 // v - array of vectors to populate
3214 // Temp - Temperature to acheive
3215 //
3216 // This function assigns a random velocity distribution to a
3217 // simulation to achieve a desired initial temperature. The method
3218 // used here was stolen from the program X-PLOR.
3219 //
3220 //**********************************************************************
3221 
3222 void WorkDistrib::random_velocities(BigReal Temp,Molecule *structure,
3223  Vector *v, int totalAtoms)
3224 {
3225  int i, j; // Loop counter
3226  BigReal kbT; // Boltzman constant * Temp
3227  BigReal randnum; // Random number from -6.0 to 6.0
3228  BigReal kbToverM; // sqrt(Kb*Temp/Mass)
3229  SimParameters *simParams = Node::Object()->simParameters;
3230  Bool lesOn = simParams->lesOn;
3231  Random vel_random(simParams->randomSeed);
3232 
3233  int lesReduceTemp = lesOn && simParams->lesReduceTemp;
3234  BigReal tempFactor = lesReduceTemp ? 1.0 / simParams->lesFactor : 1.0;
3235 
3236  kbT = Temp*BOLTZMANN;
3237 
3238  // Loop through all the atoms and assign velocities in
3239  // the x, y and z directions for each one
3240  for (i=0; i<totalAtoms; i++)
3241  {
3242  if (structure->atommass(i) <= 0.) {
3243  kbToverM = 0.;
3244  } else {
3245  kbToverM = sqrt(kbT *
3246  ( lesOn && structure->get_fep_type(i) ? tempFactor : 1.0 ) /
3247  structure->atommass(i) );
3248  }
3249 
3250  // The following comment was stolen from X-PLOR where
3251  // the following section of code was adapted from.
3252 
3253  // This section generates a Gaussian random
3254  // deviate of 0.0 mean and standard deviation RFD for
3255  // each of the three spatial dimensions.
3256  // The algorithm is a "sum of uniform deviates algorithm"
3257  // which may be found in Abramowitz and Stegun,
3258  // "Handbook of Mathematical Functions", pg 952.
3259  for (randnum=0.0, j=0; j<12; j++)
3260  {
3261  randnum += vel_random.uniform();
3262  }
3263 
3264  randnum -= 6.0;
3265 
3266  v[i].x = randnum*kbToverM;
3267 
3268  for (randnum=0.0, j=0; j<12; j++)
3269  {
3270  randnum += vel_random.uniform();
3271  }
3272 
3273  randnum -= 6.0;
3274 
3275  v[i].y = randnum*kbToverM;
3276 
3277  for (randnum=0.0, j=0; j<12; j++)
3278  {
3279  randnum += vel_random.uniform();
3280  }
3281 
3282  randnum -= 6.0;
3283 
3284  v[i].z = randnum*kbToverM;
3285  }
3286 
3287  if ( simParams->drudeOn ) for (i=0; i<totalAtoms; i++) {
3288  if ( structure->is_drude(i) ) {
3289  v[i] = v[structure->get_mother_atom(i)]; // zero is good enough
3290  }
3291  }
3292 }
3293 /* END OF FUNCTION random_velocities */
3294 
3295 //**********************************************************************
3296 //
3297 // FUNCTION remove_com_motion
3298 //
3299 // INPUTS:
3300 // vel - Array of initial velocity vectors
3301 //
3302 // This function removes the center of mass motion from a molecule.
3303 //
3304 //**********************************************************************
3305 
3306 void WorkDistrib::remove_com_motion(Vector *vel, Molecule *structure, int n)
3307 {
3308  Vector mv(0,0,0); // Sum of (mv)_i
3309  BigReal totalMass=0; // Total mass of system
3310  int i; // Loop counter
3311 
3312  // Loop through and compute the net momentum
3313  for (i=0; i<n; i++)
3314  {
3315  BigReal mass = structure->atommass(i);
3316  mv += mass * vel[i];
3317  totalMass += mass;
3318  }
3319 
3320  mv /= totalMass;
3321 
3322  iout << iINFO << "REMOVING COM VELOCITY "
3323  << ( PDBVELFACTOR * mv ) << "\n" << endi;
3324 
3325  for (i=0; i<n; i++) { vel[i] -= mv; }
3326 
3327 }
3328 /* END OF FUNCTION remove_com_motion */
3329 
3330 #if USE_TOPOMAP
3331 
3332 //Specifically designed for BGL and other 3d Tori architectures
3333 //Partition Torus and Patch grid together using recursive bisection.
3334 int WorkDistrib::assignPatchesTopoGridRecBisection() {
3335 
3336  PatchMap *patchMap = PatchMap::Object();
3337  int *assignedNode = new int[patchMap->numPatches()];
3338  int numNodes = Node::Object()->numNodes();
3339  SimParameters *simParams = Node::Object()->simParameters;
3340  if(simParams->simulateInitialMapping) {
3341  numNodes = simParams->simulatedPEs;
3342  }
3343 
3344  int usedNodes = numNodes;
3345  CkPrintf("assignPatchesTopoGridRecBisection\n");
3346  if ( simParams->noPatchesOnZero && numNodes > 1 ) {
3347  usedNodes -= 1;
3348  if ( simParams->noPatchesOnOne && numNodes > 2 )
3349  usedNodes -= 1;
3350  }
3351  RecBisection recBisec(patchMap->numPatches(), PatchMap::Object());
3352 
3353  int xsize = 0, ysize = 0, zsize = 0;
3354 
3355  // Right now assumes a T*** (e.g. TXYZ) mapping
3356  TopoManager tmgr;
3357  xsize = tmgr.getDimNX();
3358  ysize = tmgr.getDimNY();
3359  zsize = tmgr.getDimNZ();
3360 
3361  //Fix to not assign patches to processor 0
3362  int rc = recBisec.partitionProcGrid(xsize, ysize, zsize, assignedNode);
3363 
3364  delete [] assignedNode;
3365 
3366  return rc;
3367 }
3368 #endif
3369 
3370 
3371 #if defined(NAMD_MIC)
3372  extern void mic_hostDeviceLDB();
3373  extern void mic_contributeHostDeviceLDB(int idLen, int * id);
3374  extern void mic_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2);
3375 #endif
3376 
3378  #if defined(NAMD_MIC)
3379  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3380  wdProxy.initHostDeviceLDB();
3381  #endif
3382 }
3383 
3385  #if defined(NAMD_MIC)
3386  mic_hostDeviceLDB();
3387  #endif
3388 }
3389 
3390 void WorkDistrib::send_contributeHostDeviceLDB(int peSetLen, int * peSet) {
3391  #if defined(NAMD_MIC)
3392  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3393  wdProxy[0].contributeHostDeviceLDB(peSetLen, peSet);
3394  #endif
3395 }
3396 
3397 void WorkDistrib::contributeHostDeviceLDB(int peSetLen, int * peSet) {
3398  #if defined(NAMD_MIC)
3399  mic_contributeHostDeviceLDB(peSetLen, peSet);
3400  #endif
3401 }
3402 
3403 void WorkDistrib::send_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2) {
3404  #if defined(NAMD_MIC)
3405  CProxy_WorkDistrib wdProxy(CkpvAccess(BOCclass_group).workDistrib);
3406  wdProxy.setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
3407  #endif
3408 }
3409 
3410 void WorkDistrib::setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2) {
3411  #if defined(NAMD_MIC)
3412  mic_setDeviceLDBParams(dt, hs, sp1, pp1, pp2);
3413  #endif
3414 }
3415 
3416 
3417 #include "WorkDistrib.def.h"
3418 
static Node * Object()
Definition: Node.h:86
#define LDBAL_HYBRID
Definition: SimParameters.h:63
int npatches
Definition: WorkDistrib.C:1486
static int offset_b(int i)
Definition: Lattice.h:248
unsigned char partition
Definition: NamdTypes.h:56
void setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2)
Definition: WorkDistrib.C:3410
BlockLoad::TempStorage load
void enqueueMIC(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3126
Real langevinParam
Definition: NamdTypes.h:110
Real langevin_param(int atomnum) const
Definition: Molecule.h:1296
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:107
static void sortPmePes(int *pmepes, int xdim, int ydim)
Definition: WorkDistrib.C:300
Bool simulateInitialMapping
static void messageFinishMIC(Compute *)
Definition: WorkDistrib.C:2929
int isSendSpanningTreeUnset()
patch_sortop_curve_b(PatchMap *m)
Definition: WorkDistrib.C:1932
void enqueueAngles(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2968
static void messageFinishCUDA(Compute *)
Definition: WorkDistrib.C:2896
void end(void)
Definition: MStream.C:176
Definition: PDB.h:35
int sequence(void)
Definition: Compute.h:64
PatchID assignToPatch(Position p, const Lattice &l)
Definition: PatchMap.inl:14
void setNewNumPartitions(ComputeID cid, char numPartitions)
Definition: ComputeMap.h:144
void setRecvSpanning()
Definition: ProxyMgr.C:371
static int offset_c(int i)
Definition: Lattice.h:249
Vector a_r() const
Definition: Lattice.h:268
static void recursive_bisect_with_curve(int *patch_begin, int *patch_end, int *node_begin, int *node_end, double *patchLoads, double *sortedLoads, int *assignedNode, TopoManagerWrapper &tmgr)
Definition: WorkDistrib.C:1972
int numComputes(void)
Definition: ComputeMap.h:101
void saveComputeMap(const char *fname)
Definition: ComputeMap.C:262
static ProxyMgr * Object()
Definition: ProxyMgr.h:394
unsigned int hydrogenGroupSize
Definition: NamdTypes.h:58
#define BOLTZMANN
Definition: common.h:45
Definition: Node.h:78
static int * peCompactOrdering
Definition: WorkDistrib.h:117
int ComputeID
Definition: NamdTypes.h:183
unsigned int atomFixed
Definition: NamdTypes.h:96
void finishCUDAP3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3122
void initPtrs()
Definition: ComputeMap.C:82
void enqueueCrossterms(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2998
Position fixedPosition
Definition: NamdTypes.h:102
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:41
int isRecvSpanningTreeUnset()
char computeMapFilename[128]
void enqueuePme(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3013
int gridsize_c(void) const
Definition: PatchMap.h:66
static PatchMap * Object()
Definition: PatchMap.h:27
int operator==(const nodesort &o) const
Definition: WorkDistrib.C:1488
void enqueueWorkA3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3066
ComputeType
Definition: ComputeMap.h:20
void enqueueWork(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2950
BigReal max_c(int pid) const
Definition: PatchMap.h:96
int getNumFixedAtoms()
Definition: Patch.h:112
void enqueueGromacsPair(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3005
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int numPatches
void enqueueSelfA1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3024
void finishCUDAP2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3119
Definition: Vector.h:64
#define DrudeAtom
Definition: structures.h:23
BigReal min_a(int pid) const
Definition: PatchMap.h:91
static void send_contributeHostDeviceLDB(int peSetLen, int *peSet)
Definition: WorkDistrib.C:3390
#define HydrogenAtom
Definition: structures.h:16
SimParameters * simParameters
Definition: Node.h:178
Vector c_r() const
Definition: Lattice.h:270
void loadComputeMap(const char *fname)
Definition: ComputeMap.C:278
int index_a(int pid) const
Definition: PatchMap.h:86
static __thread atom * atoms
void createHomePatch(PatchID pid, FullAtomList &a)
Definition: PatchMgr.C:74
Atom * getAtoms() const
Definition: Molecule.h:490
void setSendSpanning()
Definition: ProxyMgr.C:362
void sendAtoms(PatchID pid, FullAtomList &a)
Definition: PatchMgr.C:157
TopoManager tmgr
Definition: WorkDistrib.C:1621
float Real
Definition: common.h:107
void enqueueExcls(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2956
#define DebugM(x, y)
Definition: Debug.h:59
void enqueueBonds(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2962
#define ALLBUTME
Definition: Communicate.h:14
BigReal z
Definition: Vector.h:66
void enqueueAniso(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2992
int packSize(void)
Definition: PatchMap.C:314
void enqueueSelfB1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3040
#define FALSE
Definition: common.h:116
Real rigid_bond_length(int atomnum) const
Definition: Molecule.h:1453
Position position
Definition: NamdTypes.h:53
void enqueueWorkB1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3072
static void messageEnqueueWork(Compute *)
Definition: WorkDistrib.C:2727
static void peOrderingReady()
Definition: WorkDistrib.C:166
std::ostream & iWARN(std::ostream &s)
Definition: InfoStream.C:108
bool operator()(int pe1, int pe2) const
Definition: WorkDistrib.C:1844
MIStream * get(char &data)
Definition: MStream.h:29
#define iout
Definition: InfoStream.h:87
int operator<(const nodesort &o) const
Definition: WorkDistrib.C:1497
int sizeGrid(ScaledPosition xmin, ScaledPosition xmax, const Lattice &lattice, BigReal patchSize, double maxNumPatches, int staticAtomAssignment, int asplit, int bsplit, int csplit)
Definition: PatchMap.C:62
Velocity velocity
Definition: NamdTypes.h:101
ComputeID storeCompute(int node, int maxPids, ComputeType type, int partition=-1, int numPartitions=0)
Definition: ComputeMap.C:153
int num_atoms(void)
Definition: PDB.C:323
int numaway_b(void) const
Definition: PatchMap.h:69
Patch * patch(PatchID pid)
Definition: PatchMap.h:235
void enqueueSelfA3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3034
Vector origin() const
Definition: Lattice.h:262
Position nearest(Position data, ScaledPosition ref) const
Definition: Lattice.h:90
unsigned int groupFixed
Definition: NamdTypes.h:97
Bool pairInteractionOn
int basenode(int pid) const
Definition: PatchMap.h:117
bool operator()(int a, int b) const
Definition: WorkDistrib.C:263
void movePatch(PatchID, NodeID)
Definition: PatchMgr.C:84
Vector b_r() const
Definition: Lattice.h:269
int getGBISPhase(void)
Definition: Compute.h:66
LocalWorkMsg *const localWorkMsg
Definition: Compute.h:46
BigReal min_b(int pid) const
Definition: PatchMap.h:93
void patchMapInit(void)
Definition: WorkDistrib.C:1105
void recvComputeMapChanges(ComputeMapChangeMsg *)
Definition: WorkDistrib.C:370
int allocateCids()
Definition: ComputeMap.C:143
int atomsInGroup
Definition: Hydrogen.h:19
#define MACHINE_PROGRESS
Compute * compute
Definition: WorkDistrib.h:33
char * patchMapData
Definition: WorkDistrib.C:975
char newNumPartitions(ComputeID cid)
Definition: ComputeMap.h:141
void unpack(char *buf)
Definition: PatchMap.C:365
bool operator()(int p1, int p2) const
Definition: WorkDistrib.C:1933
Charge charge
Definition: NamdTypes.h:54
virtual void doWork()
Definition: Compute.C:108
void reorder(Elem *a, int n)
Definition: Random.h:220
HydrogenGroup hydrogenGroup
Definition: Molecule.h:639
void enqueueCUDA(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3096
void sendComputeMap(void)
Definition: WorkDistrib.C:1078
void enqueueWorkB2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3077
void read_binary_file(const char *fname, Vector *data, int n)
Definition: NamdOneTools.C:52
int numNodes()
Definition: Node.h:189
void enqueueCUDAP2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3102
void assignBaseNode(PatchID, NodeID)
Definition: PatchMap.C:472
static void recursive_bisect_coord(int x_begin, int x_end, int y_begin, int y_end, int *pe_begin, ScaledPosition *coord, int *result, int ydim)
Definition: WorkDistrib.C:268
void newCid(int pid, int cid)
Definition: PatchMap.C:512
void enqueueSelfB3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3050
int coord(int pe, int dim)
Definition: WorkDistrib.C:1835
#define order
Definition: PmeRealSpace.C:235
Definition: Random.h:37
void enqueueWorkC(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3090
pe_sortop_bit_reversed(int *r)
Definition: WorkDistrib.C:140
int gridsize_a(void) const
Definition: PatchMap.h:64
int compare_bit_reversed(int a, int b)
Definition: ComputePme.C:318
Bool is_drude(int)
void reinitAtoms(const char *basename=0)
Definition: WorkDistrib.C:952
void enqueueThole(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2986
void enqueueWorkA2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3061
void createHomePatches(void)
Definition: WorkDistrib.C:889
Compute * compute
Definition: WorkDistrib.h:27
void NAMD_bug(const char *err_msg)
Definition: common.C:123
static int offset_a(int i)
Definition: Lattice.h:247
void enqueueImpropers(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2980
static int eventMachineProgress
Definition: WorkDistrib.C:96
AtomID atomID
Definition: Hydrogen.h:16
BigReal langevinDamping
void enqueueLCPO(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3019
void sendMovePatches()
Definition: PatchMgr.C:110
void pack(MOStream *msg)
Definition: ComputeMap.C:63
int oneOrTwoAwayNeighbors(int pid, PatchID *neighbor_ids, PatchID *downstream_ids=0, int *transform_ids=0)
Definition: PatchMap.C:579
Bool staticAtomAssignment
pe_sortop_coord_y(ScaledPosition *s)
Definition: WorkDistrib.C:262
int Bool
Definition: common.h:131
Bool replicaUniformPatchGrids
#define COMPUTEMAPTAG
Definition: common.h:157
int index_b(int pid) const
Definition: PatchMap.h:87
bool operator()(int a, int b) const
Definition: WorkDistrib.C:141
int priority(void)
Definition: Compute.h:65
void finishCUDA(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3113
~WorkDistrib(void)
Definition: WorkDistrib.C:117
BigReal x
Definition: Vector.h:66
int numAtoms
Definition: Molecule.h:556
int PatchID
Definition: NamdTypes.h:182
void setNewNode(ComputeID cid, NodeID node)
Definition: ComputeMap.h:120
virtual void finishPatch(int)
Definition: Compute.C:112
void NAMD_die(const char *err_msg)
Definition: common.C:83
PDB * pdb
Definition: Node.h:180
void enqueueCUDAP3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3105
static int * peDiffuseOrderingIndex
Definition: WorkDistrib.h:116
Bool outputPatchDetails
ConfigList * configList
Definition: Node.h:179
void enqueueWorkA1(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3056
#define BUFSIZE
Definition: Communicate.h:15
Bool pressureProfileEwaldOn
std::vector< std::string > split(const std::string &text, std::string delimiter)
Definition: MoleculeQM.C:73
static int * peDiffuseOrdering
Definition: WorkDistrib.h:115
void makePatches(ScaledPosition xmin, ScaledPosition xmax, const Lattice &lattice, BigReal patchSize, double maxNumPatches, int staticAtomAssignment, int replicaUniformPatchGrids, int lcpo, int asplit, int bsplit, int csplit)
Definition: PatchMap.C:171
unsigned char get_fep_type(int anum) const
Definition: Molecule.h:1345
BigReal max_b(int pid) const
Definition: PatchMap.h:94
int index_c(int pid) const
Definition: PatchMap.h:88
static int peOrderingInit
Definition: WorkDistrib.h:114
void sendPatchMap(void)
Definition: WorkDistrib.C:978
void find_extremes(const Lattice &, BigReal frac=1.0)
Definition: PDB.C:438
void saveComputeMapChanges(int, CkGroupID)
Definition: WorkDistrib.C:352
int add(const Elem &elem)
Definition: ResizeArray.h:97
int migrationGroupSize
Definition: NamdTypes.h:117
int32 status
Definition: NamdTypes.h:115
void finishCUDAPatch(FinishWorkMsg *msg)
Definition: WorkDistrib.C:3109
BigReal max_a(int pid) const
Definition: PatchMap.h:92
void savePatchMap(PatchMapMsg *msg)
Definition: WorkDistrib.C:1014
static int * peCompactOrderingIndex
Definition: WorkDistrib.h:118
ScaledPosition center(int pid) const
Definition: PatchMap.h:99
BlockRadixSort::TempStorage sort
void topo_getargs(char **)
Definition: WorkDistrib.C:90
static void buildNodeAwarePeOrdering(void)
Definition: WorkDistrib.C:176
patch_sortop_curve_a(PatchMap *m)
Definition: WorkDistrib.C:1911
int myid()
Definition: Node.h:188
unsigned int randomSeed
BigReal initialTemp
Real rigidBondLength
Definition: NamdTypes.h:118
long long int64
Definition: common.h:34
int pressureProfileAtomTypes
#define simParams
Definition: Output.C:127
short vdwType
Definition: NamdTypes.h:55
int atomsInMigrationGroup
Definition: Hydrogen.h:29
static int randtopo
Definition: WorkDistrib.C:84
void newPid(ComputeID cid, int pid, int trans=13)
Definition: ComputeMap.C:198
static void send_setDeviceLDBParams(int dt, int hs, int sp1, int pp1, int pp2)
Definition: WorkDistrib.C:3403
Index atomvdwtype(int anum) const
Definition: Molecule.h:1058
ScaledPosition * spos
Definition: WorkDistrib.C:253
int numPatches(void) const
Definition: PatchMap.h:59
int node(int pid) const
Definition: PatchMap.h:114
Position apply_transform(Position data, const Transform &t) const
Definition: Lattice.h:132
bool operator()(int p1, int p2) const
Definition: WorkDistrib.C:1954
void mapComputes(void)
Definition: WorkDistrib.C:2269
char * data
Definition: ConfigList.h:48
void enqueueSelfA2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3029
static ComputeMap * Object()
Definition: ComputeMap.h:89
static void build_ordering(void *)
Definition: WorkDistrib.C:86
BigReal y
Definition: Vector.h:66
int getNumAtoms()
Definition: Patch.h:105
Real atomcharge(int anum) const
Definition: Molecule.h:1048
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:18
Mass mass
Definition: NamdTypes.h:108
void distributeHomePatches(void)
Definition: WorkDistrib.C:930
StringList * find(const char *name) const
Definition: ConfigList.C:341
void assignNode(PatchID, NodeID)
Definition: PatchMap.C:465
bool operator()(int a, int b) const
Definition: WorkDistrib.C:255
bool less_than_bit_reversed(int a, int b)
Definition: ComputePme.C:327
patch_sortop_curve_c(PatchMap *m)
Definition: WorkDistrib.C:1953
void mic_initialize()
#define PDBVELFACTOR
Definition: common.h:48
Bool pairInteractionSelf
int type()
Definition: Compute.h:48
void enqueueSelfB2(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3045
BigReal patchDimension
int numPatchesOnNode(int node)
Definition: PatchMap.h:60
gridSize y
void initHostDeviceLDB()
Definition: WorkDistrib.C:3384
MOStream * put(char data)
Definition: MStream.h:112
Real atommass(int anum) const
Definition: Molecule.h:1038
static void send_initHostDeviceLDB()
Definition: WorkDistrib.C:3377
int numaway_c(void) const
Definition: PatchMap.h:70
FullAtomList * createAtomLists(const char *basename=0)
Definition: WorkDistrib.C:618
#define LonepairAtom
Definition: structures.h:22
int size(void) const
Definition: ResizeArray.h:127
unsigned int nonbondedGroupSize
Definition: NamdTypes.h:57
BigReal min_c(int pid) const
Definition: PatchMap.h:95
infostream & endi(infostream &s)
Definition: InfoStream.C:38
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
pe_sortop_coord_x(ScaledPosition *s)
Definition: WorkDistrib.C:254
void enqueueDihedrals(LocalWorkMsg *msg)
Definition: WorkDistrib.C:2974
Bool is_atom_fixed(int atomnum) const
Definition: Molecule.h:1407
void cuda_initialize()
Definition: DeviceCUDA.C:20
int b_p() const
Definition: Lattice.h:274
void finishMIC(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3129
gridSize x
void contributeHostDeviceLDB(int peSetLen, int *peSet)
Definition: WorkDistrib.C:3397
void pack(char *buf, int size)
Definition: PatchMap.C:328
#define RIGID_NONE
Definition: SimParameters.h:77
int isOutputProcessor(int pe)
void doneSaveComputeMap(CkReductionMsg *)
Definition: WorkDistrib.C:423
void unpack(MIStream *msg)
Definition: ComputeMap.C:70
double recipMass
Definition: NamdTypes.h:103
int numaway_a(void) const
Definition: PatchMap.h:68
void get_all_positions(Vector *)
Definition: PDB.C:366
int a_p() const
Definition: Lattice.h:273
pe_sortop_topo(TopoManagerWrapper &t, int *d)
Definition: WorkDistrib.C:1843
Molecule * molecule
Definition: Node.h:176
bool operator()(int p1, int p2) const
Definition: WorkDistrib.C:1912
void coords(int pe, int *crds)
Definition: WorkDistrib.C:1821
void enqueueWorkB3(LocalWorkMsg *msg)
Definition: WorkDistrib.C:3082
int gridsize_b(void) const
Definition: PatchMap.h:65
#define TRUE
Definition: common.h:117
int get_mother_atom(int)
const ComputeID cid
Definition: Compute.h:43
Bool noPatchesOnOutputPEs
int fixpe(int pe)
Definition: WorkDistrib.C:1625
int * sortAndSplit(int *node_begin, int *node_end, int splitdim)
Definition: WorkDistrib.C:1856
ScaledPosition * spos
Definition: WorkDistrib.C:261
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
Definition: PDB.h:102
void sortAtomsForPatches(int *order, int *breaks, const FullAtom *atoms, int nmgrps, int natoms, int ni, int nj, int nk)
Definition: SortAtoms.C:131
double BigReal
Definition: common.h:112
Transform transform
Definition: NamdTypes.h:116
int c_p() const
Definition: Lattice.h:275
#define PDBVELINVFACTOR
Definition: common.h:49
void assignNodeToPatch(void)
Definition: WorkDistrib.C:1319
int getPatchesInOctet(int pid, PatchID *pids, int *transform_ids=0)
Definition: PatchMap.C:634
NodeID newNode(ComputeID cid)
Definition: ComputeMap.h:116
iterator begin(void)
Definition: ResizeArray.h:36