NAMD
Node.C
Go to the documentation of this file.
1 
7 /*
8  Toplevel routines for initializing a Node for a simulation
9  one Node per Pe (processor element).
10 */
11 
12 #if !defined(WIN32) || defined(__CYGWIN__)
13 #include <unistd.h>
14 #endif
15 #include "InfoStream.h"
16 #include "Node.decl.h"
17 #include "Node.h"
18 #ifdef DPMTA
19 #include <pvm3.h>
20 #endif
21 
22 #include "ProcessorPrivate.h"
23 
24 #define MIN_DEBUG_LEVEL 3
25 //#define DEBUGM
26 #include "Debug.h"
27 
28 #include <stdio.h>
29 #include <converse.h>
30 #include "memusage.h"
31 #include "IMDOutput.h"
32 #include "Lattice.h"
33 #include "ComputeMsmMsa.h" // needed for MsmMsaData definition
34 #include "ComputeMsm.h" // needed for MsmInitMsg definition
35 #include "main.decl.h"
36 #include "main.h"
37 #include "WorkDistrib.h"
38 #include "PatchMgr.h"
39 #include "Patch.h"
40 #include "Compute.h"
41 #include "ComputeMap.h"
42 #include "ComputeMgr.h"
43 #include "Molecule.h"
44 #include "HomePatchList.h"
45 #include "AtomMap.h"
46 #include "Sequencer.h"
47 #include "Controller.h"
48 #include "NamdState.h"
49 #include "Output.h"
50 #include "ProxyMgr.h"
51 #include "PatchMap.h"
52 #include "PatchMap.inl"
53 #include "Parameters.h"
54 #include "SimParameters.h"
55 #include "Communicate.h"
56 #include "LdbCoordinator.h"
57 #include "ScriptTcl.h"
58 #include "ComputeMgr.decl.h"
59 #include "ComputePmeMgr.decl.h"
60 // #ifdef NAMD_CUDA
61 #include "ComputeCUDAMgr.decl.h"
62 #include "ComputeCUDAMgr.h"
63 #include "ComputePmeCUDAMgr.decl.h"
64 #include "ComputePmeCUDAMgr.h"
65 // #endif
66 #include "ComputeGridForceMgr.decl.h"
67 #include "Sync.h"
68 #include "BackEnd.h"
69 #include "PDB.h"
70 #include "packmsg.h"
71 #include "CollectionMgr.decl.h"
72 #include "ParallelIOMgr.decl.h"
73 #include "Vector.h"
74 // BEGIN LA
75 #include "Random.h"
76 // END LA
77 
78 #if(CMK_CCS_AVAILABLE && CMK_WEB_MODE)
79 extern "C" void CApplicationInit();
80 #endif
81 
82 #include "DumpBench.h"
83 
84 class CheckpointMsg : public CMessage_CheckpointMsg {
85 public:
86  int task;
87  int replica;
89  char *key;
90 };
91 
92 extern "C" {
93  void recvCheckpointCReq_handler(envelope*);
94  void recvCheckpointCAck_handler(envelope*);
95 }
96 
97 #ifdef CMK_BALANCED_INJECTION_API
98 #include "ckBIconfig.h"
99 #endif
100 
101 #include "CollectionMgr.h"
102 #include "CollectionMaster.h"
103 #include "CollectionMgr.decl.h"
104 #include "CollectionMaster.decl.h"
105 
106 #if USE_HPM
107 extern "C" void HPM_Init(int);
108 extern "C" void HPM_Start(char *label, int);
109 extern "C" void HPM_Stop(char *label, int);
110 extern "C" void HPM_Print(int, int);
111 #endif
112 
113 #if defined(NAMD_MIC)
114  extern void mic_dumpHostDeviceComputeMap();
115  extern void mic_initHostDeviceLDB();
116 #endif
117 
118 #ifdef MEASURE_NAMD_WITH_PAPI
119 #include "papi.h"
120 #if CMK_SMP
121 #include <pthread.h>
122 #endif
123 #define NUM_PAPI_EVENTS 6
124 CkpvDeclare(int *, papiEvents);
125 
126 #define MEASURE_PAPI_SPP 1
127 #define MEASURE_PAPI_CACHE 0
128 #define MEASURE_PAPI_FLOPS 0
129 
130 static void namdInitPapiCounters(){
131  if(CkMyRank()==0){
132  //only initialize per OS process (i.e. a charm node)
133  int retval = PAPI_library_init(PAPI_VER_CURRENT);
134  if(retval != PAPI_VER_CURRENT) {
135  if(CkMyPe()==0){
136  NAMD_die("PAPI library is not compatitible!");
137  }
138  }
139  #if CMK_SMP
140  //now only consider systems that are compatible with POSIX
141  if(PAPI_thread_init(pthread_self)!=PAPI_OK) {
142  if(CkMyPe()==0){
143  NAMD_die("Multi-thread mode in PAPI could not be initialized!");
144  }
145  }
146  #endif
147  }
148  CkpvInitialize(int *, papiEvents);
149  CkpvAccess(papiEvents) = new int[NUM_PAPI_EVENTS+1];
150 
151 #if MEASURE_PAPI_CACHE
152  if(PAPI_query_event(PAPI_L1_DCM)==PAPI_OK) {
153  CkpvAccess(papiEvents)[0] = PAPI_L1_DCM;
154  }else{
155  if(CkMyPe()==0){
156  CkPrintf("WARNING: PAPI_L1_DCM doesn't exsit on this platform!\n");
157  }
158  //if not default to PAPI_TOT_INS
159  CkpvAccess(papiEvents)[0] = PAPI_TOT_INS;
160  }
161 
162  if(PAPI_query_event(PAPI_L2_DCM)==PAPI_OK) {
163  CkpvAccess(papiEvents)[1] = PAPI_L2_DCM;
164  }else{
165  //if not default to PAPI_TOT_CYC
166  CkpvAccess(papiEvents)[1] = PAPI_TOT_CYC;
167  }
168 #elif MEASURE_PAPI_FLOPS
169  if(PAPI_query_event(PAPI_FP_INS)==PAPI_OK) {
170  CkpvAccess(papiEvents)[0] = PAPI_FP_INS;
171  }else{
172  if(CkMyPe()==0){
173  CkPrintf("WARNING: PAPI_FP_INS doesn't exsit on this platform!\n");
174  }
175  //if not default to PAPI_TOT_INS
176  CkpvAccess(papiEvents)[0] = PAPI_TOT_INS;
177  }
178 
179  if(PAPI_query_event(PAPI_FMA_INS)==PAPI_OK) {
180  CkpvAccess(papiEvents)[1] = PAPI_FMA_INS;
181  }else{
182  //if not default to PAPI_TOT_CYC
183  CkpvAccess(papiEvents)[1] = PAPI_TOT_CYC;
184  }
185 #elif MEASURE_PAPI_SPP
186 /* for SPP we record these
187 1) PAPI_FP_OPS
188 2) PAPI_TOT_INS
189 3) perf::PERF_COUNT_HW_CACHE_LL:MISS
190 4) DATA_PREFETCHER:ALL
191 5) PAPI_L1_DCA
192 6) INSTRUCTION_FETCH_STALL
193 7) PAPI_TOT_CYC, and
194 8) real (wall) time
195 */
196  int papiEventSet = PAPI_NULL;
197  if (PAPI_create_eventset(&papiEventSet) != PAPI_OK) {
198  CmiAbort("PAPI failed to create event set!\n");
199  }
200 
201  if(PAPI_query_event(PAPI_FP_OPS)==PAPI_OK) {
202  CkpvAccess(papiEvents)[0] = PAPI_FP_OPS;
203  }else{
204  if(CkMyPe()==0){
205  CkAbort("WARNING: PAPI_FP_OPS doesn't exist on this platform!");
206  }
207  }
208  if(PAPI_query_event(PAPI_TOT_INS)==PAPI_OK) {
209  CkpvAccess(papiEvents)[1] = PAPI_TOT_INS;
210  }else{
211  if(CkMyPe()==0){
212  CkAbort("WARNING: PAPI_TOT_INS doesn't exist on this platform!");
213  }
214  }
215  int EventCode;
216  int ret;
217  ret=PAPI_event_name_to_code("perf::PERF_COUNT_HW_CACHE_LL:MISS",&EventCode);
218  if(ret==PAPI_OK && PAPI_query_event(EventCode)==PAPI_OK) {
219  CkpvAccess(papiEvents)[2] = EventCode;
220  }else{
221  if(CkMyPe()==0){
222  CkAbort("WARNING: perf::PERF_COUNT_HW_CACHE_LL:MISS doesn't exist on this platform!");
223  }
224  }
225  ret=PAPI_event_name_to_code("DATA_PREFETCHER:ALL",&EventCode);
226  if(ret==PAPI_OK && PAPI_query_event(EventCode)==PAPI_OK) {
227  CkpvAccess(papiEvents)[3] = EventCode;
228  }else{
229  if(CkMyPe()==0){
230  CkAbort("WARNING: DATA_PREFETCHER:ALL doesn't exist on this platform!");
231  }
232  }
233  if(PAPI_query_event(PAPI_L1_DCA)==PAPI_OK) {
234  CkpvAccess(papiEvents)[4] = PAPI_L1_DCA;
235  }else{
236  if(CkMyPe()==0){
237  CkAbort("WARNING: PAPI_L1_DCA doesn't exist on this platform!");
238  }
239  }
240  /* ret=PAPI_event_name_to_code("INSTRUCTION_FETCH_STALL",&EventCode);
241  if(ret==PAPI_OK && PAPI_query_event(EventCode)==PAPI_OK) {
242  CkpvAccess(papiEvents)[5] = EventCode;
243  }else{
244  if(CkMyPe()==0){
245  CkAbort("WARNING: INSTRUCTION_FETCH_STALL doesn't exist on this platform!");
246  }
247  }
248  */
249  if(PAPI_query_event(PAPI_TOT_CYC)==PAPI_OK) {
250  CkpvAccess(papiEvents)[5] = PAPI_TOT_CYC;
251  }else{
252  if(CkMyPe()==0){
253  CkAbort("WARNING: PAPI_TOT_CYC doesn't exist on this platform!");
254  }
255  }
256  for(int i=0;i<NUM_PAPI_EVENTS;i++)
257  {
258  int papiRetValue=PAPI_add_events(papiEventSet, &CkpvAccess(papiEvents)[i],1);
259  if (papiRetValue != PAPI_OK) {
260  CkPrintf("failure for event %d\n",i);
261  if (papiRetValue == PAPI_ECNFLCT) {
262  CmiAbort("PAPI events conflict! Please re-assign event types!\n");
263  } else {
264  CmiAbort("PAPI failed to add designated events!\n");
265  }
266  }
267 
268  }
269 #endif
270 }
271 #endif
272 
273 #ifdef OPENATOM_VERSION
274 static void startOA(){(char inDriverFile[1024], char inPhysicsFile[1024], CkCallback doneCB)
275 {
276  CProxy_oaSetup moaInstance = CProxy_oaSetup::ckNew(inDriverFile, inPhysicsFile, doneCB);
277 }
278 #endif //OPENATOM_VERSION
279 
280 //======================================================================
281 // Public Functions
282 
283 //----------------------------------------------------------------------
284 
286 double startupTime;
287 
288 //----------------------------------------------------------------------
289 // BOC constructor
291 {
292  DebugM(4,"Creating Node\n");
293 #if(CMK_CCS_AVAILABLE && CMK_WEB_MODE)
294  CApplicationInit();
295 #endif
296  if (CkpvAccess(Node_instance) == 0) {
297  CkpvAccess(Node_instance) = this;
298  eventEndOfTimeStep = traceRegisterUserEvent("EndOfTimeStep", 135);
299  } else {
300  NAMD_bug("Node::Node() - another instance of Node exists!");
301  }
302 
303  CkpvAccess(BOCclass_group) = msg->group;
304  delete msg;
305 
306  CkpvAccess(BOCclass_group).node = thisgroup;
307 
308  recvCheckpointCReq_index = CmiRegisterHandler((CmiHandler)recvCheckpointCReq_handler);
309  recvCheckpointCAck_index = CmiRegisterHandler((CmiHandler)recvCheckpointCAck_handler);
310 
311  startupPhase = 0;
312 
313  molecule = NULL;
314  parameters = NULL;
315  simParameters = NULL;
316  configList = NULL;
317  pdb = NULL;
318  state = NULL;
319  output = NULL;
320  imd = new IMDOutput;
321  colvars = 0;
322 
323 #if USE_HPM
324  // assumes that this will be done only on BG/P
325  TopoManager *tmgr = new TopoManager();
326  int x, y, z;
327  tmgr->rankToCoordinates(CkMyPe(), x, y, z, localRankOnNode);
328  delete tmgr;
329 #endif
330 
331  specialTracing = traceAvailable() && (traceIsOn()==0);
332 
333  DebugM(4,"Creating PatchMap, AtomMap, ComputeMap\n");
336  if ( CkMyRank() == 0 ) ComputeMap::Instance();
337 
338  //Note: Binding BOC vars such as workDistrib has been moved
339  //to the 1st phase of startup because the in-order message delivery
340  //is not always guaranteed --Chao Mei
341 #ifdef CMK_BALANCED_INJECTION_API
342  if(CkMyRank() == 0){
343  balancedInjectionLevel=ck_get_GNI_BIConfig();
344  // CkPrintf("[%d] get retrieved BI=%d\n",CkMyPe(),balancedInjectionLevel);
345  ck_set_GNI_BIConfig(20);
346  // CkPrintf("[%d] set retrieved BI=%d\n",CkMyPe(),ck_get_GNI_BIConfig());
347  }
348 #endif
349 
350 }
351 
352 //----------------------------------------------------------------------
353 // ~Node(void) needs to clean up everything.
354 
356 {
357  delete output;
358  delete computeMap;
359  delete atomMap;
360  delete patchMap;
361  delete CkpvAccess(comm);
362  // BEGIN LA
363  delete rand;
364  // END LA
365 #ifdef MEASURE_NAMD_WITH_PAPI
366  delete CkpvAccess(papiEvents);
367 #endif
368 }
369 
370 void Node::bindBocVars(){
371  DebugM(4,"Binding to BOC's\n");
372  CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
373  patchMgr = pm.ckLocalBranch();
374  CProxy_ProxyMgr prm(CkpvAccess(BOCclass_group).proxyMgr);
375  proxyMgr = prm.ckLocalBranch();
376  CProxy_WorkDistrib wd(CkpvAccess(BOCclass_group).workDistrib);
377  workDistrib = wd.ckLocalBranch();
378  CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).computeMgr);
379  computeMgr = cm.ckLocalBranch();
380  CProxy_LdbCoordinator lc(CkpvAccess(BOCclass_group).ldbCoordinator);
381  ldbCoordinator = lc.ckLocalBranch();
382  #ifdef MEM_OPT_VERSION
383  CProxy_ParallelIOMgr io(CkpvAccess(BOCclass_group).ioMgr);
384  ioMgr = io.ckLocalBranch();
385  #endif
386 
387 }
388 
389 //----------------------------------------------------------------------
390 // Malloc Test Sequence
391 void Node::mallocTest(int step) {
392  int MB = 1024*1024;
393  int size = 100;
394  char* foo = (char*) malloc(size*MB);
395  if ( ! foo ) {
396  char buf[256];
397  sprintf(buf,"Malloc fails on Pe %d at %d MB.\n",CkMyPe(),step*size);
398  NAMD_die(buf);
399  }
400  memset(foo,0,size*MB*sizeof(char));
401 }
402 
404  if ( mallocTest_size ) {
405  CkPrintf("All PEs successfully allocated %d MB.\n", 100*mallocTest_size);
406  } else {
407  CkPrintf("Starting malloc test on all PEs.\n");
408  }
409  fflush(stdout);
410  ++mallocTest_size;
411  CkStartQD(CkIndex_Node::mallocTestQd(), &thishandle);
412  (CProxy_Node(CkpvAccess(BOCclass_group).node)).mallocTest(mallocTest_size);
413 }
414 
415 //----------------------------------------------------------------------
416 // Startup Sequence
417 
419  (CProxy_Node(CkpvAccess(BOCclass_group).node)).startup();
420 }
421 
425 
426 extern void registerUserEventsForAllComputeObjs(void);
427 
429  int gotoRun = false;
430  double newTime;
431 
432  if (!CkMyPe()) {
433  if (!startupPhase) {
434  iout << iINFO << "\n";
435  startupTime = CmiWallTimer();
436  iout << iINFO << "Entering startup at " << startupTime << " s, ";
437  } else {
438  newTime = CmiWallTimer();
439  iout << iINFO << "Startup phase " << startupPhase-1 << " took "
440  << newTime - startupTime << " s, ";
441  startupTime = newTime;
442  }
443  iout << memusage_MB() << " MB of memory in use\n" << endi;
444  fflush(stdout);
445  }
446  switch (startupPhase) {
447 
448  case 0:
450  namdOneCommInit(); // Namd1.X style
451  break;
452 
453  case 1:
454  bindBocVars();
455 
456  // send & receive molecule, simparameters... (Namd1.X style)
457  if (CkMyPe()) {
458  namdOneRecv();
459  } else {
460  namdOneSend();
461  }
462  break;
463 
464  case 2:
465  // fix up one-per-node objects (for SMP version)
469 
472 
473  #if !CMK_SMP || ! USE_CKLOOP
474  //the CkLoop library should be only used in SMP mode
476  #else
477  if ( CkNumPes() < 2 * CkNumNodes() ) simParameters->useCkLoop = 0;
478  #endif
479 
480 
481  if ( simParameters->mallocTest ) {
482  if (!CkMyPe()) {
483  mallocTest_size = 0;
484  CkStartQD(CkIndex_Node::mallocTestQd(), &thishandle);
485  }
486  return;
487  }
488 
489 
490  #ifdef MEASURE_NAMD_WITH_PAPI
491  if(simParameters->papiMeasure) namdInitPapiCounters();
492  #endif
493 
494  #ifdef MEM_OPT_VERSION
495  //At this point, each Node object has received the simParameters,
496  //parameters and the atom signatures info from the master Node
497  //(proc 0). It's time to initialize the parallel IO manager and
498  //read the binary per-atom file --Chao Mei
499 
500  //Step 1: initialize the parallel IO manager per Node
501  ioMgr->initialize(this);
502  #endif
503 
504  break;
505 
506  case 3:
507 
508  #ifdef MEM_OPT_VERSION
509  //Step 2: read the binary per-atom files (signater index, coordinates etc.)
510  ioMgr->readPerAtomInfo();
511  #endif
512 
513  break;
514 
515  case 4:
516 
517  #ifdef MEM_OPT_VERSION
518  //Step 3: update counters of tuples and exclusions inside Molecule object
519  ioMgr->updateMolInfo();
520 
521  //Step 4: prepare distributing the atoms to neighboring procs if necessary
522  ioMgr->migrateAtomsMGrp();
523 
524  //step 5: initialize patchMap and send it to every other processors
525  //to decide atoms to patch distribution on every input processor
526  if(!CkMyPe()) {
527  workDistrib->patchMapInit(); // create space division
529  }
530  #endif
531 
532  #if USE_HPM
533  HPM_Init(localRankOnNode);
534  #endif
535 
536  // take care of inital thread setting
537  threadInit();
538 
539  // create blank AtomMap
541 
542  if (!CkMyPe()) {
543 #ifdef NAMD_CUDA
544  if (simParameters->usePMECUDA) {
545  // computePmeCUDAMgr was created in BackEnd.C
546  // This empty branch is to avoid initializing ComputePmeMgr
547  } else
548 #endif
549  if (simParameters->PMEOn) {
550  CkpvAccess(BOCclass_group).computePmeMgr = CProxy_ComputePmeMgr::ckNew();
551  }
552  #ifdef OPENATOM_VERSION
553  if ( simParameters->openatomOn ) {
554  CkpvAccess(BOCclass_group).computeMoaMgr = CProxy_ComputeMoaMgr::ckNew();
555  }
556  #endif // OPENATOM_VERSION
557 
558  }
559 
560  #ifdef OPENATOM_VERSION
561  if ( simParameters->openatomOn ) {
562  // if ( ! CkMyPe() ) {
563  CkCallback doneMoaStart(CkIndexmain::doneMoaSetup(), thishandle);
564  startOA(simParameters->moaDriverFile, simParameters->moaPhysicsFile, doneMoaStart);
565  // }
566  }
567  #endif // OPENATOM_VERSION
568 
569  // BEGIN LA
571  rand->split(CkMyPe(), CkNumPes());
572  // END LA
573 
574  break;
575 
576  case 5:
577  #ifdef MEM_OPT_VERSION
578  //Now, every input proc has received all the atoms necessary
579  //to decide the patches those atoms belong to
580 
581  //step 1: integrate the migrated atoms into the atom list that
582  //contains the initally distributed atoms, and sort the atoms
583  //based on hydrogenList value
584  ioMgr->integrateMigratedAtoms();
585 
586  //step 2: integrate the cluster size of each atom on each output proc
587  ioMgr->integrateClusterSize();
588 
589  //step 3: calculate the number of atoms in each patch on every
590  //input procs (atoms belonging to a patch may lie on different
591  //procs), and reduce such info on proc 0. Such info is required
592  //for determing which node a particular patch is assigned to.
593  ioMgr->calcAtomsInEachPatch();
594 
595  //set to false to re-send PatchMap later
597  #endif
598  break;
599  case 6:
602  }
605  }
608  }
609  #ifdef PROCTRACE_DEBUG
610  DebugFileTrace::Instance("procTrace");
611  #endif
612 
613  if (!CkMyPe()) {
614  output = new Output; // create output object just on PE(0)
615 
616  #ifndef MEM_OPT_VERSION
617  workDistrib->patchMapInit(); // create space division
618  workDistrib->createHomePatches(); // load atoms into HomePatch(es)
619  #endif
620 
623  //ComputeMap::Object()->printComputeMap();
624 
625  // For MIC runs, take the additional step after the compute map has been created to
626  // assign the various computes to either the host or the device. This info will
627  // be distributed across the PEs.
628  #if defined(NAMD_MIC)
629  mic_initHostDeviceLDB();
630  #endif
631 
633  iout << iINFO << "Simulating initial mapping with " << simParameters->simulatedPEs
634  << " PEs with " << simParameters->simulatedNodeSize << " PEs per node\n" << endi;
635  outputPatchComputeMaps("init_mapping", 0);
636  iout << iINFO << "Simulating initial mapping is done, now NAMD exits\n" << endi;
637  BackEnd::exit();
638  }
639 
641 
642  //in MEM_OPT_VERSION, patchMap is resent
643  //because they have been updated since creation including
644  //#atoms per patch, the proc a patch should stay etc. --Chao Mei
646  #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
647  CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
648  //a node broadcast
649  npm.createProxyInfo(PatchMap::Object()->numPatches());
650  #endif
651  }
652  {
653  #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
654  CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
655  if(CkMyRank()==0) {
656  //just need to register once
657  npm[CkMyNode()].ckLocalBranch()->registerLocalProxyMgr(CkpvAccess(BOCclass_group).proxyMgr);
658  }
659  npm[CkMyNode()].ckLocalBranch()->registerLocalPatchMap(CkMyRank(), PatchMap::Object());
660  #endif
661  }
662  break;
663 
664  case 7:
665 #ifdef CHARM_HAS_MSA
667  CProxy_ComputeMsmMsaMgr msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
668  msm[CkMyPe()].initialize(new CkQdMsg);
669  }
670 #else
672  CProxy_ComputeMsmMgr msm(CkpvAccess(BOCclass_group).computeMsmMgr);
673  MsmInitMsg *msg = new MsmInitMsg;
674  Lattice lattice = simParameters->lattice; // system lattice vectors
675  ScaledPosition smin=0, smax=0;
676  if (lattice.a_p() && lattice.b_p() && lattice.c_p()) {
677  msg->smin = smin;
678  msg->smax = smax;
679  msm[CkMyPe()].initialize(msg); // call from my own PE
680  }
681  else if ( ! CkMyPe() ) {
682  pdb->get_extremes(smin, smax); // only available on PE 0
683  msg->smin = smin;
684  msg->smax = smax;
685  msm.initialize(msg); // broadcast to chare group
686  }
687 
688  /*
689  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
690  Node *node = nd.ckLocalBranch();
691  ScaledPosition smin, smax;
692  node->pdb->get_extremes(smin, smax);
693  msg->smin = smin; // extreme positions in system
694  msg->smax = smax;
695  msm[CkMyPe()].initialize(msg);
696  */
697  }
698 #endif
699 
700  if ( simParameters->PMEOn ) {
701  #ifdef OPENATOM_VERSION
702  if ( simParameters->openatomOn ) {
703  CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
704  moa[CkMyPe()].initialize(new CkQdMsg);
705  }
706  #endif // OPENATOM_VERSION
707 #ifdef NAMD_CUDA
708  if ( simParameters->usePMECUDA ) {
709  if(CkMyRank()==0) {
710  CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
711  pme.ckLocalBranch()->initialize(new CkQdMsg); // must run on pe 0 to call ckNew
712  }
713  } else
714 #endif
715  {
716  CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
717  pme[CkMyPe()].initialize(new CkQdMsg);
718  }
719  }
720  break;
721 
722  case 8:
723 #ifdef NAMD_CUDA
724  if ( (simParameters->useCUDA2 || simParameters->bondedCUDA) && CkMyRank()==0 ) {
725  CProxy_ComputeCUDAMgr nb(CkpvAccess(BOCclass_group).computeCUDAMgr);
726  nb.ckLocalBranch()->initialize(new CkQdMsg);
727  }
728 #endif
729  break;
730 
731  case 9:
733  break;
734 
735  case 10:
736  #ifdef MEM_OPT_VERSION
737  //migrate atoms to HomePatch processors
738  ioMgr->sendAtomsToHomePatchProcs();
739  #endif
740  break;
741 
742  case 11:
743  // part 2 of MSM init
745  CProxy_ComputeMsmMgr msm(CkpvAccess(BOCclass_group).computeMsmMgr);
746  msm[CkMyPe()].initialize_create(); // call from my own PE
747  }
748 
749  if ( simParameters->PMEOn ) {
750  #ifdef OPENATOM_VERSION
751  if ( simParameters->openatomOn ) {
752  CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
753  moa[CkMyPe()].initWorkers(new CkQdMsg);
754  }
755  #endif // OPENATOM_VERSION
756 #ifdef NAMD_CUDA
757  if ( simParameters->usePMECUDA ) {
758  if(CkMyRank()==0) {
759  CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
760  pme[CkMyNode()].initialize_pencils(new CkQdMsg);
761  }
762  } else
763 #endif
764  {
765  CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
766  pme[CkMyPe()].initialize_pencils(new CkQdMsg);
767  }
768  }
769 #ifdef CHARM_HAS_MSA
770  else if ( simParameters->MSMOn && ! simParameters->MsmSerialOn ) {
771  CProxy_ComputeMsmMsaMgr msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
772  msm[CkMyPe()].initWorkers(new CkQdMsg);
773  }
774 #else
775  else if ( simParameters->MSMOn && ! simParameters->MsmSerialOn ) {
776  CProxy_ComputeMsmMgr msm(CkpvAccess(BOCclass_group).computeMsmMgr);
777  msm[CkMyPe()].update(new CkQdMsg);
778  }
779 #endif
780 
781  #ifdef MEM_OPT_VERSION
782  //Now every processor has all the atoms it needs to create the HomePatches.
783  //The HomePatches are created in parallel on every home patch procs.
784  ioMgr->createHomePatches();
785  #else
786  if (!CkMyPe()) {
788  }
789  #endif
790  break;
791 
792  case 12:
793  if ( simParameters->PMEOn ) {
794  #ifdef OPENATOM_VERSION
795  if ( simParameters->openatomOn ) {
796  CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
797  moa[CkMyPe()].startWorkers(new CkQdMsg);
798  }
799  #endif // OPENATOM_VERSION
800 #ifdef NAMD_CUDA
801  if ( simParameters->usePMECUDA ) {
802  if(CkMyRank()==0) {
803  CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
804  pme[CkMyNode()].activate_pencils(new CkQdMsg);
805  }
806  } else
807 #endif
808  {
809  CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
810  pme[CkMyPe()].activate_pencils(new CkQdMsg);
811  }
812  }
813 #ifdef CHARM_HAS_MSA
814  else if ( simParameters->MSMOn && ! simParameters->MsmSerialOn ) {
815  CProxy_ComputeMsmMsaMgr msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
816  msm[CkMyPe()].startWorkers(new CkQdMsg);
817  }
818 #else
819  /*
820  else if ( simParameters->MSMOn && ! simParameters->MsmSerialOn ) {
821  CProxy_ComputeMsmMgr msm(CkpvAccess(BOCclass_group).computeMsmMgr);
822  //msm[CkMyPe()].startWorkers(new CkQdMsg);
823  }
824  */
825 #endif
826 
827  proxyMgr->createProxies(); // need Home patches before this
828  if (!CkMyPe()) LdbCoordinator::Object()->createLoadBalancer();
829 
830 #ifdef NAMD_TCL
831  // TclInitSubsystems() has a race condition so we create one interp per node here
832  if (CkMyPe() && CkMyNodeSize() > 1 && ! CkMyRank()) Tcl_DeleteInterp(Tcl_CreateInterp());
833 #endif
834 
835 #ifdef USE_NODEPATCHMGR
836  //at this point, PatchMap info has been recved on PEs. It is time to create
837  //the home patch spanning tree for receiving proxy list info
838  if(proxyMgr->getSendSpanning() || proxyMgr->getRecvSpanning()) {
839  if(CkMyRank()==0) {
840  CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
841  npm[CkMyNode()].ckLocalBranch()->createSTForHomePatches(PatchMap::Object());
842  }
843  }
844 #endif
845 
846  break;
847 
848  case 13:
849 
850  // DMK - DEBUG - If, in MIC runs, the debug option to dump all the compute maps to files
851  // for debugging/verification purposes has been enabled, have each PE do so now.
852  #if defined(NAMD_MIC)
853  mic_dumpHostDeviceComputeMap();
854  #endif
855 
856  if (!CkMyPe()) {
857  iout << iINFO << "CREATING " << ComputeMap::Object()->numComputes()
858  << " COMPUTE OBJECTS\n" << endi;
859  }
860  DebugM(4,"Creating Computes\n");
862  DebugM(4,"Building Sequencers\n");
863  buildSequencers();
864  DebugM(4,"Initializing LDB\n");
866  break;
867 
868  case 14:
869  // computes may create proxies on the fly so put these in separate phase
870  Sync::Object()->openSync(); // decide if to open local Sync
872 #ifdef CMK_BALANCED_INJECTION_API
873  if(CkMyRank() == 0){
874  // CkPrintf("[%d] get retrieved BI=%d\n",CkMyPe(),balancedInjectionLevel);
875  ck_set_GNI_BIConfig(balancedInjectionLevel);
876  // CkPrintf("[%d] set retrieved BI=%d\n",CkMyPe(),ck_get_GNI_BIConfig());
877  }
878 #endif
879 
880  break;
881 
882  case 15:
883  {
884  //For debugging
885  /*if(!CkMyPe()){
886  FILE *dumpFile = fopen("/tmp/NAMD_Bench.dump", "w");
887  dumpbench(dumpFile);
888  NAMD_die("Normal execution\n");
889  }*/
890  }
891  #ifdef MEM_OPT_VERSION
892  //free space in the Molecule object that are not used anymore
893  ioMgr->freeMolSpace();
894  #endif
895  gotoRun = true;
896  break;
897 
898  default:
899  NAMD_bug("Startup Phase has a bug - check case statement");
900  break;
901 
902  }
903 
904  startupPhase++;
905  if (!CkMyPe()) {
906  if (!gotoRun) {
907  CkStartQD(CkCallback(CkIndex_Node::startup(), thisgroup));
908  } else {
910  }
911  }
912 }
913 
914 #ifdef OPENATOM_VERSION
915 void Node::doneMoaStart()
916 {
917 #ifdef OPENATOM_VERSION_DEBUG
918  CkPrintf("doneMoaStart executed on processor %d.\n", CkMyPe() );
919 #endif //OPENATOM_VERSION_DEBUG
920 }
921 #endif //OPENATOM_VERSION
922 
923 void Node::namdOneCommInit()
924 {
925  if (CkpvAccess(comm) == NULL) {
926  CkpvAccess(comm) = new Communicate();
927 #ifdef DPMTA
928  pvmc_init();
929 #endif
930  }
931 }
932 
933 // Namd 1.X style Send/Recv of simulation information
934 
935 void Node::namdOneRecv() {
936  if ( CmiMyRank() ) return;
937 
938  MIStream *conv_msg;
939 
940  // Receive molecule and simulation parameter information
941  simParameters = node_simParameters = new SimParameters;
942  //****** BEGIN CHARMM/XPLOR type changes
943  parameters = node_parameters = new Parameters();
944  //****** END CHARMM/XPLOR type changes
945  molecule = node_molecule = new Molecule(simParameters,parameters);
946 
947  DebugM(4, "Getting SimParameters\n");
948  conv_msg = CkpvAccess(comm)->newInputStream(0, SIMPARAMSTAG);
950 
951  DebugM(4, "Getting Parameters\n");
952  conv_msg = CkpvAccess(comm)->newInputStream(0, STATICPARAMSTAG);
953  parameters->receive_Parameters(conv_msg);
954 
955  DebugM(4, "Getting Molecule\n");
956  conv_msg = CkpvAccess(comm)->newInputStream(0, MOLECULETAG);
957  // Modified by JLai -- 10.21.11
958  molecule->receive_Molecule(conv_msg);
960  iout << iINFO << "Compute Nodes receiving GoMolecule Information" << "\n" << endi;
961  conv_msg = CkpvAccess(comm)->newInputStream(0, MOLECULETAG);
962  molecule->receive_GoMolecule(conv_msg);
963  }
964  // End of modification
965  DebugM(4, "Done Receiving\n");
966 }
967 
968 void Node::namdOneSend() {
969  node_simParameters = simParameters;
970  node_parameters = parameters;
971  node_molecule = molecule;
972 
973  MOStream *conv_msg;
974  // I'm Pe(0) so I send what I know
975  DebugM(4, "Sending SimParameters\n");
976  conv_msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, SIMPARAMSTAG, BUFSIZE);
978 
979  DebugM(4, "Sending Parameters\n");
980  conv_msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, STATICPARAMSTAG, BUFSIZE);
981  parameters->send_Parameters(conv_msg);
982 
983  DebugM(4, "Sending Molecule\n");
984  int bufSize = BUFSIZE;
985  if(molecule->numAtoms>=1000000) bufSize = 16*BUFSIZE;
986  conv_msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, MOLECULETAG, bufSize);
987  // Modified by JLai -- 10.21.11
988  molecule->send_Molecule(conv_msg);
989 
991  iout << iINFO << "Master Node sending GoMolecule Information" << "\n" << endi;
992  conv_msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, MOLECULETAG, bufSize);
993  molecule->send_GoMolecule(conv_msg);
994  } // End of modification
995 }
996 
997 
998 void Node::reloadStructure(const char *fname, const char *pdbname) {
999  delete molecule;
1000  molecule = state->molecule = 0;
1001  delete pdb;
1002  pdb = state->pdb = 0;
1003  state->loadStructure(fname,pdbname,1);
1004  this->molecule = state->molecule;
1005  this->pdb = state->pdb;
1006  CProxy_Node nodeProxy(thisgroup);
1007  nodeProxy.resendMolecule();
1008 }
1009 
1010 
1012  if ( CmiMyRank() ) {
1013  return;
1014  }
1015  if ( CmiMyPe() == 0 ) {
1016  int bufSize = BUFSIZE;
1017  MOStream *conv_msg;
1018  conv_msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, STATICPARAMSTAG, bufSize);
1019  parameters->send_Parameters(conv_msg);
1020  if(molecule->numAtoms>=1000000) bufSize = 16*BUFSIZE;
1021  conv_msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, MOLECULETAG, bufSize);
1022  molecule->send_Molecule(conv_msg);
1023  } else {
1024  MIStream *conv_msg;
1025  delete parameters;
1026  parameters = new Parameters;
1027  conv_msg = CkpvAccess(comm)->newInputStream(0, STATICPARAMSTAG);
1028  parameters->receive_Parameters(conv_msg);
1029  delete molecule;
1031  conv_msg = CkpvAccess(comm)->newInputStream(0, MOLECULETAG);
1032  molecule->receive_Molecule(conv_msg);
1033  }
1034  node_parameters = parameters;
1035  node_molecule = molecule;
1039  CProxy_Node nodeProxy(thisgroup);
1040  for ( int i=0; i<CmiMyNodeSize(); ++i ) {
1041  nodeProxy[CmiMyPe()+i].resendMolecule2();
1042  }
1043 }
1044 
1049 }
1050 
1051 
1052 // Initial thread setup
1053 
1054 void Node::threadInit() {
1055  // Thread initialization
1056  if (CthImplemented()) {
1057  CthSetStrategyDefault(CthSelf());
1058  } else {
1059  NAMD_bug("Node::startup() Oh no, tiny elvis, threads not implemented");
1060  }
1061 }
1062 
1063 //
1064 void Node::buildSequencers() {
1067 
1068  // Controller object is only on Pe(0)
1069  if ( ! CkMyPe() ) {
1070  Controller *controller = new Controller(state);
1071  state->useController(controller);
1072  }
1073 
1074  // Assign Sequencer to all HomePatch(es)
1075  for (ai=ai.begin(); ai != ai.end(); ai++) {
1076  HomePatch *patch = (*ai).patch;
1077  Sequencer *sequencer = new Sequencer(patch);
1078  patch->useSequencer(sequencer);
1079  }
1080 }
1081 
1082 
1083 
1084 //-----------------------------------------------------------------------
1085 // Node run() - broadcast to all nodes
1086 //-----------------------------------------------------------------------
1088  (CProxy_Node(CkpvAccess(BOCclass_group).node)).run();
1089 }
1090 
1091 
1092 //-----------------------------------------------------------------------
1093 // run(void) runs the specified simulation for the specified number of
1094 // steps, overriding the contents of the configuration file
1095 //-----------------------------------------------------------------------
1097 {
1098  // Start Controller (aka scalar Sequencer) on Pe(0)
1099 // printf("\n\n I am in Node.C in run method about to call state->runController\n\n");
1100  if ( ! CkMyPe() ) {
1101  state->runController();
1102  }
1103 
1104  DebugM(4, "Starting Sequencers\n");
1105  // Run Sequencer on each HomePatch - i.e. start simulation
1108  for (ai=ai.begin(); ai != ai.end(); ai++) {
1109  HomePatch *patch = (*ai).patch;
1110 //CkPrintf("Proc#%d in Node calling Sequencer ",CkMyPe());
1111  patch->runSequencer();
1112  }
1113 
1114  if (!CkMyPe()) {
1115  double newTime = CmiWallTimer();
1116  iout << iINFO << "Startup phase " << startupPhase-1 << " took "
1117  << newTime - startupTime << " s, "
1118  << memusage_MB() << " MB of memory in use\n";
1119  iout << iINFO << "Finished startup at " << newTime << " s, "
1120  << memusage_MB() << " MB of memory in use\n\n" << endi;
1121  fflush(stdout);
1122  }
1123 
1124 }
1125 
1126 
1127 //-----------------------------------------------------------------------
1128 // Node scriptBarrier() - twiddle parameters with simulation halted
1129 //-----------------------------------------------------------------------
1130 
1132  CkStartQD(CkIndex_Node::scriptBarrier(), &thishandle);
1133 }
1134 
1136  //script->awaken();
1137 }
1138 
1140  simParameters->scriptSet(msg->param,msg->value);
1141  delete msg;
1142 }
1143 
1144 void Node::reloadCharges(const char *filename) {
1145  FILE *file = fopen(filename,"r");
1146  if ( ! file ) NAMD_die("node::reloadCharges():Error opening charge file.");
1147 
1148  int n = molecule->numAtoms;
1149  float *charge = new float[n];
1150 
1151  for ( int i = 0; i < n; ++i ) {
1152  if ( ! fscanf(file,"%f",&charge[i]) )
1153  NAMD_die("Node::reloadCharges():Not enough numbers in charge file.");
1154  }
1155 
1156  fclose(file);
1157  CProxy_Node(thisgroup).reloadCharges(charge,n);
1158  delete [] charge;
1159 }
1160 
1161 void Node::reloadCharges(float charge[], int n) {
1162  molecule->reloadCharges(charge,n);
1163 }
1164 
1165 
1166 // BEGIN gf
1167 void Node::reloadGridforceGrid(const char * key) {
1168  DebugM(4, "reloadGridforceGrid(const char*) called on node " << CkMyPe() << "\n" << endi);
1169 
1170  int gridnum;
1171  MGridforceParams *mgridParams;
1172  if (key == NULL) {
1175  } else {
1176  gridnum = simParameters->mgridforcelist.index_for_key(key);
1177  mgridParams = simParameters->mgridforcelist.find_key(key);
1178  }
1179 
1180  if (gridnum < 0 || mgridParams == NULL) {
1181  NAMD_die("Node::reloadGridforceGrid(const char*):Could not find grid.");
1182  }
1183 
1184  GridforceGrid *grid = molecule->get_gridfrc_grid(gridnum);
1185  if (grid == NULL) {
1186  NAMD_bug("Node::reloadGridforceGrid(const char*):grid not found");
1187  }
1188  grid->reinitialize(simParameters, mgridParams);
1189 
1190  CProxy_Node(thisgroup).reloadGridforceGrid(gridnum);
1191 
1192  DebugM(4, "reloadGridforceGrid(const char*) finished\n" << endi);
1193 }
1194 
1195 void Node::updateGridScale(const char* key, Vector scale) {
1196  DebugM(4, "updateGridScale(char*, Vector) called on node " << CkMyPe() << "\n" << endi);
1197 
1198  int gridnum;
1199  MGridforceParams* mgridParams;
1200  if (key == NULL) {
1203  } else {
1204  gridnum = simParameters->mgridforcelist.index_for_key(key);
1205  mgridParams = simParameters->mgridforcelist.find_key(key);
1206  }
1207 
1208  if (gridnum < 0 || mgridParams == NULL) {
1209  NAMD_die("Node::updateGridScale(char*, Vector): Could not find grid.");
1210  }
1211 
1212  GridforceGrid* grid = molecule->get_gridfrc_grid(gridnum);
1213  if (grid == NULL) {
1214  NAMD_bug("Node::updateGridScale(char*, Vector): grid not found");
1215  }
1216  CProxy_Node(thisgroup).updateGridScale(gridnum, scale.x, scale.y, scale.z);
1217 
1218  DebugM(4, "updateGridScale(char*, Vector) finished\n" << endi);
1219 }
1220 void Node::updateGridScale(int gridnum, float sx, float sy, float sz) {
1221  if (CmiMyRank()) return;
1222  DebugM(4, "updateGridScale(char*, int, float, float, float) called on node " << CkMyPe() << "\n" << endi);
1223 
1224  GridforceGrid *grid = molecule->get_gridfrc_grid(gridnum);
1225  if (grid == NULL) {
1226  NAMD_bug("Node::updateGridScale(char*, int, float, float, float):grid not found");
1227  }
1228 
1229  Vector scale(sx,sy,sz);
1231  grid->set_scale( scale );
1232 
1233  DebugM(4, "updateGridScale(char*, int, float, float, float) finished\n" << endi);
1234 }
1235 
1236 void Node::reloadGridforceGrid(int gridnum) {
1237  if (CmiMyRank()) return;
1238  DebugM(4, "reloadGridforceGrid(int) called on node " << CkMyPe() << "\n" << endi);
1239 
1240  GridforceGrid *grid = molecule->get_gridfrc_grid(gridnum);
1241  if (grid == NULL) {
1242  NAMD_bug("Node::reloadGridforceGrid(int):grid not found");
1243  }
1244 
1245  if (CkMyPe()) {
1246  // not node 0 -> receive grid
1247  DebugM(4, "Receiving grid\n");
1248 
1249  delete grid;
1250 
1251  MIStream *msg = CkpvAccess(comm)->newInputStream(0, GRIDFORCEGRIDTAG);
1252  grid = GridforceGrid::unpack_grid(gridnum, msg);
1253  molecule->set_gridfrc_grid(gridnum, grid);
1254  delete msg;
1255  } else {
1256  // node 0 -> send grid
1257  DebugM(4, "Sending grid\n");
1258 
1259  MOStream *msg = CkpvAccess(comm)->newOutputStream(ALLBUTME, GRIDFORCEGRIDTAG, BUFSIZE);
1260  GridforceGrid::pack_grid(grid, msg);
1261  msg->end();
1262  delete msg;
1263  }
1264 
1265  DebugM(4, "reloadGridforceGrid(int) finished\n" << endi);
1266 }
1267 // END gf
1268 
1269 
1270 // initiating replica
1271 void Node::sendCheckpointReq(int remote, const char *key, int task, Lattice &lat, ControllerState &cs) {
1272  CheckpointMsg *msg = new (1+strlen(key),0) CheckpointMsg;
1273  msg->replica = CmiMyPartition();
1274  msg->task = task;
1275  msg->checkpoint.lattice = lat;
1276  msg->checkpoint.state = cs;
1277  strcpy(msg->key,key);
1278  envelope *env = UsrToEnv(CheckpointMsg::pack(msg));
1279  CmiSetHandler(env,recvCheckpointCReq_index);
1280 #if CMK_HAS_PARTITION
1281  CmiInterSyncSendAndFree(CkMyPe(),remote,env->getTotalsize(),(char*)env);
1282 #else
1283  CmiSyncSendAndFree(CkMyPe(),env->getTotalsize(),(char*)env);
1284 #endif
1285 }
1286 
1287 // responding replica
1288 extern "C" {
1289  void recvCheckpointCReq_handler(envelope *env) {
1290  Node::Object()->recvCheckpointReq(CheckpointMsg::unpack(EnvToUsr(env)));
1291  }
1292 }
1293 
1294 // responding replica
1296  state->controller->recvCheckpointReq(msg->key,msg->task,msg->checkpoint);
1297 
1298  int remote = msg->replica;
1299  msg->replica = CmiMyPartition();
1300  envelope *env = UsrToEnv(CheckpointMsg::pack(msg));
1301  CmiSetHandler(env,recvCheckpointCAck_index);
1302 #if CMK_HAS_PARTITION
1303  CmiInterSyncSendAndFree(CkMyPe(),remote,env->getTotalsize(),(char*)env);
1304 #else
1305  CmiSyncSendAndFree(CkMyPe(),env->getTotalsize(),(char*)env);
1306 #endif
1307 }
1308 
1309 // initiating replica
1310 extern "C" {
1311  void recvCheckpointCAck_handler(envelope *env) {
1312  Node::Object()->recvCheckpointAck(CheckpointMsg::unpack(EnvToUsr(env)));
1313  }
1314 }
1315 
1316 // initiating replica
1318  state->controller->recvCheckpointAck(msg->checkpoint);
1319  delete msg;
1320 }
1321 
1322 
1324  //CmiPrintf("sendEnableExitScheduler\n");
1325  CProxy_Node nodeProxy(thisgroup);
1326  nodeProxy[0].recvEnableExitScheduler();
1327 }
1328 
1330  //CmiPrintf("recvEnableExitScheduler\n");
1332 }
1333 
1335  if ( CkMyPe() ) {
1337  } else {
1338  CkStartQD(CkIndex_Node::exitScheduler(), &thishandle);
1339  }
1340 }
1341 
1343  //CmiPrintf("exitScheduler %d\n",CkMyPe());
1344  CsdExitScheduler();
1345 }
1346 
1348  CProxy_Node nodeProxy(thisgroup);
1349  nodeProxy[0].recvEnableEarlyExit();
1350 }
1351 
1353  enableEarlyExit();
1354 }
1355 
1357  if ( CkMyPe() ) {
1359  } else {
1360  CkStartQD(CkIndex_Node::earlyExit(),&thishandle);
1361  }
1362 }
1363 
1364 void Node::earlyExit(void) {
1365  NAMD_die("Exiting prematurely; see error messages above.");
1366 }
1367 
1368 
1369 //------------------------------------------------------------------------
1370 // Some odd utilities
1371 //------------------------------------------------------------------------
1373 {
1374  this->molecule = state->molecule;
1375  this->parameters = state->parameters;
1376  this->simParameters = state->simParameters;
1377  this->configList = state->configList;
1378  this->pdb = state->pdb;
1379  this->state = state;
1380 }
1381 
1382 // entry methods for BG/P HPM (performance counters) library
1384 #if USE_HPM
1385  HPM_Start("500 steps", localRankOnNode);
1386 #endif
1387 }
1388 
1390 #if USE_HPM
1391  HPM_Stop("500 steps", localRankOnNode);
1392  HPM_Print(CkMyPe(), localRankOnNode);
1393 #endif
1394 }
1395 
1396 void Node::traceBarrier(int turnOnTrace, int step){
1397  curTimeStep = step;
1398  if(turnOnTrace) traceBegin();
1399  else traceEnd();
1400 
1401  if(turnOnTrace) CmiTurnOnStats();
1402  else CmiTurnOffStats();
1403 
1404  //CkPrintf("traceBarrier (%d) at step %d called on proc %d\n", turnOnTrace, step, CkMyPe());
1405  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1406  CkCallback cb(CkIndex_Node::resumeAfterTraceBarrier(NULL), nd[0]);
1407  contribute(0, NULL, CkReduction::sum_int, cb);
1408 
1409 }
1410 
1411 void Node::resumeAfterTraceBarrier(CkReductionMsg *msg){
1412  CmiAssert(CmiMyPe()==0);
1413  delete msg;
1415 }
1416 
1417 void Node::papiMeasureBarrier(int turnOnMeasure, int step){
1418 #ifdef MEASURE_NAMD_WITH_PAPI
1419  curMFlopStep = step;
1420  double results[NUM_PAPI_EVENTS+1];
1421 
1422  if(turnOnMeasure){
1423  CkpvAccess(papiEvents)[NUM_PAPI_EVENTS]=CmiWallTimer();
1424 
1425  long long counters[NUM_PAPI_EVENTS+1];
1426  int ret=PAPI_start_counters(CkpvAccess(papiEvents), NUM_PAPI_EVENTS);
1427  if(ret==PAPI_OK)
1428  {
1429  // CkPrintf("traceBarrier start counters (%d) at step %d called on proc %d\n", turnOnMeasure, step, CkMyPe());
1430  }
1431  else
1432  {
1433  CkPrintf("error PAPI_start_counters (%d) at step %d called on proc %d\n",ret , step, CkMyPe());
1434  }
1435  if(PAPI_read_counters(counters, NUM_PAPI_EVENTS)!=PAPI_OK)
1436  {
1437  CkPrintf("error PAPI_read_counters %d\n",PAPI_read_counters(counters, NUM_PAPI_EVENTS));
1438  };
1439  }else{
1440  long long counters[NUM_PAPI_EVENTS+1];
1441  for(int i=0;i<NUM_PAPI_EVENTS;i++) counters[i]=0LL;
1442  if(PAPI_read_counters(counters, NUM_PAPI_EVENTS)==PAPI_OK)
1443  {
1444 #if !MEASURE_PAPI_SPP
1445  results[0] = (double)counters[0]/1e6;
1446  results[1] = (double)counters[1]/1e6;
1447 #else
1448  for(int i=0;i<NUM_PAPI_EVENTS;i++) results[i] = counters[i]/1e6;
1449 #endif
1450  // for(int i=0;i<NUM_PAPI_EVENTS;i++) CkPrintf("[%d] counter %d is %ld\n",CkMyPe(),i,counters[i]);
1451  }
1452  else
1453  {
1454  // CkPrintf("error PAPI_read_counters %d\n",PAPI_read_counters(counters, NUM_PAPI_EVENTS));
1455  }
1456  // CkPrintf("traceBarrier stop counters (%d) at step %d called on proc %d\n", turnOnMeasure, step, CkMyPe());
1457 
1458  PAPI_stop_counters(counters, NUM_PAPI_EVENTS);
1459  }
1460  if(CkMyPe()==0)
1461  // CkPrintf("traceBarrier (%d) at step %d called on proc %d\n", turnOnMeasure, step, CkMyPe());
1462  results[NUM_PAPI_EVENTS]=CkpvAccess(papiEvents)[NUM_PAPI_EVENTS]; //starttime
1463  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1464  CkCallback cb(CkIndex_Node::resumeAfterPapiMeasureBarrier(NULL), nd[0]);
1465  contribute(sizeof(double)*(NUM_PAPI_EVENTS+1), &results, CkReduction::sum_double, cb);
1466 #endif
1467 }
1468 
1469 void Node::resumeAfterPapiMeasureBarrier(CkReductionMsg *msg){
1470 #ifdef MEASURE_NAMD_WITH_PAPI
1471 
1472  if(simParameters->papiMeasureStartStep != curMFlopStep) {
1473  double *results = (double *)msg->getData();
1474  double endtime=CmiWallTimer();
1475  int bstep = simParameters->papiMeasureStartStep;
1476  int estep = bstep + simParameters->numPapiMeasureSteps;
1477 #if MEASURE_PAPI_SPP
1478  CkPrintf("SPP INFO: PAPI_FP_OPS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[0]);
1479  CkPrintf("SPP INFO: PAPI_TOT_INS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[1]);
1480  CkPrintf("SPP INFO: perf::PERF_COUNT_HW_CACHE_LL:MISS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[2]);
1481  CkPrintf("SPP INFO: DATA_PREFETCHER:ALL timestep %d to %d is %lf(1e6)\n", bstep,estep,results[3]);
1482  CkPrintf("SPP INFO: PAPI_L1_DCA timestep %d to %d is %lf(1e6)\n", bstep,estep,results[4]);
1483  CkPrintf("SPP INFO: PAPI_TOT_CYC timestep %d to % is %lf(1e6)\n", bstep,estep,results[5]);
1484  // CkPrintf("SPP INFO: INSTRUCTION_FETCH_STALL timestep %d to %d is %lf(1e6)\n", bstep,estep,results[6]);
1485  // CkPrintf("SPP INFO: WALLtime timestep %d to %d is %lf\n", bstep,estep,endtime-results[NUM_PAPI_EVENTS]/CkNumPes());
1486  CkPrintf("SPP INFO: WALLtime timestep %d to %d is %lf\n", bstep,estep,endtime-results[NUM_PAPI_EVENTS]);
1487  CkPrintf("SPP INFO: endtime %lf avgtime %lf tottime %lf\n", endtime,results[NUM_PAPI_EVENTS]/CkNumPes(),results[NUM_PAPI_EVENTS] );
1488 #else
1489  if(CkpvAccess(papiEvents)[0] == PAPI_FP_INS){
1490  double totalFPIns = results[0];
1491  if(CkpvAccess(papiEvents)[1] == PAPI_FMA_INS) totalFPIns += (results[1]*2);
1492  CkPrintf("FLOPS INFO: from timestep %d to %d, the total FP instruction of NAMD is %lf(x1e6) per processor\n",
1493  bstep, estep, totalFPIns/CkNumPes());
1494  }else{
1495  char nameBuf[PAPI_MAX_STR_LEN];
1496  CkPrintf("PAPI COUNTERS INFO: from timestep %d to %d, ",
1497  bstep, estep);
1498  for(int i=0; i<NUM_PAPI_EVENTS; i++) {
1499  PAPI_event_code_to_name(CkpvAccess(papiEvents)[i], nameBuf);
1500  CkPrintf("%s is %lf(x1e6), ", nameBuf, results[i]/CkNumPes());
1501  }
1502  CkPrintf("per processor\n");
1503  }
1504 #endif
1505  }
1506  delete msg;
1507  state->controller->resumeAfterPapiMeasureBarrier(curMFlopStep);
1508 #endif
1509 }
1510 
1511 extern char *gNAMDBinaryName;
1512 void Node::outputPatchComputeMaps(const char *filename, int tag){
1514 
1515  int numpes = CkNumPes();
1516  int nodesize = CkMyNodeSize();
1518  numpes = simParameters->simulatedPEs;
1519  nodesize = simParameters->simulatedNodeSize;
1520  }
1521 
1522  char fname[128];
1523  sprintf(fname, "mapdump_%s.%d_%d_%d_%s", filename, numpes, nodesize, tag, gNAMDBinaryName);
1524 
1525  FILE *fp = fopen(fname, "w");
1526  if(fp == NULL) {
1527  NAMD_die("Error in outputing PatchMap and ComputeMap info!\n");
1528  return;
1529  }
1530  PatchMap *pMap = PatchMap::Object();
1531  ComputeMap *cMap = ComputeMap::Object();
1532  int numPatches = pMap->numPatches();
1533  int numComputes = cMap->numComputes();
1534  fprintf(fp, "%d %d %d %d %d %d %d\n", numpes, nodesize, numPatches, numComputes,
1535  pMap->gridsize_a(), pMap->gridsize_b(), pMap->gridsize_c());
1536  //output PatchMap info
1537  for(int i=0; i<numPatches; i++) {
1538  #ifdef MEM_OPT_VERSION
1539  fprintf(fp, "%d %d\n", pMap->numAtoms(i), pMap->node(i));
1540  #else
1541  fprintf(fp, "%d %d\n", pMap->patch(i)->getNumAtoms(), pMap->node(i));
1542  #endif
1543  }
1544 
1545  //output ComputeMap info
1546  for(int i=0; i<numComputes; i++) {
1547  fprintf(fp, "%d %d %d %d\n", cMap->node(i), cMap->type(i), cMap->pid(i,0), cMap->pid(i,1));
1548  }
1549 }
1550 
1551 
1552 //======================================================================
1553 // Private functions
1554 
1555 #include "Node.def.h"
1556 
static Node * Object()
Definition: Node.h:86
#define GRIDFORCEGRIDTAG
Definition: common.h:156
void allocateMap(int nAtomIDs)
Definition: AtomMap.C:161
std::ostream & iINFO(std::ostream &s)
Definition: InfoStream.C:107
bool specialTracing
Definition: Node.h:163
Bool simulateInitialMapping
void recvCheckpointReq(const char *key, int task, checkpoint &cp)
Definition: Controller.C:4078
void recvCheckpointCReq_handler(envelope *)
Definition: Node.C:1289
void setPatchMapArrived(bool s)
Definition: WorkDistrib.h:107
void runSequencer(void)
Definition: HomePatch.C:269
void createProxies(void)
Definition: ProxyMgr.C:417
void end(void)
Definition: MStream.C:176
ControllerState state
Definition: Controller.h:274
void setRecvSpanning()
Definition: ProxyMgr.C:371
void receive_SimParameters(MIStream *)
int proxyRecvSpanning
Definition: ProxyMgr.C:46
BOCgroup group
Definition: Node.h:68
int numComputes(void)
Definition: ComputeMap.h:101
int getRecvSpanning()
Definition: ProxyMgr.C:376
int curTimeStep
Definition: Node.h:151
int eventEndOfTimeStep
Definition: Node.C:285
void send_GoMolecule(MOStream *)
Definition: GoMolecule.C:1635
void mallocTest(int)
Definition: Node.C:391
void startHPM()
Definition: Node.C:1383
static ProxyMgr * Object()
Definition: ProxyMgr.h:394
void exitScheduler(void)
Definition: Node.C:1342
IMDOutput * imd
Definition: Node.h:183
void saveMolDataPointers(NamdState *)
Definition: Node.C:1372
void receive_GoMolecule(MIStream *)
Definition: GoMolecule.C:1744
LdbCoordinator * ldbCoordinator
Definition: Node.h:203
int gridsize_c(void) const
Definition: PatchMap.h:66
static PatchMap * Object()
Definition: PatchMap.h:27
void sendEnableEarlyExit(void)
Definition: Node.C:1347
void send_Molecule(MOStream *)
Definition: Molecule.C:5448
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int numPatches
static void exit(int status=0)
Definition: BackEnd.C:276
Definition: Vector.h:64
static AtomMap * Instance()
Definition: AtomMap.C:125
Output * output
Definition: Node.h:182
SimParameters * simParameters
Definition: Node.h:178
int task
Definition: Node.C:86
void setSendSpanning()
Definition: ProxyMgr.C:362
static void pack_grid(GridforceGrid *grid, MOStream *msg)
Definition: GridForceGrid.C:50
#define DebugM(x, y)
Definition: Debug.h:59
void createLoadBalancer()
double startupTime
Definition: Node.C:286
HomePatchList * homePatchList()
Definition: PatchMap.C:438
#define ALLBUTME
Definition: Communicate.h:14
BigReal z
Definition: Vector.h:66
void enableScriptBarrier()
Definition: Node.C:1131
int getSendSpanning()
Definition: ProxyMgr.C:367
void scriptSet(const char *, const char *)
char value[MAX_SCRIPT_PARAM_SIZE]
Definition: Node.h:75
void sendEnableExitScheduler(void)
Definition: Node.C:1323
void recvCheckpointReq(CheckpointMsg *)
Definition: Node.C:1295
static void messageStartUp()
Definition: Node.C:418
void stopHPM()
Definition: Node.C:1389
char * gNAMDBinaryName
Definition: BackEnd.C:237
void reloadCharges(float charge[], int n)
#define iout
Definition: InfoStream.h:87
Patch * patch(PatchID pid)
Definition: PatchMap.h:235
int curMFlopStep
Definition: Node.h:156
#define STATICPARAMSTAG
Definition: common.h:151
Molecule * node_molecule
Definition: Node.C:424
void outputPatchComputeMaps(const char *filename, int tag)
Definition: Node.C:1512
int loadStructure(const char *, const char *, int)
Definition: NamdState.C:152
void createComputes(ComputeMap *map)
Definition: ComputeMgr.C:1009
int replica
Definition: Node.C:87
void split(int iStream, int numStreams)
Definition: Random.h:77
void recvCheckpointAck(CheckpointMsg *)
Definition: Node.C:1317
void patchMapInit(void)
Definition: WorkDistrib.C:1105
void openSync()
Definition: Sync.C:63
ComputeMap * computeMap
Definition: Node.h:202
void runController(void)
Definition: NamdState.C:79
double memusage_MB()
Definition: memusage.h:13
virtual void reinitialize(SimParameters *simParams, MGridforceParams *mgridParams)=0
void sendComputeMap(void)
Definition: WorkDistrib.C:1078
ResizeArrayIter< T > end(void) const
Definition: Output.h:43
void registerUserEventsForAllComputeObjs()
Definition: ComputeMgr.C:830
void sendBuildCudaExclusions()
Definition: ComputeMgr.C:1448
void resumeAfterPapiMeasureBarrier(CkReductionMsg *msg)
Definition: Node.C:1469
void initialize(PatchMap *pmap, ComputeMap *cmap, int reinit=0)
void scriptParam(ScriptParamMsg *)
Definition: Node.C:1139
Definition: Random.h:37
int index_for_key(const char *key)
void run()
Definition: Node.C:1096
int gridsize_a(void) const
Definition: PatchMap.h:64
void enableExitScheduler(void)
Definition: Node.C:1334
void buildProxySpanningTree()
Definition: ProxyMgr.C:559
void createHomePatches(void)
Definition: WorkDistrib.C:889
void NAMD_bug(const char *err_msg)
Definition: common.C:123
ComputeType type(ComputeID cid)
Definition: ComputeMap.C:120
ScaledPosition smax
Definition: ComputeMsm.h:21
Controller::checkpoint checkpoint
Definition: Node.C:88
NamdState * state
Definition: Node.h:181
#define MGRIDFORCEPARAMS_DEFAULTKEY
MGridforceParams * find_key(const char *key)
void traceBarrier(int turnOnTrace, int step)
Definition: Node.C:1396
void updateGridScale(const char *key, Vector scale)
Definition: Node.C:1195
gridSize z
static void pme_select()
static void messageRun()
Definition: Node.C:1087
void scriptBarrier(void)
Definition: Node.C:1135
static Sync * Object()
Definition: Sync.h:50
void recvCheckpointAck(checkpoint &cp)
Definition: Controller.C:4108
void reloadStructure(const char *, const char *)
Definition: Node.C:998
BigReal x
Definition: Vector.h:66
AtomMap * atomMap
Definition: Node.h:200
void recvEnableExitScheduler(void)
Definition: Node.C:1329
int numAtoms
Definition: Molecule.h:556
GridforceGrid * get_gridfrc_grid(int gridnum) const
Definition: Molecule.h:1276
void NAMD_die(const char *err_msg)
Definition: common.C:83
PDB * pdb
Definition: Node.h:180
static LdbCoordinator * Object()
ConfigList * configList
Definition: Node.h:179
static AtomMap * Object()
Definition: AtomMap.h:36
#define BUFSIZE
Definition: Communicate.h:15
MGridforceParamsList mgridforcelist
static void nonbonded_select()
void recvEnableEarlyExit(void)
Definition: Node.C:1352
void sendPatchMap(void)
Definition: WorkDistrib.C:978
void send_Parameters(MOStream *)
Definition: Parameters.C:5048
int isRecvSpanningTreeOn()
Parameters * parameters
Definition: Node.h:177
static ComputeMap * Instance()
Definition: ComputeMap.C:26
void resumeAfterTraceBarrier(CkReductionMsg *msg)
Definition: Node.C:1411
unsigned int randomSeed
CkpvDeclare(AtomMap *, AtomMap_instance)
WorkDistrib * workDistrib
Definition: Node.h:166
~Node(void)
Definition: Node.C:355
Parameters * node_parameters
Definition: Node.C:423
int numPatches(void) const
Definition: PatchMap.h:59
int node(int pid) const
Definition: PatchMap.h:114
static GridforceGrid * unpack_grid(int gridnum, MIStream *msg)
Definition: GridForceGrid.C:60
#define SIMPARAMSTAG
Definition: common.h:150
SimParameters * node_simParameters
Definition: Node.C:422
Random * rand
Definition: Node.h:172
void mallocTestQd(void)
Definition: Node.C:403
void mapComputes(void)
Definition: WorkDistrib.C:2269
void recvCheckpointCAck_handler(envelope *)
Definition: Node.C:1311
static ComputeMap * Object()
Definition: ComputeMap.h:89
PatchMap * patchMap
Definition: Node.h:201
ScaledPosition smin
Definition: ComputeMsm.h:21
void useController(Controller *controllerPtr)
Definition: NamdState.C:74
void papiMeasureBarrier(int turnOnMeasure, int step)
Definition: Node.C:1417
BigReal y
Definition: Vector.h:66
void resendMolecule2()
Definition: Node.C:1045
int getNumAtoms()
Definition: Patch.h:105
void resendMolecule()
Definition: Node.C:1011
void distributeHomePatches(void)
Definition: WorkDistrib.C:930
void setProxyTreeBranchFactor(int dim)
Definition: ProxyMgr.C:380
colvarmodule * colvars
Definition: Node.h:184
void startup()
Definition: Node.C:428
int node(ComputeID cid)
Definition: ComputeMap.h:106
k< npairi;++k){TABENERGY(const int numtypes=simParams->tableNumTypes;const float table_spacing=simParams->tableSpacing;const int npertype=(int)(namdnearbyint(simParams->tableMaxDist/simParams->tableSpacing)+1);) int table_i=(r2iilist[2 *k] >> 14)+r2_delta_expc;const int j=pairlisti[k];#define p_j BigReal diffa=r2list[k]-r2_table[table_i];#define table_four_i TABENERGY(register const int tabtype=-1-(lj_pars->A< 0?lj_pars->A:0);) BigReal kqq=kq_i *p_j-> charge
gridSize y
int set_gridfrc_grid(int gridnum, GridforceGrid *grid)
Definition: Molecule.h:1285
int pid(ComputeID cid, int i)
Definition: ComputeMap.C:109
#define MOLECULETAG
Definition: common.h:152
int isSendSpanningTreeOn()
Node(GroupInitMsg *msg)
Definition: Node.C:290
void resumeAfterTraceBarrier(int)
Definition: Controller.C:4148
infostream & endi(infostream &s)
Definition: InfoStream.C:38
void sendCheckpointReq(int remote, const char *key, int task, Lattice &lat, ControllerState &cs)
Definition: Node.C:1271
ComputeMgr * computeMgr
Definition: Node.h:169
int b_p() const
Definition: Lattice.h:274
int mallocTest_size
Definition: Node.h:129
gridSize x
void enableEarlyExit(void)
Definition: Node.C:1356
void receive_Molecule(MIStream *)
Definition: Molecule.C:5806
void earlyExit(void)
Definition: Node.C:1364
MGridforceParams * at_index(int idx)
int a_p() const
Definition: Lattice.h:273
static PatchMap * Instance()
Definition: PatchMap.C:32
Molecule * molecule
Definition: Node.h:176
int gridsize_b(void) const
Definition: PatchMap.h:65
void useSequencer(Sequencer *sequencerPtr)
Definition: HomePatch.C:265
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
Definition: PDB.h:102
ResizeArrayIter< T > begin(void) const
char param[MAX_SCRIPT_PARAM_SIZE]
Definition: Node.h:74
void receive_Parameters(MIStream *)
Definition: Parameters.C:5424
virtual void set_scale(Vector s)=0
void reloadCharges(const char *filename)
Definition: Node.C:1144
void send_SimParameters(MOStream *)
int c_p() const
Definition: Lattice.h:275
void reloadGridforceGrid(const char *key)
Definition: Node.C:1167
void assignNodeToPatch(void)
Definition: WorkDistrib.C:1319
char * key
Definition: Node.C:89
int proxySendSpanning
Definition: ProxyMgr.C:45