12 #if !defined(WIN32) || defined(__CYGWIN__)
16 #include "Node.decl.h"
24 #define MIN_DEBUG_LEVEL 3
35 #include "main.decl.h"
58 #include "ComputeMgr.decl.h"
59 #include "ComputePmeMgr.decl.h"
61 #include "ComputeCUDAMgr.decl.h"
63 #include "ComputePmeCUDAMgr.decl.h"
66 #include "ComputeGridForceMgr.decl.h"
71 #include "CollectionMgr.decl.h"
72 #include "ParallelIOMgr.decl.h"
78 #if(CMK_CCS_AVAILABLE && CMK_WEB_MODE)
79 extern "C" void CApplicationInit();
97 #ifdef CMK_BALANCED_INJECTION_API
98 #include "ckBIconfig.h"
103 #include "CollectionMgr.decl.h"
104 #include "CollectionMaster.decl.h"
107 extern "C" void HPM_Init(
int);
108 extern "C" void HPM_Start(
char *label,
int);
109 extern "C" void HPM_Stop(
char *label,
int);
110 extern "C" void HPM_Print(
int,
int);
113 #if defined(NAMD_MIC)
114 extern void mic_dumpHostDeviceComputeMap();
115 extern void mic_initHostDeviceLDB();
118 #ifdef MEASURE_NAMD_WITH_PAPI
123 #define NUM_PAPI_EVENTS 6
126 #define MEASURE_PAPI_SPP 1
127 #define MEASURE_PAPI_CACHE 0
128 #define MEASURE_PAPI_FLOPS 0
130 static void namdInitPapiCounters(){
133 int retval = PAPI_library_init(PAPI_VER_CURRENT);
134 if(retval != PAPI_VER_CURRENT) {
136 NAMD_die(
"PAPI library is not compatitible!");
141 if(PAPI_thread_init(pthread_self)!=PAPI_OK) {
143 NAMD_die(
"Multi-thread mode in PAPI could not be initialized!");
148 CkpvInitialize(
int *, papiEvents);
149 CkpvAccess(papiEvents) =
new int[NUM_PAPI_EVENTS+1];
151 #if MEASURE_PAPI_CACHE
152 if(PAPI_query_event(PAPI_L1_DCM)==PAPI_OK) {
153 CkpvAccess(papiEvents)[0] = PAPI_L1_DCM;
156 CkPrintf(
"WARNING: PAPI_L1_DCM doesn't exsit on this platform!\n");
159 CkpvAccess(papiEvents)[0] = PAPI_TOT_INS;
162 if(PAPI_query_event(PAPI_L2_DCM)==PAPI_OK) {
163 CkpvAccess(papiEvents)[1] = PAPI_L2_DCM;
166 CkpvAccess(papiEvents)[1] = PAPI_TOT_CYC;
168 #elif MEASURE_PAPI_FLOPS
169 if(PAPI_query_event(PAPI_FP_INS)==PAPI_OK) {
170 CkpvAccess(papiEvents)[0] = PAPI_FP_INS;
173 CkPrintf(
"WARNING: PAPI_FP_INS doesn't exsit on this platform!\n");
176 CkpvAccess(papiEvents)[0] = PAPI_TOT_INS;
179 if(PAPI_query_event(PAPI_FMA_INS)==PAPI_OK) {
180 CkpvAccess(papiEvents)[1] = PAPI_FMA_INS;
183 CkpvAccess(papiEvents)[1] = PAPI_TOT_CYC;
185 #elif MEASURE_PAPI_SPP
196 int papiEventSet = PAPI_NULL;
197 if (PAPI_create_eventset(&papiEventSet) != PAPI_OK) {
198 CmiAbort(
"PAPI failed to create event set!\n");
201 if(PAPI_query_event(PAPI_FP_OPS)==PAPI_OK) {
202 CkpvAccess(papiEvents)[0] = PAPI_FP_OPS;
205 CkAbort(
"WARNING: PAPI_FP_OPS doesn't exist on this platform!");
208 if(PAPI_query_event(PAPI_TOT_INS)==PAPI_OK) {
209 CkpvAccess(papiEvents)[1] = PAPI_TOT_INS;
212 CkAbort(
"WARNING: PAPI_TOT_INS doesn't exist on this platform!");
217 ret=PAPI_event_name_to_code(
"perf::PERF_COUNT_HW_CACHE_LL:MISS",&EventCode);
218 if(ret==PAPI_OK && PAPI_query_event(EventCode)==PAPI_OK) {
219 CkpvAccess(papiEvents)[2] = EventCode;
222 CkAbort(
"WARNING: perf::PERF_COUNT_HW_CACHE_LL:MISS doesn't exist on this platform!");
225 ret=PAPI_event_name_to_code(
"DATA_PREFETCHER:ALL",&EventCode);
226 if(ret==PAPI_OK && PAPI_query_event(EventCode)==PAPI_OK) {
227 CkpvAccess(papiEvents)[3] = EventCode;
230 CkAbort(
"WARNING: DATA_PREFETCHER:ALL doesn't exist on this platform!");
233 if(PAPI_query_event(PAPI_L1_DCA)==PAPI_OK) {
234 CkpvAccess(papiEvents)[4] = PAPI_L1_DCA;
237 CkAbort(
"WARNING: PAPI_L1_DCA doesn't exist on this platform!");
249 if(PAPI_query_event(PAPI_TOT_CYC)==PAPI_OK) {
250 CkpvAccess(papiEvents)[5] = PAPI_TOT_CYC;
253 CkAbort(
"WARNING: PAPI_TOT_CYC doesn't exist on this platform!");
256 for(
int i=0;i<NUM_PAPI_EVENTS;i++)
258 int papiRetValue=PAPI_add_events(papiEventSet, &CkpvAccess(papiEvents)[i],1);
259 if (papiRetValue != PAPI_OK) {
260 CkPrintf(
"failure for event %d\n",i);
261 if (papiRetValue == PAPI_ECNFLCT) {
262 CmiAbort(
"PAPI events conflict! Please re-assign event types!\n");
264 CmiAbort(
"PAPI failed to add designated events!\n");
273 #ifdef OPENATOM_VERSION
274 static void startOA(){(
char inDriverFile[1024],
char inPhysicsFile[1024], CkCallback doneCB)
276 CProxy_oaSetup moaInstance = CProxy_oaSetup::ckNew(inDriverFile, inPhysicsFile, doneCB);
278 #endif //OPENATOM_VERSION
292 DebugM(4,
"Creating Node\n");
293 #if(CMK_CCS_AVAILABLE && CMK_WEB_MODE)
296 if (CkpvAccess(Node_instance) == 0) {
297 CkpvAccess(Node_instance) =
this;
298 eventEndOfTimeStep = traceRegisterUserEvent(
"EndOfTimeStep", 135);
300 NAMD_bug(
"Node::Node() - another instance of Node exists!");
303 CkpvAccess(BOCclass_group) = msg->
group;
306 CkpvAccess(BOCclass_group).node = thisgroup;
325 TopoManager *tmgr =
new TopoManager();
327 tmgr->rankToCoordinates(CkMyPe(), x, y, z, localRankOnNode);
333 DebugM(4,
"Creating PatchMap, AtomMap, ComputeMap\n");
341 #ifdef CMK_BALANCED_INJECTION_API
343 balancedInjectionLevel=ck_get_GNI_BIConfig();
345 ck_set_GNI_BIConfig(20);
361 delete CkpvAccess(comm);
365 #ifdef MEASURE_NAMD_WITH_PAPI
366 delete CkpvAccess(papiEvents);
370 void Node::bindBocVars(){
371 DebugM(4,
"Binding to BOC's\n");
372 CProxy_PatchMgr pm(CkpvAccess(BOCclass_group).patchMgr);
373 patchMgr = pm.ckLocalBranch();
374 CProxy_ProxyMgr prm(CkpvAccess(BOCclass_group).proxyMgr);
375 proxyMgr = prm.ckLocalBranch();
376 CProxy_WorkDistrib wd(CkpvAccess(BOCclass_group).
workDistrib);
378 CProxy_ComputeMgr cm(CkpvAccess(BOCclass_group).
computeMgr);
380 CProxy_LdbCoordinator lc(CkpvAccess(BOCclass_group).
ldbCoordinator);
382 #ifdef MEM_OPT_VERSION
383 CProxy_ParallelIOMgr io(CkpvAccess(BOCclass_group).ioMgr);
384 ioMgr = io.ckLocalBranch();
394 char* foo = (
char*) malloc(size*MB);
397 sprintf(buf,
"Malloc fails on Pe %d at %d MB.\n",CkMyPe(),step*size);
400 memset(foo,0,size*MB*
sizeof(
char));
405 CkPrintf(
"All PEs successfully allocated %d MB.\n", 100*
mallocTest_size);
407 CkPrintf(
"Starting malloc test on all PEs.\n");
411 CkStartQD(CkIndex_Node::mallocTestQd(), &thishandle);
419 (CProxy_Node(CkpvAccess(BOCclass_group).node)).
startup();
435 startupTime = CmiWallTimer();
436 iout << iINFO <<
"Entering startup at " << startupTime <<
" s, ";
438 newTime = CmiWallTimer();
439 iout <<
iINFO <<
"Startup phase " << startupPhase-1 <<
" took "
440 << newTime - startupTime <<
" s, ";
441 startupTime = newTime;
446 switch (startupPhase) {
473 #if !CMK_SMP || ! USE_CKLOOP
484 CkStartQD(CkIndex_Node::mallocTestQd(), &thishandle);
490 #ifdef MEASURE_NAMD_WITH_PAPI
494 #ifdef MEM_OPT_VERSION
501 ioMgr->initialize(
this);
508 #ifdef MEM_OPT_VERSION
510 ioMgr->readPerAtomInfo();
517 #ifdef MEM_OPT_VERSION
519 ioMgr->updateMolInfo();
522 ioMgr->migrateAtomsMGrp();
533 HPM_Init(localRankOnNode);
550 CkpvAccess(BOCclass_group).computePmeMgr = CProxy_ComputePmeMgr::ckNew();
552 #ifdef OPENATOM_VERSION
554 CkpvAccess(BOCclass_group).computeMoaMgr = CProxy_ComputeMoaMgr::ckNew();
556 #endif // OPENATOM_VERSION
560 #ifdef OPENATOM_VERSION
563 CkCallback doneMoaStart(CkIndexmain::doneMoaSetup(), thishandle);
567 #endif // OPENATOM_VERSION
577 #ifdef MEM_OPT_VERSION
584 ioMgr->integrateMigratedAtoms();
587 ioMgr->integrateClusterSize();
593 ioMgr->calcAtomsInEachPatch();
609 #ifdef PROCTRACE_DEBUG
610 DebugFileTrace::Instance(
"procTrace");
616 #ifndef MEM_OPT_VERSION
628 #if defined(NAMD_MIC)
629 mic_initHostDeviceLDB();
636 iout <<
iINFO <<
"Simulating initial mapping is done, now NAMD exits\n" <<
endi;
646 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
647 CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
653 #if defined(NODEAWARE_PROXY_SPANNINGTREE) && defined(USE_NODEPATCHMGR)
654 CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
657 npm[CkMyNode()].ckLocalBranch()->registerLocalProxyMgr(CkpvAccess(BOCclass_group).proxyMgr);
659 npm[CkMyNode()].ckLocalBranch()->registerLocalPatchMap(CkMyRank(),
PatchMap::Object());
667 CProxy_ComputeMsmMsaMgr msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
668 msm[CkMyPe()].initialize(
new CkQdMsg);
672 CProxy_ComputeMsmMgr msm(CkpvAccess(BOCclass_group).computeMsmMgr);
676 if (lattice.
a_p() && lattice.
b_p() && lattice.
c_p()) {
679 msm[CkMyPe()].initialize(msg);
681 else if ( ! CkMyPe() ) {
701 #ifdef OPENATOM_VERSION
703 CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
704 moa[CkMyPe()].initialize(
new CkQdMsg);
706 #endif // OPENATOM_VERSION
710 CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
711 pme.ckLocalBranch()->initialize(
new CkQdMsg);
716 CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
717 pme[CkMyPe()].initialize(
new CkQdMsg);
725 CProxy_ComputeCUDAMgr nb(CkpvAccess(BOCclass_group).computeCUDAMgr);
726 nb.ckLocalBranch()->initialize(
new CkQdMsg);
736 #ifdef MEM_OPT_VERSION
738 ioMgr->sendAtomsToHomePatchProcs();
745 CProxy_ComputeMsmMgr msm(CkpvAccess(BOCclass_group).computeMsmMgr);
746 msm[CkMyPe()].initialize_create();
750 #ifdef OPENATOM_VERSION
752 CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
753 moa[CkMyPe()].initWorkers(
new CkQdMsg);
755 #endif // OPENATOM_VERSION
759 CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
760 pme[CkMyNode()].initialize_pencils(
new CkQdMsg);
765 CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
766 pme[CkMyPe()].initialize_pencils(
new CkQdMsg);
771 CProxy_ComputeMsmMsaMgr msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
772 msm[CkMyPe()].initWorkers(
new CkQdMsg);
776 CProxy_ComputeMsmMgr msm(CkpvAccess(BOCclass_group).computeMsmMgr);
777 msm[CkMyPe()].update(
new CkQdMsg);
781 #ifdef MEM_OPT_VERSION
784 ioMgr->createHomePatches();
794 #ifdef OPENATOM_VERSION
796 CProxy_ComputeMoaMgr moa(CkpvAccess(BOCclass_group).computeMoaMgr);
797 moa[CkMyPe()].startWorkers(
new CkQdMsg);
799 #endif // OPENATOM_VERSION
803 CProxy_ComputePmeCUDAMgr pme(CkpvAccess(BOCclass_group).computePmeCUDAMgr);
804 pme[CkMyNode()].activate_pencils(
new CkQdMsg);
809 CProxy_ComputePmeMgr pme(CkpvAccess(BOCclass_group).computePmeMgr);
810 pme[CkMyPe()].activate_pencils(
new CkQdMsg);
815 CProxy_ComputeMsmMsaMgr msm(CkpvAccess(BOCclass_group).computeMsmMsaMgr);
816 msm[CkMyPe()].startWorkers(
new CkQdMsg);
832 if (CkMyPe() && CkMyNodeSize() > 1 && ! CkMyRank()) Tcl_DeleteInterp(Tcl_CreateInterp());
835 #ifdef USE_NODEPATCHMGR
840 CProxy_NodeProxyMgr npm(CkpvAccess(BOCclass_group).nodeProxyMgr);
841 npm[CkMyNode()].ckLocalBranch()->createSTForHomePatches(
PatchMap::Object());
852 #if defined(NAMD_MIC)
853 mic_dumpHostDeviceComputeMap();
858 <<
" COMPUTE OBJECTS\n" <<
endi;
860 DebugM(4,
"Creating Computes\n");
862 DebugM(4,
"Building Sequencers\n");
864 DebugM(4,
"Initializing LDB\n");
872 #ifdef CMK_BALANCED_INJECTION_API
875 ck_set_GNI_BIConfig(balancedInjectionLevel);
891 #ifdef MEM_OPT_VERSION
893 ioMgr->freeMolSpace();
899 NAMD_bug(
"Startup Phase has a bug - check case statement");
907 CkStartQD(CkCallback(CkIndex_Node::startup(), thisgroup));
914 #ifdef OPENATOM_VERSION
915 void Node::doneMoaStart()
917 #ifdef OPENATOM_VERSION_DEBUG
918 CkPrintf(
"doneMoaStart executed on processor %d.\n", CkMyPe() );
919 #endif //OPENATOM_VERSION_DEBUG
921 #endif //OPENATOM_VERSION
923 void Node::namdOneCommInit()
925 if (CkpvAccess(comm) == NULL) {
935 void Node::namdOneRecv() {
936 if ( CmiMyRank() )
return;
947 DebugM(4,
"Getting SimParameters\n");
948 conv_msg = CkpvAccess(comm)->newInputStream(0,
SIMPARAMSTAG);
951 DebugM(4,
"Getting Parameters\n");
955 DebugM(4,
"Getting Molecule\n");
956 conv_msg = CkpvAccess(comm)->newInputStream(0,
MOLECULETAG);
960 iout <<
iINFO <<
"Compute Nodes receiving GoMolecule Information" <<
"\n" <<
endi;
961 conv_msg = CkpvAccess(comm)->newInputStream(0,
MOLECULETAG);
965 DebugM(4,
"Done Receiving\n");
968 void Node::namdOneSend() {
975 DebugM(4,
"Sending SimParameters\n");
979 DebugM(4,
"Sending Parameters\n");
983 DebugM(4,
"Sending Molecule\n");
991 iout <<
iINFO <<
"Master Node sending GoMolecule Information" <<
"\n" <<
endi;
1006 CProxy_Node nodeProxy(thisgroup);
1007 nodeProxy.resendMolecule();
1012 if ( CmiMyRank() ) {
1015 if ( CmiMyPe() == 0 ) {
1031 conv_msg = CkpvAccess(comm)->newInputStream(0,
MOLECULETAG);
1039 CProxy_Node nodeProxy(thisgroup);
1040 for (
int i=0; i<CmiMyNodeSize(); ++i ) {
1041 nodeProxy[CmiMyPe()+i].resendMolecule2();
1054 void Node::threadInit() {
1056 if (CthImplemented()) {
1057 CthSetStrategyDefault(CthSelf());
1059 NAMD_bug(
"Node::startup() Oh no, tiny elvis, threads not implemented");
1064 void Node::buildSequencers() {
1075 for (ai=ai.begin(); ai != ai.end(); ai++) {
1088 (CProxy_Node(CkpvAccess(BOCclass_group).node)).
run();
1104 DebugM(4,
"Starting Sequencers\n");
1108 for (ai=ai.
begin(); ai != ai.
end(); ai++) {
1115 double newTime = CmiWallTimer();
1116 iout <<
iINFO <<
"Startup phase " << startupPhase-1 <<
" took "
1117 << newTime - startupTime <<
" s, "
1119 iout <<
iINFO <<
"Finished startup at " << newTime <<
" s, "
1132 CkStartQD(CkIndex_Node::scriptBarrier(), &thishandle);
1145 FILE *file = fopen(filename,
"r");
1146 if ( ! file )
NAMD_die(
"node::reloadCharges():Error opening charge file.");
1149 float *
charge =
new float[n];
1151 for (
int i = 0; i < n; ++i ) {
1152 if ( ! fscanf(file,
"%f",&charge[i]) )
1153 NAMD_die(
"Node::reloadCharges():Not enough numbers in charge file.");
1157 CProxy_Node(thisgroup).reloadCharges(charge,n);
1168 DebugM(4,
"reloadGridforceGrid(const char*) called on node " << CkMyPe() <<
"\n" << endi);
1180 if (gridnum < 0 || mgridParams == NULL) {
1181 NAMD_die(
"Node::reloadGridforceGrid(const char*):Could not find grid.");
1186 NAMD_bug(
"Node::reloadGridforceGrid(const char*):grid not found");
1190 CProxy_Node(thisgroup).reloadGridforceGrid(gridnum);
1192 DebugM(4,
"reloadGridforceGrid(const char*) finished\n" << endi);
1196 DebugM(4,
"updateGridScale(char*, Vector) called on node " << CkMyPe() <<
"\n" << endi);
1208 if (gridnum < 0 || mgridParams == NULL) {
1209 NAMD_die(
"Node::updateGridScale(char*, Vector): Could not find grid.");
1214 NAMD_bug(
"Node::updateGridScale(char*, Vector): grid not found");
1216 CProxy_Node(thisgroup).updateGridScale(gridnum, scale.
x, scale.
y, scale.
z);
1218 DebugM(4,
"updateGridScale(char*, Vector) finished\n" << endi);
1221 if (CmiMyRank())
return;
1222 DebugM(4,
"updateGridScale(char*, int, float, float, float) called on node " << CkMyPe() <<
"\n" << endi);
1226 NAMD_bug(
"Node::updateGridScale(char*, int, float, float, float):grid not found");
1233 DebugM(4,
"updateGridScale(char*, int, float, float, float) finished\n" << endi);
1237 if (CmiMyRank())
return;
1238 DebugM(4,
"reloadGridforceGrid(int) called on node " << CkMyPe() <<
"\n" << endi);
1242 NAMD_bug(
"Node::reloadGridforceGrid(int):grid not found");
1247 DebugM(4,
"Receiving grid\n");
1257 DebugM(4,
"Sending grid\n");
1265 DebugM(4,
"reloadGridforceGrid(int) finished\n" << endi);
1273 msg->
replica = CmiMyPartition();
1277 strcpy(msg->
key,key);
1278 envelope *env = UsrToEnv(CheckpointMsg::pack(msg));
1279 CmiSetHandler(env,recvCheckpointCReq_index);
1280 #if CMK_HAS_PARTITION
1281 CmiInterSyncSendAndFree(CkMyPe(),remote,env->getTotalsize(),(
char*)env);
1283 CmiSyncSendAndFree(CkMyPe(),env->getTotalsize(),(
char*)env);
1299 msg->
replica = CmiMyPartition();
1300 envelope *env = UsrToEnv(CheckpointMsg::pack(msg));
1301 CmiSetHandler(env,recvCheckpointCAck_index);
1302 #if CMK_HAS_PARTITION
1303 CmiInterSyncSendAndFree(CkMyPe(),remote,env->getTotalsize(),(
char*)env);
1305 CmiSyncSendAndFree(CkMyPe(),env->getTotalsize(),(
char*)env);
1325 CProxy_Node nodeProxy(thisgroup);
1326 nodeProxy[0].recvEnableExitScheduler();
1338 CkStartQD(CkIndex_Node::exitScheduler(), &thishandle);
1348 CProxy_Node nodeProxy(thisgroup);
1349 nodeProxy[0].recvEnableEarlyExit();
1360 CkStartQD(CkIndex_Node::earlyExit(),&thishandle);
1365 NAMD_die(
"Exiting prematurely; see error messages above.");
1378 this->
pdb = state->pdb;
1379 this->state =
state;
1385 HPM_Start(
"500 steps", localRankOnNode);
1391 HPM_Stop(
"500 steps", localRankOnNode);
1392 HPM_Print(CkMyPe(), localRankOnNode);
1398 if(turnOnTrace) traceBegin();
1401 if(turnOnTrace) CmiTurnOnStats();
1402 else CmiTurnOffStats();
1405 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1406 CkCallback cb(CkIndex_Node::resumeAfterTraceBarrier(NULL), nd[0]);
1407 contribute(0, NULL, CkReduction::sum_int, cb);
1412 CmiAssert(CmiMyPe()==0);
1418 #ifdef MEASURE_NAMD_WITH_PAPI
1420 double results[NUM_PAPI_EVENTS+1];
1423 CkpvAccess(papiEvents)[NUM_PAPI_EVENTS]=CmiWallTimer();
1425 long long counters[NUM_PAPI_EVENTS+1];
1426 int ret=PAPI_start_counters(CkpvAccess(papiEvents), NUM_PAPI_EVENTS);
1433 CkPrintf(
"error PAPI_start_counters (%d) at step %d called on proc %d\n",ret , step, CkMyPe());
1435 if(PAPI_read_counters(counters, NUM_PAPI_EVENTS)!=PAPI_OK)
1437 CkPrintf(
"error PAPI_read_counters %d\n",PAPI_read_counters(counters, NUM_PAPI_EVENTS));
1440 long long counters[NUM_PAPI_EVENTS+1];
1441 for(
int i=0;i<NUM_PAPI_EVENTS;i++) counters[i]=0LL;
1442 if(PAPI_read_counters(counters, NUM_PAPI_EVENTS)==PAPI_OK)
1444 #if !MEASURE_PAPI_SPP
1445 results[0] = (double)counters[0]/1e6;
1446 results[1] = (double)counters[1]/1e6;
1448 for(
int i=0;i<NUM_PAPI_EVENTS;i++) results[i] = counters[i]/1e6;
1458 PAPI_stop_counters(counters, NUM_PAPI_EVENTS);
1462 results[NUM_PAPI_EVENTS]=CkpvAccess(papiEvents)[NUM_PAPI_EVENTS];
1463 CProxy_Node nd(CkpvAccess(BOCclass_group).node);
1464 CkCallback cb(CkIndex_Node::resumeAfterPapiMeasureBarrier(NULL), nd[0]);
1465 contribute(
sizeof(
double)*(NUM_PAPI_EVENTS+1), &results, CkReduction::sum_double, cb);
1470 #ifdef MEASURE_NAMD_WITH_PAPI
1473 double *results = (
double *)msg->getData();
1474 double endtime=CmiWallTimer();
1477 #if MEASURE_PAPI_SPP
1478 CkPrintf(
"SPP INFO: PAPI_FP_OPS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[0]);
1479 CkPrintf(
"SPP INFO: PAPI_TOT_INS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[1]);
1480 CkPrintf(
"SPP INFO: perf::PERF_COUNT_HW_CACHE_LL:MISS timestep %d to %d is %lf(1e6)\n", bstep,estep,results[2]);
1481 CkPrintf(
"SPP INFO: DATA_PREFETCHER:ALL timestep %d to %d is %lf(1e6)\n", bstep,estep,results[3]);
1482 CkPrintf(
"SPP INFO: PAPI_L1_DCA timestep %d to %d is %lf(1e6)\n", bstep,estep,results[4]);
1483 CkPrintf(
"SPP INFO: PAPI_TOT_CYC timestep %d to % is %lf(1e6)\n", bstep,estep,results[5]);
1486 CkPrintf(
"SPP INFO: WALLtime timestep %d to %d is %lf\n", bstep,estep,endtime-results[NUM_PAPI_EVENTS]);
1487 CkPrintf(
"SPP INFO: endtime %lf avgtime %lf tottime %lf\n", endtime,results[NUM_PAPI_EVENTS]/CkNumPes(),results[NUM_PAPI_EVENTS] );
1489 if(CkpvAccess(papiEvents)[0] == PAPI_FP_INS){
1490 double totalFPIns = results[0];
1491 if(CkpvAccess(papiEvents)[1] == PAPI_FMA_INS) totalFPIns += (results[1]*2);
1492 CkPrintf(
"FLOPS INFO: from timestep %d to %d, the total FP instruction of NAMD is %lf(x1e6) per processor\n",
1493 bstep, estep, totalFPIns/CkNumPes());
1495 char nameBuf[PAPI_MAX_STR_LEN];
1496 CkPrintf(
"PAPI COUNTERS INFO: from timestep %d to %d, ",
1498 for(
int i=0; i<NUM_PAPI_EVENTS; i++) {
1499 PAPI_event_code_to_name(CkpvAccess(papiEvents)[i], nameBuf);
1500 CkPrintf(
"%s is %lf(x1e6), ", nameBuf, results[i]/CkNumPes());
1502 CkPrintf(
"per processor\n");
1515 int numpes = CkNumPes();
1516 int nodesize = CkMyNodeSize();
1523 sprintf(fname,
"mapdump_%s.%d_%d_%d_%s", filename, numpes, nodesize, tag, gNAMDBinaryName);
1525 FILE *fp = fopen(fname,
"w");
1527 NAMD_die(
"Error in outputing PatchMap and ComputeMap info!\n");
1534 fprintf(fp,
"%d %d %d %d %d %d %d\n", numpes, nodesize, numPatches, numComputes,
1538 #ifdef MEM_OPT_VERSION
1539 fprintf(fp,
"%d %d\n", pMap->numAtoms(i), pMap->
node(i));
1546 for(
int i=0; i<numComputes; i++) {
1547 fprintf(fp,
"%d %d %d %d\n", cMap->
node(i), cMap->
type(i), cMap->
pid(i,0), cMap->
pid(i,1));
1555 #include "Node.def.h"
void allocateMap(int nAtomIDs)
std::ostream & iINFO(std::ostream &s)
Bool simulateInitialMapping
void recvCheckpointReq(const char *key, int task, checkpoint &cp)
void recvCheckpointCReq_handler(envelope *)
void setPatchMapArrived(bool s)
void receive_SimParameters(MIStream *)
void send_GoMolecule(MOStream *)
static ProxyMgr * Object()
void saveMolDataPointers(NamdState *)
void receive_GoMolecule(MIStream *)
LdbCoordinator * ldbCoordinator
int gridsize_c(void) const
static PatchMap * Object()
void sendEnableEarlyExit(void)
void send_Molecule(MOStream *)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int numPatches
static void exit(int status=0)
static AtomMap * Instance()
SimParameters * simParameters
int proxyTreeBranchFactor
static void pack_grid(GridforceGrid *grid, MOStream *msg)
void createLoadBalancer()
HomePatchList * homePatchList()
void enableScriptBarrier()
void scriptSet(const char *, const char *)
char value[MAX_SCRIPT_PARAM_SIZE]
void sendEnableExitScheduler(void)
void recvCheckpointReq(CheckpointMsg *)
static void messageStartUp()
void reloadCharges(float charge[], int n)
Patch * patch(PatchID pid)
void outputPatchComputeMaps(const char *filename, int tag)
int loadStructure(const char *, const char *, int)
void createComputes(ComputeMap *map)
void split(int iStream, int numStreams)
void recvCheckpointAck(CheckpointMsg *)
virtual void reinitialize(SimParameters *simParams, MGridforceParams *mgridParams)=0
void sendComputeMap(void)
ResizeArrayIter< T > end(void) const
void registerUserEventsForAllComputeObjs()
void sendBuildCudaExclusions()
void resumeAfterPapiMeasureBarrier(CkReductionMsg *msg)
void initialize(PatchMap *pmap, ComputeMap *cmap, int reinit=0)
void scriptParam(ScriptParamMsg *)
int index_for_key(const char *key)
int gridsize_a(void) const
void enableExitScheduler(void)
void buildProxySpanningTree()
void createHomePatches(void)
void NAMD_bug(const char *err_msg)
ComputeType type(ComputeID cid)
Controller::checkpoint checkpoint
#define MGRIDFORCEPARAMS_DEFAULTKEY
MGridforceParams * find_key(const char *key)
void traceBarrier(int turnOnTrace, int step)
void updateGridScale(const char *key, Vector scale)
void recvCheckpointAck(checkpoint &cp)
void reloadStructure(const char *, const char *)
void recvEnableExitScheduler(void)
GridforceGrid * get_gridfrc_grid(int gridnum) const
void NAMD_die(const char *err_msg)
static LdbCoordinator * Object()
static AtomMap * Object()
MGridforceParamsList mgridforcelist
static void nonbonded_select()
void recvEnableEarlyExit(void)
void send_Parameters(MOStream *)
int isRecvSpanningTreeOn()
static ComputeMap * Instance()
void resumeAfterTraceBarrier(CkReductionMsg *msg)
CkpvDeclare(AtomMap *, AtomMap_instance)
WorkDistrib * workDistrib
Parameters * node_parameters
int numPatches(void) const
static GridforceGrid * unpack_grid(int gridnum, MIStream *msg)
SimParameters * node_simParameters
void recvCheckpointCAck_handler(envelope *)
static ComputeMap * Object()
void useController(Controller *controllerPtr)
void papiMeasureBarrier(int turnOnMeasure, int step)
void distributeHomePatches(void)
void setProxyTreeBranchFactor(int dim)
k< npairi;++k){TABENERGY(const int numtypes=simParams->tableNumTypes;const float table_spacing=simParams->tableSpacing;const int npertype=(int)(namdnearbyint(simParams->tableMaxDist/simParams->tableSpacing)+1);) int table_i=(r2iilist[2 *k] >> 14)+r2_delta_expc;const int j=pairlisti[k];#define p_j BigReal diffa=r2list[k]-r2_table[table_i];#define table_four_i TABENERGY(register const int tabtype=-1-(lj_pars->A< 0?lj_pars->A:0);) BigReal kqq=kq_i *p_j-> charge
int set_gridfrc_grid(int gridnum, GridforceGrid *grid)
int pid(ComputeID cid, int i)
int isSendSpanningTreeOn()
void resumeAfterTraceBarrier(int)
infostream & endi(infostream &s)
void sendCheckpointReq(int remote, const char *key, int task, Lattice &lat, ControllerState &cs)
void enableEarlyExit(void)
void receive_Molecule(MIStream *)
MGridforceParams * at_index(int idx)
static PatchMap * Instance()
int gridsize_b(void) const
void useSequencer(Sequencer *sequencerPtr)
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
ResizeArrayIter< T > begin(void) const
char param[MAX_SCRIPT_PARAM_SIZE]
void receive_Parameters(MIStream *)
virtual void set_scale(Vector s)=0
void reloadCharges(const char *filename)
void send_SimParameters(MOStream *)
void reloadGridforceGrid(const char *key)
void assignNodeToPatch(void)