18 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
23 const int ComputeBondedCUDA::CudaTupleTypeSize[Tuples::NUM_TUPLE_TYPES] = {
39 Compute(c), computeMgr(computeMgr), deviceID(deviceID), masterPe(CkMyPe()),
40 bondedKernel(deviceID, cudaNonbondedTables)
43 computes.resize(CkMyNodeSize());
44 patchIDsPerRank.resize(CkMyNodeSize());
45 numExclPerRank.resize(CkMyNodeSize());
46 for (
int i=0;i < numExclPerRank.size();i++) {
47 numExclPerRank[i].numModifiedExclusions = 0;
48 numExclPerRank[i].numExclusions = 0;
64 energies_virials = NULL;
66 initializeCalled =
false;
69 accelMDdoDihe =
false;
78 ComputeBondedCUDA::~ComputeBondedCUDA() {
81 if (
atoms != NULL) deallocate_host<CudaAtom>(&
atoms);
82 if (
forces != NULL) deallocate_host<FORCE_TYPE>(&
forces);
83 if (energies_virials != NULL) deallocate_host<double>(&energies_virials);
84 if (tupleData != NULL) deallocate_host<char>(&tupleData);
86 if (initializeCalled) {
88 cudaCheck(cudaEventDestroy(forceDoneEvent));
97 void ComputeBondedCUDA::unregisterBoxesOnPe() {
98 for (
int i=0;i < patchIDsPerRank[CkMyRank()].size();i++) {
99 PatchID patchID = patchIDsPerRank[CkMyRank()][i];
101 if (tpe == NULL || tpe->
p == NULL) {
102 NAMD_bug(
"ComputeBondedCUDA::unregisterBoxesOnPe, TuplePatchElem not found or setup incorrectly");
115 void ComputeBondedCUDA::registerCompute(
int pe,
int type,
PatchIDList& pids) {
117 if (CkMyPe() != masterPe)
118 NAMD_bug(
"ComputeBondedCUDA::registerCompute() called on non master PE");
120 int rank = CkRankOf(pe);
122 HomeCompute& homeCompute = computes[rank].homeCompute;
123 if (homeCompute.patchIDs.size() == 0) {
125 homeCompute.patchIDs.resize(pids.
size());
126 for (
int i=0;i < pids.
size();i++) {
127 homeCompute.patchIDs[i] = pids[i];
128 homeCompute.isBasePatch[pids[i]] = 1;
131 if (homeCompute.patchIDs.size() != pids.
size()) {
132 NAMD_bug(
"ComputeBondedCUDA::registerCompute(), homeComputes, patch IDs do not match (1)");
134 for (
int i=0;i < pids.
size();i++) {
135 if (homeCompute.patchIDs[i] != pids[i]) {
136 NAMD_bug(
"ComputeBondedCUDA::registerCompute(), homeComputes, patch IDs do not match (2)");
143 homeCompute.tuples.push_back(
new HomeTuples<BondElem, Bond, BondValue>(
Tuples::BOND));
147 homeCompute.tuples.push_back(
new HomeTuples<AngleElem, Angle, AngleValue>(
Tuples::ANGLE));
151 homeCompute.tuples.push_back(
new HomeTuples<DihedralElem, Dihedral, DihedralValue>(
Tuples::DIHEDRAL));
155 homeCompute.tuples.push_back(
new HomeTuples<ImproperElem, Improper, ImproperValue>(
Tuples::IMPROPER));
159 homeCompute.tuples.push_back(
new HomeTuples<ExclElem, Exclusion, int>(
Tuples::EXCLUSION));
163 homeCompute.tuples.push_back(
new HomeTuples<CrosstermElem, Crossterm, CrosstermValue>(
Tuples::CROSSTERM));
167 NAMD_bug(
"ComputeBondedCUDA::registerCompute(), Unsupported compute type");
177 void ComputeBondedCUDA::registerSelfCompute(
int pe,
int type,
int pid) {
179 if (CkMyPe() != masterPe)
180 NAMD_bug(
"ComputeBondedCUDA::registerSelfCompute() called on non master PE");
182 int rank = CkRankOf(pe);
184 std::vector< SelfCompute >& selfComputes = computes[rank].selfComputes;
185 auto it = find(selfComputes.begin(), selfComputes.end(), SelfCompute(type));
186 if (it == selfComputes.end()) {
188 selfComputes.push_back(SelfCompute(type));
189 it = selfComputes.begin() + (selfComputes.size() - 1);
193 it->tuples =
new SelfTuples<BondElem, Bond, BondValue>(
Tuples::BOND);
197 it->tuples =
new SelfTuples<AngleElem, Angle, AngleValue>(
Tuples::ANGLE);
201 it->tuples =
new SelfTuples<DihedralElem, Dihedral, DihedralValue>(
Tuples::DIHEDRAL);
205 it->tuples =
new SelfTuples<ImproperElem, Improper, ImproperValue>(
Tuples::IMPROPER);
213 it->tuples =
new SelfTuples<CrosstermElem, Crossterm, CrosstermValue>(
Tuples::CROSSTERM);
217 NAMD_bug(
"ComputeBondedCUDA::registerSelfCompute(), Unsupported compute type");
224 it->patchIDs.push_back(pid);
227 void ComputeBondedCUDA::assignPatchesOnPe() {
230 for (
int i=0;i < patchIDsPerRank[CkMyRank()].size();i++) {
231 PatchID patchID = patchIDsPerRank[CkMyRank()][i];
235 NAMD_bug(
"ComputeBondedCUDA::assignPatchesOnPe, patch not found");
236 if (flags == NULL) flags = &patchMap->
patch(patchID)->
flags;
239 NAMD_bug(
"ComputeBondedCUDA::assignPatchesOnPe, TuplePatchElem not found");
241 if (tpe->
p != NULL) {
242 NAMD_bug(
"ComputeBondedCUDA::assignPatchesOnPe, TuplePatchElem already registered");
255 void ComputeBondedCUDA::atomUpdate() {
256 atomsChangedIn =
true;
263 int ComputeBondedCUDA::noWork() {
268 void ComputeBondedCUDA::messageEnqueueWork() {
269 if (masterPe != CkMyPe())
270 NAMD_bug(
"ComputeBondedCUDA::messageEnqueueWork() must be called from master PE");
278 void ComputeBondedCUDA::doWork() {
280 if (CkMyPe() != masterPe)
281 NAMD_bug(
"ComputeBondedCUDA::doWork() called on non master PE");
287 atomsChanged = atomsChangedIn;
288 atomsChangedIn =
false;
290 if (getNumPatches() == 0)
return;
293 NAMD_bug(
"ComputeBondedCUDA::doWork(), no flags set");
296 lattice = flags->lattice;
297 doEnergy = flags->doEnergy;
298 doVirial = flags->doVirial;
299 doSlow = flags->doFullElectrostatics;
300 doMolly = flags->doMolly;
314 void ComputeBondedCUDA::patchReady(
PatchID pid,
int doneMigration,
int seq) {
329 void ComputeBondedCUDA::updatePatches() {
331 for (
int i=0;i < patches.size();i++) {
332 patches[i].atomStart = atomStart;
333 atomStart += patches[i].numAtoms;
335 atomStorageSize = atomStart;
338 reallocate_host<CudaAtom>(&
atoms, &atomsSize, atomStorageSize, 1.4f);
344 void ComputeBondedCUDA::mapAtoms() {
346 for (
int i=0;i < getNumPatches();i++) {
356 void ComputeBondedCUDA::unmapAtoms() {
358 for (
int i=0;i < getNumPatches();i++) {
368 void ComputeBondedCUDA::openBoxesOnPe() {
370 std::vector<int>& patchIDs = patchIDsPerRank[CkMyRank()];
372 for (
auto it=patchIDs.begin();it != patchIDs.end();it++) {
383 int pi = patchIndex[patchID];
384 int atomStart = patches[pi].atomStart;
385 int numAtoms = patches[pi].numAtoms;
389 for (
int i=0;i < numAtoms;i++) {
391 atoms[atomStart + j] = src[i];
398 patchesCounter -= patchIDs.size();
399 if (patchesCounter == 0) {
400 patchesCounter = getNumPatches();
407 computeMgr->sendLoadTuplesOnPe(pes,
this);
414 void countNumExclusions(Tuples* tuples,
int& numModifiedExclusions,
int& numExclusions) {
415 numModifiedExclusions = 0;
416 int ntuples = tuples->getNumTuples();
418 for (
int ituple=0;ituple < ntuples;ituple++) {
419 if (src[ituple].modified) numModifiedExclusions++;
421 numExclusions = ntuples - numModifiedExclusions;
427 void ComputeBondedCUDA::loadTuplesOnPe() {
429 int numModifiedExclusions = 0;
430 int numExclusions = 0;
432 std::vector< SelfCompute >& selfComputes = computes[CkMyRank()].selfComputes;
434 for (
auto it=selfComputes.begin();it != selfComputes.end();it++) {
435 it->tuples->loadTuples(tuplePatchList, NULL, &atomMap, it->patchIDs);
439 countNumExclusions(it->tuples, tmp1, tmp2);
440 numModifiedExclusions += tmp1;
441 numExclusions += tmp2;
445 HomeCompute& homeCompute = computes[CkMyRank()].homeCompute;
446 for (
int i=0;i < homeCompute.tuples.size();i++) {
447 homeCompute.tuples[i]->loadTuples(tuplePatchList,
448 homeCompute.isBasePatch.data(), &atomMap,
449 homeCompute.patchIDs);
453 countNumExclusions(homeCompute.tuples[i], tmp1, tmp2);
454 numModifiedExclusions += tmp1;
455 numExclusions += tmp2;
460 numExclPerRank[CkMyRank()].numModifiedExclusions = numModifiedExclusions;
461 numExclPerRank[CkMyRank()].numExclusions = numExclusions;
465 patchesCounter -= patchIDsPerRank[CkMyRank()].size();
466 if (patchesCounter == 0) {
467 patchesCounter = getNumPatches();
476 void ComputeBondedCUDA::copyBondData(
const int ntuples,
const BondElem* __restrict__ src,
480 for (
int ituple=0;ituple < ntuples;ituple++) {
482 auto p0 = src[ituple].p[0];
483 auto p1 = src[ituple].p[1];
484 int pi0 = patchIndex[p0->patchID];
485 int pi1 = patchIndex[p1->patchID];
486 int l0 = src[ituple].localIndex[0];
487 int l1 = src[ituple].localIndex[1];
488 dstval.
i = l0 + patches[pi0].atomStart;
489 dstval.
j = l1 + patches[pi1].atomStart;
490 dstval.
itype = (src[ituple].value - bond_array);
493 Vector shiftVec = lattice.wrap_delta_scaled(position1, position2);
494 shiftVec += patchMap->
center(p0->patchID) - patchMap->
center(p1->patchID);
495 dstval.
ioffsetXYZ = make_float3((
float)shiftVec.
x, (
float)shiftVec.
y, (
float)shiftVec.
z);
496 dstval.
scale = src[ituple].scale;
497 dst[ituple] = dstval;
501 void ComputeBondedCUDA::copyAngleData(
const int ntuples,
const AngleElem* __restrict__ src,
505 for (
int ituple=0;ituple < ntuples;ituple++) {
507 auto p0 = src[ituple].p[0];
508 auto p1 = src[ituple].p[1];
509 auto p2 = src[ituple].p[2];
510 int pi0 = patchIndex[p0->patchID];
511 int pi1 = patchIndex[p1->patchID];
512 int pi2 = patchIndex[p2->patchID];
513 int l0 = src[ituple].localIndex[0];
514 int l1 = src[ituple].localIndex[1];
515 int l2 = src[ituple].localIndex[2];
516 dstval.
i = l0 + patches[pi0].atomStart;
517 dstval.
j = l1 + patches[pi1].atomStart;
518 dstval.
k = l2 + patches[pi2].atomStart;
519 dstval.
itype = (src[ituple].value - angle_array);
523 Vector shiftVec12 = lattice.wrap_delta_scaled(position1, position2);
524 Vector shiftVec32 = lattice.wrap_delta_scaled(position3, position2);
525 shiftVec12 += patchMap->
center(p0->patchID) - patchMap->
center(p1->patchID);
526 shiftVec32 += patchMap->
center(p2->patchID) - patchMap->
center(p1->patchID);
527 dstval.
ioffsetXYZ = make_float3((
float)shiftVec12.
x, (
float)shiftVec12.
y, (
float)shiftVec12.
z);
528 dstval.
koffsetXYZ = make_float3((
float)shiftVec32.
x, (
float)shiftVec32.
y, (
float)shiftVec32.
z);
529 dstval.
scale = src[ituple].scale;
530 dst[ituple] = dstval;
537 template <
bool doDihedral,
typename T,
typename P>
538 void ComputeBondedCUDA::copyDihedralData(
const int ntuples,
const T* __restrict__ src,
539 const P* __restrict__ p_array,
CudaDihedral* __restrict__ dst) {
542 for (
int ituple=0;ituple < ntuples;ituple++) {
544 auto p0 = src[ituple].p[0];
545 auto p1 = src[ituple].p[1];
546 auto p2 = src[ituple].p[2];
547 auto p3 = src[ituple].p[3];
548 int pi0 = patchIndex[p0->patchID];
549 int pi1 = patchIndex[p1->patchID];
550 int pi2 = patchIndex[p2->patchID];
551 int pi3 = patchIndex[p3->patchID];
552 int l0 = src[ituple].localIndex[0];
553 int l1 = src[ituple].localIndex[1];
554 int l2 = src[ituple].localIndex[2];
555 int l3 = src[ituple].localIndex[3];
556 dstval.
i = l0 + patches[pi0].atomStart;
557 dstval.
j = l1 + patches[pi1].atomStart;
558 dstval.
k = l2 + patches[pi2].atomStart;
559 dstval.
l = l3 + patches[pi3].atomStart;
561 dstval.
itype = dihedralMultMap[(src[ituple].value - p_array)];
563 dstval.
itype = improperMultMap[(src[ituple].value - p_array)];
569 Vector shiftVec12 = lattice.wrap_delta_scaled(position1, position2);
570 Vector shiftVec23 = lattice.wrap_delta_scaled(position2, position3);
571 Vector shiftVec43 = lattice.wrap_delta_scaled(position4, position3);
572 shiftVec12 += patchMap->
center(p0->patchID) - patchMap->
center(p1->patchID);
573 shiftVec23 += patchMap->
center(p1->patchID) - patchMap->
center(p2->patchID);
574 shiftVec43 += patchMap->
center(p3->patchID) - patchMap->
center(p2->patchID);
575 dstval.
ioffsetXYZ = make_float3((
float)shiftVec12.
x, (
float)shiftVec12.
y, (
float)shiftVec12.
z);
576 dstval.
joffsetXYZ = make_float3((
float)shiftVec23.
x, (
float)shiftVec23.
y, (
float)shiftVec23.
z);
577 dstval.
loffsetXYZ = make_float3((
float)shiftVec43.
x, (
float)shiftVec43.
y, (
float)shiftVec43.
z);
578 dstval.
scale = src[ituple].scale;
579 dst[ituple] = dstval;
583 void ComputeBondedCUDA::copyExclusionData(
const int ntuples,
const ExclElem* __restrict__ src,
const int typeSize,
587 for (
int ituple=0;ituple < ntuples;ituple++) {
588 auto p0 = src[ituple].p[0];
589 auto p1 = src[ituple].p[1];
590 int pi0 = patchIndex[p0->patchID];
591 int pi1 = patchIndex[p1->patchID];
592 int l0 = src[ituple].localIndex[0];
593 int l1 = src[ituple].localIndex[1];
598 Vector shiftVec = lattice.wrap_delta_scaled(position1, position2);
599 shiftVec += patchMap->
center(p0->patchID) - patchMap->
center(p1->patchID);
601 ce.
i = l0 + patches[pi0].atomStart;
602 ce.
j = l1 + patches[pi1].atomStart;
605 ce.
ioffsetXYZ = make_float3((
float)shiftVec.
x, (
float)shiftVec.
y, (
float)shiftVec.
z);
607 if (src[ituple].modified) {
619 void ComputeBondedCUDA::copyCrosstermData(
const int ntuples,
const CrosstermElem* __restrict__ src,
623 for (
int ituple=0;ituple < ntuples;ituple++) {
624 auto p0 = src[ituple].p[0];
625 auto p1 = src[ituple].p[1];
626 auto p2 = src[ituple].p[2];
627 auto p3 = src[ituple].p[3];
628 auto p4 = src[ituple].p[4];
629 auto p5 = src[ituple].p[5];
630 auto p6 = src[ituple].p[6];
631 auto p7 = src[ituple].p[7];
632 int pi0 = patchIndex[p0->patchID];
633 int pi1 = patchIndex[p1->patchID];
634 int pi2 = patchIndex[p2->patchID];
635 int pi3 = patchIndex[p3->patchID];
636 int pi4 = patchIndex[p4->patchID];
637 int pi5 = patchIndex[p5->patchID];
638 int pi6 = patchIndex[p6->patchID];
639 int pi7 = patchIndex[p7->patchID];
640 int l0 = src[ituple].localIndex[0];
641 int l1 = src[ituple].localIndex[1];
642 int l2 = src[ituple].localIndex[2];
643 int l3 = src[ituple].localIndex[3];
644 int l4 = src[ituple].localIndex[4];
645 int l5 = src[ituple].localIndex[5];
646 int l6 = src[ituple].localIndex[6];
647 int l7 = src[ituple].localIndex[7];
648 dst[ituple].i1 = l0 + patches[pi0].atomStart;
649 dst[ituple].i2 = l1 + patches[pi1].atomStart;
650 dst[ituple].i3 = l2 + patches[pi2].atomStart;
651 dst[ituple].i4 = l3 + patches[pi3].atomStart;
652 dst[ituple].i5 = l4 + patches[pi4].atomStart;
653 dst[ituple].i6 = l5 + patches[pi5].atomStart;
654 dst[ituple].i7 = l6 + patches[pi6].atomStart;
655 dst[ituple].i8 = l7 + patches[pi7].atomStart;
656 dst[ituple].itype = (src[ituple].value - crossterm_array);
665 Vector shiftVec12 = lattice.wrap_delta_scaled(position1, position2);
666 Vector shiftVec23 = lattice.wrap_delta_scaled(position2, position3);
667 Vector shiftVec34 = lattice.wrap_delta_scaled(position3, position4);
668 Vector shiftVec56 = lattice.wrap_delta_scaled(position5, position6);
669 Vector shiftVec67 = lattice.wrap_delta_scaled(position6, position7);
670 Vector shiftVec78 = lattice.wrap_delta_scaled(position7, position8);
671 shiftVec12 += patchMap->
center(p0->patchID) - patchMap->
center(p1->patchID);
672 shiftVec23 += patchMap->
center(p1->patchID) - patchMap->
center(p2->patchID);
673 shiftVec34 += patchMap->
center(p2->patchID) - patchMap->
center(p3->patchID);
674 shiftVec56 += patchMap->
center(p4->patchID) - patchMap->
center(p5->patchID);
675 shiftVec67 += patchMap->
center(p5->patchID) - patchMap->
center(p6->patchID);
676 shiftVec78 += patchMap->
center(p6->patchID) - patchMap->
center(p7->patchID);
677 dst[ituple].offset12XYZ = make_float3( (
float)shiftVec12.
x, (
float)shiftVec12.
y, (
float)shiftVec12.
z);
678 dst[ituple].offset23XYZ = make_float3( (
float)shiftVec23.
x, (
float)shiftVec23.
y, (
float)shiftVec23.
z);
679 dst[ituple].offset34XYZ = make_float3( (
float)shiftVec34.
x, (
float)shiftVec34.
y, (
float)shiftVec34.
z);
680 dst[ituple].offset56XYZ = make_float3( (
float)shiftVec56.
x, (
float)shiftVec56.
y, (
float)shiftVec56.
z);
681 dst[ituple].offset67XYZ = make_float3( (
float)shiftVec67.
x, (
float)shiftVec67.
y, (
float)shiftVec67.
z);
682 dst[ituple].offset78XYZ = make_float3( (
float)shiftVec78.
x, (
float)shiftVec78.
y, (
float)shiftVec78.
z);
683 dst[ituple].scale = src[ituple].scale;
687 void ComputeBondedCUDA::tupleCopyWorker(
int first,
int last,
void *result,
int paraNum,
void *param) {
688 ComputeBondedCUDA* c = (ComputeBondedCUDA *)param;
689 c->tupleCopyWorker(first, last);
692 void ComputeBondedCUDA::tupleCopyWorker(
int first,
int last) {
695 int pos = exclusionStartPos;
696 int pos2 = exclusionStartPos2;
698 int ntuples = (*it)->getNumTuples();
704 for (
int i=first;i <= last;i++) {
705 switch (tupleCopyWorkList[i].tupletype) {
709 copyBondData(tupleCopyWorkList[i].ntuples, (
BondElem *)tupleCopyWorkList[i].tupleElemList,
710 Node::Object()->parameters->bond_array, (
CudaBond *)&tupleData[tupleCopyWorkList[i].tupleDataPos]);
716 copyAngleData(tupleCopyWorkList[i].ntuples, (
AngleElem *)tupleCopyWorkList[i].tupleElemList,
717 Node::Object()->parameters->angle_array, (
CudaAngle *)&tupleData[tupleCopyWorkList[i].tupleDataPos]);
723 copyDihedralData<true, DihedralElem, DihedralValue>(tupleCopyWorkList[i].ntuples,
725 (
CudaDihedral *)&tupleData[tupleCopyWorkList[i].tupleDataPos]);
731 copyDihedralData<false, ImproperElem, ImproperValue>(tupleCopyWorkList[i].ntuples,
733 (
CudaDihedral *)&tupleData[tupleCopyWorkList[i].tupleDataPos]);
739 copyCrosstermData(tupleCopyWorkList[i].ntuples, (
CrosstermElem *)tupleCopyWorkList[i].tupleElemList,
745 NAMD_bug(
"ComputeBondedCUDA::tupleCopyWorker, Unsupported tuple type");
755 void ComputeBondedCUDA::copyTupleData() {
760 int numModifiedExclusions = 0;
761 int numExclusions = 0;
762 for (
int i=0;i < numExclPerRank.size();i++) {
763 numModifiedExclusions += numExclPerRank[i].numModifiedExclusions;
764 numExclusions += numExclPerRank[i].numExclusions;
771 exclusionStartPos = 0;
772 exclusionStartPos2 = 0;
773 tupleCopyWorkList.clear();
774 for (
int tupletype=0;tupletype < Tuples::NUM_TUPLE_TYPES;tupletype++) {
778 exclusionStartPos = pos;
779 exclusionStartPos2 = pos + numModifiedExclusionsWA*CudaTupleTypeSize[
Tuples::EXCLUSION];
783 for (
auto it = tupleList[tupletype].begin();it != tupleList[tupletype].end();it++) {
784 int ntuples = (*it)->getNumTuples();
787 TupleCopyWork tupleCopyWork;
788 tupleCopyWork.tupletype = tupletype;
789 tupleCopyWork.ntuples = ntuples;
790 tupleCopyWork.tupleElemList = (*it)->getTupleList();
791 tupleCopyWork.tupleDataPos = pos;
792 tupleCopyWorkList.push_back(tupleCopyWork);
793 pos += ntuples*CudaTupleTypeSize[tupletype];
796 numTuplesPerType[tupletype] = num;
800 posWA += (numModifiedExclusionsWA + numExclusionsWA)*CudaTupleTypeSize[tupletype];
805 if (numModifiedExclusions + numExclusions != numTuplesPerType[
Tuples::EXCLUSION]) {
806 NAMD_bug(
"ComputeBondedCUDA::copyTupleData, invalid number of exclusions");
810 hasExclusions = (numExclusions > 0);
811 hasModifiedExclusions = (numModifiedExclusions > 0);
815 reallocate_host<char>(&tupleData, &tupleDataSize, posWA, 1.2f);
817 #if CMK_SMP && USE_CKLOOP
819 if (useCkLoop >= 1) {
820 CkLoop_Parallelize(tupleCopyWorker, 1, (
void *)
this, CkMyNodeSize(), -1, tupleCopyWorkList.size() - 1);
824 tupleCopyWorker(-1, tupleCopyWorkList.size() - 1);
833 int forceStorageSize = bondedKernel.getAllForceSize(atomStorageSize,
true);
834 reallocate_host<FORCE_TYPE>(&
forces, &forcesSize, forceStorageSize, 1.4f);
840 void ComputeBondedCUDA::launchWork() {
841 if (CkMyPe() != masterPe)
842 NAMD_bug(
"ComputeBondedCUDA::launchWork() called on non master PE");
850 float3
lata = make_float3(lattice.a().x, lattice.a().y, lattice.a().z);
851 float3
latb = make_float3(lattice.b().x, lattice.b().y, lattice.b().z);
852 float3
latc = make_float3(lattice.c().x, lattice.c().y, lattice.c().z);
857 bondedKernel.bondedForce(
860 doEnergy, doVirial, doSlow,
868 forceDoneSetCallback();
871 void ComputeBondedCUDA::forceDoneCheck(
void *arg,
double walltime) {
872 ComputeBondedCUDA* c = (ComputeBondedCUDA *)arg;
874 if (CkMyPe() != c->masterPe)
875 NAMD_bug(
"ComputeBondedCUDA::forceDoneCheck called on non masterPe");
879 cudaError_t err = cudaEventQuery(c->forceDoneEvent);
880 if (err == cudaSuccess) {
886 }
else if (err != cudaErrorNotReady) {
889 sprintf(errmsg,
"in ComputeBondedCUDA::forceDoneCheck after polling %d times over %f s",
890 c->checkCount, walltime - c->beforeForceCompute);
896 if (c->checkCount >= 1000000) {
898 sprintf(errmsg,
"ComputeBondedCUDA::forceDoneCheck polled %d times over %f s",
899 c->checkCount, walltime - c->beforeForceCompute);
905 CcdCallFnAfter(forceDoneCheck, arg, 0.1);
911 void ComputeBondedCUDA::forceDoneSetCallback() {
912 if (CkMyPe() != masterPe)
913 NAMD_bug(
"ComputeBondedCUDA::forceDoneSetCallback called on non masterPe");
919 beforeForceCompute = CkWallTimer();
921 CcdCallFnAfter(forceDoneCheck,
this, 0.1);
924 inline void convertForceToDouble(
const FORCE_TYPE *af,
const int forceStride,
double& afx,
double& afy,
double& afz) {
925 #ifdef USE_STRIDED_FORCE
945 template <
bool sumNbond,
bool sumSlow>
946 void finishForceLoop(
const int numAtoms,
const int forceStride,
948 Force* __restrict__ f,
Force* __restrict__ f_nbond,
Force* __restrict__ f_slow) {
950 for (
int j=0;j < numAtoms;j++) {
952 double afx, afy, afz;
953 convertForceToDouble(af + j, forceStride, afx, afy, afz);
960 double afx, afy, afz;
961 convertForceToDouble(af_nbond + j, forceStride, afx, afy, afz);
968 double afx, afy, afz;
969 convertForceToDouble(af_slow + j, forceStride, afx, afy, afz);
981 void ComputeBondedCUDA::finishPatchesOnPe() {
984 int myRank = CkMyRank();
985 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
987 sprintf(buf,
"%s: %d",
NamdProfileEventStr[NamdProfileEvent::COMPUTE_BONDED_CUDA_FINISH_PATCHES], myRank);
990 const int forceStride = bondedKernel.getForceStride(atomStorageSize);
991 const int forceSize = bondedKernel.getForceSize(atomStorageSize);
992 const bool sumNbond = hasModifiedExclusions;
993 const bool sumSlow = (hasModifiedExclusions || hasExclusions) && doSlow;
995 for (
int i=0;i < patchIDsPerRank[myRank].size();i++) {
996 PatchID patchID = patchIDsPerRank[myRank][i];
1000 NAMD_bug(
"ComputeBondedCUDA::finishPatchesOnPe, TuplePatchElem not found");
1003 int pi = patchIndex[patchID];
1004 int numAtoms = patches[pi].numAtoms;
1005 int atomStart = patches[pi].atomStart;
1014 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
1016 sprintf(buf2,
"%s: %d",
NamdProfileEventStr[NamdProfileEvent::COMPUTE_BONDED_CUDA_FINISH_FORCE_LOOP], myRank);
1019 if (!sumNbond && !sumSlow) {
1020 finishForceLoop<false, false>(numAtoms, forceStride, af, af_nbond, af_slow, f, f_nbond, f_slow);
1021 }
else if (sumNbond && !sumSlow) {
1022 finishForceLoop<true, false>(numAtoms, forceStride, af, af_nbond, af_slow, f, f_nbond, f_slow);
1023 }
else if (!sumNbond && sumSlow) {
1024 finishForceLoop<false, true>(numAtoms, forceStride, af, af_nbond, af_slow, f, f_nbond, f_slow);
1025 }
else if (sumNbond && sumSlow) {
1026 finishForceLoop<true, true>(numAtoms, forceStride, af, af_nbond, af_slow, f, f_nbond, f_slow);
1028 NAMD_bug(
"ComputeBondedCUDA::finishPatchesOnPe, logically impossible choice");
1030 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
1031 NAMD_EVENT_STOP(1, NamdProfileEvent::COMPUTE_BONDED_CUDA_FINISH_FORCE_LOOP);
1066 patchesCounter -= patchIDsPerRank[CkMyRank()].size();
1067 if (patchesCounter == 0) {
1068 patchesCounter = getNumPatches();
1075 #if defined(NAMD_NVTX_ENABLED) || defined(NAMD_CMK_TRACE_ENABLED) || defined(NAMD_ROCTX_ENABLED)
1076 NAMD_EVENT_STOP(1, NamdProfileEvent::COMPUTE_BONDED_CUDA_FINISH_PATCHES);
1080 void ComputeBondedCUDA::finishPatches() {
1089 #ifdef WRITE_FULL_VIRIALS
1090 #ifdef USE_FP_VIRIAL
1091 void convertVirial(
double *virial) {
1092 long long int *virial_lli = (
long long int *)virial;
1093 for (
int i=0;i < 9;i++) {
1094 virial[i] = ((double)virial_lli[i])*virial_to_double;
1103 void ComputeBondedCUDA::finishReductions() {
1105 if (CkMyPe() != masterPe)
1106 NAMD_bug(
"ComputeBondedCUDA::finishReductions() called on non masterPe");
1112 for (
int tupletype=0;tupletype < Tuples::NUM_TUPLE_TYPES;tupletype++) {
1113 if (numTuplesPerType[tupletype] > 0) {
1116 switch (tupletype) {
1125 case Tuples::DIHEDRAL:
1129 case Tuples::IMPROPER:
1133 case Tuples::EXCLUSION:
1139 case Tuples::CROSSTERM:
1144 NAMD_bug(
"ComputeBondedCUDA::finishReductions, Unsupported tuple type");
1149 auto it = tupleList[tupletype].begin();
1150 (*it)->submitTupleCount(reduction, numTuplesPerType[tupletype]);
1155 #ifdef WRITE_FULL_VIRIALS
1156 #ifdef USE_FP_VIRIAL
1163 #error "non-WRITE_FULL_VIRIALS not implemented"
1165 ADD_TENSOR(reduction, REDUCTION_VIRIAL_NORMAL,energies_virials, ComputeBondedCUDAKernel::normalVirialIndex);
1166 ADD_TENSOR(reduction, REDUCTION_VIRIAL_NBOND, energies_virials, ComputeBondedCUDAKernel::nbondVirialIndex);
1167 ADD_TENSOR(reduction, REDUCTION_VIRIAL_SLOW, energies_virials, ComputeBondedCUDAKernel::slowVirialIndex);
1168 ADD_TENSOR(reduction, REDUCTION_VIRIAL_AMD_DIHE, energies_virials, ComputeBondedCUDAKernel::amdDiheVirialIndex);
1171 ADD_TENSOR(reduction, REDUCTION_VIRIAL_NORMAL, energies_virials, ComputeBondedCUDAKernel::amdDiheVirialIndex);
1175 reduction->submit();
1181 void ComputeBondedCUDA::initialize() {
1183 if (CkMyPe() != masterPe)
1184 NAMD_bug(
"ComputeBondedCUDA::initialize() called on non master PE");
1187 for (
int rank=0;rank < computes.size();rank++) {
1188 if (computes[rank].selfComputes.size() > 0 || computes[rank].homeCompute.patchIDs.size() > 0) {
1189 pes.push_back(CkNodeFirst(CkMyNode()) + rank);
1194 if (pes.size() == 0)
return;
1196 initializeCalled =
true;
1199 #if CUDA_VERSION >= 5050 || defined(NAMD_HIP)
1200 int leastPriority, greatestPriority;
1201 cudaCheck(cudaDeviceGetStreamPriorityRange(&leastPriority, &greatestPriority));
1202 cudaCheck(cudaStreamCreateWithPriority(&
stream, cudaStreamDefault, greatestPriority));
1206 cudaCheck(cudaEventCreate(&forceDoneEvent));
1207 lock = CmiCreateLock();
1215 for (
int rank=0;rank < computes.size();rank++) {
1216 std::vector< SelfCompute >& selfComputes = computes[rank].selfComputes;
1217 for (
auto it=selfComputes.begin();it != selfComputes.end();it++) {
1218 for (
auto jt=it->patchIDs.begin();jt != it->patchIDs.end();jt++) {
1221 patchIDsPerRank[rank].push_back(*jt);
1222 allPatchIDs.push_back(*jt);
1230 for (
int rank=0;rank < computes.size();rank++) {
1231 HomeCompute& homeCompute = computes[rank].homeCompute;
1232 std::vector<int>& patchIDs = homeCompute.patchIDs;
1233 for (
int i=0;i < patchIDs.size();i++) {
1234 int patchID = patchIDs[i];
1237 patchIDsPerRank[rank].push_back(patchID);
1238 allPatchIDs.push_back(patchID);
1243 std::vector< std::vector<int> > patchIDsToAppend(CkMyNodeSize());
1245 std::vector<int> neighborPids;
1246 for (
int rank=0;rank < computes.size();rank++) {
1248 HomeCompute& homeCompute = computes[rank].homeCompute;
1249 std::vector<int>& patchIDs = homeCompute.patchIDs;
1250 for (
int i=0;i < patchIDs.size();i++) {
1251 int patchID = patchIDs[i];
1253 for (
int j=0;j < numNeighbors;j++) {
1255 neighborPids.push_back(neighbors[j]);
1262 std::sort(neighborPids.begin(), neighborPids.end());
1263 auto it_end = std::unique(neighborPids.begin(), neighborPids.end());
1264 neighborPids.resize(std::distance(neighborPids.begin(), it_end));
1267 for (
int i=0;i < neighborPids.size();i++) {
1268 for (
int rank=0;rank < computes.size();rank++) {
1269 int pid = neighborPids[i];
1270 int pe = rank + CkNodeFirst(CkMyNode());
1271 if (patchMap->
node(pid) == pe) {
1274 patchIDsPerRank[rank].push_back(pid);
1275 allPatchIDs.push_back(pid);
1277 patchIDsToAppend[rank].push_back(pid);
1279 pes.push_back(CkNodeFirst(CkMyNode()) + rank);
1287 auto it_end = std::unique(pes.begin(), pes.end());
1288 pes.resize(std::distance(pes.begin(), it_end));
1293 for (
int rank=0;rank < computes.size();rank++) {
1295 HomeCompute& homeCompute = computes[rank].homeCompute;
1296 std::vector<int>& patchIDs = homeCompute.patchIDs;
1297 std::vector<int> neighborPatchIDs;
1298 for (
int i=0;i < patchIDs.size();i++) {
1299 int patchID = patchIDs[i];
1301 for (
int j=0;j < numNeighbors;j++) {
1305 patchIDsPerRank[rank].push_back(neighbors[j]);
1306 allPatchIDs.push_back(neighbors[j]);
1308 if ( std::count(patchIDs.begin(), patchIDs.end(), neighbors[j]) == 0
1309 && std::count(neighborPatchIDs.begin(), neighborPatchIDs.end(), neighbors[j]) == 0 ) {
1310 neighborPatchIDs.push_back(neighbors[j]);
1320 for (
int i=0;i < neighborPatchIDs.size();i++) {
1321 patchIDsToAppend[rank].push_back(neighborPatchIDs[i]);
1325 for (
int rank=0;rank < patchIDsToAppend.size();rank++) {
1326 for (
int i=0;i < patchIDsToAppend[rank].size();i++) {
1327 computes[rank].homeCompute.patchIDs.push_back(patchIDsToAppend[rank][i]);
1333 std::sort(allPatchIDs.begin(), allPatchIDs.end());
1334 auto it_end = std::unique(allPatchIDs.begin(), allPatchIDs.end());
1335 allPatchIDs.resize(std::distance(allPatchIDs.begin(), it_end));
1339 setNumPatches(allPatchIDs.size());
1342 patchesCounter = getNumPatches();
1344 patches.resize(getNumPatches());
1347 for (
int rank=0;rank < computes.size();rank++) {
1348 std::vector< SelfCompute >& selfComputes = computes[rank].selfComputes;
1349 for (
auto it=selfComputes.begin();it != selfComputes.end();it++) {
1350 tupleList[it->tuples->getType()].push_back(it->tuples);
1352 HomeCompute& homeCompute = computes[rank].homeCompute;
1353 for (
int i=0;i < homeCompute.tuples.size();i++) {
1354 tupleList[homeCompute.tuples[i]->getType()].push_back(homeCompute.tuples[i]);
1362 std::vector<char> patchIDset(patchMap->
numPatches(), 0);
1363 int numPatchIDset = 0;
1364 int numPatchIDs = 0;
1365 for (
int rank=0;rank < computes.size();rank++) {
1366 numPatchIDs += patchIDsPerRank[rank].size();
1367 for (
int i=0;i < patchIDsPerRank[rank].size();i++) {
1368 PatchID patchID = patchIDsPerRank[rank][i];
1369 if (patchIDset[patchID] == 0) numPatchIDset++;
1370 patchIDset[patchID] = 1;
1371 if ( !std::count(allPatchIDs.begin(), allPatchIDs.end(), patchID) ) {
1372 NAMD_bug(
"ComputeBondedCUDA::initialize(), inconsistent patch mapping");
1376 if (numPatchIDs != getNumPatches() || numPatchIDset != getNumPatches()) {
1377 NAMD_bug(
"ComputeBondedCUDA::initialize(), inconsistent patch mapping");
1382 atomMappers.resize(getNumPatches());
1383 for (
int i=0;i < getNumPatches();i++) {
1384 atomMappers[i] =
new AtomMapper(allPatchIDs[i], &atomMap);
1385 patchIndex[allPatchIDs[i]] = i;
1390 for (
int tupletype=0;tupletype < Tuples::NUM_TUPLE_TYPES;tupletype++) {
1391 if (tupleList[tupletype].size() > 0) {
1398 std::vector<CudaBondValue> bondValues(NumBondParams);
1399 for (
int i=0;i < NumBondParams;i++) {
1400 bondValues[i].k = bond_array[i].
k;
1401 bondValues[i].x0 = bond_array[i].
x0;
1402 bondValues[i].x1 = bond_array[i].
x1;
1404 bondedKernel.setupBondValues(NumBondParams, bondValues.data());
1412 std::vector<CudaAngleValue> angleValues(NumAngleParams);
1413 bool normal_ub_error =
false;
1414 for (
int i=0;i < NumAngleParams;i++) {
1415 angleValues[i].k = angle_array[i].
k;
1416 if (angle_array[i].normal == 1) {
1417 angleValues[i].theta0 = angle_array[i].
theta0;
1419 angleValues[i].theta0 = cos(angle_array[i].theta0);
1421 normal_ub_error |= (angle_array[i].
normal == 0 && angle_array[i].
k_ub);
1422 angleValues[i].k_ub = angle_array[i].
k_ub;
1423 angleValues[i].r_ub = angle_array[i].
r_ub;
1424 angleValues[i].normal = angle_array[i].
normal;
1426 if (normal_ub_error)
NAMD_die(
"ERROR: Can't use cosAngles with Urey-Bradley angles");
1427 bondedKernel.setupAngleValues(NumAngleParams, angleValues.data());
1431 case Tuples::DIHEDRAL:
1435 int NumDihedralParamsMult = 0;
1436 for (
int i=0;i < NumDihedralParams;i++) {
1437 NumDihedralParamsMult += std::max(0, dihedral_array[i].multiplicity);
1439 std::vector<CudaDihedralValue> dihedralValues(NumDihedralParamsMult);
1440 dihedralMultMap.resize(NumDihedralParams);
1442 for (
int i=0;i < NumDihedralParams;i++) {
1444 dihedralMultMap[i] = k;
1445 for (
int j=0;j < multiplicity;j++) {
1446 dihedralValues[k].k = dihedral_array[i].
values[j].
k;
1447 dihedralValues[k].n = (dihedral_array[i].
values[j].
n << 1) | (j < (multiplicity - 1));
1448 dihedralValues[k].delta = dihedral_array[i].
values[j].
delta;
1452 bondedKernel.setupDihedralValues(NumDihedralParamsMult, dihedralValues.data());
1456 case Tuples::IMPROPER:
1460 int NumImproperParamsMult = 0;
1461 for (
int i=0;i < NumImproperParams;i++) {
1462 NumImproperParamsMult += std::max(0, improper_array[i].multiplicity);
1464 std::vector<CudaDihedralValue> improperValues(NumImproperParamsMult);
1465 improperMultMap.resize(NumImproperParams);
1467 for (
int i=0;i < NumImproperParams;i++) {
1469 improperMultMap[i] = k;
1470 for (
int j=0;j < multiplicity;j++) {
1471 improperValues[k].k = improper_array[i].
values[j].
k;
1472 improperValues[k].n = (improper_array[i].
values[j].
n << 1) | (j < (multiplicity - 1));
1473 improperValues[k].delta = improper_array[i].
values[j].
delta;
1477 bondedKernel.setupImproperValues(NumImproperParamsMult, improperValues.data());
1481 case Tuples::CROSSTERM:
1485 std::vector<CudaCrosstermValue> crosstermValues(NumCrosstermParams);
1488 for (
int ipar=0;ipar < NumCrosstermParams;ipar++) {
1489 for (
int i=0;i < N;i++) {
1490 for (
int j=0;j < N;j++) {
1495 #define INDEX(ncols,i,j) ((i)*ncols + (j))
1498 const double Ainv[16][16] = {
1499 { 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1500 { 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1501 {-3, 3, 0, 0, -2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1502 { 2, -2, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
1503 { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0},
1504 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0},
1505 { 0, 0, 0, 0, 0, 0, 0, 0, -3, 3, 0, 0, -2, -1, 0, 0},
1506 { 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 1, 1, 0, 0},
1507 {-3, 0, 3, 0, 0, 0, 0, 0, -2, 0, -1, 0, 0, 0, 0, 0},
1508 { 0, 0, 0, 0, -3, 0, 3, 0, 0, 0, 0, 0, -2, 0, -1, 0},
1509 { 9, -9, -9, 9, 6, 3, -6, -3, 6, -6, 3, -3, 4, 2, 2, 1},
1510 {-6, 6, 6, -6, -3, -3, 3, 3, -4, 4, -2, 2, -2, -2, -1, -1},
1511 { 2, 0, -2, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0},
1512 { 0, 0, 0, 0, 2, 0, -2, 0, 0, 0, 0, 0, 1, 0, 1, 0},
1513 {-6, 6, 6, -6, -4, -2, 4, 2, -3, 3, -3, 3, -2, -1, -2, -1},
1514 { 4, -4, -4, 4, 2, 2, -2, -2, 2, -2, 2, -2, 1, 1, 1, 1}
1518 #define M_PI 3.14159265358979323846
1521 const double h =
M_PI/12.0;
1523 const double x[16] = {
1524 table[
INDEX(D,i,j)].
d00, table[
INDEX(D,i+1,j)].
d00, table[
INDEX(D,i,j+1)].
d00, table[
INDEX(D,i+1,j+1)].
d00,
1525 table[
INDEX(D,i,j)].
d10*h, table[
INDEX(D,i+1,j)].
d10*h, table[
INDEX(D,i,j+1)].
d10*h, table[
INDEX(D,i+1,j+1)].
d10*h,
1526 table[
INDEX(D,i,j)].
d01*h, table[
INDEX(D,i+1,j)].
d01*h, table[
INDEX(D,i,j+1)].
d01*h, table[
INDEX(D,i+1,j+1)].
d01*h,
1527 table[
INDEX(D,i,j)].
d11*h*h, table[
INDEX(D,i+1,j)].
d11*h*h, table[
INDEX(D,i,j+1)].
d11*h*h, table[
INDEX(D,i+1,j+1)].
d11*h*h
1531 float* a = (
float *)&crosstermValues[ipar].c[i][j][0];
1532 for (
int k=0;k < 16;k++) {
1534 for (
int l=0;l < 16;l++) {
1535 a_val += Ainv[k][l]*x[l];
1537 a[k] = (float)a_val;
1543 bondedKernel.setupCrosstermValues(NumCrosstermParams, crosstermValues.data());
1547 case Tuples::EXCLUSION:
1552 NAMD_bug(
"ComputeBondedCUDA::initialize, Undefined tuple type");
1561 #endif // BONDED_CUDA
static __constant__ const double force_to_double
#define NAMD_EVENT_STOP(eon, id)
Box< Patch, CompAtom > * registerAvgPositionPickup(Compute *cid)
CrosstermData c[dim][dim]
void unregisterAvgPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
static ProxyMgr * Object()
static int warpAlign(const int n)
#define CUDA_BONDED_KERNEL_EVENT
#define ADD_TENSOR(R, RL, D, DL)
static PatchMap * Object()
void sendMessageEnqueueWork(int pe, CudaComputeNonbonded *c)
static __thread ComputeMgr * computeMgr
SimParameters * simParameters
void sendFinishReductions(int pe, CudaComputeNonbonded *c)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 lata
static __thread atom * atoms
static __thread float4 * forces
#define INDEX(ncols, i, j)
void unregisterForceDeposit(Compute *cid, Box< Patch, Results > **const box)
int upstreamNeighbors(int pid, PatchID *neighbor_ids)
char const *const NamdProfileEventStr[]
static void messageEnqueueWork(Compute *)
SubmitReduction * willSubmit(int setID, int size=-1)
DihedralValue * dihedral_array
static ReductionMgr * Object(void)
Patch * patch(PatchID pid)
void CcdCallBacksReset(void *ignored, double curWallTime)
CrosstermValue * crossterm_array
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 latb
__thread cudaStream_t stream
FourBodyConsts values[MAX_MULTIPLICITY]
void sendLaunchWork(int pe, CudaComputeNonbonded *c)
CudaAtom * getCudaAtomList()
void NAMD_bug(const char *err_msg)
void sendUnregisterBoxesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
void cudaDie(const char *msg, cudaError_t err=cudaSuccess)
ImproperValue * improper_array
void sendFinishPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int numPatches
Box< Patch, CompAtom > * positionBox
void createProxy(PatchID pid)
void NAMD_die(const char *err_msg)
FourBodyConsts values[MAX_MULTIPLICITY]
void unregisterPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
ScaledPosition center(int pid) const
BlockRadixSort::TempStorage sort
int numPatches(void) const
#define NAMD_EVENT_START_EX(eon, id, str)
Box< Patch, CompAtom > * avgPositionBox
virtual void patchReady(PatchID, int doneMigration, int seq)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 latc
void sendOpenBoxesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
Box< Patch, Results > * forceBox
void sendAssignPatchesOnPe(std::vector< int > &pes, CudaComputeNonbonded *c)
void close(Data **const t)
Box< Patch, CompAtom > * registerPositionPickup(Compute *cid)
CompAtomExt * getCompAtomExtInfo()
Box< Patch, Results > * registerForceDeposit(Compute *cid)