4 #include <cuda_runtime.h>
7 #include <hip/hip_runtime.h>
20 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
28 patches[i].patchID = pids[i];
38 patches[0].patchID = pid;
46 if (patches[i].positionBox != NULL) {
49 if (patches[i].avgPositionBox != NULL) {
52 if (patches[i].forceBox != NULL) {
64 lock = CmiCreateLock();
68 if (simParams->
alchFepOn)
NAMD_bug(
"ComputePmeCUDA::ComputePmeCUDA, alchFepOn not yet implemented");
69 if (simParams->
alchThermIntOn)
NAMD_bug(
"ComputePmeCUDA::ComputePmeCUDA, alchThermIntOn not yet implemented");
70 if (simParams->
lesOn)
NAMD_bug(
"ComputePmeCUDA::ComputePmeCUDA, lesOn not yet implemented");
73 sendAtomsDone =
false;
79 computePmeCUDAMgrProxy = CkpvAccess(BOCclass_group).computePmeCUDAMgr;
80 mgr = computePmeCUDAMgrProxy.ckLocalBranch();
82 NAMD_bug(
"ComputePmeCUDA::ComputePmeCUDA, unable to locate local branch of BOC entry computePmeCUDAMgr");
86 if (patches[i].positionBox != NULL || patches[i].avgPositionBox != NULL
87 || patches[i].forceBox != NULL || patches[i].patch != NULL)
88 NAMD_bug(
"ComputePmeCUDA::initialize() called twice or boxes not set to NULL");
90 NAMD_bug(
"ComputePmeCUDA::initialize() patch not found");
92 patches[i].positionBox = patches[i].patch->registerPositionPickup(
this);
93 patches[i].forceBox = patches[i].patch->registerForceDeposit(
this);
94 patches[i].avgPositionBox = patches[i].patch->registerAvgPositionPickup(
this);
107 void ComputePmeCUDA::setupActivePencils() {
115 patches[i].homePencilY = homey;
116 patches[i].homePencilZ = homez;
117 patches[i].homePencilNode = mgr->
getNode(homey,homez);
121 computePmeCUDAMgrProxy[patches[i].homePencilNode].registerPatch(msg);
130 if (patches[0].patch->flags.doFullElectrostatics)
return 0;
135 patches[i].positionBox->skip();
136 patches[i].forceBox->skip();
138 if (patches[i].patchID == 0) computePmeCUDAMgrProxy[patches[i].homePencilNode].skip();
148 sendAtomsDone =
false;
152 sendAtomsDone =
true;
158 void ComputePmeCUDA::sendAtoms() {
160 Lattice& lattice = patches[0].patch->lattice;
165 double ox = origin.
x;
166 double oy = origin.
y;
167 double oz = origin.
z;
168 double r1x = recip1.
x;
169 double r1y = recip1.
y;
170 double r1z = recip1.
z;
171 double r2x = recip2.
x;
172 double r2y = recip2.
y;
173 double r2z = recip2.
z;
174 double r3x = recip3.
x;
175 double r3y = recip3.
y;
176 double r3z = recip3.
z;
178 double selfEnergy = 0.;
181 if (patches[i].pmeForceMsg != NULL)
182 NAMD_bug(
"ComputePmeCUDA::sendAtoms, pmeForceMsg is not empty");
187 bool doMolly = patches[i].patch->flags.doMolly;
188 bool doEnergy = patches[i].patch->flags.doEnergy;
189 bool doVirial = patches[i].patch->flags.doVirial;
197 CompAtom *
x = patches[i].positionBox->open();
199 patches[i].positionBox->close(&x);
200 x = patches[i].avgPositionBox->open();
203 int numAtoms = patches[i].patch->getNumAtoms();
205 if ( doEnergy ) selfEnergy += calcSelfEnergy(numAtoms, x);
218 msg->numAtoms = numAtoms;
220 msg->i = patches[i].homePencilY;
221 msg->j = patches[i].homePencilZ;
224 msg->doEnergy = doEnergy;
225 msg->doVirial = doVirial;
226 msg->lattice = lattice;
230 for (
int j=0;j < numAtoms;j++) {
237 double wx = px*r1x + py*r1y + pz*r1z;
238 double wy = px*r2x + py*r2y + pz*r2z;
239 double wz = px*r3x + py*r3y + pz*r3z;
243 wx = (wx - (floor(wx + 0.5) - 0.5));
244 wy = (wy - (floor(wy + 0.5) - 0.5));
245 wz = (wz - (floor(wz + 0.5) - 0.5));
255 if (atom.x >= 1.0f) atom.x -= 1.0f;
256 if (atom.y >= 1.0f) atom.y -= 1.0f;
257 if (atom.z >= 1.0f) atom.z -= 1.0f;
258 atom.q = (float)(q*coulomb_sqrt);
271 if (patches[i].homePencilNode == CkMyNode()) {
274 computePmeCUDAMgrProxy[patches[i].homePencilNode].recvAtoms(msg);
278 patches[i].avgPositionBox->close(&x);
280 patches[i].positionBox->close(&x);
291 double ComputePmeCUDA::calcSelfEnergy(
int numAtoms,
CompAtom *x) {
292 double selfEnergy = 0.0;
293 for (
int i=0;i < numAtoms;i++) {
302 void ComputePmeCUDA::recvForces() {
304 Lattice& lattice = patches[0].patch->lattice;
309 double r1x = recip1.
x;
310 double r1y = recip1.
y;
311 double r1z = recip1.
z;
312 double r2x = recip2.
x;
313 double r2y = recip2.
y;
314 double r2z = recip2.
z;
315 double r3x = recip3.
x;
316 double r3y = recip3.
y;
317 double r3z = recip3.
z;
322 if (patches[i].pmeForceMsg == NULL)
323 NAMD_bug(
"ComputePmeCUDA::recvForces, no message in pmeForceMsg");
325 CudaForce* force = patches[i].pmeForceMsg->force;
326 Results *r = patches[i].forceBox->open();
327 int numAtoms = patches[i].pmeForceMsg->numAtoms;
330 if (!patches[i].pmeForceMsg->numStrayAtoms && !simParams->
commOnly) {
331 for(
int j=0;j < numAtoms;j++) {
332 double f1 = force[j].
x;
333 double f2 = force[j].
y;
334 double f3 = force[j].
z;
335 f[j].
x += f1*r1x + f2*r2x + f3*r3x;
336 f[j].
y += f1*r1y + f2*r2y + f3*r3y;
337 f[j].
z += f1*r1z + f2*r2z + f3*r3z;
341 patches[i].forceBox->close(&r);
342 delete patches[i].pmeForceMsg;
343 patches[i].pmeForceMsg = NULL;
354 if (patchCounter == 0) {
359 if (patches[i].pmeForceMsg != NULL)
360 NAMD_bug(
"ComputePmeCUDA::storePmeForceMsg, already contains message");
361 patches[i].pmeForceMsg = msg;
void setNumPatches(int n)
void unregisterAvgPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
static BigReal dielectric_1
static PatchMap * Object()
void getHomePencil(PatchID patchID, int &homey, int &homez)
SimParameters * simParameters
static __thread atom * atoms
void unregisterForceDeposit(Compute *cid, Box< Patch, Results > **const box)
void recvAtoms(PmeAtomMsg *msg)
SubmitReduction * willSubmit(int setID, int size=-1)
static ReductionMgr * Object(void)
Patch * patch(PatchID pid)
ComputePmeCUDA(ComputeID c, PatchIDList &pids)
bool storePmeForceMsg(PmeForceMsg *msg)
void NAMD_bug(const char *err_msg)
virtual ~ComputePmeCUDA()
void unregisterPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
int getNode(int i, int j)
#define SET_PRIORITY(MSG, SEQ, PRIO)