NAMD
ComputePmeCUDA.C
Go to the documentation of this file.
1 #include <numeric>
2 #include <algorithm>
3 #ifdef NAMD_CUDA
4 #include <cuda_runtime.h>
5 #endif
6 #include "Node.h"
7 #include "SimParameters.h"
8 #include "Priorities.h"
9 #include "ComputeNonbondedUtil.h"
10 #include "ComputePmeCUDA.h"
11 #include "ComputePmeCUDAMgr.h"
12 #include "PmeSolver.h"
13 #include "HomePatch.h"
14 
15 #ifdef NAMD_CUDA
16 //
17 // Class creator, multiple patches
18 //
20  setNumPatches(pids.size());
21  patches.resize(getNumPatches());
22  for (int i=0;i < getNumPatches();i++) {
23  patches[i].patchID = pids[i];
24  }
25 }
26 
27 //
28 // Class creator, single patch
29 //
31  setNumPatches(1);
32  patches.resize(getNumPatches());
33  patches[0].patchID = pid;
34 }
35 
36 //
37 // Class destructor
38 //
40  for (int i=0;i < getNumPatches();i++) {
41  if (patches[i].positionBox != NULL) {
42  PatchMap::Object()->patch(patches[i].patchID)->unregisterPositionPickup(this, &patches[i].positionBox);
43  }
44  if (patches[i].avgPositionBox != NULL) {
45  PatchMap::Object()->patch(patches[i].patchID)->unregisterAvgPositionPickup(this, &patches[i].avgPositionBox);
46  }
47  if (patches[i].forceBox != NULL) {
48  PatchMap::Object()->patch(patches[i].patchID)->unregisterForceDeposit(this, &patches[i].forceBox);
49  }
50  }
51  delete reduction;
52  CmiDestroyLock(lock);
53 }
54 
55 //
56 // Initialize
57 //
59  lock = CmiCreateLock();
60 
61  // Sanity Check
63  if (simParams->alchFepOn) NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, alchFepOn not yet implemented");
64  if (simParams->alchThermIntOn) NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, alchThermIntOn not yet implemented");
65  if (simParams->lesOn) NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, lesOn not yet implemented");
66  if (simParams->pairInteractionOn) NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, pairInteractionOn not yet implemented");
67 
68  sendAtomsDone = false;
70  // basePriority = PME_PRIORITY;
71  patchCounter = getNumPatches();
72 
73  // Get proxy to ComputePmeCUDAMgr
74  computePmeCUDAMgrProxy = CkpvAccess(BOCclass_group).computePmeCUDAMgr;
75  mgr = computePmeCUDAMgrProxy.ckLocalBranch();
76  if (mgr == NULL)
77  NAMD_bug("ComputePmeCUDA::ComputePmeCUDA, unable to locate local branch of BOC entry computePmeCUDAMgr");
78  pmeGrid = mgr->getPmeGrid();
79 
80  for (int i=0;i < getNumPatches();i++) {
81  if (patches[i].positionBox != NULL || patches[i].avgPositionBox != NULL
82  || patches[i].forceBox != NULL || patches[i].patch != NULL)
83  NAMD_bug("ComputePmeCUDA::initialize() called twice or boxes not set to NULL");
84  if (!(patches[i].patch = PatchMap::Object()->patch(patches[i].patchID))) {
85  NAMD_bug("ComputePmeCUDA::initialize() patch not found");
86  }
87  patches[i].positionBox = patches[i].patch->registerPositionPickup(this);
88  patches[i].forceBox = patches[i].patch->registerForceDeposit(this);
89  patches[i].avgPositionBox = patches[i].patch->registerAvgPositionPickup(this);
90  }
91 
92  setupActivePencils();
93 }
94 
96  atomsChanged = true;
97 }
98 
99 //
100 // Setup, see which pencils overlap with the patches held by this compute
101 //
102 void ComputePmeCUDA::setupActivePencils() {
103  PatchMap *patchMap = PatchMap::Object();
104 
105  for (int i=0;i < getNumPatches();i++) {
106  int homey = -1;
107  int homez = -1;
108  mgr->getHomePencil(patches[i].patchID, homey, homez);
109 
110  patches[i].homePencilY = homey;
111  patches[i].homePencilZ = homez;
112  patches[i].homePencilNode = mgr->getNode(homey,homez);
113  RegisterPatchMsg *msg = new RegisterPatchMsg();
114  msg->i = homey;
115  msg->j = homez;
116  computePmeCUDAMgrProxy[patches[i].homePencilNode].registerPatch(msg);
117  }
118 
119  atomsChanged = true;
120 
121 }
122 
124 
125  if (patches[0].patch->flags.doFullElectrostatics) return 0;
126 
127  reduction->submit();
128 
129  for (int i=0;i < getNumPatches();i++) {
130  patches[i].positionBox->skip();
131  patches[i].forceBox->skip();
132  // We only need to call skip() once
133  if (patches[i].patchID == 0) computePmeCUDAMgrProxy[patches[i].homePencilNode].skip();
134  }
135 
136  return 1;
137 }
138 
140  if (sendAtomsDone) {
141  // Second part of computation: receive forces from ComputePmeCUDAMgr
142  // basePriority = PME_OFFLOAD_PRIORITY;
143  sendAtomsDone = false;
144  recvForces();
145  } else {
146  // First part of computation: send atoms to ComputePmeCUDAMgr
147  sendAtomsDone = true;
148  // basePriority = COMPUTE_HOME_PRIORITY + PATCH_PRIORITY(patchID);
149  sendAtoms();
150  }
151 }
152 
153 void ComputePmeCUDA::sendAtoms() {
154 
155  Lattice& lattice = patches[0].patch->lattice;
156  Vector origin = lattice.origin();
157  Vector recip1 = lattice.a_r();
158  Vector recip2 = lattice.b_r();
159  Vector recip3 = lattice.c_r();
160  double ox = origin.x;
161  double oy = origin.y;
162  double oz = origin.z;
163  double r1x = recip1.x;
164  double r1y = recip1.y;
165  double r1z = recip1.z;
166  double r2x = recip2.x;
167  double r2y = recip2.y;
168  double r2z = recip2.z;
169  double r3x = recip3.x;
170  double r3y = recip3.y;
171  double r3z = recip3.z;
172 
173  double selfEnergy = 0.;
174 
175  for (int i=0;i < getNumPatches();i++) {
176  if (patches[i].pmeForceMsg != NULL)
177  NAMD_bug("ComputePmeCUDA::sendAtoms, pmeForceMsg is not empty");
178 
179  const BigReal coulomb_sqrt = sqrt( COULOMB * ComputeNonbondedUtil::scaling
181 
182  bool doMolly = patches[i].patch->flags.doMolly;
183  bool doEnergy = patches[i].patch->flags.doEnergy;
184  bool doVirial = patches[i].patch->flags.doVirial;
185  PatchMap *patchMap = PatchMap::Object();
186 
187  // Send atom patch to pencil(s)
188  // #ifdef NETWORK_PROGRESS
189  // CmiNetworkProgress();
190  // #endif
191 
192  CompAtom *x = patches[i].positionBox->open();
193  if ( doMolly ) {
194  patches[i].positionBox->close(&x);
195  x = patches[i].avgPositionBox->open();
196  }
197 
198  int numAtoms = patches[i].patch->getNumAtoms();
199 
200  if ( doEnergy ) selfEnergy += calcSelfEnergy(numAtoms, x);
201 
202  // const Vector ucenter = patches[i].patch->lattice.unscale(patchMap->center(patches[i].patchID));
203  // const BigReal recip11 = patches[i].patch->lattice.a_r().x;
204  // const BigReal recip22 = patches[i].patch->lattice.b_r().y;
205  // const BigReal recip33 = patches[i].patch->lattice.c_r().z;
206 
207  PmeAtomMsg *msg = new (numAtoms, PRIORITY_SIZE) PmeAtomMsg;
209  // NOTE:
210  // patch already contains the centered coordinates and scaled charges
211  // memcpy(msg->atoms, patch->getCudaAtomList(), sizeof(CudaAtom)*numAtoms);
212 
213  msg->numAtoms = numAtoms;
214  // msg->patchIndex = i;
215  msg->i = patches[i].homePencilY;
216  msg->j = patches[i].homePencilZ;
217  msg->compute = this;
218  msg->pe = CkMyPe();
219  msg->doEnergy = doEnergy;
220  msg->doVirial = doVirial;
221  msg->lattice = lattice;
222  CudaAtom *atoms = msg->atoms;
223  // BigReal miny = 1.0e20;
224  // BigReal minz = 1.0e20;
225  for (int j=0;j < numAtoms;j++) {
226  CudaAtom atom;
227  BigReal q = x[j].charge;
228  // Convert atom positions to range [0,1)
229  double px = x[j].position.x - ox;
230  double py = x[j].position.y - oy;
231  double pz = x[j].position.z - oz;
232  double wx = px*r1x + py*r1y + pz*r1z;
233  double wy = px*r2x + py*r2y + pz*r2z;
234  double wz = px*r3x + py*r3y + pz*r3z;
235  // double wx = x[j].position.x*recip11;
236  // double wy = x[j].position.y*recip22;
237  // double wz = x[j].position.z*recip33;
238  wx = (wx - (floor(wx + 0.5) - 0.5));
239  wy = (wy - (floor(wy + 0.5) - 0.5));
240  wz = (wz - (floor(wz + 0.5) - 0.5));
241  // wx = (wx - floor(wx));
242  // wy = (wy - floor(wy));
243  // wz = (wz - floor(wz));
244  // if (wx >= 1.0) wx -= 1.0;
245  // if (wy >= 1.0) wy -= 1.0;
246  // if (wz >= 1.0) wz -= 1.0;
247  atom.x = (float)wx;
248  atom.y = (float)wy;
249  atom.z = (float)wz;
250  if (atom.x >= 1.0f) atom.x -= 1.0f;
251  if (atom.y >= 1.0f) atom.y -= 1.0f;
252  if (atom.z >= 1.0f) atom.z -= 1.0f;
253  atom.q = (float)(q*coulomb_sqrt);
254  atoms[j] = atom;
255  // miny = std::min(x[j].position.y, miny);
256  // minz = std::min(x[j].position.z, minz);
257  }
258  // Calculate corner with minimum y and z for this patch
259  // double wy = miny*recip22;
260  // double wz = minz*recip33;
261  // msg->miny = (int)((double)pmeGrid.K2*(wy - (floor(wy + 0.5) - 0.5)));
262  // msg->minz = (int)((double)pmeGrid.K3*(wz - (floor(wz + 0.5) - 0.5)));
263 
264  // For local (within shared memory node), get pointer to memory location and do direct memcpy
265  // For global (on different shread memory nodes),
266  if (patches[i].homePencilNode == CkMyNode()) {
267  mgr->recvAtoms(msg);
268  } else {
269  computePmeCUDAMgrProxy[patches[i].homePencilNode].recvAtoms(msg);
270  }
271 
272  if ( doMolly )
273  patches[i].avgPositionBox->close(&x);
274  else
275  patches[i].positionBox->close(&x);
276  }
277 
278  reduction->item(REDUCTION_ELECT_ENERGY_SLOW) += selfEnergy;
279  reduction->submit();
280 
281 }
282 
283 //
284 // Calculate self-energy and send to PmeSolver
285 //
286 double ComputePmeCUDA::calcSelfEnergy(int numAtoms, CompAtom *x) {
287  double selfEnergy = 0.0;
288  for (int i=0;i < numAtoms;i++) {
289  selfEnergy += x[i].charge*x[i].charge;
290  }
291  //const double SQRT_PI = 1.7724538509055160273; /* mathematica 15 digits*/
294  return selfEnergy;
295 }
296 
297 void ComputePmeCUDA::recvForces() {
298 
299  Lattice& lattice = patches[0].patch->lattice;
300  Vector origin = lattice.origin();
301  Vector recip1 = lattice.a_r();
302  Vector recip2 = lattice.b_r();
303  Vector recip3 = lattice.c_r();
304  double r1x = recip1.x;
305  double r1y = recip1.y;
306  double r1z = recip1.z;
307  double r2x = recip2.x;
308  double r2y = recip2.y;
309  double r2z = recip2.z;
310  double r3x = recip3.x;
311  double r3y = recip3.y;
312  double r3z = recip3.z;
313 
315 
316  for (int i=0;i < getNumPatches();i++) {
317  if (patches[i].pmeForceMsg == NULL)
318  NAMD_bug("ComputePmeCUDA::recvForces, no message in pmeForceMsg");
319 
320  CudaForce* force = patches[i].pmeForceMsg->force;
321  Results *r = patches[i].forceBox->open();
322  int numAtoms = patches[i].pmeForceMsg->numAtoms;
323 
324  Force *f = r->f[Results::slow];
325  if (!patches[i].pmeForceMsg->numStrayAtoms && !simParams->commOnly) {
326  for(int j=0;j < numAtoms;j++) {
327  double f1 = force[j].x;
328  double f2 = force[j].y;
329  double f3 = force[j].z;
330  f[j].x += f1*r1x + f2*r2x + f3*r3x;
331  f[j].y += f1*r1y + f2*r2y + f3*r3y;
332  f[j].z += f1*r1z + f2*r2z + f3*r3z;
333  }
334  }
335 
336  patches[i].forceBox->close(&r);
337  delete patches[i].pmeForceMsg;
338  patches[i].pmeForceMsg = NULL;
339  }
340 
341 }
342 
344  bool done = false;
345  int i;
346  CmiLock(lock);
347  patchCounter--;
348  i = patchCounter;
349  if (patchCounter == 0) {
350  patchCounter = getNumPatches();
351  done = true;
352  }
353  CmiUnlock(lock);
354  if (patches[i].pmeForceMsg != NULL)
355  NAMD_bug("ComputePmeCUDA::storePmeForceMsg, already contains message");
356  patches[i].pmeForceMsg = msg;
357  return done;
358 }
359 #endif // NAMD_CUDA
static Node * Object()
Definition: Node.h:86
void setNumPatches(int n)
Definition: Compute.h:52
int sequence(void)
Definition: Compute.h:64
void unregisterAvgPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
Definition: Patch.C:140
Vector a_r() const
Definition: Lattice.h:268
int ComputeID
Definition: NamdTypes.h:183
static PatchMap * Object()
Definition: PatchMap.h:27
void getHomePencil(PatchID patchID, int &homey, int &homez)
Definition: Vector.h:64
SimParameters * simParameters
Definition: Node.h:178
Vector c_r() const
Definition: Lattice.h:270
static __thread atom * atoms
#define COULOMB
Definition: common.h:46
BigReal & item(int i)
Definition: ReductionMgr.h:312
void unregisterForceDeposit(Compute *cid, Box< Patch, Results > **const box)
Definition: Patch.C:239
float x
Definition: NamdTypes.h:158
void recvAtoms(PmeAtomMsg *msg)
BigReal z
Definition: Vector.h:66
Position position
Definition: NamdTypes.h:53
SubmitReduction * willSubmit(int setID, int size=-1)
Definition: ReductionMgr.C:365
static ReductionMgr * Object(void)
Definition: ReductionMgr.h:278
Patch * patch(PatchID pid)
Definition: PatchMap.h:235
Vector origin() const
Definition: Lattice.h:262
Bool pairInteractionOn
ComputePmeCUDA(ComputeID c, PatchIDList &pids)
Vector b_r() const
Definition: Lattice.h:269
#define SQRT_PI
Definition: ComputeExt.C:30
bool storePmeForceMsg(PmeForceMsg *msg)
Charge charge
Definition: NamdTypes.h:54
#define PRIORITY_SIZE
Definition: Priorities.h:13
#define PME_PRIORITY
Definition: Priorities.h:29
void NAMD_bug(const char *err_msg)
Definition: common.C:123
Force * f[maxNumForces]
Definition: PatchTypes.h:67
BigReal x
Definition: Vector.h:66
float z
Definition: NamdTypes.h:158
int PatchID
Definition: NamdTypes.h:182
virtual ~ComputePmeCUDA()
void unregisterPositionPickup(Compute *cid, Box< Patch, CompAtom > **const box)
Definition: Patch.C:122
#define simParams
Definition: Output.C:127
int getNumPatches()
Definition: Compute.h:53
BigReal y
Definition: Vector.h:66
int getNode(int i, int j)
float y
Definition: NamdTypes.h:158
void submit(void)
Definition: ReductionMgr.h:323
int size(void) const
Definition: ResizeArray.h:127
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
gridSize x
double BigReal
Definition: common.h:114
for(int i=0;i< n1;++i)