NAMD
CudaComputeNonbonded.h
Go to the documentation of this file.
1 #ifndef CUDACOMPUTENONBONDED_H
2 #define CUDACOMPUTENONBONDED_H
3 #include <vector>
4 #include "Compute.h"
5 #include "Box.h"
6 #include "PatchTypes.h"
7 #include "CudaUtils.h"
8 #include "ComputeNonbondedUtil.h"
9 #include "CudaNonbondedTables.h"
10 #include "CudaTileListKernel.h"
12 #include "CudaComputeGBISKernel.h"
13 #include "ComputeMgr.h"
14 #ifdef NAMD_CUDA
15 #include <cuda.h>
16 
17 // 2^11 ints * 2^5 bits = 2^16 bits = range of unsigned short excl_index
18 // 2^27 ints * 2^5 bits = 2^32 bits = range of unsigned int excl_index
19 #define MAX_EXCLUSIONS (1<<27)
20 
22 public:
23  struct ComputeRecord {
26  // Index to patches[] -array
27  int patchInd[2];
29  };
30 
31  struct PatchRecord {
32  PatchRecord(PatchID patchID) : patchID(patchID) {
33  patch = NULL;
34  compAtom = NULL;
35  results = NULL;
36  positionBox = NULL;
37  forceBox = NULL;
38  intRadBox = NULL;
39  psiSumBox = NULL;
40  bornRadBox = NULL;
41  dEdaSumBox = NULL;
42  dHdrPrefixBox = NULL;
43  }
46  int numAtoms;
48  int atomStart;
49  // Pe where the patch was registered
50  int pe;
51  // For priority sorting
54  bool isSameNode;
55  // Storage for open positionBox
57  // Storage for open forceBox
59  // Boxes
62  Box<Patch,Real> *intRadBox; //5 GBIS Boxes
67  Real *intRad; //5 GBIS arrays
72  bool operator < (const PatchRecord& pr) const {
73  return (patchID < pr.patchID);
74  }
75  bool operator == (const PatchRecord& pr) const {
76  return (patchID == pr.patchID);
77  }
78  };
79 
80 private:
81  // This variable is set in atomUpdate() by any Pe
82  bool atomsChangedIn;
83  // This variable is set in doWork() by masterPe
84  bool atomsChanged;
85 
86  bool computesChanged;
87 
88  const int deviceID;
89  size_t maxShmemPerBlock;
90  cudaStream_t stream;
91 
92  // PME and VdW CUDA kernels
93  CudaComputeNonbondedKernel nonbondedKernel;
94 
95  // GBIS kernel
96  CudaComputeGBISKernel GBISKernel;
97 
98  // Tile list CUDA kernels
99  CudaTileListKernel tileListKernel;
100 
101  // Exclusions
102  int2 *exclusionsByAtom;
103 
104  // VdW-types
105  // Pinned host memory
106  int* vdwTypes;
107  int vdwTypesSize;
108 
109  // Maximum number of tiles per tile list
110  int maxTileListLen;
111 
112  // Pinned host memory
113  int2* exclIndexMaxDiff;
114  int exclIndexMaxDiffSize;
115 
116  // Pinned host memory
117  int* atomIndex;
118  int atomIndexSize;
119 
120  // Required (xyzq, vdwTypes) storage
121  int atomStorageSize;
122 
123  // Atom and charge storage
124  // Pinned host memory
125  CudaAtom* atoms;
126  int atomsSize;
127 
128  // Force storage
129  float4* h_forces;
130  int h_forcesSize;
131  float4* h_forcesSlow;
132  int h_forcesSlowSize;
133 
134  float4* d_forces;
135  int d_forcesSize;
136  float4* d_forcesSlow;
137  int d_forcesSlowSize;
138 
139  // Virial and energy storage
140  VirialEnergy* h_virialEnergy;
141  VirialEnergy* d_virialEnergy;
142 
143  // GBIS storage
144  //--------------
145  // Pinned host memory
146  float* intRad0H;
147  int intRad0HSize;
148  // Pinned host memory
149  float* intRadSH;
150  int intRadSHSize;
151  // Mapped host memory
152  GBReal* psiSumH;
153  int psiSumHSize;
154  // Pinned host memory
155  float* bornRadH;
156  int bornRadHSize;
157  // Mapped host memory
158  GBReal* dEdaSumH;
159  int dEdaSumHSize;
160  // Pinned host memory
161  float* dHdrPrefixH;
162  int dHdrPrefixHSize;
163 
164  // Event and sanity check flag for making sure event was actually recorded
165  cudaEvent_t forceDoneEvent;
166  bool forceDoneEventRecord;
167  // Check counter for event polling
168  int checkCount;
169 
170  // Node lock
171  CmiNodeLock lock;
172  // List of local PEs that have patches
173  std::vector<int> pes;
174  // List of patch indices on each rank
175  std::vector< std::vector<int> > rankPatches;
176  // Master Pe = Pe where this Compute and reduction lives
177  int masterPe;
178 
179  // Are we in skip?
180  bool doSkip;
181 
182  // Device-wide patch and compute records, and the list of patches
183  std::vector<ComputeRecord> computes;
184  std::vector<PatchRecord> patches;
185 
186  // CUDA versions of patches
187  // Pinned host memory
188  CudaPatchRecord* cudaPatches;
189 
190  SubmitReduction *reduction;
191 
192  // Pair lists
193  int pairlistsValid;
194  float pairlistTolerance;
195  int usePairlists;
196  int savePairlists;
197  float plcutoff2;
198 
199  bool reSortDone;
200 
201  // Flags
202  bool doSlow;
203  bool doEnergy;
204  bool doVirial;
205 
206  // Walltime for force compute start
207  double beforeForceCompute;
208 
209  static inline void updateVdwTypesExclLoop(int first, int last, void *result, int paraNum, void *param);
210  void updateVdwTypesExclSubset(int first, int last);
211 
212  static inline void copyAtomsLoop(int first, int last, void *result, int paraNum, void *param);
213  void copyAtomsSubset(int first, int last);
214 
215  void addPatch(PatchID pid);
216  void addCompute(ComputeID cid, PatchID pid1, PatchID pid2, Vector offset);
217  void updatePatches();
218  int calcNumTileLists();
219  void getMaxMovementTolerance(float& maxAtomMovement, float& maxPatchTolerance);
220  void updateVdwTypesExcl();
221  void buildNeighborlist();
222  void skip();
223  void doGBISphase1();
224  void doGBISphase2();
225  void doGBISphase3();
226  void doForce();
227  void finishSetOfPatchesOnPe(std::vector<int>& patchSet);
228  void finishPatches();
229  void finishGBISPhase(int i);
230  void finishTimers();
231  void reSortTileLists();
232  void forceDone();
233  static void forceDoneCheck(void *arg, double walltime);
234  void forceDoneSetCallback();
235  void updateComputes();
236  void buildExclusions();
237  void skipPatch(int i);
238  void openBox(int i);
239  void reallocateArrays();
240  void copyGBISphase(int i);
241  void updatePatch(int i);
242  int findPid(PatchID pid);
243  void assignPatch(int i);
244  ComputeMgr* computeMgr;
245  int patchesCounter;
246 
247  const bool doStreaming;
248  int* patchReadyQueue;
249  int patchReadyQueueNext, patchReadyQueueLen;
250 
251  void finishPatch(int i);
252  void unregisterBox(int i);
253 
254  // void writeId(const char* filename);
255  // void writeXYZ(const char* filename);
256 
257 public:
258  CudaComputeNonbonded(ComputeID c, int deviceID, CudaNonbondedTables& cudaNonbondedTables, bool doStreaming);
261  void registerComputePair(ComputeID cid, PatchID* pid, int* trans);
262  void assignPatches(ComputeMgr* computeMgrIn);
263  virtual void initialize();
264  virtual void atomUpdate();
265  virtual int noWork();
266  virtual void doWork();
267  void launchWork();
268  void finishReductions();
269  void unregisterBoxesOnPe();
270  void assignPatchesOnPe();
271  void openBoxesOnPe();
272  void skipPatchesOnPe();
273  void finishPatchesOnPe();
274  void finishPatchOnPe(int i);
275  void messageEnqueueWork();
276  virtual void patchReady(PatchID, int doneMigration, int seq);
277  virtual void gbisP2PatchReady(PatchID, int seq);
278  virtual void gbisP3PatchReady(PatchID, int seq);
279 };
280 
281 #endif // NAMD_CUDA
282 #endif // CUDACOMPUTENONBONDED_H
int ComputeID
Definition: NamdTypes.h:183
Definition: Vector.h:64
virtual void gbisP2PatchReady(PatchID, int seq)
float Real
Definition: common.h:107
Definition: Patch.h:35
virtual void gbisP3PatchReady(PatchID, int seq)
CudaComputeNonbonded(ComputeID c, int deviceID, CudaNonbondedTables &cudaNonbondedTables, bool doStreaming)
int PatchID
Definition: NamdTypes.h:182
void registerComputeSelf(ComputeID cid, PatchID pid)
bool operator<(const PatchRecord &pr) const
virtual void patchReady(PatchID, int doneMigration, int seq)
Box< Patch, CompAtom > * positionBox
void registerComputePair(ComputeID cid, PatchID *pid, int *trans)
void assignPatches(ComputeMgr *computeMgrIn)
const ComputeID cid
Definition: Compute.h:43
float GBReal
Definition: ComputeGBIS.inl:17
bool operator==(const PatchRecord &pr) const