NAMD
ComputeBondedCUDA.h
Go to the documentation of this file.
1 #ifndef COMPUTEBONDEDCUDA_H
2 #define COMPUTEBONDEDCUDA_H
3 #include "Compute.h"
4 #include "ComputeMap.h"
5 #include "CudaNonbondedTables.h"
7 #include "ComputeHomeTuples.h"
8 #ifdef NAMD_CUDA
9 #ifdef BONDED_CUDA
10 
11 #include <vector>
12 #include <array>
13 
14 class ComputeBondedCUDA : public Compute {
15 
16 public:
17 
18  static const int CudaTupleTypeSize[Tuples::NUM_TUPLE_TYPES];
19 
20 private:
21  bool initializeCalled;
22 
23  // Device ID and stream
24  const int deviceID;
25  cudaStream_t stream;
26 
27  // Master PE for this compute
28  const int masterPe;
29 
30  // List of all patch IDs on this object
31  std::vector<int> allPatchIDs;
32 
33  // List of tuple patches for the entire compute (i.e. across all PEs)
34  TuplePatchList tuplePatchList;
35 
36  // For every PE, list of patches that it has registered
37  std::vector< std::vector<int> > patchIDsPerRank;
38 
39  // List of PEs involved in the computation
40  std::vector<int> pes;
41 
42  // Self compute
43  struct SelfCompute {
44  int type;
45  std::vector<int> patchIDs;
46  Tuples* tuples;
47  SelfCompute(int type=-1) : type(type), tuples(NULL) {}
48  int operator==(const SelfCompute &elem) const {
49  return (elem.type == type);
50  }
51  };
52 
53  // Home compute, each PE has one
54  struct HomeCompute {
55  std::vector<char> isBasePatch;
56  std::vector<int> patchIDs;
57  // Multiple tuples per PE, each of different kind
58  std::vector< Tuples* > tuples;
59  };
60 
61  // Computes for each PE
62  struct ComputeRecord {
63  HomeCompute homeCompute;
64  // Self computes, organized by type
65  std::vector< SelfCompute > selfComputes;
66  };
67 
68  // Collection of all computes for each PE
69  std::vector< ComputeRecord > computes;
70 
71  // For every tuple type, list of tuples
72  // NOTE: These are pointers to the data recorded in "computes" and
73  // are here to make it easier to traverse across all tuples of certain kind
74  std::array< std::list<Tuples*>, Tuples::NUM_TUPLE_TYPES > tupleList;
75 
76  int numTuplesPerType[Tuples::NUM_TUPLE_TYPES];
77 
78  AtomMap atomMap;
79  std::vector< AtomMapper* > atomMappers;
80 
81  struct PatchRecord {
82  int atomStart;
83  int numAtoms;
84  };
85  std::vector<PatchRecord> patches;
86 
87  // Patch "patchID" is found in patches[patchIndex[patchID]]
88  std::vector<int> patchIndex;
89 
90  // Maps multiplicit indices
91  std::vector<int> dihedralMultMap;
92  std::vector<int> improperMultMap;
93 
94  // Number of exclusions per rank, separated into modified and non-modified
95  struct NumExcl {
96  int numModifiedExclusions;
97  int numExclusions;
98  };
99  std::vector<NumExcl> numExclPerRank;
100 
101  // Flags that indicate wether this GPU has exclusions and modified exclusions
102  bool hasExclusions;
103  bool hasModifiedExclusions;
104 
105  // All tuple data
106  char* tupleData;
107  int tupleDataSize;
108 
109  // Bonded CUDA kernel
110  ComputeBondedCUDAKernel bondedKernel;
111 
112  // Pointer to computeMgr that created this object
114 
115  // Node-wide counter for patches.
116  int patchesCounter;
117 
118  // "Force done event" for event polling
119  cudaEvent_t forceDoneEvent;
120 
121  // Check counter for event polling
122  int checkCount;
123 
124  // Node lock
125  CmiNodeLock lock;
126 
127  // This variable is set in atomUpdate() by any Pe
128  bool atomsChangedIn;
129  // This variable is set in doWork() by masterPe
130  bool atomsChanged;
131 
132  // Reduction
133  SubmitReduction *reduction;
134 
135  // Required storage
136  int atomStorageSize;
137 
138  // Flags pointer
139  Flags* flags;
140 
141  // Lattice and energy and virial booleans
142  Lattice lattice;
143  bool doEnergy;
144  bool doVirial;
145  bool doSlow;
146  bool doMolly;
147 
148  // Walltime for force compute start
149  double beforeForceCompute;
150 
151  bool accelMDdoDihe;
152 
153  // Atom storage in pinned host memory
154  CudaAtom* atoms;
155  int atomsSize;
156 
157  // Force storage in pinned host memory
159  int forcesSize;
160 
161  double* energies_virials;
162 
163  void mapAtoms();
164  void unmapAtoms();
165 
166  void updatePatches();
167 
168  static void forceDoneCheck(void *arg, double walltime);
169  void forceDoneSetCallback();
170 
171  void finishPatches();
172 
173  // ------------ For copyTupleData -------------------
174  struct TupleCopyWork {
175  int tupletype;
176  int ntuples;
177  void* tupleElemList;
178  int tupleDataPos;
179  };
180 
181  std::vector<TupleCopyWork> tupleCopyWorkList;
182 
183  int exclusionStartPos;
184  int exclusionStartPos2;
185 
186  void copyBondData(const int ntuples, const BondElem* __restrict__ src,
187  const BondValue* __restrict__ bond_array, CudaBond* __restrict__ dst);
188 
189  void copyAngleData(const int ntuples, const AngleElem* __restrict__ src,
190  const AngleValue* __restrict__ angle_array, CudaAngle* __restrict__ dst);
191 
192  template <bool doDihedral, typename T, typename P>
193  void copyDihedralData(const int ntuples, const T* __restrict__ src,
194  const P* __restrict__ p_array, CudaDihedral* __restrict__ dst);
195 
196  void copyExclusionData(const int ntuples, const ExclElem* __restrict__ src, const int typeSize,
197  CudaExclusion* __restrict__ dst1, CudaExclusion* __restrict__ dst2, int& pos, int& pos2);
198 
199  void copyCrosstermData(const int ntuples, const CrosstermElem* __restrict__ src,
200  const CrosstermValue* __restrict__ crossterm_array, CudaCrossterm* __restrict__ dst);
201 
202  static void tupleCopyWorker(int first, int last, void *result, int paraNum, void *param);
203  void tupleCopyWorker(int first, int last);
204  // --------------------------------------------------
205 
206 public:
207 
208  ComputeBondedCUDA(ComputeID c, ComputeMgr* computeMgr, int deviceID, CudaNonbondedTables& cudaNonbondedTables);
209  ~ComputeBondedCUDA();
210  void registerCompute(int pe, int type, PatchIDList& pids);
211  void registerSelfCompute(int pe, int type, int pid);
212  void unregisterBoxesOnPe();
213  void assignPatchesOnPe();
214  virtual void patchReady(PatchID, int doneMigration, int seq);
215  virtual void initialize();
216  virtual void atomUpdate();
217  virtual int noWork();
218  virtual void doWork();
219  void messageEnqueueWork();
220  // void updatePatches();
221  void openBoxesOnPe();
222  void loadTuplesOnPe();
223  void copyTupleData();
224  void launchWork();
225 
226  void finishPatchesOnPe();
227  void finishReductions();
228 
229 };
230 
231 #endif // BONDED_CUDA
232 #endif // NAMD_CUDA
233 #endif // COMPUTEBONDEDCUDA_H
int ComputeID
Definition: NamdTypes.h:183
static __thread ComputeMgr * computeMgr
static __thread atom * atoms
static __thread float4 * forces
virtual void initialize()
Definition: Compute.h:56
__thread cudaStream_t stream
virtual void doWork()
Definition: Compute.C:108
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize
#define FORCE_TYPE
int PatchID
Definition: NamdTypes.h:182
int operator==(const AtomSigInfo &s1, const AtomSigInfo &s2)
Definition: CompressPsf.C:146
virtual void atomUpdate()
Definition: Compute.h:59
virtual void patchReady(PatchID, int doneMigration, int seq)
Definition: Compute.C:63
virtual int noWork()
Definition: Compute.C:104