1 #ifndef COMPUTEBONDEDCUDA_H 2 #define COMPUTEBONDEDCUDA_H 9 #if defined(NAMD_CUDA) || defined(NAMD_HIP) 16 class ComputeBondedCUDA :
public Compute {
20 static const int CudaTupleTypeSize[Tuples::NUM_TUPLE_TYPES];
21 static const int CudaTupleTypeSizeStage[Tuples::NUM_TUPLE_TYPES];
24 bool initializeCalled;
29 #ifdef NODEGROUP_FORCE_REGISTER 30 std::atomic<int> tupleWorkIndex;
37 std::vector<int> allPatchIDs;
43 std::vector< std::vector<int> > patchIDsPerRank;
51 std::vector<int> patchIDs;
53 SelfCompute(
int type=-1) : type(type), tuples(NULL) {}
54 int operator==(
const SelfCompute &elem)
const {
55 return (elem.type == type);
61 std::vector<char> isBasePatch;
62 std::vector<int> patchIDs;
64 std::vector< Tuples* > tuples;
68 struct ComputeRecord {
69 HomeCompute homeCompute;
71 std::vector< SelfCompute > selfComputes;
75 std::vector< ComputeRecord > computes;
80 std::array< std::list<Tuples*>, Tuples::NUM_TUPLE_TYPES > tupleList;
82 int numTuplesPerType[Tuples::NUM_TUPLE_TYPES];
85 std::vector< AtomMapper* > atomMappers;
91 std::vector<PatchRecord> patches;
94 std::vector<int> patchIndex;
97 std::vector<int> dihedralMultMap;
98 std::vector<int> improperMultMap;
102 int numModifiedExclusions;
105 std::vector<NumExcl> numExclPerRank;
109 bool hasModifiedExclusions;
113 size_t tupleDataSize;
115 std::vector<CudaBondStage> bondTupleData;
116 std::vector<CudaAngleStage> angleTupleData;
117 std::vector<CudaDihedralStage> dihedralTupleData;
118 std::vector<CudaDihedralStage> improperTupleData;
119 std::vector<CudaExclusionStage> modifiedExclusionTupleData;
120 std::vector<CudaExclusionStage> exclusionTupleData;
121 std::vector<CudaCrosstermStage> crosstermTupleData;
125 #ifdef NODEGROUP_FORCE_REGISTER 126 MigrationBondedCUDAKernel migrationKernel;
127 #endif // NODEGROUP_FORCE_REGISTER 136 double3* h_patchMapCenter;
137 double3* d_patchMapCenter;
143 cudaEvent_t forceDoneEvent;
150 CmiNodeLock printLock;
178 double beforeForceCompute;
191 double* energies_virials;
196 int pswitchTable[3*3];
201 void updatePatches();
203 static void forceDoneCheck(
void *arg,
double walltime);
204 void forceDoneSetCallback();
208 struct TupleCopyWork {
212 int64_t tupleDataPos;
215 std::vector<TupleCopyWork> tupleCopyWorkList;
217 int64_t exclusionStartPos;
218 int64_t exclusionStartPos2;
219 std::vector<CudaBondStage> hostCudaBondStage;
221 #ifdef NODEGROUP_FORCE_REGISTER 222 template <
typename T>
223 void sortTupleList(std::vector<T>& tuples, std::vector<int>& tupleCounts, std::vector<int>& tupleOffsets);
224 void sortAndCopyToDevice();
225 void migrateTuples(
bool startup);
227 template <
typename T,
typename P,
typename D>
228 void copyTupleToStage(
const T& src,
const P* __restrict__ p_array, D& dstval);
230 template <
typename T,
typename P,
typename D>
231 void copyToStage(
const int ntuples,
const T* __restrict__ src,
232 const P* __restrict__ p_array, std::vector<D>& dst);
234 void copyExclusionDataStage(
const int ntuples,
const ExclElem* __restrict__ src,
const int typeSize,
235 std::vector<CudaExclusionStage>& dst1, std::vector<CudaExclusionStage>& dst2, int64_t& pos, int64_t& pos2);
238 void copyBondData(
const int ntuples,
const BondElem* __restrict__ src,
241 void copyBondDatafp32(
const int ntuples,
const BondElem* __restrict__ src,
244 void copyAngleData(
const int ntuples,
const AngleElem* __restrict__ src,
247 template <
bool doDihedral,
typename T,
typename P>
248 void copyDihedralData(
const int ntuples,
const T* __restrict__ src,
249 const P* __restrict__ p_array,
CudaDihedral* __restrict__ dst);
251 template <
bool doDihedral,
typename T,
typename P>
252 void copyDihedralDatafp32(
const int ntuples,
const T* __restrict__ src,
253 const P* __restrict__ p_array,
CudaDihedral* __restrict__ dst);
255 void copyExclusionData(
const int ntuples,
const ExclElem* __restrict__ src,
const int typeSize,
258 void copyCrosstermData(
const int ntuples,
const CrosstermElem* __restrict__ src,
261 static void tupleCopyWorker(
int first,
int last,
void *result,
int paraNum,
void *param);
262 void tupleCopyWorker(
int first,
int last);
263 static void tupleCopyWorkerExcl(
int first,
int last,
void *result,
int paraNum,
void *param);
264 void tupleCopyWorkerExcl(
int first,
int last);
266 #ifdef NODEGROUP_FORCE_REGISTER 267 void tupleCopyWorkerType(
int tupletype);
274 ~ComputeBondedCUDA();
275 void registerCompute(
int pe,
int type,
PatchIDList& pids);
276 void registerSelfCompute(
int pe,
int type,
int pid);
277 void unregisterBoxesOnPe();
278 void assignPatchesOnPe();
284 void messageEnqueueWork();
286 void openBoxesOnPe(
int startup = 1);
287 void loadTuplesOnPe(
const int startup = 1);
288 void copyTupleData();
289 void copyTupleDataSN();
291 void updateCudaAlchParameters();
293 void updateHostCudaAlchFlags();
294 void updateKernelCudaAlchFlags();
295 void updateHostCudaAlchParameters();
296 void updateKernelCudaAlchParameters();
297 void updateHostCudaAlchLambdas();
298 void updateKernelCudaAlchLambdas();
300 #ifdef NODEGROUP_FORCE_REGISTER 301 void updatePatchRecords();
304 void registerPointersToHost();
305 void copyHostRegisterToDevice();
306 void copyPatchData();
307 void copyTupleDataGPU(
const int startup);
308 void updatePatchOrder(
const std::vector<CudaLocalRecord>& data);
309 #endif // NODEGROUP_FORCE_REGISTER 311 void finishPatchesOnPe();
312 void finishPatches();
313 void finishReductions();
315 std::vector<int>& getBondedPes(
void) {
return pes;}
317 std::vector<PatchRecord>& getPatches() {
return patches; }
320 #endif // BONDED_CUDA 322 #endif // COMPUTEBONDEDCUDA_H
virtual void initialize()
int operator==(const AtomSigInfo &s1, const AtomSigInfo &s2)
virtual void atomUpdate()
virtual void patchReady(PatchID, int doneMigration, int seq)