NAMD
ComputeLonepairsCUDA.C
Go to the documentation of this file.
1 #include "ComputeLonepairsCUDA.h"
2 #include "CudaUtils.h"
3 #include "ProcessorPrivate.h"
4 #include "AtomMap.h"
5 #include "HomePatch.h"
6 #include "Node.h"
7 #include "Molecule.h"
8 #include "DeviceCUDA.h"
10 
11 #include <string>
12 #include <cmath>
13 
14 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
15 #ifdef WIN32
16 #define __thread __declspec(thread)
17 #endif
18 extern __thread DeviceCUDA *deviceCUDA;
19 #endif
20 
21 #if defined(NAMD_CUDA) || defined(NAMD_HIP)
22 
24  d_lpcolinear_list(nullptr), lpcolinear_list_size(0),
25  d_lprelative_list(nullptr), lprelative_list_size(0),
26  d_lpbisector_list(nullptr), lpbisector_list_size(0) {}
27 
29  deallocate_device(&d_lpcolinear_list);
30  lpcolinear_list_size = 0;
31  deallocate_device(&d_lprelative_list);
32  lprelative_list_size = 0;
33  deallocate_device(&d_lpbisector_list);
34  lpbisector_list_size = 0;
35 }
36 
38  std::vector<AtomMap*> &atomMapsList,
39  const std::vector<CudaLocalRecord> &localRecords,
40  const int* globalToLocalID, AtomID aid, bool errorOnNotFound) {
41  LocalID lid{-1, -1};
42  for (int j = 0; j < atomMapsList.size(); ++j) {
43  if (atomMapsList[j] == NULL) continue;
44  lid = atomMapsList[j]->localID(aid);
45  if (lid.pid != -1) break;
46  }
47  if (errorOnNotFound && (lid.pid == -1)) {
48  const std::string error = "Atom " + std::to_string(aid) + " not found in master PE " + std::to_string(deviceCUDA->getMasterPe());
49  NAMD_bug(error.c_str());
50  return -1;
51  }
52  const int soaPid = globalToLocalID[lid.pid];
53  const int soaIndex = localRecords[soaPid].bufferOffset + lid.index;
54  return soaIndex;
55 }
56 
58  std::vector<HomePatch*> patchList,
59  std::vector<AtomMap*> &atomMapsList,
60  const std::vector<CudaLocalRecord> &localRecords,
61  const int* h_globalToLocalID, cudaStream_t stream) {
62  std::vector<LonepairColinear> lpcolinear_list;
63  std::vector<LonepairRelative> lprelative_list;
64  std::vector<LonepairBisector> lpbisector_list;
65  for (size_t i = 0; i < patchList.size(); ++i) {
66  HomePatch *p = patchList[i];
67  const int numAtoms = p->getNumAtoms();
68  const auto& atomList = p->getAtomList();
69  for (int j = 0; j < numAtoms; ++j) {
70  const auto mass = atomList[j].mass;
71  // Iterate over all atoms with mass < 0.01 to find lone pair IDs
72  if (mass < 0.01) {
73  const AtomID aid = atomList[j].id;
74  // Find the LP host
75  Lphost *lph_ptr = Node::Object()->molecule->get_lphost(aid);
76  if (lph_ptr == NULL) {
77  const std::string error = "rebuildLonePairHostList: no Lphost exists for LP " + std::to_string(aid);
78  NAMD_die(error.c_str());
79  }
80  Lphost lph = *(lph_ptr);
81  if (lph.numhosts == 2) { // colinear
82  lpcolinear_list.push_back(LonepairColinear{
83  globalAtomIDToSOAID(atomMapsList, localRecords, h_globalToLocalID, lph.atom1, true),
84  globalAtomIDToSOAID(atomMapsList, localRecords, h_globalToLocalID, lph.atom2, true),
85  globalAtomIDToSOAID(atomMapsList, localRecords, h_globalToLocalID, lph.atom3, true),
86  lph.distance, lph.angle});
87  } else {
88  const double distance = lph.distance;
89  // Pre-calculate the sine and cosine
90  if (distance < 0) { // bisector
91  const double dcosa = -distance * std::cos(lph.angle);
92  const double dsina = -distance * std::sin(lph.angle);
93  const double dsinasint = dsina * std::sin(lph.dihedral);
94  const double dsinacost = dsina * std::cos(lph.dihedral);
95  lpbisector_list.push_back(LonepairBisector{
96  globalAtomIDToSOAID(atomMapsList, localRecords, h_globalToLocalID, lph.atom1, true),
97  globalAtomIDToSOAID(atomMapsList, localRecords, h_globalToLocalID, lph.atom2, true),
98  globalAtomIDToSOAID(atomMapsList, localRecords, h_globalToLocalID, lph.atom3, true),
99  globalAtomIDToSOAID(atomMapsList, localRecords, h_globalToLocalID, lph.atom4, true),
100  dcosa, dsinacost, dsinasint});
101  } else { // relative
102  const double dcosa = distance * std::cos(lph.angle);
103  const double dsina = distance * std::sin(lph.angle);
104  const double dsinasint = dsina * std::sin(lph.dihedral);
105  const double dsinacost = dsina * std::cos(lph.dihedral);
106  lprelative_list.push_back(LonepairRelative{
107  globalAtomIDToSOAID(atomMapsList, localRecords, h_globalToLocalID, lph.atom1, true),
108  globalAtomIDToSOAID(atomMapsList, localRecords, h_globalToLocalID, lph.atom2, true),
109  globalAtomIDToSOAID(atomMapsList, localRecords, h_globalToLocalID, lph.atom3, true),
110  globalAtomIDToSOAID(atomMapsList, localRecords, h_globalToLocalID, lph.atom4, true),
111  dcosa, dsinacost, dsinasint});
112  }
113  }
114  }
115  }
116  }
117  lpcolinear_list_size = lpcolinear_list.size();
118  if (!lpcolinear_list.empty()) {
119  deallocate_device_async(&d_lpcolinear_list, stream);
120  allocate_device_async(&d_lpcolinear_list, lpcolinear_list_size, stream);
121  copy_HtoD(lpcolinear_list.data(), d_lpcolinear_list, lpcolinear_list_size, stream);
122  }
123  lprelative_list_size = lprelative_list.size();
124  if (!lprelative_list.empty()) {
125  deallocate_device_async(&d_lprelative_list, stream);
126  allocate_device_async(&d_lprelative_list, lprelative_list_size, stream);
127  copy_HtoD(lprelative_list.data(), d_lprelative_list, lprelative_list_size, stream);
128  }
129  lpbisector_list_size = lpbisector_list.size();
130  if (!lpbisector_list.empty()) {
131  deallocate_device_async(&d_lpbisector_list, stream);
132  allocate_device_async(&d_lpbisector_list, lpbisector_list_size, stream);
133  copy_HtoD(lpbisector_list.data(), d_lpbisector_list, lpbisector_list_size, stream);
134  }
135  cudaCheck(cudaStreamSynchronize(stream));
136 }
137 
139  double* d_pos_x,
140  double* d_pos_y,
141  double* d_pos_z,
142  cudaStream_t stream) const {
143  if (lpcolinear_list_size > 0) {
144  repositionColinear(d_pos_x, d_pos_y, d_pos_z, d_lpcolinear_list, lpcolinear_list_size, stream);
145  }
146  if (lpbisector_list_size > 0) {
147  repositionBisector(d_pos_x, d_pos_y, d_pos_z, d_lpbisector_list, lpbisector_list_size, stream);
148  }
149  if (lprelative_list_size > 0) {
150  repositionRelative(d_pos_x, d_pos_y, d_pos_z, d_lprelative_list, lprelative_list_size, stream);
151  }
152 }
153 
155  double* d_f_normal_x,
156  double* d_f_normal_y,
157  double* d_f_normal_z,
158  double* d_f_nbond_x,
159  double* d_f_nbond_y,
160  double* d_f_nbond_z,
161  double* d_f_slow_x,
162  double* d_f_slow_y,
163  double* d_f_slow_z,
164  cudaTensor* d_virial_normal,
165  cudaTensor* d_virial_nbond,
166  cudaTensor* d_virial_slow,
167  const double* d_pos_x,
168  const double* d_pos_y,
169  const double* d_pos_z,
170  const int maxForceNumber,
171  const int doVirial,
172  cudaStream_t stream) const {
173  if (lpcolinear_list_size > 0) {
175  d_f_normal_x, d_f_normal_y, d_f_normal_z,
176  d_f_nbond_x, d_f_nbond_y, d_f_nbond_z,
177  d_f_slow_x, d_f_slow_y, d_f_slow_z,
178  d_virial_normal, d_virial_nbond, d_virial_slow,
179  d_pos_x, d_pos_y, d_pos_z, maxForceNumber,
180  doVirial, d_lpcolinear_list, lpcolinear_list_size,
181  stream);
182  }
183  if (lpbisector_list_size > 0) {
185  d_f_normal_x, d_f_normal_y, d_f_normal_z,
186  d_f_nbond_x, d_f_nbond_y, d_f_nbond_z,
187  d_f_slow_x, d_f_slow_y, d_f_slow_z,
188  d_virial_normal, d_virial_nbond, d_virial_slow,
189  d_pos_x, d_pos_y, d_pos_z, maxForceNumber,
190  doVirial, d_lpbisector_list, lpbisector_list_size,
191  stream);
192  }
193  if (lprelative_list_size > 0) {
195  d_f_normal_x, d_f_normal_y, d_f_normal_z,
196  d_f_nbond_x, d_f_nbond_y, d_f_nbond_z,
197  d_f_slow_x, d_f_slow_y, d_f_slow_z,
198  d_virial_normal, d_virial_nbond, d_virial_slow,
199  d_pos_x, d_pos_y, d_pos_z, maxForceNumber,
200  doVirial, d_lprelative_list, lprelative_list_size,
201  stream);
202  }
203 }
204 
205 #endif // NAMD_CUDA
static Node * Object()
Definition: Node.h:86
void redistributeForceColinear(double *d_f_normal_x, double *d_f_normal_y, double *d_f_normal_z, double *d_f_nbond_x, double *d_f_nbond_y, double *d_f_nbond_z, double *d_f_slow_x, double *d_f_slow_y, double *d_f_slow_z, cudaTensor *d_virial_normal, cudaTensor *d_virial_nbond, cudaTensor *d_virial_slow, const double *d_pos_x, const double *d_pos_y, const double *d_pos_z, const int maxForceNumber, const int doVirial, const ComputeLonepairsCUDA::LonepairColinear *d_lpcolinear_list, size_t lpcolinear_list_size, cudaStream_t stream)
int getNumAtoms() const
Definition: Patch.h:105
int32 atom2
Definition: structures.h:123
int32 numhosts
Either 2 or 3 host atoms, depending on LP type.
Definition: structures.h:126
void deallocate_device(T **pp)
Definition: CudaUtils.h:333
int globalAtomIDToSOAID(std::vector< AtomMap *> &atomMapsList, const std::vector< CudaLocalRecord > &localRecords, const int *globalToLocalID, AtomID aid, bool errorOnNotFound)
Real dihedral
Definition: structures.h:129
Real distance
Definition: structures.h:127
int32 atom3
Definition: structures.h:124
void repositionColinear(double *d_pos_x, double *d_pos_y, double *d_pos_z, const ComputeLonepairsCUDA::LonepairColinear *d_lpcolinear_list, size_t lpcolinear_list_size, cudaStream_t stream)
void allocate_device_async(T **pp, const size_t len, cudaStream_t stream)
Definition: CudaUtils.h:321
void repositionBisector(double *d_pos_x, double *d_pos_y, double *d_pos_z, const ComputeLonepairsCUDA::LonepairBisector *d_lpbisector_list, size_t lpbisector_list_size, cudaStream_t stream)
void reposition(double *d_pos_x, double *d_pos_y, double *d_pos_z, cudaStream_t stream) const
Determine the positions of lone pairs. Should be called before force evaluations. ...
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:23
int32 atom4
Definition: structures.h:125
int32 atom1
First index is to the LP.
Definition: structures.h:122
FullAtomList & getAtomList()
Definition: HomePatch.h:528
int getMasterPe()
Definition: DeviceCUDA.h:137
void NAMD_bug(const char *err_msg)
Definition: common.C:195
void redistributeForceBisector(double *d_f_normal_x, double *d_f_normal_y, double *d_f_normal_z, double *d_f_nbond_x, double *d_f_nbond_y, double *d_f_nbond_z, double *d_f_slow_x, double *d_f_slow_y, double *d_f_slow_z, cudaTensor *d_virial_normal, cudaTensor *d_virial_nbond, cudaTensor *d_virial_slow, const double *d_pos_x, const double *d_pos_y, const double *d_pos_z, const int maxForceNumber, const int doVirial, const ComputeLonepairsCUDA::LonepairBisector *d_lpbisector_list, size_t lpbisector_list_size, cudaStream_t stream)
void NAMD_die(const char *err_msg)
Definition: common.C:147
void redistributeForceRelative(double *d_f_normal_x, double *d_f_normal_y, double *d_f_normal_z, double *d_f_nbond_x, double *d_f_nbond_y, double *d_f_nbond_z, double *d_f_slow_x, double *d_f_slow_y, double *d_f_slow_z, cudaTensor *d_virial_normal, cudaTensor *d_virial_nbond, cudaTensor *d_virial_slow, const double *d_pos_x, const double *d_pos_y, const double *d_pos_z, const int maxForceNumber, const int doVirial, const ComputeLonepairsCUDA::LonepairRelative *d_lprelative_list, size_t lprelative_list_size, cudaStream_t stream)
Real angle
Definition: structures.h:128
void repositionRelative(double *d_pos_x, double *d_pos_y, double *d_pos_z, const ComputeLonepairsCUDA::LonepairRelative *d_lprelative_list, size_t lprelative_list_size, cudaStream_t stream)
void updateAtoms(std::vector< HomePatch *> patchList, std::vector< AtomMap *> &atomMapsList, const std::vector< CudaLocalRecord > &localRecords, const int *h_globalToLocalID, cudaStream_t stream)
Prepare the device lone pair lists from lphosts in the Molecule class.
int32 AtomID
Definition: NamdTypes.h:35
void redistributeForce(double *d_f_normal_x, double *d_f_normal_y, double *d_f_normal_z, double *d_f_nbond_x, double *d_f_nbond_y, double *d_f_nbond_z, double *d_f_slow_x, double *d_f_slow_y, double *d_f_slow_z, cudaTensor *d_virial_normal, cudaTensor *d_virial_nbond, cudaTensor *d_virial_slow, const double *d_pos_x, const double *d_pos_y, const double *d_pos_z, const int maxForceNumber, const int doVirial, cudaStream_t stream) const
Project the forces on lone pairs to host atoms. Should be called after force evaluations and before t...
void deallocate_device_async(T **pp, cudaStream_t stream)
Definition: CudaUtils.h:337
#define cudaCheck(stmt)
Definition: CudaUtils.h:233
Lphost * get_lphost(int atomid) const
Definition: Molecule.h:1150
Molecule * molecule
Definition: Node.h:179
void copy_HtoD(const T *h_array, T *d_array, size_t array_len, cudaStream_t stream=0)
Definition: CudaUtils.h:409