15 #define NAMD_AVXTILES_PAIR_THRESHOLD 4
17 #define NAMD_AVXTILES_IPAIRCOUNT 300
19 #define NAMD_AVXTILES_ORDER_PATCHES
28 inline int numTiles()
const {
return _numTiles; }
29 inline int maxTiles()
const {
return _numTilesAlloc; }
30 inline bool realloc(
const int n) {
32 if (n>_numTilesAlloc) {
47 int _numTiles, _numTilesAlloc;
68 void setSimParams(
const float scale,
const float scale14,
const float c1,
69 const float c3,
const float switchOn2,
float *fastTable,
70 float *fastEnergyTable,
float *slowTable,
71 float *slowEnergyTable,
float *eps4sigma,
72 float *eps4sigma14,
float *ljTable,
73 const float ljTableWidth,
float *modifiedTable,
74 float *modifiedEnergyTable,
float *excludedTable,
75 float *excludedEnergyTable,
const int interpolationMode);
77 inline void atomUpdate(AVXTiles *patch0tiles, AVXTiles *patch1tiles) {
78 tiles_p0 = patch0tiles;
79 tiles_p1 = patch1tiles;
82 #ifdef NAMD_AVXTILES_ORDER_PATCHES
86 const int rem0 = patch0tiles->numAtoms() & 15;
87 const int rem1 = patch1tiles->numAtoms() & 15;
88 if (rem1 && rem1 <= NAMD_AVXTILES_PAIR_THRESHOLD && rem1 < rem0)
90 else if ((rem0 > NAMD_AVXTILES_PAIR_THRESHOLD || rem0 == 0) &&
91 patch1tiles->numAtoms() < patch0tiles->numAtoms())
94 tiles_p0 = patch1tiles;
95 tiles_p1 = patch0tiles;
101 realloc(tiles_p0->numTiles());
104 inline void updateParams(
const Lattice &lattice,
const Vector &offset,
105 const double cutoff) {
106 _cutoff2 = cutoff * cutoff;
107 _paramMinvCut3 = -1.0 / (_cutoff2 * sqrt(_cutoff2));
108 _paramCutUnder3 = 3.0 / sqrt(_cutoff2);
109 _shx = offset.
x*lattice.
a().
x + offset.
y*lattice.
b().
x +
110 offset.
z*lattice.
c().
x;
111 _shy = offset.
x*lattice.
a().
y + offset.
y*lattice.
b().
y +
112 offset.
z*lattice.
c().
y;
113 _shz = offset.
x*lattice.
a().
z + offset.
y*lattice.
b().
z +
114 offset.
z*lattice.
c().
z;
117 inline void updateBuildInfo(
const int step,
const int minPart,
118 const int maxPart,
const int numParts,
119 const double plcutoff) {
123 _numParts = numParts;
124 _plcutoff2 = plcutoff * plcutoff;
127 inline int numLists()
const {
return _numLists; }
129 inline void realloc(
const int numLists) {
130 _numLists = numLists;
131 if (numLists > _numListsAlloc) _realloc();
134 inline void reallocModified(
const int numModified) {
135 _numModified = numModified;
136 if (numModified > _numModifiedAlloc) _reallocModified();
139 inline void reallocExcluded(
const int numExcluded) {
140 _numExcluded = numExcluded;
141 if (numExcluded > _numExcludedAlloc) _reallocExcluded();
144 inline void reallocPairLists(
const int numPairLists,
const int maxPairs) {
145 if (numPairLists > _maxPairLists || maxPairs > _maxPairs)
146 _reallocPairLists(numPairLists, maxPairs);
149 inline int exclusionChecksum()
const {
return _exclusionChecksum; }
150 inline float energyVdw()
const {
return _energyVdw; }
151 inline float energyElec()
const {
return _energyElec; }
152 inline float energySlow()
const {
return _energySlow; }
153 inline float virialXX()
const {
return _fNet_x * _shx; }
154 inline float virialXY()
const {
return _fNet_x * _shy; }
155 inline float virialXZ()
const {
return _fNet_x * _shz; }
156 inline float virialYY()
const {
return _fNet_y * _shy; }
157 inline float virialYZ()
const {
return _fNet_y * _shz; }
158 inline float virialZZ()
const {
return _fNet_z * _shz; }
159 inline float virialSlowXX()
const {
return _fNetSlow_x * _shx; }
160 inline float virialSlowXY()
const {
return _fNetSlow_x * _shy; }
161 inline float virialSlowXZ()
const {
return _fNetSlow_x * _shz; }
162 inline float virialSlowYY()
const {
return _fNetSlow_y * _shy; }
163 inline float virialSlowYZ()
const {
return _fNetSlow_y * _shz; }
164 inline float virialSlowZZ()
const {
return _fNetSlow_z * _shz; }
166 #ifdef NAMD_AVXTILES_ORDER_PATCHES
167 inline int patchOrder0()
const {
return _patchOrder0; }
168 inline int patchOrder1()
const {
return _patchOrder1; }
170 inline int patchOrder0()
const {
return 0; }
171 inline int patchOrder1()
const {
return 1; }
180 void delEmptyLists();
183 void nbForceAVX512(
const int doEnergy,
const int doVirial,
const int doList,
188 unsigned int *listDepth;
191 AVXTiles *tiles_p0, *tiles_p1;
196 template <
bool count,
bool partitionMode>
199 template <
bool doEnergy,
bool doVirial,
bool doSlow,
200 bool doList,
int interpMode>
201 __forceinline
void nbAVX512Tiles(__m512 &energyVdw, __m512 &energyElec,
202 __m512 &energySlow, __m512 &fNet_x,
203 __m512 &fNet_y, __m512 &fNet_z,
204 __m512 &fNetSlow_x, __m512 &fNetSlow_y,
206 template <
bool doEnergy,
bool doVirial,
bool doSlow,
int interpMode>
207 __forceinline
void nbAVX512Pairs(__m512 &energyVdw, __m512 &energyElec,
208 __m512 &energySlow, __m512 &fNet_x,
209 __m512 &fNet_y, __m512 &fNet_z,
210 __m512 &fNetSlow_x, __m512 &fNetSlow_y,
212 template <
bool doEnergy,
bool doVirial,
bool doSlow,
int interpMode>
213 inline void nbAVX512Modified(__m512 &energyVdw, __m512 &energyElec,
214 __m512 &energySlow, __m512 &fNet_x,
215 __m512 &fNet_y, __m512 &fNet_z,
216 __m512 &fNetSlow_x, __m512 &fNetSlow_y,
218 template <
bool doEnergy,
bool doVirial>
219 inline void nbAVX512Excluded(__m512 &energySlow, __m512 &fNetSlow_x,
220 __m512 &fNetSlow_y, __m512 &fNetSlow_z);
222 template <
bool doEnergy,
bool doVirial,
bool doSlow,
223 bool doList,
int interpMode>
227 void _reallocModified();
228 void _reallocExcluded();
229 void _reallocPairLists(
const int numPairLists,
const int maxPairs);
231 float _cutoff2, _plcutoff2;
233 float *_paramSlowTable, *_paramSlowEnergyTable;
235 float *_paramEps4Sigma, *_paramEps4Sigma14;
237 float _paramMinvCut3, _paramCutUnder3;
239 float *_paramModifiedTable, *_paramModifiedEnergyTable;
240 float *_paramExcludedTable, *_paramExcludedEnergyTable;
243 float *_paramFastTable, *_paramFastEnergyTable;
244 const float *_paramLjTable;
248 float _shx, _shy, _shz;
249 float _paramScale, _paramScale14, _paramC1, _paramC3, _paramSwitchOn2;
250 int _numLists, _numListsAlloc;
251 int _numModified, _numModifiedAlloc, _numExcluded, _numExcludedAlloc;
252 int *_modified_i, *_modified_j, *_excluded_i, *_excluded_j;
254 int _numPairLists, _maxPairLists, _maxPairs;
255 int *_pair_i, *_numPairs, *_pairStart, *_pairLists;
257 float _fNet_x, _fNet_y, _fNet_z, _fNetSlow_x, _fNetSlow_y, _fNetSlow_z;
258 int _exclusionChecksum;
259 float _energyVdw, _energyElec, _energySlow;
261 int _interpolationMode, _minPart, _maxPart, _numParts, _lastBuild;
263 #ifdef NAMD_AVXTILES_ORDER_PATCHES
264 int _patchOrder0, _patchOrder1;
267 #ifndef MEM_OPT_VERSION
268 char * _exclFlyListBuffer;
269 char * _exclFlyLists[16];
270 const int32 * _fullExcl[16], * _modExcl[16];
271 int _lastFlyListTile;
272 const char * buildExclFlyList(
const int itileList,
const int z,
273 const __m512i &atomIndex_i,
const int n,
278 #endif // NAMD_AVXTILES
279 #endif // AVXTILELISTS_H
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ cudaTextureObject_t const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int unsigned int *__restrict__ const CudaPatchRecord *__restrict__ float4 *__restrict__ float4 *__restrict__ int *__restrict__ int *__restrict__ TileListVirialEnergy *__restrict__ virialEnergy int itileList