NAMD
ComputeNonbondedCUDAKernel.h
Go to the documentation of this file.
1 #ifdef NAMD_CUDA
2 //this type defined in multiple files
3 typedef float GBReal;
4 
5 void cuda_errcheck(const char *msg);
6 
7 #ifndef __CUDACC__
8 #undef __align__
9 #define __align__(X)
10 #endif
11 
12 
13 // Number of warps per block for Non-bonded CUDA kernel
14 #define NUM_WARP 4
15 #define WARPSIZE 32
16 
17 // Exclusion mask: bit 1 = atom pair is excluded
18 struct exclmask {
19  unsigned int excl[32];
20 };
21 
22 struct __align__(16) patch_pair {
23  float3 offset;
24  int patch1_start; // Coordinate/force start for this patch
25  int patch1_size; // Size of the patch
26  int patch2_start;
27  int patch2_size;
28  int patch1_ind; // Patch index
29  int patch2_ind;
30  int patch1_num_pairs; // Number of pairs that involve this patch
31  int patch2_num_pairs;
32  union {
33  bool patch_done[2]; // After-GPU-computation shared memory temporary storage
34  struct {
35  int plist_start; // Pair list start
36  int plist_size; // Pair list size
37  };
38  };
39  int exclmask_start; // Exclusion mask start
40  int patch1_free_size; // Size of the free atoms in patch
41  int patch2_free_size; // Size of the free atoms in patch
42 // int pad1, pad2;
43 };
44 
45 #define PATCH_PAIR_SIZE (sizeof(patch_pair)/4)
46 
47 struct __align__(16) atom { // must be multiple of 16!
48  float3 position;
49  float charge;
50 };
51 
52 struct __align__(16) atom_param { // must be multiple of 16!
53  int vdw_type;
54  int index;
55  int excl_index;
56  int excl_maxdiff; // maxdiff == 0 -> only excluded from self
57 };
58 
59 #define COPY_ATOM( DEST, SOURCE ) { \
60  DEST.position.x = SOURCE.position.x; \
61  DEST.position.y = SOURCE.position.y; \
62  DEST.position.z = SOURCE.position.z; \
63  DEST.charge = SOURCE.charge; \
64  }
65 
66 #define COPY_PARAM( DEST, SOURCE ) { \
67  DEST.sqrt_epsilon = SOURCE.sqrt_epsilon; \
68  DEST.half_sigma = SOURCE.half_sigma; \
69  DEST.index = SOURCE.index; \
70  DEST.excl_index = SOURCE.excl_index; \
71  DEST.excl_maxdiff = SOURCE.excl_maxdiff; \
72  }
73 
74 #define COPY_ATOM_TO_SHARED( ATOM, PARAM, SHARED ) { \
75  COPY_ATOM( SHARED, ATOM ) \
76  COPY_PARAM( SHARED, PARAM ) \
77  }
78 
79 #define COPY_ATOM_FROM_SHARED( ATOM, PARAM, SHARED ) { \
80  COPY_ATOM( ATOM, SHARED ) \
81  COPY_PARAM( PARAM, SHARED ) \
82  }
83 
84 // 2^11 ints * 2^5 bits = 2^16 bits = range of unsigned short excl_index
85 // 2^27 ints * 2^5 bits = 2^32 bits = range of unsigned int excl_index
86 #define MAX_EXCLUSIONS (1<<27)
87 #define MAX_CONST_EXCLUSIONS 2048 // cache size is 8k
88 
89 void cuda_bind_exclusions(const unsigned int *t, int n);
90 
91 void cuda_bind_lj_table(const float2 *t, int _lj_table_size);
92 
93 // #define FORCE_TABLE_SIZE 512
94 // maximum size of CUDA array 1D texture reference is 2^13 = 8192
95 // #define FORCE_TABLE_SIZE 8192
96 // CUDA docs lie, older devices can only handle 4096
97 #define FORCE_TABLE_SIZE 4096
98 
99 void cuda_bind_force_table(const float4 *t, const float4 *et);
100 
101 void cuda_init();
102 
103 void cuda_bind_patch_pairs(patch_pair *h_patch_pairs, int npatch_pairs,
104  int npatches, int natoms, int nexclmask, int plist_len);
105 
106 void cuda_bind_atom_params(const atom_param *t);
107 void cuda_bind_vdw_types(const int *t);
108 
109 void cuda_bind_atoms(const atom *a);
110 
111 void cuda_bind_forces(float4 *f, float4 *f_slow);
112 
113 void cuda_bind_virials(float *v, int *queue, int *blockorder);
114 
115 void cuda_nonbonded_forces(float3 lata, float3 latb, float3 latc,
116  float cutoff2, float plcutoff2,
117  int cbegin, int ccount, int ctotal,
118  int doSlow, int doEnergy, int usePairlists, int savePairlists,
119  int doStreaming, int saveOrder, cudaStream_t &strm);
120 
121 //GBIS methods
122 void cuda_GBIS_P1(
123  int cbegin,
124  int ccount,
125  int pbegin,
126  int pcount,
127  float a_cut,
128  float rho_0,
129  float3 lata,
130  float3 latb,
131  float3 latc,
132  cudaStream_t &strm
133  );
134 void cuda_GBIS_P2(
135  int cbegin,
136  int ccount,
137  int pbegin,
138  int pcount,
139  float a_cut,
140  float r_cut,
141  float scaling,
142  float kappa,
143  float smoothDist,
144  float epsilon_p,
145  float epsilon_s,
146  float3 lata,
147  float3 latb,
148  float3 latc,
149  int doEnergy,
150  int doFullElec,
151  cudaStream_t &strm
152  );
153 void cuda_GBIS_P3(
154  int cbegin,
155  int ccount,
156  int pbegin,
157  int pcount,
158  float a_cut,
159  float rho_0,
160  float scaling,
161  float3 lata,
162  float3 latb,
163  float3 latc,
164  cudaStream_t &strm
165  );
166 
167 void cuda_bind_GBIS_intRad(float *intRad0H, float *intRadSH);
169 void cuda_bind_GBIS_psiSum(GBReal *psiSumH);
170 void cuda_bind_GBIS_bornRad(float *bornRadH);
171 void cuda_bind_GBIS_dEdaSum(GBReal *dEdaSumH);
173 
174 //end GBIS methods
175 
177 
178 #endif // NAMD_CUDA
179 
void cuda_bind_force_table(const float4 *t, const float4 *et)
void cuda_bind_forces(float4 *f, float4 *f_slow)
int cuda_stream_finished()
void cuda_bind_exclusions(const unsigned int *t, int n)
void cuda_bind_atoms(const atom *a)
static __thread float * bornRadH
static __thread float * dHdrPrefixH
static __thread int plist_size
void cuda_bind_GBIS_energy(float *e)
void cuda_bind_GBIS_dEdaSum(GBReal *dEdaSumH)
void cuda_nonbonded_forces(float3 lata, float3 latb, float3 latc, float cutoff2, float plcutoff2, int cbegin, int ccount, int ctotal, int doSlow, int doEnergy, int usePairlists, int savePairlists, int doStreaming, int saveOrder, cudaStream_t &strm)
static __thread float * intRadSH
void cuda_bind_patch_pairs(patch_pair *h_patch_pairs, int npatch_pairs, int npatches, int natoms, int plist_len, int nexclmask)
void cuda_bind_vdw_types(const int *t)
void cuda_bind_lj_table(const float2 *t, int _lj_table_size)
unsigned int excl[32]
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 lata
void cuda_bind_GBIS_dHdrPrefix(float *dHdrPrefixH)
void cuda_bind_GBIS_bornRad(float *bornRadH)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float plcutoff2
void cuda_bind_virials(float *v, int *queue, int *blockorder)
void cuda_bind_GBIS_psiSum(GBReal *psiSumH)
void cuda_GBIS_P3(int cbegin, int ccount, int pbegin, int pcount, float a_cut, float rho_0, float scaling, float3 lata, float3 latb, float3 latc, cudaStream_t &strm)
void cuda_GBIS_P1(int cbegin, int ccount, int pbegin, int pcount, float a_cut, float rho_0, float3 lata, float3 latb, float3 latc, cudaStream_t &strm)
void cuda_init()
void cuda_errcheck(const char *msg)
void cuda_bind_GBIS_intRad(float *intRad0H, float *intRadSH)
static __thread float * energy_gbis
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 latc
k< npairi;++k){TABENERGY(const int numtypes=simParams->tableNumTypes;const float table_spacing=simParams->tableSpacing;const int npertype=(int)(namdnearbyint(simParams->tableMaxDist/simParams->tableSpacing)+1);) int table_i=(r2iilist[2 *k] >> 14)+r2_delta_expc;const int j=pairlisti[k];#define p_j BigReal diffa=r2list[k]-r2_table[table_i];#define table_four_i TABENERGY(register const int tabtype=-1-(lj_pars->A< 0?lj_pars->A:0);) BigReal kqq=kq_i *p_j-> charge
void cuda_GBIS_P2(int cbegin, int ccount, int pbegin, int pcount, float a_cut, float r_cut, float scaling, float kappa, float smoothDist, float epsilon_p, float epsilon_s, float3 lata, float3 latb, float3 latc, int doEnergy, int doFullElec, cudaStream_t &strm)
void cuda_bind_atom_params(const atom_param *t)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cutoff2
#define __align__(X)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 latb
static __thread float * intRad0H
float GBReal
Definition: ComputeGBIS.inl:17