NAMD
Macros | Functions | Variables
ComputeNonbondedCUDAKernel.cu File Reference
#include "CudaUtils.h"
#include "ComputeNonbondedCUDAKernel.h"
#include <stdio.h>
#include "ComputeNonbondedCUDAKernelBase.h"
#include "ComputeGBISCUDAKernel.h"

Go to the source code of this file.

Macros

#define SET_EXCL(EXCL, BASE, DIFF)   (EXCL)[((BASE)+(DIFF))>>5] |= (1<<(((BASE)+(DIFF))&31))
 
#define BLOCK_SIZE   128
 
#define SHARED_SIZE   32
 
#define MAKE_PAIRLIST
 
#define DO_SLOW
 
#define DO_ENERGY
 
#define DO_ENERGY
 
#define DO_SLOW
 
#define DO_ENERGY
 
#define DO_ENERGY
 
#define CALL(X)
 

Functions

void cuda_bind_exclusions (const unsigned int *t, int n)
 
void cuda_bind_lj_table (const float2 *t, int _lj_table_size)
 
void cuda_bind_force_table (const float4 *t, const float4 *et)
 
void cuda_init ()
 
void cuda_bind_patch_pairs (patch_pair *h_patch_pairs, int npatch_pairs, int npatches, int natoms, int plist_len, int nexclmask)
 
void cuda_bind_atom_params (const atom_param *t)
 
void cuda_bind_vdw_types (const int *t)
 
void cuda_bind_atoms (const atom *a)
 
void cuda_bind_forces (float4 *f, float4 *f_slow)
 
void cuda_bind_virials (float *v, int *queue, int *blockorder)
 
void cuda_bind_GBIS_energy (float *e)
 
void cuda_bind_GBIS_intRad (float *intRad0H, float *intRadSH)
 
void cuda_bind_GBIS_psiSum (GBReal *psiSumH)
 
void cuda_bind_GBIS_bornRad (float *bornRadH)
 
void cuda_bind_GBIS_dEdaSum (GBReal *dEdaSumH)
 
void cuda_bind_GBIS_dHdrPrefix (float *dHdrPrefixH)
 
void cuda_nonbonded_forces (float3 lata, float3 latb, float3 latc, float cutoff2, float plcutoff2, int cbegin, int ccount, int ctotal, int doSlow, int doEnergy, int usePairlists, int savePairlists, int doStreaming, int saveOrder, cudaStream_t &strm)
 
void cuda_GBIS_P1 (int cbegin, int ccount, int pbegin, int pcount, float a_cut, float rho_0, float3 lata, float3 latb, float3 latc, cudaStream_t &strm)
 
void cuda_GBIS_P2 (int cbegin, int ccount, int pbegin, int pcount, float a_cut, float r_cut, float scaling, float kappa, float smoothDist, float epsilon_p, float epsilon_s, float3 lata, float3 latb, float3 latc, int doEnergy, int doFullElec, cudaStream_t &strm)
 
void cuda_GBIS_P3 (int cbegin, int ccount, int pbegin, int pcount, float a_cut, float rho_0, float scaling, float3 lata, float3 latb, float3 latc, cudaStream_t &strm)
 

Variables

texture< unsigned int,
1, cudaReadModeElementType > 
tex_exclusions
 
static __thread int exclusions_size
 
static __thread unsigned int * exclusions
 
__constant__ unsigned int const_exclusions [MAX_CONST_EXCLUSIONS]
 
static __thread unsigned int * overflow_exclusions
 
texture< float2,
1, cudaReadModeElementType > 
lj_table
 
static __thread int lj_table_size
 
texture< float4,
1, cudaReadModeElementType > 
force_table
 
texture< float4,
1, cudaReadModeElementType > 
energy_table
 
static __thread int num_patches
 
static __thread int num_virials
 
static __thread int num_atoms
 
static __thread int patch_pairs_size
 
static __thread patch_pair * patch_pairs
 
static __thread int atom_params_size
 
static __thread atom_param * atom_params
 
static __thread int vdw_types_size
 
static __thread int * vdw_types
 
static __thread int atoms_size
 
static __thread atom * atoms
 
static __thread int tmpforces_size
 
static __thread float4 * tmpforces
 
static __thread int slow_tmpforces_size
 
static __thread float4 * slow_tmpforces
 
static __thread int tmpvirials_size
 
static __thread float * tmpvirials
 
static __thread int slow_tmpvirials_size
 
static __thread float * slow_tmpvirials
 
static __thread int global_counters_size
 
static __thread unsigned int * global_counters
 
static __thread int plist_size
 
static __thread unsigned int * plist
 
static __thread int exclmasks_size
 
static __thread exclmaskexclmasks
 
static __thread float4 * forces
 
static __thread float4 * slow_forces
 
static __thread int * force_ready_queue
 
static __thread float * virials
 
static __thread float * slow_virials
 
static __thread int * block_order
 
static __thread int intRad0D_size
 
static __thread float * intRad0D
 
static __thread int intRadSD_size
 
static __thread float * intRadSD
 
static __thread GBRealpsiSumD
 
static __thread int tmp_psiSumD_size
 
static __thread GBRealtmp_psiSumD
 
static __thread int bornRadD_size
 
static __thread float * bornRadD
 
static __thread GBRealdEdaSumD
 
static __thread int tmp_dEdaSumD_size
 
static __thread GBRealtmp_dEdaSumD
 
static __thread int dHdrPrefixD_size
 
static __thread float * dHdrPrefixD
 
static __thread int GBIS_P1_counters_size
 
static __thread unsigned int * GBIS_P1_counters
 
static __thread int GBIS_P2_counters_size
 
static __thread unsigned int * GBIS_P2_counters
 
static __thread int GBIS_P3_counters_size
 
static __thread unsigned int * GBIS_P3_counters
 
static __thread float * energy_gbis
 
static __thread int tmp_energy_gbis_size
 
static __thread float * tmp_energy_gbis
 
__thread int max_grid_size
 
__thread cudaStream_t stream
 
__thread cudaStream_t stream2
 

Macro Definition Documentation

#define BLOCK_SIZE   128

Definition at line 474 of file ComputeNonbondedCUDAKernel.cu.

Referenced by GBIS_P2_Kernel().

#define CALL (   X)
Value:
X<<< grid_dim, nthread3, 0, strm >>> \
tmpforces, (doSlow?slow_tmpforces:NULL), \
forces, (doSlow?slow_forces:NULL), \
tmpvirials, (doSlow?slow_tmpvirials:NULL), \
virials, (doSlow?slow_virials:NULL), \
global_counters, (doStreaming?force_ready_queue:NULL), \
cbegin+cstart, ctotal, (saveOrder?block_order:NULL), \
static __thread int lj_table_size
static __thread atom * atoms
static __thread float4 * forces
static __thread unsigned int * plist
static __thread unsigned int * overflow_exclusions
static __thread float * slow_virials
static __thread exclmask * exclmasks
static __thread float * slow_tmpvirials
static __thread float * virials
static __thread float4 * slow_forces
static __thread float * tmpvirials
static __thread int * force_ready_queue
static __thread patch_pair * patch_pairs
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 lata
static __thread int num_patches
static __thread atom_param * atom_params
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float plcutoff2
static __thread float4 * slow_tmpforces
static __thread unsigned int * global_counters
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 latc
static __thread int * vdw_types
static __thread int * block_order
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cutoff2
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 latb
static __thread float4 * tmpforces
#define DO_ENERGY

Definition at line 495 of file ComputeNonbondedCUDAKernel.cu.

#define DO_ENERGY

Definition at line 495 of file ComputeNonbondedCUDAKernel.cu.

#define DO_ENERGY

Definition at line 495 of file ComputeNonbondedCUDAKernel.cu.

#define DO_ENERGY

Definition at line 495 of file ComputeNonbondedCUDAKernel.cu.

#define DO_SLOW

Definition at line 489 of file ComputeNonbondedCUDAKernel.cu.

#define DO_SLOW

Definition at line 489 of file ComputeNonbondedCUDAKernel.cu.

#define MAKE_PAIRLIST

Definition at line 477 of file ComputeNonbondedCUDAKernel.cu.

#define SET_EXCL (   EXCL,
  BASE,
  DIFF 
)    (EXCL)[((BASE)+(DIFF))>>5] |= (1<<(((BASE)+(DIFF))&31))

Definition at line 19 of file ComputeNonbondedCUDAKernel.cu.

#define SHARED_SIZE   32

Definition at line 475 of file ComputeNonbondedCUDAKernel.cu.

Function Documentation

void cuda_bind_atom_params ( const atom_param *  t)

Definition at line 352 of file ComputeNonbondedCUDAKernel.cu.

References atom_params, cuda_errcheck(), num_atoms, and stream.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

352  {
353  cudaMemcpyAsync(atom_params, t, num_atoms * sizeof(atom_param),
354  cudaMemcpyHostToDevice, stream);
355  cuda_errcheck("memcpy to atom_params");
356 }
__thread cudaStream_t stream
static __thread atom_param * atom_params
static __thread int num_atoms
void cuda_errcheck(const char *msg)
void cuda_bind_atoms ( const atom *  a)

Definition at line 364 of file ComputeNonbondedCUDAKernel.cu.

References atoms, cuda_errcheck(), num_atoms, and stream.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

364  {
365  cuda_errcheck("before memcpy to atoms");
366  cudaMemcpyAsync(atoms, a, num_atoms * sizeof(atom),
367  cudaMemcpyHostToDevice, stream);
368  cuda_errcheck("memcpy to atoms");
369 }
static __thread atom * atoms
__thread cudaStream_t stream
static __thread int num_atoms
void cuda_errcheck(const char *msg)
void cuda_bind_exclusions ( const unsigned int *  t,
int  n 
)

Definition at line 22 of file ComputeNonbondedCUDAKernel.cu.

References const_exclusions, cuda_errcheck(), exclusions, exclusions_size, MAX_CONST_EXCLUSIONS, overflow_exclusions, and tex_exclusions.

Referenced by ComputeNonbondedCUDA::build_exclusions().

22  {
23  exclusions_size = n;
24  static __thread int exclusions_alloc;
25  if ( exclusions && exclusions_alloc < exclusions_size ) {
26  cudaFree(exclusions);
27  cuda_errcheck("freeing exclusions");
28  cudaFree(overflow_exclusions);
29  cuda_errcheck("freeing overflow_exclusions");
30  exclusions = 0;
31  }
32  if ( ! exclusions ) {
33  exclusions_alloc = exclusions_size;
34  cudaMalloc((void**) &exclusions, n*sizeof(unsigned int));
35  cuda_errcheck("malloc exclusions");
36  cudaMalloc((void**) &overflow_exclusions, n*sizeof(unsigned int));
37  cuda_errcheck("malloc overflow_exclusions");
38  }
39  cudaMemcpy(exclusions, t, n*sizeof(unsigned int), cudaMemcpyHostToDevice);
40  cuda_errcheck("memcpy exclusions");
41  tex_exclusions.normalized = false;
42  tex_exclusions.addressMode[0] = cudaAddressModeClamp;
43  tex_exclusions.filterMode = cudaFilterModePoint;
44  cudaBindTexture(NULL, tex_exclusions, exclusions, n*sizeof(unsigned int));
45  cuda_errcheck("binding exclusions to texture");
46 
47  cudaMemcpy(overflow_exclusions, t,
48  n*sizeof(unsigned int), cudaMemcpyHostToDevice);
49  cuda_errcheck("memcpy to overflow_exclusions");
50  int nconst = ( n < MAX_CONST_EXCLUSIONS ? n : MAX_CONST_EXCLUSIONS );
51  cudaMemcpyToSymbol(const_exclusions, t, nconst*sizeof(unsigned int), 0);
52  cuda_errcheck("memcpy to const_exclusions");
53 }
static __thread unsigned int * exclusions
static __thread unsigned int * overflow_exclusions
__constant__ unsigned int const_exclusions[MAX_CONST_EXCLUSIONS]
texture< unsigned int, 1, cudaReadModeElementType > tex_exclusions
static __thread int exclusions_size
#define MAX_CONST_EXCLUSIONS
void cuda_errcheck(const char *msg)
void cuda_bind_force_table ( const float4 *  t,
const float4 *  et 
)

Definition at line 90 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), energy_table, force_table, and FORCE_TABLE_SIZE.

Referenced by ComputeNonbondedCUDA::build_force_table().

90  {
91  static __thread cudaArray *ct;
92  static __thread cudaArray *ect;
93  if ( ! ct ) {
94  cudaMallocArray(&ct, &force_table.channelDesc, FORCE_TABLE_SIZE, 1);
95  cuda_errcheck("allocating force table");
96  }
97  if ( ! ect ) {
98  cudaMallocArray(&ect, &energy_table.channelDesc, FORCE_TABLE_SIZE, 1);
99  cuda_errcheck("allocating energy table");
100  }
101  cudaMemcpyToArray(ct, 0, 0, t, FORCE_TABLE_SIZE*sizeof(float4), cudaMemcpyHostToDevice);
102  // cudaMemcpy(ct, t, FORCE_TABLE_SIZE*sizeof(float4), cudaMemcpyHostToDevice);
103  cuda_errcheck("memcpy to force table");
104  cudaMemcpyToArray(ect, 0, 0, et, FORCE_TABLE_SIZE*sizeof(float4), cudaMemcpyHostToDevice);
105  cuda_errcheck("memcpy to energy table");
106 
107  force_table.normalized = true;
108  force_table.addressMode[0] = cudaAddressModeClamp;
109  force_table.addressMode[1] = cudaAddressModeClamp;
110  force_table.filterMode = cudaFilterModeLinear;
111 
112  energy_table.normalized = true;
113  energy_table.addressMode[0] = cudaAddressModeClamp;
114  energy_table.addressMode[1] = cudaAddressModeClamp;
115  energy_table.filterMode = cudaFilterModeLinear;
116 
117  cudaBindTextureToArray(force_table, ct);
118  cuda_errcheck("binding force table to texture");
119 
120  cudaBindTextureToArray(energy_table, ect);
121  cuda_errcheck("binding energy table to texture");
122 }
#define FORCE_TABLE_SIZE
void cuda_errcheck(const char *msg)
texture< float4, 1, cudaReadModeElementType > energy_table
texture< float4, 1, cudaReadModeElementType > force_table
void cuda_bind_forces ( float4 *  f,
float4 *  f_slow 
)

Definition at line 371 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), forces, and slow_forces.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

371  {
372  cudaHostGetDevicePointer(&forces, f, 0);
373  cuda_errcheck("cudaHostGetDevicePointer forces");
374  cudaHostGetDevicePointer(&slow_forces, f_slow, 0);
375  cuda_errcheck("cudaHostGetDevicePointer slow_forces");
376 }
static __thread float4 * forces
static __thread float4 * slow_forces
void cuda_errcheck(const char *msg)
void cuda_bind_GBIS_bornRad ( float *  bornRadH)

Definition at line 406 of file ComputeNonbondedCUDAKernel.cu.

References bornRadD, cuda_errcheck(), num_atoms, and stream.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

406  {
407  cudaMemcpyAsync(bornRadD, bornRadH, num_atoms * sizeof(float),
408  cudaMemcpyHostToDevice, stream);
409  cuda_errcheck("memcpy to bornRad");
410 }
static __thread float * bornRadD
static __thread float * bornRadH
__thread cudaStream_t stream
static __thread int num_atoms
void cuda_errcheck(const char *msg)
void cuda_bind_GBIS_dEdaSum ( GBReal dEdaSumH)

Definition at line 412 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), and dEdaSumD.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

412  {
413  cudaHostGetDevicePointer(&dEdaSumD, dEdaSumH, 0);
414  cuda_errcheck("cudaHostGetDevicePointer dEdaSum");
415 }
static __thread GBReal * dEdaSumD
void cuda_errcheck(const char *msg)
void cuda_bind_GBIS_dHdrPrefix ( float *  dHdrPrefixH)

Definition at line 417 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), dHdrPrefixD, num_atoms, and stream.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

417  {
418  cudaMemcpyAsync(dHdrPrefixD, dHdrPrefixH, num_atoms * sizeof(float),
419  cudaMemcpyHostToDevice, stream);
420  cuda_errcheck("memcpy to dHdrPrefix");
421 }
static __thread float * dHdrPrefixH
__thread cudaStream_t stream
static __thread float * dHdrPrefixD
static __thread int num_atoms
void cuda_errcheck(const char *msg)
void cuda_bind_GBIS_energy ( float *  e)

Definition at line 389 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), and energy_gbis.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

389  {
390  cudaHostGetDevicePointer(&energy_gbis, e, 0);
391  cuda_errcheck("cudaHostGetDevicePointer energy_gbis");
392 }
static __thread float * energy_gbis
void cuda_errcheck(const char *msg)
void cuda_bind_GBIS_intRad ( float *  intRad0H,
float *  intRadSH 
)

Definition at line 393 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), intRad0D, intRadSD, num_atoms, and stream.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

393  {
394  cudaMemcpyAsync(intRad0D, intRad0H, num_atoms * sizeof(float),
395  cudaMemcpyHostToDevice, stream);
396  cudaMemcpyAsync(intRadSD, intRadSH, num_atoms * sizeof(float),
397  cudaMemcpyHostToDevice, stream);
398  cuda_errcheck("memcpy to intRad");
399 }
static __thread float * intRadSD
static __thread float * intRad0D
__thread cudaStream_t stream
static __thread float * intRadSH
static __thread int num_atoms
void cuda_errcheck(const char *msg)
static __thread float * intRad0H
void cuda_bind_GBIS_psiSum ( GBReal psiSumH)

Definition at line 401 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), and psiSumD.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

401  {
402  cudaHostGetDevicePointer(&psiSumD, psiSumH, 0);
403  cuda_errcheck("cudaHostGetDevicePointer psiSum");
404 }
void cuda_errcheck(const char *msg)
static __thread GBReal * psiSumD
void cuda_bind_lj_table ( const float2 t,
int  _lj_table_size 
)

Definition at line 59 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), lj_table, and lj_table_size.

Referenced by ComputeNonbondedCUDA::build_lj_table().

59  {
60  static __thread float2 *ct;
61  static __thread int lj_table_alloc;
62  lj_table_size = _lj_table_size;
63  if ( ct && lj_table_alloc < lj_table_size ) {
64  cudaFree(ct);
65  cuda_errcheck("freeing lj table");
66  ct = 0;
67  }
68  if ( ! ct ) {
69  lj_table_alloc = lj_table_size;
70  cudaMalloc((void**) &ct, lj_table_size*lj_table_size*sizeof(float2));
71  cuda_errcheck("allocating lj table");
72  }
73  cudaMemcpy(ct, t, lj_table_size*lj_table_size*sizeof(float2),
74  cudaMemcpyHostToDevice);
75  cuda_errcheck("memcpy to lj table");
76 
77  lj_table.normalized = false;
78  lj_table.addressMode[0] = cudaAddressModeClamp;
79  lj_table.filterMode = cudaFilterModePoint;
80 
81  cudaBindTexture((size_t*)0, lj_table, ct,
83  cuda_errcheck("binding lj table to texture");
84 }
static __thread int lj_table_size
texture< float2, 1, cudaReadModeElementType > lj_table
void cuda_errcheck(const char *msg)
void cuda_bind_patch_pairs ( patch_pair *  h_patch_pairs,
int  npatch_pairs,
int  npatches,
int  natoms,
int  plist_len,
int  nexclmask 
)

Definition at line 293 of file ComputeNonbondedCUDAKernel.cu.

References atom_params, atom_params_size, atoms, atoms_size, bornRadD, bornRadD_size, cuda_errcheck(), dHdrPrefixD, dHdrPrefixD_size, exclmasks, exclmasks_size, GBIS_P1_counters, GBIS_P1_counters_size, GBIS_P2_counters, GBIS_P2_counters_size, GBIS_P3_counters, GBIS_P3_counters_size, global_counters, global_counters_size, intRad0D, intRad0D_size, intRadSD, intRadSD_size, num_atoms, num_patches, num_virials, patch_pairs, patch_pairs_size, plist, plist_size, slow_tmpforces, slow_tmpforces_size, slow_tmpvirials, slow_tmpvirials_size, tmp_dEdaSumD, tmp_dEdaSumD_size, tmp_energy_gbis, tmp_energy_gbis_size, tmp_psiSumD, tmp_psiSumD_size, tmpforces, tmpforces_size, tmpvirials, tmpvirials_size, vdw_types, and vdw_types_size.

Referenced by ComputeNonbondedCUDA::doWork().

295  {
296  num_patches = npatches;
297  num_virials = npatches;
298  num_atoms = natoms;
299  reallocate_device<patch_pair>(&patch_pairs, &patch_pairs_size, npatch_pairs, 1.2f);
300  reallocate_device<atom>(&atoms, &atoms_size, num_atoms, 1.2f);
301  reallocate_device<atom_param>(&atom_params, &atom_params_size, num_atoms, 1.2f);
302  reallocate_device<int>(&vdw_types, &vdw_types_size, num_atoms, 1.2f);
303  reallocate_device<unsigned int>(&global_counters, &global_counters_size, num_patches+2, 1.2f);
304  reallocate_device<float4>(&tmpforces, &tmpforces_size, num_atoms, 1.2f);
305  reallocate_device<float4>(&slow_tmpforces, &slow_tmpforces_size, num_atoms, 1.2f);
306  reallocate_device<unsigned int>(&plist, &plist_size, plist_len, 1.2f);
307  reallocate_device<exclmask>(&exclmasks, &exclmasks_size, nexclmask, 1.2f);
308  reallocate_device<float>(&tmpvirials, &tmpvirials_size, num_patches*16, 1.2f);
309  reallocate_device<float>(&slow_tmpvirials, &slow_tmpvirials_size, num_patches*16, 1.2f);
310 
311  // For GBIS
312  reallocate_device<unsigned int>(&GBIS_P1_counters, &GBIS_P1_counters_size, num_patches, 1.2f);
313  reallocate_device<unsigned int>(&GBIS_P2_counters, &GBIS_P2_counters_size, num_patches, 1.2f);
314  reallocate_device<unsigned int>(&GBIS_P3_counters, &GBIS_P3_counters_size, num_patches, 1.2f);
315  reallocate_device<float>(&intRad0D, &intRad0D_size, num_atoms, 1.2f);
316  reallocate_device<float>(&intRadSD, &intRadSD_size, num_atoms, 1.2f);
317  reallocate_device<GBReal>(&tmp_psiSumD, &tmp_psiSumD_size, num_atoms, 1.2f);
318  reallocate_device<float>(&bornRadD, &bornRadD_size, num_atoms, 1.2f);
319  reallocate_device<GBReal>(&tmp_dEdaSumD, &tmp_dEdaSumD_size, num_atoms, 1.2f);
320  reallocate_device<float>(&dHdrPrefixD, &dHdrPrefixD_size, num_atoms, 1.2f);
321  reallocate_device<float>(&tmp_energy_gbis, &tmp_energy_gbis_size, num_patches, 1.2f);
322 
323  cudaMemcpy(patch_pairs, h_patch_pairs, npatch_pairs*sizeof(patch_pair), cudaMemcpyHostToDevice);
324  cuda_errcheck("memcpy to patch_pairs");
325 
326  cudaMemset(global_counters, 0, (num_patches+2)*sizeof(unsigned int));
327  cuda_errcheck("memset global_counters");
328 
329  cudaMemset(GBIS_P1_counters, 0, num_patches*sizeof(unsigned int));
330  cuda_errcheck("memset GBIS_P1_counters");
331 
332  cudaMemset(GBIS_P2_counters, 0, num_patches*sizeof(unsigned int));
333  cuda_errcheck("memset GBIS_P2_counters");
334 
335  cudaMemset(GBIS_P3_counters, 0, num_patches*sizeof(unsigned int));
336  cuda_errcheck("memset GBIS_P3_counters");
337 
338  cudaMemset(tmpforces, 0, num_atoms*sizeof(float4));
339  cuda_errcheck("memset tmpforces");
340 
341  cudaMemset(tmpvirials, 0, num_patches*sizeof(float)*16);
342  cuda_errcheck("memset tmpvirials");
343 
344  cudaMemset(slow_tmpforces, 0, num_atoms*sizeof(float4));
345  cuda_errcheck("memset slow_tmpforces");
346 
347  cudaMemset(slow_tmpvirials, 0, num_patches*sizeof(float)*16);
348  cuda_errcheck("memset slow_tmpvirials");
349 
350 }
static __thread unsigned int * GBIS_P2_counters
static __thread GBReal * tmp_dEdaSumD
static __thread float * tmp_energy_gbis
static __thread float * intRadSD
static __thread int patch_pairs_size
static __thread int intRad0D_size
static __thread float * bornRadD
static __thread atom * atoms
static __thread float * intRad0D
static __thread GBReal * tmp_psiSumD
static __thread unsigned int * plist
static __thread int GBIS_P1_counters_size
static __thread int intRadSD_size
static __thread exclmask * exclmasks
static __thread float * slow_tmpvirials
static __thread int plist_size
static __thread int vdw_types_size
static __thread int atom_params_size
static __thread float * dHdrPrefixD
static __thread int global_counters_size
static __thread int GBIS_P3_counters_size
static __thread float * tmpvirials
static __thread patch_pair * patch_pairs
static __thread unsigned int * GBIS_P1_counters
static __thread int num_patches
static __thread int num_virials
static __thread int tmpvirials_size
static __thread int tmp_psiSumD_size
static __thread int bornRadD_size
static __thread atom_param * atom_params
static __thread int GBIS_P2_counters_size
static __thread int tmp_dEdaSumD_size
static __thread int tmp_energy_gbis_size
static __thread unsigned int * GBIS_P3_counters
static __thread int num_atoms
static __thread int slow_tmpforces_size
static __thread float4 * slow_tmpforces
static __thread int tmpforces_size
static __thread unsigned int * global_counters
void cuda_errcheck(const char *msg)
static __thread int slow_tmpvirials_size
static __thread int dHdrPrefixD_size
static __thread int exclmasks_size
static __thread int * vdw_types
static __thread float4 * tmpforces
static __thread int atoms_size
void cuda_bind_vdw_types ( const int *  t)

Definition at line 358 of file ComputeNonbondedCUDAKernel.cu.

References cuda_errcheck(), num_atoms, stream, and vdw_types.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

358  {
359  cudaMemcpyAsync(vdw_types, t, num_atoms * sizeof(int),
360  cudaMemcpyHostToDevice, stream);
361  cuda_errcheck("memcpy to vdw_types");
362 }
__thread cudaStream_t stream
static __thread int num_atoms
void cuda_errcheck(const char *msg)
static __thread int * vdw_types
void cuda_bind_virials ( float *  v,
int *  queue,
int *  blockorder 
)

Definition at line 378 of file ComputeNonbondedCUDAKernel.cu.

References block_order, cuda_errcheck(), force_ready_queue, num_virials, slow_virials, and virials.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

378  {
379  cudaHostGetDevicePointer(&virials, v, 0);
380  cuda_errcheck("cudaHostGetDevicePointer virials");
382  cudaHostGetDevicePointer(&force_ready_queue, queue, 0);
383  cuda_errcheck("cudaHostGetDevicePointer force_ready_queue");
384  cudaHostGetDevicePointer(&block_order, blockorder, 0);
385  cuda_errcheck("cudaHostGetDevicePointer block_order");
386 }
static __thread float * slow_virials
static __thread float * virials
static __thread int * force_ready_queue
static __thread int num_virials
void cuda_errcheck(const char *msg)
static __thread int * block_order
void cuda_GBIS_P1 ( int  cbegin,
int  ccount,
int  pbegin,
int  pcount,
float  a_cut,
float  rho_0,
float3  lata,
float3  latb,
float3  latc,
cudaStream_t &  strm 
)

Definition at line 569 of file ComputeNonbondedCUDAKernel.cu.

References atoms, cuda_errcheck(), GBIS_P1_counters, intRad0D, intRadSD, lata, latb, latc, max_grid_size, num_atoms, NUM_WARP, patch_pairs, psiSumD, tmp_psiSumD, and WARPSIZE.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

580  {
581 
582  if ( ccount ) {
583  cudaMemsetAsync(tmp_psiSumD, 0, num_atoms*sizeof(GBReal), strm);
584 
585  int grid_dim = max_grid_size; // maximum allowed
586  for ( int cstart = 0; cstart < ccount; cstart += grid_dim ) {
587  if (grid_dim > ccount - cstart) {
588  grid_dim = ccount - cstart;
589  }
590 
591  dim3 nthread3(WARPSIZE, NUM_WARP, 1);
592  GBIS_P1_Kernel<<<grid_dim, nthread3, 0, strm>>>(
593  patch_pairs+cbegin+cstart,
594  atoms,
595  intRad0D,
596  intRadSD,
597  tmp_psiSumD,
598  psiSumD,
599  a_cut,
600  rho_0,
601  lata,
602  latb,
603  latc,
605  );
606  cuda_errcheck("dev_GBIS_P1");
607  } // end for
608  }
609 } // end GBIS P1
static __thread float * intRadSD
static __thread atom * atoms
static __thread float * intRad0D
static __thread GBReal * tmp_psiSumD
static __thread patch_pair * patch_pairs
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 lata
static __thread unsigned int * GBIS_P1_counters
static __thread int num_atoms
void cuda_errcheck(const char *msg)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 latc
__thread int max_grid_size
#define NUM_WARP
#define WARPSIZE
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 latb
static __thread GBReal * psiSumD
float GBReal
Definition: ComputeGBIS.inl:17
void cuda_GBIS_P2 ( int  cbegin,
int  ccount,
int  pbegin,
int  pcount,
float  a_cut,
float  r_cut,
float  scaling,
float  kappa,
float  smoothDist,
float  epsilon_p,
float  epsilon_s,
float3  lata,
float3  latb,
float3  latc,
int  doEnergy,
int  doFullElec,
cudaStream_t &  strm 
)

Definition at line 614 of file ComputeNonbondedCUDAKernel.cu.

References atoms, bornRadD, cuda_errcheck(), dEdaSumD, energy_gbis, forces, GBIS_P2_counters, lata, latb, latc, max_grid_size, num_atoms, num_patches, NUM_WARP, patch_pairs, tmp_dEdaSumD, tmp_energy_gbis, tmpforces, and WARPSIZE.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

632  {
633 
634  if ( ccount ) {
635  cudaMemsetAsync(tmp_dEdaSumD, 0, num_atoms*sizeof(GBReal), strm);
636  cudaMemsetAsync(tmp_energy_gbis, 0, num_patches*sizeof(float), strm);
637 
638  int grid_dim = max_grid_size; // maximum allowed
639  for ( int cstart = 0; cstart < ccount; cstart += grid_dim ) {
640  if (grid_dim > ccount - cstart)
641  grid_dim = ccount - cstart;
642 
643  dim3 nthread3(WARPSIZE, NUM_WARP, 1);
644  GBIS_P2_Kernel<<<grid_dim, nthread3, 0, strm>>>(
645  patch_pairs+cbegin+cstart,
646  atoms,
647  bornRadD,
648  tmp_dEdaSumD,
649  dEdaSumD,
650  a_cut,
651  r_cut,
652  scaling,
653  kappa,
654  smoothDist,
655  epsilon_p,
656  epsilon_s,
657  lata,
658  latb,
659  latc,
660  doEnergy,
661  doFullElec,
662  tmpforces,
663  forces,
665  energy_gbis,
667  );
668  cuda_errcheck("dev_GBIS_P2");
669  } // end for
670  }
671 } // end P2
static __thread unsigned int * GBIS_P2_counters
static __thread GBReal * tmp_dEdaSumD
static __thread float * tmp_energy_gbis
static __thread float * bornRadD
static __thread atom * atoms
static __thread float4 * forces
static __thread GBReal * dEdaSumD
static __thread float * energy_gbis
static __thread patch_pair * patch_pairs
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 lata
static __thread int num_patches
static __thread int num_atoms
void cuda_errcheck(const char *msg)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 latc
__thread int max_grid_size
#define NUM_WARP
#define WARPSIZE
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 latb
static __thread float4 * tmpforces
float GBReal
Definition: ComputeGBIS.inl:17
void cuda_GBIS_P3 ( int  cbegin,
int  ccount,
int  pbegin,
int  pcount,
float  a_cut,
float  rho_0,
float  scaling,
float3  lata,
float3  latb,
float3  latc,
cudaStream_t &  strm 
)

Definition at line 676 of file ComputeNonbondedCUDAKernel.cu.

References atoms, cuda_errcheck(), dHdrPrefixD, GBIS_P3_counters, intRad0D, intRadSD, lata, latb, latc, max_grid_size, NUM_WARP, patch_pairs, slow_forces, slow_tmpforces, and WARPSIZE.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

688  {
689  int grid_dim = max_grid_size; // maximum allowed
690  for ( int cstart = 0; cstart < ccount; cstart += grid_dim ) {
691  if (grid_dim > ccount - cstart)
692  grid_dim = ccount - cstart;
693 
694  dim3 nthread3(WARPSIZE, NUM_WARP, 1);
695  GBIS_P3_Kernel<<<grid_dim, nthread3, 0, strm>>>(
696  patch_pairs+cbegin+cstart,
697  atoms,
698  intRad0D,
699  intRadSD,
700  dHdrPrefixD,
701  a_cut,
702  rho_0,
703  scaling,
704  lata,
705  latb,
706  latc,
708  slow_forces,
710  );
711  cuda_errcheck("dev_GBIS_P3");
712  }
713 }
static __thread float * intRadSD
static __thread atom * atoms
static __thread float * intRad0D
static __thread float4 * slow_forces
static __thread float * dHdrPrefixD
static __thread patch_pair * patch_pairs
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 lata
static __thread unsigned int * GBIS_P3_counters
static __thread float4 * slow_tmpforces
void cuda_errcheck(const char *msg)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 latc
__thread int max_grid_size
#define NUM_WARP
#define WARPSIZE
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 latb
void cuda_init ( )

Definition at line 203 of file ComputeNonbondedCUDAKernel.cu.

References atom_params, atom_params_size, atoms, atoms_size, bornRadD, bornRadD_size, cuda_errcheck(), dEdaSumD, dHdrPrefixD, dHdrPrefixD_size, energy_gbis, exclmasks, exclmasks_size, exclusions, exclusions_size, force_ready_queue, forces, GBIS_P1_counters, GBIS_P1_counters_size, GBIS_P2_counters, GBIS_P2_counters_size, GBIS_P3_counters, GBIS_P3_counters_size, global_counters, global_counters_size, intRad0D, intRad0D_size, intRadSD, intRadSD_size, max_grid_size, patch_pairs, patch_pairs_size, plist, plist_size, psiSumD, slow_forces, slow_tmpforces, slow_tmpforces_size, slow_tmpvirials, slow_tmpvirials_size, tmp_dEdaSumD, tmp_dEdaSumD_size, tmp_energy_gbis, tmp_energy_gbis_size, tmp_psiSumD, tmp_psiSumD_size, tmpforces, tmpforces_size, tmpvirials, tmpvirials_size, vdw_types, and vdw_types_size.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA().

203  {
204  patch_pairs_size = 0;
205  patch_pairs = NULL;
206 
207  atom_params_size = 0;
208  atom_params = NULL;
209 
210  vdw_types_size = 0;
211  vdw_types = NULL;
212 
213  atoms_size = 0;
214  atoms = NULL;
215 
216  tmpforces_size = 0;
217  tmpforces = NULL;
218 
220  slow_tmpforces = NULL;
221 
222  tmpvirials_size = 0;
223  tmpvirials = NULL;
224 
226  slow_tmpvirials = NULL;
227 
229  global_counters = NULL;
230 
231  plist_size = 0;
232  plist = NULL;
233 
234  exclmasks_size = 0;
235  exclmasks = NULL;
236 
237  forces = NULL;
238  slow_forces = NULL;
239 
240  force_ready_queue = NULL;
241 
242  exclusions_size = 0;
243  exclusions = NULL;
244 
245  // --------------------
246  // For GBIS
247  // --------------------
248  intRad0D_size = 0;
249  intRad0D = NULL;
250 
251  intRadSD_size = 0;
252  intRadSD = NULL;
253 
254  psiSumD = NULL; // host-mapped memory
255 
256  tmp_psiSumD_size = 0;
257  tmp_psiSumD = NULL;
258 
259  bornRadD_size = 0;
260  bornRadD = NULL;
261 
262  dEdaSumD = NULL; // host-mapped memory
263 
264  tmp_dEdaSumD_size = 0;
265  tmp_dEdaSumD = NULL;
266 
267  dHdrPrefixD_size = 0;
268  dHdrPrefixD = NULL;
269 
271  GBIS_P1_counters = NULL;
272 
274  GBIS_P2_counters = NULL;
275 
277  GBIS_P3_counters = NULL;
278 
279  energy_gbis = NULL; // host-mapped memory
280 
282  tmp_energy_gbis = NULL;
283 
284  int dev;
285  cudaGetDevice(&dev);
286  cuda_errcheck("cudaGetDevice");
287  cudaDeviceProp deviceProp;
288  cudaGetDeviceProperties(&deviceProp, dev);
289  cuda_errcheck("cudaGetDeviceProperties");
290  max_grid_size = deviceProp.maxGridSize[1];
291 }
static __thread unsigned int * GBIS_P2_counters
static __thread GBReal * tmp_dEdaSumD
static __thread float * tmp_energy_gbis
static __thread float * intRadSD
static __thread int patch_pairs_size
static __thread int intRad0D_size
static __thread float * bornRadD
static __thread unsigned int * exclusions
static __thread atom * atoms
static __thread float * intRad0D
static __thread float4 * forces
static __thread GBReal * tmp_psiSumD
static __thread unsigned int * plist
static __thread GBReal * dEdaSumD
static __thread int GBIS_P1_counters_size
static __thread int intRadSD_size
static __thread float * energy_gbis
static __thread exclmask * exclmasks
static __thread float * slow_tmpvirials
static __thread int plist_size
static __thread float4 * slow_forces
static __thread int vdw_types_size
static __thread int atom_params_size
static __thread float * dHdrPrefixD
static __thread int global_counters_size
static __thread int GBIS_P3_counters_size
static __thread float * tmpvirials
static __thread int * force_ready_queue
static __thread patch_pair * patch_pairs
static __thread unsigned int * GBIS_P1_counters
static __thread int tmpvirials_size
static __thread int tmp_psiSumD_size
static __thread int bornRadD_size
static __thread atom_param * atom_params
static __thread int GBIS_P2_counters_size
static __thread int tmp_dEdaSumD_size
static __thread int tmp_energy_gbis_size
static __thread unsigned int * GBIS_P3_counters
static __thread int slow_tmpforces_size
static __thread int exclusions_size
static __thread float4 * slow_tmpforces
static __thread int tmpforces_size
static __thread unsigned int * global_counters
void cuda_errcheck(const char *msg)
static __thread int slow_tmpvirials_size
static __thread int dHdrPrefixD_size
static __thread int exclmasks_size
static __thread int * vdw_types
__thread int max_grid_size
static __thread float4 * tmpforces
static __thread GBReal * psiSumD
static __thread int atoms_size
void cuda_nonbonded_forces ( float3  lata,
float3  latb,
float3  latc,
float  cutoff2,
float  plcutoff2,
int  cbegin,
int  ccount,
int  ctotal,
int  doSlow,
int  doEnergy,
int  usePairlists,
int  savePairlists,
int  doStreaming,
int  saveOrder,
cudaStream_t &  strm 
)

Definition at line 500 of file ComputeNonbondedCUDAKernel.cu.

References CALL, cuda_errcheck(), cutoff2, max_grid_size, num_atoms, num_patches, NUM_WARP, slow_tmpforces, slow_tmpvirials, tmpforces, tmpvirials, and WARPSIZE.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

504  {
505 
506  if ( ccount ) {
507  if ( usePairlists ) {
508  if ( ! savePairlists ) plcutoff2 = 0.;
509  } else {
510  plcutoff2 = cutoff2;
511  }
512 
513  cudaMemsetAsync(tmpforces, 0, num_atoms*sizeof(float4), strm);
514  cudaMemsetAsync(tmpvirials, 0, num_patches*sizeof(float)*16, strm);
515  if ( doSlow ) {
516  cudaMemsetAsync(slow_tmpforces, 0, num_atoms*sizeof(float4), strm);
517  cudaMemsetAsync(slow_tmpvirials, 0, num_patches*sizeof(float)*16, strm);
518  }
519 
520  int grid_dim = max_grid_size; // maximum allowed
521  for ( int cstart = 0; cstart < ccount; cstart += grid_dim ) {
522  if ( grid_dim > ccount - cstart ) grid_dim = ccount - cstart;
523 
524  dim3 nthread3(WARPSIZE, NUM_WARP, 1);
525 
526 #define CALL(X) X<<< grid_dim, nthread3, 0, strm >>> \
527  (patch_pairs, atoms, atom_params, vdw_types, plist, \
528  tmpforces, (doSlow?slow_tmpforces:NULL), \
529  forces, (doSlow?slow_forces:NULL), \
530  tmpvirials, (doSlow?slow_tmpvirials:NULL), \
531  virials, (doSlow?slow_virials:NULL), \
532  global_counters, (doStreaming?force_ready_queue:NULL), \
533  overflow_exclusions, num_patches, \
534  cbegin+cstart, ctotal, (saveOrder?block_order:NULL), \
535  exclmasks, lj_table_size, \
536  lata, latb, latc, cutoff2, plcutoff2, doSlow)
537 //end definition
538 
539  if ( doEnergy ) {
540  if ( doSlow ) {
541  if ( plcutoff2 != 0. ) CALL(dev_nonbonded_slow_energy_pairlist);
542  else CALL(dev_nonbonded_slow_energy);
543  } else {
544  if ( plcutoff2 != 0. ) CALL(dev_nonbonded_energy_pairlist);
545  else CALL(dev_nonbonded_energy);
546  }
547  } else {
548  if ( doSlow ) {
549  if ( plcutoff2 != 0. ) CALL(dev_nonbonded_slow_pairlist);
550  else CALL(dev_nonbonded_slow);
551  } else {
552  if ( plcutoff2 != 0. ) CALL(dev_nonbonded_pairlist);
553  else CALL(dev_nonbonded);
554  }
555  }
556 
557  cuda_errcheck("dev_nonbonded");
558  }
559  }
560 
561 }
static __thread float * slow_tmpvirials
static __thread float * tmpvirials
static __thread int num_patches
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float plcutoff2
static __thread int num_atoms
static __thread float4 * slow_tmpforces
void cuda_errcheck(const char *msg)
__thread int max_grid_size
#define NUM_WARP
#define WARPSIZE
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cutoff2
#define CALL(DOENERGY, DOVIRIAL)
static __thread float4 * tmpforces

Variable Documentation

__thread atom_param* atom_params
static
__thread int atom_params_size
static

Definition at line 130 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread atom* atoms
static
__thread int atoms_size
static

Definition at line 134 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread int* block_order
static

Definition at line 159 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_virials().

__thread float* bornRadD
static
__thread int bornRadD_size
static

Definition at line 173 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__constant__ unsigned int const_exclusions[MAX_CONST_EXCLUSIONS]

Definition at line 16 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_exclusions(), and void().

__thread GBReal* dEdaSumD
static

Definition at line 176 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_GBIS_dEdaSum(), cuda_GBIS_P2(), and cuda_init().

__thread float* dHdrPrefixD
static
__thread int dHdrPrefixD_size
static

Definition at line 181 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread float* energy_gbis
static

Definition at line 193 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_GBIS_energy(), cuda_GBIS_P2(), and cuda_init().

texture<float4, 1, cudaReadModeElementType> energy_table

Definition at line 88 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_force_table(), and void().

__thread exclmask* exclmasks
static

Definition at line 152 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread int exclmasks_size
static

Definition at line 151 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread unsigned int* exclusions
static
__thread int exclusions_size
static

Definition at line 13 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_exclusions(), and cuda_init().

__thread int* force_ready_queue
static

Definition at line 156 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_virials(), and cuda_init().

texture<float4, 1, cudaReadModeElementType> force_table

Definition at line 87 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_force_table(), and void().

__thread float4* forces
static
__thread unsigned int* GBIS_P1_counters
static

Definition at line 185 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), cuda_GBIS_P1(), and cuda_init().

__thread int GBIS_P1_counters_size
static

Definition at line 184 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread unsigned int* GBIS_P2_counters
static

Definition at line 188 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), cuda_GBIS_P2(), and cuda_init().

__thread int GBIS_P2_counters_size
static

Definition at line 187 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread unsigned int* GBIS_P3_counters
static

Definition at line 191 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), cuda_GBIS_P3(), and cuda_init().

__thread int GBIS_P3_counters_size
static

Definition at line 190 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread unsigned int* global_counters
static

Definition at line 148 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread int global_counters_size
static

Definition at line 147 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread float* intRad0D
static
__thread int intRad0D_size
static

Definition at line 162 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread float* intRadSD
static
__thread int intRadSD_size
static

Definition at line 165 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

texture<float2, 1, cudaReadModeElementType> lj_table

Definition at line 56 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_lj_table(), and void().

__thread int lj_table_size
static

Definition at line 57 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_lj_table(), and void().

__thread int max_grid_size
__thread int num_atoms
static
__thread int num_patches
static
__thread int num_virials
static

Definition at line 125 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_bind_virials().

__thread unsigned int* overflow_exclusions
static

Definition at line 17 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_exclusions().

__thread patch_pair* patch_pairs
static
__thread int patch_pairs_size
static

Definition at line 128 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread unsigned int* plist
static
__thread int plist_size
static

Definition at line 149 of file ComputeNonbondedCUDAKernel.cu.

Referenced by __align__(), cuda_bind_patch_pairs(), and cuda_init().

__thread GBReal* psiSumD
static

Definition at line 168 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_GBIS_psiSum(), cuda_GBIS_P1(), and cuda_init().

__thread float4* slow_forces
static

Definition at line 155 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_forces(), cuda_GBIS_P3(), cuda_init(), and void().

__thread float4* slow_tmpforces
static
__thread int slow_tmpforces_size
static

Definition at line 139 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread float* slow_tmpvirials
static
__thread int slow_tmpvirials_size
static

Definition at line 144 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread float* slow_virials
static

Definition at line 158 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_virials().

__thread cudaStream_t stream
__thread cudaStream_t stream2
texture<unsigned int, 1, cudaReadModeElementType> tex_exclusions

Definition at line 12 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_exclusions().

__thread GBReal* tmp_dEdaSumD
static

Definition at line 179 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), cuda_GBIS_P2(), and cuda_init().

__thread int tmp_dEdaSumD_size
static

Definition at line 178 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread float* tmp_energy_gbis
static

Definition at line 196 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), cuda_GBIS_P2(), and cuda_init().

__thread int tmp_energy_gbis_size
static

Definition at line 195 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread GBReal* tmp_psiSumD
static

Definition at line 171 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), cuda_GBIS_P1(), and cuda_init().

__thread int tmp_psiSumD_size
static

Definition at line 170 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread float4* tmpforces
static
__thread int tmpforces_size
static

Definition at line 137 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread float* tmpvirials
static
__thread int tmpvirials_size
static

Definition at line 142 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread int* vdw_types
static
__thread int vdw_types_size
static

Definition at line 132 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_patch_pairs(), and cuda_init().

__thread float* virials
static

Definition at line 157 of file ComputeNonbondedCUDAKernel.cu.

Referenced by cuda_bind_virials().