CudaComputeNonbondedKernel.cu File Reference

#include <cuda.h>
#include <namd_cub/cub.cuh>
#include "CudaComputeNonbondedKernel.h"
#include "CudaTileListKernel.h"
#include "DeviceCUDA.h"

Go to the source code of this file.

Defines

#define OVERALLOC   1.2f
#define MAX_CONST_EXCLUSIONS   2048
#define NONBONDKERNEL_NUM_WARP   4
#define LARGE_FLOAT   (float)(1.0e10)
#define REDUCENONBONDEDVIRIALKERNEL_NUM_WARP   32
#define REDUCEVIRIALENERGYKERNEL_NUM_WARP   32
#define REDUCEGBISENERGYKERNEL_NUM_WARP   32
#define CALL(DOENERGY, DOVIRIAL, DOSLOW, DOPAIRLIST, DOSTREAMING)

Functions

void NAMD_die (const char *)
template<bool doEnergy, bool doSlow>
__device__ __forceinline__ void calcForceEnergy (const float r2, const float qi, const float qj, const float dx, const float dy, const float dz, const int vdwtypei, const int vdwtypej, const float2 *__restrict__ vdwCoefTable, cudaTextureObject_t vdwCoefTableTex, cudaTextureObject_t forceTableTex, cudaTextureObject_t energyTableTex, float3 &iforce, float3 &iforceSlow, float3 &jforce, float3 &jforceSlow, float &energyVdw, float &energyElec, float &energySlow)
template<bool doSlow>
__device__ __forceinline__ void storeForces (const int pos, const float3 force, const float3 forceSlow, float4 *__restrict__ devForces, float4 *__restrict__ devForcesSlow)
template<bool doSlow>
__device__ __forceinline__ void storeForces (const int pos, const float3 force, const float3 forceSlow, float *__restrict__ devForces_x, float *__restrict__ devForces_y, float *__restrict__ devForces_z, float *__restrict__ devForcesSlow_x, float *__restrict__ devForcesSlow_y, float *__restrict__ devForcesSlow_z)
template<bool doSlow>
__device__ __forceinline__ void storeForces (const int pos, const float3 force, const float3 forceSlow, float3 *__restrict__ forces, float3 *__restrict__ forcesSlow)
template<bool doPairlist>
__device__ __forceinline__ void shuffleNext (float &xyzq_j_w, int &vdwtypej, int &jatomIndex, int &jexclMaxdiff, int &jexclIndex)
template<bool doPairlist>
__device__ __forceinline__ void shuffleNext (float &xyzq_j_w, int &vdwtypej, int &jatomIndex)
template<bool doSlow>
__device__ __forceinline__ void shuffleNext (float3 &jforce, float3 &jforceSlow)
__device__ __forceinline__ float distsq (const BoundingBox a, const float4 b)
template<bool doEnergy, bool doVirial, bool doSlow, bool doPairlist, bool doStreaming>
__global__ void __launch_bounds__ (WARPSIZE *NONBONDKERNEL_NUM_WARP, doPairlist?(10):(doEnergy?(10):(10))) nonbondedForceKernel(const int start
 if (itileList< numTileLists)
__global__ void reduceNonbondedVirialKernel (const bool doSlow, const int atomStorageSize, const float4 *__restrict__ xyzq, const float4 *__restrict__ devForces, const float4 *__restrict__ devForcesSlow, VirialEnergy *__restrict__ virialEnergy)
__global__ void reduceVirialEnergyKernel (const bool doEnergy, const bool doVirial, const bool doSlow, const int numTileLists, const TileListVirialEnergy *__restrict__ tileListVirialEnergy, VirialEnergy *__restrict__ virialEnergy)
__global__ void reduceGBISEnergyKernel (const int numTileLists, const TileListVirialEnergy *__restrict__ tileListVirialEnergy, VirialEnergy *__restrict__ virialEnergy)
template<int doSlow>
__global__ void transposeForcesKernel (float4 *f, float4 *fSlow, float *fx, float *fy, float *fz, float *fw, float *fSlowx, float *fSlowy, float *fSlowz, float *fSloww, int n)

Variables

__thread DeviceCUDAdeviceCUDA
__constant__ unsigned int constExclusions [MAX_CONST_EXCLUSIONS]
__global__ void const int numTileLists
__global__ void const int
const TileList *__restrict__ 
tileLists
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ 
tileExcls
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ 
tileJatomStart
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int 
vdwCoefTableWidth
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__ 
vdwCoefTable
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ 
vdwTypes
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 
lata
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 
latb
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 
latc
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ 
xyzq
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float 
cutoff2
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t 
vdwCoefTableTex
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t 
forceTableTex
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t 
energyTableTex
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int 
atomStorageSize
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float 
plcutoff2
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__ 
patchPairs
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ 
atomIndex
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ 
exclIndexMaxDiff
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__ 
overflowExclusions
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ 
tileListDepth
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ 
tileListOrder
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ 
jtiles
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ 
tileListStat
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__ 
boundingBoxes
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ 
devForces
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ 
devForcesSlow
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ 
devForce_x
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForce_y
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForce_z
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForce_w
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForceSlow_x
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForceSlow_y
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForceSlow_z
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForceSlow_w
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int 
numPatches
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__ 
patchNumCount
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ 
cudaPatches
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ float4
*__restrict__ 
mapForces
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ float4
*__restrict__ float4
*__restrict__ 
mapForcesSlow
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ float4
*__restrict__ float4
*__restrict__ int
*__restrict__ 
mapPatchReadyQueue
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ float4
*__restrict__ float4
*__restrict__ int
*__restrict__ int
*__restrict__ 
outputOrder
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ float4
*__restrict__ float4
*__restrict__ int
*__restrict__ int
*__restrict__
TileListVirialEnergy
*__restrict__ virialEnergy int 
itileList = start + threadIdx.x/WARPSIZE + blockDim.x/WARPSIZE*blockIdx.x

Define Documentation

#define CALL ( DOENERGY,
DOVIRIAL,
DOSLOW,
DOPAIRLIST,
DOSTREAMING   ) 
Value:
nonbondedForceKernel<DOENERGY, DOVIRIAL, DOSLOW, DOPAIRLIST, DOSTREAMING> \
  <<< nblock, nthread, shMemSize, stream >>>  \
  (start, tlKernel.getNumTileLists(), tlKernel.getTileLists(), tlKernel.getTileExcls(), tlKernel.getTileJatomStart(), \
    cudaNonbondedTables.getVdwCoefTableWidth(), cudaNonbondedTables.getVdwCoefTable(), \
    vdwTypes, lata, latb, latc, tlKernel.get_xyzq(), cutoff2, \
    cudaNonbondedTables.getVdwCoefTableTex(), cudaNonbondedTables.getForceTableTex(), cudaNonbondedTables.getEnergyTableTex(), \
    atomStorageSize, tlKernel.get_plcutoff2(), tlKernel.getPatchPairs(), atomIndex, exclIndexMaxDiff, overflowExclusions, \
    tlKernel.getTileListDepth(), tlKernel.getTileListOrder(), tlKernel.getJtiles(), tlKernel.getTileListStatDevPtr(), \
    tlKernel.getBoundingBoxes(), d_forces, d_forcesSlow, \
    force_x, force_y, force_z, force_w, \
    forceSlow_x, forceSlow_y, forceSlow_z, forceSlow_w, \
    numPatches, patchNumCountPtr, tlKernel.getCudaPatches(), m_forces, m_forcesSlow, m_patchReadyQueue, \
    outputOrderPtr, tlKernel.getTileListVirialEnergy()); called=true
#define LARGE_FLOAT   (float)(1.0e10)

Definition at line 175 of file CudaComputeNonbondedKernel.cu.

#define MAX_CONST_EXCLUSIONS   2048

Definition at line 19 of file CudaComputeNonbondedKernel.cu.

#define NONBONDKERNEL_NUM_WARP   4

Definition at line 22 of file CudaComputeNonbondedKernel.cu.

#define OVERALLOC   1.2f
#define REDUCEGBISENERGYKERNEL_NUM_WARP   32
#define REDUCENONBONDEDVIRIALKERNEL_NUM_WARP   32
#define REDUCEVIRIALENERGYKERNEL_NUM_WARP   32

Function Documentation

template<bool doEnergy, bool doVirial, bool doSlow, bool doPairlist, bool doStreaming>
__global__ void __launch_bounds__ ( WARPSIZE *  NONBONDKERNEL_NUM_WARP,
doPairlist?  10):(doEnergy?(10):(10) 
) const [inline]
template<bool doEnergy, bool doSlow>
__device__ __forceinline__ void calcForceEnergy ( const float  r2,
const float  qi,
const float  qj,
const float  dx,
const float  dy,
const float  dz,
const int  vdwtypei,
const int  vdwtypej,
const float2 *__restrict__  vdwCoefTable,
cudaTextureObject_t  vdwCoefTableTex,
cudaTextureObject_t  forceTableTex,
cudaTextureObject_t  energyTableTex,
float3 &  iforce,
float3 &  iforceSlow,
float3 &  jforce,
float3 &  jforceSlow,
float &  energyVdw,
float &  energyElec,
float &  energySlow 
) [inline]

Definition at line 26 of file CudaComputeNonbondedKernel.cu.

References __ldg, float2::x, and float2::y.

00032                                                           {
00033 
00034   int vdwIndex = vdwtypej + vdwtypei;
00035 #if __CUDA_ARCH__ >= 350
00036   float2 ljab = __ldg(&vdwCoefTable[vdwIndex]);
00037 #else
00038   float2 ljab = tex1Dfetch<float2>(vdwCoefTableTex, vdwIndex);
00039 #endif
00040 
00041   float rinv = rsqrtf(r2);
00042   float4 ei;
00043   float4 fi = tex1D<float4>(forceTableTex, rinv);
00044   if (doEnergy) ei = tex1D<float4>(energyTableTex, rinv);
00045 
00046   float fSlow = qi * qj;
00047   float f = ljab.x * fi.z + ljab.y * fi.y + fSlow * fi.x;
00048 
00049   if (doEnergy) {
00050     energyVdw  += ljab.x * ei.z + ljab.y * ei.y;
00051     energyElec += fSlow * ei.x;
00052     if (doSlow) energySlow += fSlow * ei.w;
00053   }
00054   if (doSlow) fSlow *= fi.w;
00055 
00056   float fx = dx * f;
00057   float fy = dy * f;
00058   float fz = dz * f;
00059   iforce.x += fx;
00060   iforce.y += fy;
00061   iforce.z += fz;
00062   jforce.x -= fx;
00063   jforce.y -= fy;
00064   jforce.z -= fz;
00065 
00066   if (doSlow) {
00067     float fxSlow = dx * fSlow;
00068     float fySlow = dy * fSlow;
00069     float fzSlow = dz * fSlow;
00070     iforceSlow.x += fxSlow;
00071     iforceSlow.y += fySlow;
00072     iforceSlow.z += fzSlow;
00073     jforceSlow.x -= fxSlow;
00074     jforceSlow.y -= fySlow;
00075     jforceSlow.z -= fzSlow;
00076   }
00077 }

__device__ __forceinline__ float distsq ( const BoundingBox  a,
const float4  b 
)

Definition at line 167 of file CudaComputeNonbondedKernel.cu.

References BoundingBox::wx, BoundingBox::wy, BoundingBox::wz, BoundingBox::x, BoundingBox::y, and BoundingBox::z.

Referenced by buildTileListsBBKernel(), and if().

00167                                                                              {
00168   float dx = max(0.0f, fabsf(a.x - b.x) - a.wx);
00169   float dy = max(0.0f, fabsf(a.y - b.y) - a.wy);
00170   float dz = max(0.0f, fabsf(a.z - b.z) - a.wz);
00171   float r2 = dx*dx + dy*dy + dz*dz;
00172   return r2;
00173 }

if (  ) 

Definition at line 228 of file CudaComputeNonbondedKernel.cu.

References CudaPatchRecord::atomStart, constExclusions, distsq(), PatchPairRecord::iatomFreeSize, PatchPairRecord::iatomSize, TileList::iatomStart, j, PatchPairRecord::jatomFreeSize, PatchPairRecord::jatomSize, TileList::jtileEnd, TileList::jtileStart, LARGE_FLOAT, MAX_CONST_EXCLUSIONS, CudaPatchRecord::numAtoms, TileList::offsetXYZ, TileList::patchInd, TileList::patchNumList, tempStorage, WARP_ALL, WARP_ANY, WARP_FULL_MASK, WARP_SHUFFLE_XOR, WARP_SYNC, WARPSIZE, BoundingBox::x, BoundingBox::y, and BoundingBox::z.

00229   {
00230 
00231     float3 iforce;
00232     float3 iforceSlow;
00233     float energyVdw, energyElec, energySlow;
00234     int nexcluded;
00235     unsigned int itileListLen;
00236     int2 patchInd;
00237     int2 patchNumList;
00238     __shared__ float4 s_xyzq[NONBONDKERNEL_NUM_WARP][WARPSIZE];
00239     __shared__ int s_vdwtypej[NONBONDKERNEL_NUM_WARP][WARPSIZE];
00240     __shared__ float3 s_jforce[NONBONDKERNEL_NUM_WARP][WARPSIZE];
00241     __shared__ float3 s_jforceSlow[NONBONDKERNEL_NUM_WARP][WARPSIZE];
00242     __shared__ int s_jatomIndex[NONBONDKERNEL_NUM_WARP][WARPSIZE];            
00243 
00244     // Start computation
00245     {
00246       // Warp index (0...warpsize-1)
00247       const int wid = threadIdx.x % WARPSIZE;
00248       const int iwarp = threadIdx.x / WARPSIZE;
00249 
00250       TileList tmp = tileLists[itileList];
00251       int iatomStart = tmp.iatomStart;
00252       int jtileStart = tmp.jtileStart;
00253       int jtileEnd   = tmp.jtileEnd;
00254       patchInd     = tmp.patchInd;
00255       patchNumList = tmp.patchNumList;
00256 
00257       float shx = tmp.offsetXYZ.x*lata.x + tmp.offsetXYZ.y*latb.x + tmp.offsetXYZ.z*latc.x;
00258       float shy = tmp.offsetXYZ.x*lata.y + tmp.offsetXYZ.y*latb.y + tmp.offsetXYZ.z*latc.y;
00259       float shz = tmp.offsetXYZ.x*lata.z + tmp.offsetXYZ.y*latb.z + tmp.offsetXYZ.z*latc.z;
00260 
00261       // DH - set zeroShift flag if magnitude of shift vector is zero
00262       bool zeroShift = ! (shx*shx + shy*shy + shz*shz > 0);
00263 
00264       int iatomSize, iatomFreeSize, jatomSize, jatomFreeSize;
00265       if (doPairlist) {
00266         PatchPairRecord PPStmp = patchPairs[itileList];
00267         iatomSize     = PPStmp.iatomSize;
00268         iatomFreeSize = PPStmp.iatomFreeSize;
00269         jatomSize     = PPStmp.jatomSize;
00270         jatomFreeSize = PPStmp.jatomFreeSize;
00271       }
00272 
00273       // Write to global memory here to avoid register spilling
00274       if (doVirial) {
00275         if (wid == 0) {
00276           virialEnergy[itileList].shx = shx;
00277           virialEnergy[itileList].shy = shy;
00278           virialEnergy[itileList].shz = shz;
00279         }
00280       }
00281 
00282       // Load i-atom data (and shift coordinates)
00283       float4 xyzq_i = xyzq[iatomStart + wid];
00284       xyzq_i.x += shx;
00285       xyzq_i.y += shy;
00286       xyzq_i.z += shz;
00287       int vdwtypei = vdwTypes[iatomStart + wid]*vdwCoefTableWidth;
00288 
00289       // Load i-atom data (and shift coordinates)
00290       BoundingBox boundingBoxI;
00291       if (doPairlist) {
00292         boundingBoxI = boundingBoxes[iatomStart/WARPSIZE];
00293         boundingBoxI.x += shx;
00294         boundingBoxI.y += shy;
00295         boundingBoxI.z += shz;
00296       }
00297 
00298       // Get i-atom global index
00299 #ifdef USE_NEW_EXCL_METHOD
00300       int iatomIndex, minExclAtom, maxExclAtom;
00301 #else
00302       int iatomIndex;
00303 #endif
00304       if (doPairlist) {
00305 #ifdef USE_NEW_EXCL_METHOD
00306         iatomIndex = atomIndex[iatomStart + wid];
00307         int2 tmp = minmaxExclAtom[iatomStart + wid];
00308         minExclAtom = tmp.x;
00309         maxExclAtom = tmp.y;
00310 #else
00311         iatomIndex = atomIndex[iatomStart + wid];
00312 #endif
00313       }
00314 
00315       // i-forces in registers
00316       // float3 iforce;
00317       iforce.x = 0.0f;
00318       iforce.y = 0.0f;
00319       iforce.z = 0.0f;
00320 
00321       // float3 iforceSlow;
00322       if (doSlow) {
00323         iforceSlow.x = 0.0f;
00324         iforceSlow.y = 0.0f;
00325         iforceSlow.z = 0.0f;
00326       }
00327 
00328       // float energyVdw, energyElec, energySlow;
00329       if (doEnergy) {
00330         energyVdw = 0.0f;
00331         energyElec = 0.0f;
00332         if (doSlow) energySlow = 0.0f;
00333       }
00334 
00335       // Number of exclusions
00336       // NOTE: Lowest bit is used as indicator bit for tile pairs:
00337       //       bit 0 tile has no atoms within pairlist cutoff
00338       //       bit 1 tile has atoms within pairlist cutoff
00339       // int nexcluded;
00340       if (doPairlist) nexcluded = 0;
00341 
00342       // Number of i loops and free atoms
00343       int nfreei;
00344       if (doPairlist) {
00345         int nloopi = min(iatomSize - iatomStart, WARPSIZE);
00346         nfreei = max(iatomFreeSize - iatomStart, 0);
00347         if (wid >= nloopi) {
00348           xyzq_i.x = -LARGE_FLOAT;
00349           xyzq_i.y = -LARGE_FLOAT;
00350           xyzq_i.z = -LARGE_FLOAT;
00351         }
00352       }
00353 
00354       // tile list stuff
00355       // int itileListLen;
00356       // int minJatomStart;
00357       if (doPairlist) {
00358         // minJatomStart = tileJatomStart[jtileStart];
00359         itileListLen = 0;
00360       }
00361 
00362       // Exclusion index and maxdiff
00363       int iexclIndex, iexclMaxdiff;
00364       if (doPairlist) {
00365         int2 tmp = exclIndexMaxDiff[iatomStart + wid];
00366         iexclIndex   = tmp.x;
00367         iexclMaxdiff = tmp.y;
00368       }
00369 
00370       for (int jtile=jtileStart;jtile <= jtileEnd;jtile++) {
00371 
00372         // Load j-atom starting index and exclusion mask
00373         int jatomStart = tileJatomStart[jtile];
00374 
00375         float4 xyzq_j = xyzq[jatomStart + wid];
00376         WARP_SYNC(WARP_FULL_MASK);        
00377 
00378         // Check for early bail
00379         if (doPairlist) {
00380           float r2bb = distsq(boundingBoxI, xyzq_j);
00381           if (WARP_ALL(WARP_FULL_MASK, r2bb > plcutoff2)) continue;
00382         }
00383         unsigned int excl = (doPairlist) ? 0 : tileExcls[jtile].excl[wid];
00384         int vdwtypej = vdwTypes[jatomStart + wid];
00385         s_vdwtypej[iwarp][wid] = vdwtypej;
00386 
00387         // Get i-atom global index
00388         if (doPairlist) {
00389           s_jatomIndex[iwarp][wid] = atomIndex[jatomStart + wid];
00390         }
00391 
00392         // Number of j loops and free atoms
00393         int nfreej;
00394         if (doPairlist) {
00395           int nloopj = min(jatomSize - jatomStart, WARPSIZE);
00396           nfreej = max(jatomFreeSize - jatomStart, 0);
00397           //if (nfreei == 0 && nfreej == 0) continue;
00398           if (wid >= nloopj) {
00399             xyzq_j.x = LARGE_FLOAT;
00400             xyzq_j.y = LARGE_FLOAT;
00401             xyzq_j.z = LARGE_FLOAT;
00402           }
00403         }
00404 
00405         s_xyzq[iwarp][wid] = xyzq_j; 
00406 
00407         // DH - self requires that zeroShift is also set
00408         const bool self = zeroShift && (iatomStart == jatomStart);
00409         const int modval = (self) ? 2*WARPSIZE-1 : WARPSIZE-1;
00410 
00411         s_jforce[iwarp][wid] = make_float3(0.0f, 0.0f, 0.0f);
00412         if (doSlow)
00413           s_jforceSlow[iwarp][wid] = make_float3(0.0f, 0.0f, 0.0f); 
00414         WARP_SYNC(WARP_FULL_MASK);
00415         
00416 
00417         int t = (self) ? 1 : 0;
00418 
00419         if (doPairlist) {
00420           // Build pair list
00421           // NOTE: Pairlist update, we must also include the diagonal since this is used
00422           //       in GBIS phase 2.
00423           // Clear the lowest (indicator) bit
00424           nexcluded &= (~1);
00425 
00426           // For self tiles, do the diagonal term (t=0).
00427           // NOTE: No energies are computed here, since this self-diagonal term is only for GBIS phase 2
00428           if (self) {
00429             int j = (0 + wid) & modval;
00430             xyzq_j = s_xyzq[iwarp][j];
00431             float dx = xyzq_j.x - xyzq_i.x;
00432             float dy = xyzq_j.y - xyzq_i.y;
00433             float dz = xyzq_j.z - xyzq_i.z;       
00434 
00435             float r2 = dx*dx + dy*dy + dz*dz;
00436 
00437             if (j < WARPSIZE && r2 < plcutoff2) {
00438               // We have atom pair within the pairlist cutoff => Set indicator bit
00439               nexcluded |= 1;
00440             }
00441           }
00442 
00443           for (;t < WARPSIZE;t++) {
00444             int j = (t + wid) & modval;
00445 
00446             excl >>= 1;
00447             if (j < WARPSIZE ) {
00448               xyzq_j = s_xyzq[iwarp][j];
00449               float dx = xyzq_j.x - xyzq_i.x;
00450               float dy = xyzq_j.y - xyzq_i.y;
00451               float dz = xyzq_j.z - xyzq_i.z;
00452               float r2 = dx*dx + dy*dy + dz*dz;
00453               // We have atom pair within the pairlist cutoff => Set indicator bit
00454               if(r2 < plcutoff2){
00455                 nexcluded |= 1;
00456                 if (j < nfreej || wid < nfreei) {
00457                   bool excluded = false;
00458                   int indexdiff = s_jatomIndex[iwarp][j] - iatomIndex;
00459                   if ( abs(indexdiff) <= iexclMaxdiff) {
00460                     indexdiff += iexclIndex;
00461                     int indexword = ((unsigned int) indexdiff) >> 5;
00462 
00463                     if ( indexword < MAX_CONST_EXCLUSIONS ) {
00464                       indexword = constExclusions[indexword];
00465                     } else {
00466                       indexword = overflowExclusions[indexword];
00467                     }
00468 
00469                     excluded = ((indexword & (1<<(indexdiff&31))) != 0);
00470                   }
00471                   if (excluded) nexcluded += 2;
00472                   if (!excluded) excl |= 0x80000000;
00473                   if (!excluded && r2 < cutoff2) {
00474                     calcForceEnergy<doEnergy, doSlow>(
00475                       r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
00476                       vdwtypei, s_vdwtypej[iwarp][j],
00477                       vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
00478                       iforce, iforceSlow,
00479                       s_jforce[iwarp][j], s_jforceSlow[iwarp][j],
00480                       energyVdw,
00481                       energyElec, energySlow);
00482                   }
00483                 }
00484               }
00485             }
00486             WARP_SYNC(WARP_FULL_MASK);
00487          } // t
00488        } else {
00489           // Just compute forces
00490           if (self) {
00491             excl >>= 1;
00492           }
00493           for (;t < WARPSIZE;t++) {
00494             if ((excl & 1)) {
00495               xyzq_j = s_xyzq[iwarp][(wid+t) & (WARPSIZE-1)];
00496               float dx = xyzq_j.x - xyzq_i.x;
00497               float dy = xyzq_j.y - xyzq_i.y;
00498               float dz = xyzq_j.z - xyzq_i.z;              
00499 
00500               float r2 = dx*dx + dy*dy + dz*dz;
00501 
00502               if (r2 < cutoff2) {
00503                 calcForceEnergy<doEnergy, doSlow>(
00504                   r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
00505                   vdwtypei, s_vdwtypej[iwarp][(wid+t) & (WARPSIZE-1)], vdwCoefTable, 
00506                   vdwCoefTableTex, forceTableTex, energyTableTex,
00507                   iforce, iforceSlow,
00508                   s_jforce[iwarp][(wid+t) & (WARPSIZE-1)],
00509                   s_jforceSlow[iwarp][(wid+t) & (WARPSIZE-1)],
00510                   energyVdw, energyElec, energySlow);
00511               } // (r2 < cutoff2)
00512             } // (excl & 1)
00513             excl >>= 1;
00514             WARP_SYNC(WARP_FULL_MASK);
00515           } // t
00516         }
00517 
00518         // Write j-forces
00519         storeForces<doSlow>(jatomStart + wid, s_jforce[iwarp][wid], s_jforceSlow[iwarp][wid],
00520           devForce_x, devForce_y, devForce_z,
00521           devForceSlow_x, devForceSlow_y, devForceSlow_z);
00522 
00523         // Write exclusions
00524         if (doPairlist && WARP_ANY(WARP_FULL_MASK, nexcluded & 1)) {
00525           int anyexcl = (65536 | WARP_ANY(WARP_FULL_MASK, excl));
00526           // Mark this jtile as non-empty:
00527           //  VdW:      1 if tile has atom pairs within pairlist cutoff and some these atoms interact
00528           //  GBIS: 65536 if tile has atom pairs within pairlist cutoff but not necessary interacting (i.e. these atoms are fixed or excluded)
00529           if (wid == 0) jtiles[jtile] = anyexcl;
00530           // Store exclusions
00531           tileExcls[jtile].excl[wid] = excl;
00532           // itileListLen:
00533           // lower 16 bits number of tiles with atom pairs within pairlist cutoff that interact
00534           // upper 16 bits number of tiles with atom pairs within pairlist cutoff (but not necessary interacting)
00535           itileListLen += anyexcl;
00536           // NOTE, this minJatomStart is only stored once for the first tile list entry
00537           // minJatomStart = min(minJatomStart, jatomStart);
00538         }
00539 
00540       } // jtile
00541 
00542       // Write i-forces
00543       storeForces<doSlow>(iatomStart + wid, iforce, iforceSlow,
00544                           devForce_x, devForce_y, devForce_z,
00545                           devForceSlow_x, devForceSlow_y, devForceSlow_z);
00546     }
00547     // Done with computation
00548 
00549     // Save pairlist stuff
00550     if (doPairlist) {
00551 
00552       // Warp index (0...warpsize-1)
00553       const int wid = threadIdx.x % WARPSIZE;
00554 
00555       if (wid == 0) {
00556         // minJatomStart is in range [0 ... atomStorageSize-1]
00557         //int atom0 = (minJatomStart)/WARPSIZE;
00558         // int atom0 = 0;
00559         // int storageOffset = atomStorageSize/WARPSIZE;
00560         // int itileListLen = 0;
00561         // for (int jtile=jtileStart;jtile <= jtileEnd;jtile++) itileListLen += jtiles[jtile];
00562         // Store 0 if itileListLen == 0
00563         // tileListDepth[itileList] = (itileListLen > 0)*(itileListLen*storageOffset + atom0);
00564         tileListDepth[itileList] = itileListLen;
00565         tileListOrder[itileList] = itileList;
00566         // Number of active tilelists with tile with atom pairs within pairlist cutoff that interact
00567         if ((itileListLen & 65535) > 0) atomicAdd(&tileListStat->numTileLists, 1);
00568         // Number of active tilelists with tiles with atom pairs within pairlist cutoff (but not necessary interacting)
00569         if (itileListLen > 0) atomicAdd(&tileListStat->numTileListsGBIS, 1);
00570         // NOTE: always numTileListsGBIS >= numTileLists
00571       }
00572 
00573       typedef cub::WarpReduce<int> WarpReduceInt;
00574       __shared__ typename WarpReduceInt::TempStorage tempStorage[NONBONDKERNEL_NUM_WARP];
00575       int warpId = threadIdx.x / WARPSIZE;
00576       // Remove indicator bit
00577       nexcluded >>= 1;
00578       volatile int nexcludedWarp = WarpReduceInt(tempStorage[warpId]).Sum(nexcluded);
00579       if (wid == 0) atomicAdd(&tileListStat->numExcluded, nexcludedWarp);
00580 
00581     }
00582 
00583     if (doVirial) {
00584       // Warp index (0...warpsize-1)
00585       const int wid = threadIdx.x % WARPSIZE;
00586 
00587       typedef cub::WarpReduce<float> WarpReduce;
00588       __shared__ typename WarpReduce::TempStorage tempStorage[NONBONDKERNEL_NUM_WARP];
00589       int warpId = threadIdx.x / WARPSIZE;
00590       volatile float iforcexSum = WarpReduce(tempStorage[warpId]).Sum(iforce.x);
00591       WARP_SYNC(WARP_FULL_MASK);
00592       volatile float iforceySum = WarpReduce(tempStorage[warpId]).Sum(iforce.y);
00593       WARP_SYNC(WARP_FULL_MASK);
00594       volatile float iforcezSum = WarpReduce(tempStorage[warpId]).Sum(iforce.z);
00595       WARP_SYNC(WARP_FULL_MASK);
00596       if (wid == 0) {
00597         virialEnergy[itileList].forcex = iforcexSum;
00598         virialEnergy[itileList].forcey = iforceySum;
00599         virialEnergy[itileList].forcez = iforcezSum;
00600       }
00601 
00602       if (doSlow) {
00603         iforcexSum = WarpReduce(tempStorage[warpId]).Sum(iforceSlow.x);
00604         WARP_SYNC(WARP_FULL_MASK);
00605         iforceySum = WarpReduce(tempStorage[warpId]).Sum(iforceSlow.y);
00606         WARP_SYNC(WARP_FULL_MASK);
00607         iforcezSum = WarpReduce(tempStorage[warpId]).Sum(iforceSlow.z);
00608         WARP_SYNC(WARP_FULL_MASK);
00609         if (wid == 0) {
00610           virialEnergy[itileList].forceSlowx = iforcexSum;
00611           virialEnergy[itileList].forceSlowy = iforceySum;
00612           virialEnergy[itileList].forceSlowz = iforcezSum;
00613         }
00614       }
00615     }
00616 
00617     // Reduce energy
00618     if (doEnergy) {
00619       // NOTE: We must hand write these warp-wide reductions to avoid excess register spillage
00620       //       (Why does CUB suck here?)
00621 #pragma unroll
00622       for (int i=16;i >= 1;i/=2) {
00623         energyVdw += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyVdw, i, 32);
00624         energyElec += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyElec, i, 32);
00625         if (doSlow) energySlow += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energySlow, i, 32);
00626       }
00627 
00628       if (threadIdx.x % WARPSIZE == 0) {
00629         virialEnergy[itileList].energyVdw  = energyVdw;
00630         virialEnergy[itileList].energyElec = energyElec;
00631         if (doSlow) virialEnergy[itileList].energySlow = energySlow;
00632       }
00633     }
00634 
00635     if (doStreaming) {
00636       // Make sure devForces and devForcesSlow have been written into device memory
00637       WARP_SYNC(WARP_FULL_MASK);
00638       __threadfence();
00639 
00640       int patchDone[2] = {false, false};
00641       const int wid = threadIdx.x % WARPSIZE;
00642       if (wid == 0) {
00643         int patchCountOld0 = atomicInc(&patchNumCount[patchInd.x], (unsigned int)(patchNumList.x-1));
00644         patchDone[0] = (patchCountOld0 + 1 == patchNumList.x);
00645         if (patchInd.x != patchInd.y) {
00646           int patchCountOld1 = atomicInc(&patchNumCount[patchInd.y], (unsigned int)(patchNumList.y-1));
00647           patchDone[1] = (patchCountOld1 + 1 == patchNumList.y);
00648         }
00649       }
00650 
00651       patchDone[0] = WARP_ANY(WARP_FULL_MASK, patchDone[0]);
00652       patchDone[1] = WARP_ANY(WARP_FULL_MASK, patchDone[1]);
00653 
00654       if (patchDone[0]) {
00655         // Patch 1 is done, write onto host-mapped memory
00656         CudaPatchRecord patch = cudaPatches[patchInd.x];
00657         int start = patch.atomStart;
00658         int end   = start + patch.numAtoms;
00659         for (int i=start+wid;i < end;i+=WARPSIZE) {
00660           mapForces[i] = make_float4(devForce_x[i],
00661             devForce_y[i], devForce_z[i], devForce_w[i]);
00662           if (doSlow){
00663             mapForcesSlow[i] = make_float4(devForceSlow_x[i],
00664                                            devForceSlow_y[i], 
00665                                            devForceSlow_z[i], 
00666                                            devForceSlow_w[i]);
00667           }
00668         }
00669       }
00670       if (patchDone[1]) {
00671         // Patch 2 is done
00672         CudaPatchRecord patch = cudaPatches[patchInd.y];
00673         int start = patch.atomStart;
00674         int end   = start + patch.numAtoms;
00675         for (int i=start+wid;i < end;i+=WARPSIZE) {
00676           mapForces[i] = make_float4(devForce_x[i],  devForce_y[i], devForce_z[i], devForce_w[i]);
00677           if (doSlow){
00678             mapForcesSlow[i] = make_float4(devForceSlow_x[i],
00679                                           devForceSlow_y[i], 
00680                                           devForceSlow_z[i], 
00681                                           devForceSlow_w[i]);
00682           }
00683         }
00684       }
00685 
00686       if (patchDone[0] || patchDone[1]) {
00687         // Make sure mapForces and mapForcesSlow are up-to-date
00688         WARP_SYNC(WARP_FULL_MASK);
00689         __threadfence_system();
00690         // Add patch into "patchReadyQueue"
00691         if (wid == 0) {
00692           if (patchDone[0]) {
00693             int ind = atomicAdd(&tileListStat->patchReadyQueueCount, 1);
00694             // int ind = atomicInc((unsigned int *)&mapPatchReadyQueue[numPatches], numPatches-1);
00695             mapPatchReadyQueue[ind] = patchInd.x;
00696           }
00697           if (patchDone[1]) {
00698             int ind = atomicAdd(&tileListStat->patchReadyQueueCount, 1);
00699             // int ind = atomicInc((unsigned int *)&mapPatchReadyQueue[numPatches], numPatches-1);
00700             mapPatchReadyQueue[ind] = patchInd.y;
00701           }
00702         }
00703       }
00704     }
00705 
00706     if (doStreaming && outputOrder != NULL && threadIdx.x % WARPSIZE == 0) {
00707       int index = atomicAdd(&tileListStat->outputOrderIndex, 1);
00708       outputOrder[index] = itileList;
00709     }
00710   } // if (itileList < numTileLists)

void NAMD_die ( const char *   ) 

Definition at line 83 of file common.C.

00084 {
00085   if ( ! err_msg ) err_msg = "(unknown error)";
00086   CkPrintf("FATAL ERROR: %s\n", err_msg);
00087   fflush(stdout);
00088   char repstr[24] = "";
00089   if (CmiNumPartitions() > 1) {
00090     sprintf(repstr,"REPLICA %d ", CmiMyPartition());
00091   }
00092   CkError("%sFATAL ERROR: %s\n", repstr, err_msg);
00093 #if CHARM_VERSION < 61000
00094   CkExit();
00095 #else
00096   CkExit(1);
00097 #endif
00098 }

__global__ void reduceGBISEnergyKernel ( const int  numTileLists,
const TileListVirialEnergy *__restrict__  tileListVirialEnergy,
VirialEnergy *__restrict__  virialEnergy 
)

Definition at line 959 of file CudaComputeNonbondedKernel.cu.

References BLOCK_SYNC, and tempStorage.

00961                                            {
00962 
00963   for (int ibase = blockIdx.x*blockDim.x;ibase < numTileLists;ibase += blockDim.x*gridDim.x)
00964   {
00965     int itileList = ibase + threadIdx.x;
00966     double energyGBISt = 0.0;
00967     if (itileList < numTileLists) {
00968       energyGBISt = tileListVirialEnergy[itileList].energyGBIS;
00969     }
00970 
00971     typedef cub::BlockReduce<double, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
00972     __shared__ typename BlockReduce::TempStorage tempStorage;
00973     volatile double energyGBIS = BlockReduce(tempStorage).Sum(energyGBISt); BLOCK_SYNC;
00974     if (threadIdx.x == 0) atomicAdd(&virialEnergy->energyGBIS, (double)energyGBIS);
00975   }
00976 
00977 }

__global__ void reduceNonbondedVirialKernel ( const bool  doSlow,
const int  atomStorageSize,
const float4 *__restrict__  xyzq,
const float4 *__restrict__  devForces,
const float4 *__restrict__  devForcesSlow,
VirialEnergy *__restrict__  virialEnergy 
)

Definition at line 717 of file CudaComputeNonbondedKernel.cu.

References BLOCK_SYNC, and tempStorage.

00721                                            {
00722 
00723   for (int ibase = blockIdx.x*blockDim.x;ibase < atomStorageSize;ibase += blockDim.x*gridDim.x)
00724   {
00725     int i = ibase + threadIdx.x;
00726 
00727     // Set to zero to avoid nan*0
00728     float4 pos;
00729     pos.x = 0.0f;
00730     pos.y = 0.0f;
00731     pos.z = 0.0f;
00732     float4 force, forceSlow;
00733     force.x = 0.0f;
00734     force.y = 0.0f;
00735     force.z = 0.0f;
00736     forceSlow.x = 0.0f;
00737     forceSlow.y = 0.0f;
00738     forceSlow.z = 0.0f;
00739     if (i < atomStorageSize) {
00740       pos = xyzq[i];
00741       force = devForces[i];
00742       if (doSlow) forceSlow = devForcesSlow[i];
00743     }
00744     // Reduce across the entire thread block
00745     float vxxt = force.x*pos.x;
00746     float vxyt = force.x*pos.y;
00747     float vxzt = force.x*pos.z;
00748     float vyxt = force.y*pos.x;
00749     float vyyt = force.y*pos.y;
00750     float vyzt = force.y*pos.z;
00751     float vzxt = force.z*pos.x;
00752     float vzyt = force.z*pos.y;
00753     float vzzt = force.z*pos.z;
00754     // atomicAdd(&virialEnergy->virial[0], (double)vxx);
00755     // atomicAdd(&virialEnergy->virial[1], (double)vxy);
00756     // atomicAdd(&virialEnergy->virial[2], (double)vxz);
00757     // atomicAdd(&virialEnergy->virial[3], (double)vyx);
00758     // atomicAdd(&virialEnergy->virial[4], (double)vyy);
00759     // atomicAdd(&virialEnergy->virial[5], (double)vyz);
00760     // atomicAdd(&virialEnergy->virial[6], (double)vzx);
00761     // atomicAdd(&virialEnergy->virial[7], (double)vzy);
00762     // atomicAdd(&virialEnergy->virial[8], (double)vzz);
00763 
00764     typedef cub::BlockReduce<float, REDUCENONBONDEDVIRIALKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
00765     __shared__ typename BlockReduce::TempStorage tempStorage;
00766     volatile float vxx = BlockReduce(tempStorage).Sum(vxxt); BLOCK_SYNC;
00767     volatile float vxy = BlockReduce(tempStorage).Sum(vxyt); BLOCK_SYNC;
00768     volatile float vxz = BlockReduce(tempStorage).Sum(vxzt); BLOCK_SYNC;
00769     volatile float vyx = BlockReduce(tempStorage).Sum(vyxt); BLOCK_SYNC;
00770     volatile float vyy = BlockReduce(tempStorage).Sum(vyyt); BLOCK_SYNC;
00771     volatile float vyz = BlockReduce(tempStorage).Sum(vyzt); BLOCK_SYNC;
00772     volatile float vzx = BlockReduce(tempStorage).Sum(vzxt); BLOCK_SYNC;
00773     volatile float vzy = BlockReduce(tempStorage).Sum(vzyt); BLOCK_SYNC;
00774     volatile float vzz = BlockReduce(tempStorage).Sum(vzzt); BLOCK_SYNC;
00775     if (threadIdx.x == 0) {
00776       atomicAdd(&virialEnergy->virial[0], (double)vxx);
00777       atomicAdd(&virialEnergy->virial[1], (double)vxy);
00778       atomicAdd(&virialEnergy->virial[2], (double)vxz);
00779       atomicAdd(&virialEnergy->virial[3], (double)vyx);
00780       atomicAdd(&virialEnergy->virial[4], (double)vyy);
00781       atomicAdd(&virialEnergy->virial[5], (double)vyz);
00782       atomicAdd(&virialEnergy->virial[6], (double)vzx);
00783       atomicAdd(&virialEnergy->virial[7], (double)vzy);
00784       atomicAdd(&virialEnergy->virial[8], (double)vzz);
00785     }
00786 
00787     if (doSlow) {
00788       // if (isnan(forceSlow.x) || isnan(forceSlow.y) || isnan(forceSlow.z))
00789       float vxxSlowt = forceSlow.x*pos.x;
00790       float vxySlowt = forceSlow.x*pos.y;
00791       float vxzSlowt = forceSlow.x*pos.z;
00792       float vyxSlowt = forceSlow.y*pos.x;
00793       float vyySlowt = forceSlow.y*pos.y;
00794       float vyzSlowt = forceSlow.y*pos.z;
00795       float vzxSlowt = forceSlow.z*pos.x;
00796       float vzySlowt = forceSlow.z*pos.y;
00797       float vzzSlowt = forceSlow.z*pos.z;
00798       // atomicAdd(&virialEnergy->virialSlow[0], (double)vxxSlow);
00799       // atomicAdd(&virialEnergy->virialSlow[1], (double)vxySlow);
00800       // atomicAdd(&virialEnergy->virialSlow[2], (double)vxzSlow);
00801       // atomicAdd(&virialEnergy->virialSlow[3], (double)vyxSlow);
00802       // atomicAdd(&virialEnergy->virialSlow[4], (double)vyySlow);
00803       // atomicAdd(&virialEnergy->virialSlow[5], (double)vyzSlow);
00804       // atomicAdd(&virialEnergy->virialSlow[6], (double)vzxSlow);
00805       // atomicAdd(&virialEnergy->virialSlow[7], (double)vzySlow);
00806       // atomicAdd(&virialEnergy->virialSlow[8], (double)vzzSlow);
00807       volatile float vxxSlow = BlockReduce(tempStorage).Sum(vxxSlowt); BLOCK_SYNC;
00808       volatile float vxySlow = BlockReduce(tempStorage).Sum(vxySlowt); BLOCK_SYNC;
00809       volatile float vxzSlow = BlockReduce(tempStorage).Sum(vxzSlowt); BLOCK_SYNC;
00810       volatile float vyxSlow = BlockReduce(tempStorage).Sum(vyxSlowt); BLOCK_SYNC;
00811       volatile float vyySlow = BlockReduce(tempStorage).Sum(vyySlowt); BLOCK_SYNC;
00812       volatile float vyzSlow = BlockReduce(tempStorage).Sum(vyzSlowt); BLOCK_SYNC;
00813       volatile float vzxSlow = BlockReduce(tempStorage).Sum(vzxSlowt); BLOCK_SYNC;
00814       volatile float vzySlow = BlockReduce(tempStorage).Sum(vzySlowt); BLOCK_SYNC;
00815       volatile float vzzSlow = BlockReduce(tempStorage).Sum(vzzSlowt); BLOCK_SYNC;
00816       if (threadIdx.x == 0) {
00817         atomicAdd(&virialEnergy->virialSlow[0], (double)vxxSlow);
00818         atomicAdd(&virialEnergy->virialSlow[1], (double)vxySlow);
00819         atomicAdd(&virialEnergy->virialSlow[2], (double)vxzSlow);
00820         atomicAdd(&virialEnergy->virialSlow[3], (double)vyxSlow);
00821         atomicAdd(&virialEnergy->virialSlow[4], (double)vyySlow);
00822         atomicAdd(&virialEnergy->virialSlow[5], (double)vyzSlow);
00823         atomicAdd(&virialEnergy->virialSlow[6], (double)vzxSlow);
00824         atomicAdd(&virialEnergy->virialSlow[7], (double)vzySlow);
00825         atomicAdd(&virialEnergy->virialSlow[8], (double)vzzSlow);
00826       }
00827     }
00828   
00829   }
00830 
00831 }

__global__ void reduceVirialEnergyKernel ( const bool  doEnergy,
const bool  doVirial,
const bool  doSlow,
const int  numTileLists,
const TileListVirialEnergy *__restrict__  tileListVirialEnergy,
VirialEnergy *__restrict__  virialEnergy 
)

Definition at line 834 of file CudaComputeNonbondedKernel.cu.

References BLOCK_SYNC, TileListVirialEnergy::energyElec, TileListVirialEnergy::energySlow, TileListVirialEnergy::energyVdw, TileListVirialEnergy::forceSlowx, TileListVirialEnergy::forceSlowy, TileListVirialEnergy::forceSlowz, TileListVirialEnergy::forcex, TileListVirialEnergy::forcey, TileListVirialEnergy::forcez, TileListVirialEnergy::shx, TileListVirialEnergy::shy, TileListVirialEnergy::shz, and tempStorage.

00838                                            {
00839 
00840   for (int ibase = blockIdx.x*blockDim.x;ibase < numTileLists;ibase += blockDim.x*gridDim.x)
00841   {
00842     int itileList = ibase + threadIdx.x;
00843     TileListVirialEnergy ve;
00844     if (itileList < numTileLists) {
00845       ve = tileListVirialEnergy[itileList];
00846     } else {
00847       // Set to zero to avoid nan*0
00848       if (doVirial) {
00849         ve.shx = 0.0f;
00850         ve.shy = 0.0f;
00851         ve.shz = 0.0f;
00852         ve.forcex = 0.0f;
00853         ve.forcey = 0.0f;
00854         ve.forcez = 0.0f;
00855         ve.forceSlowx = 0.0f;
00856         ve.forceSlowy = 0.0f;
00857         ve.forceSlowz = 0.0f;
00858       }
00859       if (doEnergy) {
00860         ve.energyVdw = 0.0;
00861         ve.energyElec = 0.0;
00862         ve.energySlow = 0.0;
00863         // ve.energyGBIS = 0.0;
00864       }
00865     }
00866 
00867     if (doVirial) {
00868       typedef cub::BlockReduce<float, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
00869       __shared__ typename BlockReduce::TempStorage tempStorage;
00870       float vxxt = ve.forcex*ve.shx;
00871       float vxyt = ve.forcex*ve.shy;
00872       float vxzt = ve.forcex*ve.shz;
00873       float vyxt = ve.forcey*ve.shx;
00874       float vyyt = ve.forcey*ve.shy;
00875       float vyzt = ve.forcey*ve.shz;
00876       float vzxt = ve.forcez*ve.shx;
00877       float vzyt = ve.forcez*ve.shy;
00878       float vzzt = ve.forcez*ve.shz;
00879       volatile float vxx = BlockReduce(tempStorage).Sum(vxxt); BLOCK_SYNC;
00880       volatile float vxy = BlockReduce(tempStorage).Sum(vxyt); BLOCK_SYNC;
00881       volatile float vxz = BlockReduce(tempStorage).Sum(vxzt); BLOCK_SYNC;
00882       volatile float vyx = BlockReduce(tempStorage).Sum(vyxt); BLOCK_SYNC;
00883       volatile float vyy = BlockReduce(tempStorage).Sum(vyyt); BLOCK_SYNC;
00884       volatile float vyz = BlockReduce(tempStorage).Sum(vyzt); BLOCK_SYNC;
00885       volatile float vzx = BlockReduce(tempStorage).Sum(vzxt); BLOCK_SYNC;
00886       volatile float vzy = BlockReduce(tempStorage).Sum(vzyt); BLOCK_SYNC;
00887       volatile float vzz = BlockReduce(tempStorage).Sum(vzzt); BLOCK_SYNC;
00888       if (threadIdx.x == 0) {
00889         atomicAdd(&virialEnergy->virial[0], (double)vxx);
00890         atomicAdd(&virialEnergy->virial[1], (double)vxy);
00891         atomicAdd(&virialEnergy->virial[2], (double)vxz);
00892         atomicAdd(&virialEnergy->virial[3], (double)vyx);
00893         atomicAdd(&virialEnergy->virial[4], (double)vyy);
00894         atomicAdd(&virialEnergy->virial[5], (double)vyz);
00895         atomicAdd(&virialEnergy->virial[6], (double)vzx);
00896         atomicAdd(&virialEnergy->virial[7], (double)vzy);
00897         atomicAdd(&virialEnergy->virial[8], (double)vzz);
00898       }
00899 
00900       if (doSlow) {
00901         typedef cub::BlockReduce<float, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
00902         __shared__ typename BlockReduce::TempStorage tempStorage;
00903         float vxxt = ve.forceSlowx*ve.shx;
00904         float vxyt = ve.forceSlowx*ve.shy;
00905         float vxzt = ve.forceSlowx*ve.shz;
00906         float vyxt = ve.forceSlowy*ve.shx;
00907         float vyyt = ve.forceSlowy*ve.shy;
00908         float vyzt = ve.forceSlowy*ve.shz;
00909         float vzxt = ve.forceSlowz*ve.shx;
00910         float vzyt = ve.forceSlowz*ve.shy;
00911         float vzzt = ve.forceSlowz*ve.shz;
00912         volatile float vxx = BlockReduce(tempStorage).Sum(vxxt); BLOCK_SYNC;
00913         volatile float vxy = BlockReduce(tempStorage).Sum(vxyt); BLOCK_SYNC;
00914         volatile float vxz = BlockReduce(tempStorage).Sum(vxzt); BLOCK_SYNC;
00915         volatile float vyx = BlockReduce(tempStorage).Sum(vyxt); BLOCK_SYNC;
00916         volatile float vyy = BlockReduce(tempStorage).Sum(vyyt); BLOCK_SYNC;
00917         volatile float vyz = BlockReduce(tempStorage).Sum(vyzt); BLOCK_SYNC;
00918         volatile float vzx = BlockReduce(tempStorage).Sum(vzxt); BLOCK_SYNC;
00919         volatile float vzy = BlockReduce(tempStorage).Sum(vzyt); BLOCK_SYNC;
00920         volatile float vzz = BlockReduce(tempStorage).Sum(vzzt); BLOCK_SYNC;
00921         if (threadIdx.x == 0) {
00922           atomicAdd(&virialEnergy->virialSlow[0], (double)vxx);
00923           atomicAdd(&virialEnergy->virialSlow[1], (double)vxy);
00924           atomicAdd(&virialEnergy->virialSlow[2], (double)vxz);
00925           atomicAdd(&virialEnergy->virialSlow[3], (double)vyx);
00926           atomicAdd(&virialEnergy->virialSlow[4], (double)vyy);
00927           atomicAdd(&virialEnergy->virialSlow[5], (double)vyz);
00928           atomicAdd(&virialEnergy->virialSlow[6], (double)vzx);
00929           atomicAdd(&virialEnergy->virialSlow[7], (double)vzy);
00930           atomicAdd(&virialEnergy->virialSlow[8], (double)vzz);
00931         }
00932       }
00933     }
00934 
00935     if (doEnergy) {
00936       typedef cub::BlockReduce<double, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
00937       __shared__ typename BlockReduce::TempStorage tempStorage;
00938       volatile double energyVdw  = BlockReduce(tempStorage).Sum(ve.energyVdw); BLOCK_SYNC;
00939       volatile double energyElec = BlockReduce(tempStorage).Sum(ve.energyElec); BLOCK_SYNC;
00940       if (threadIdx.x == 0) {
00941           atomicAdd(&virialEnergy->energyVdw, (double)energyVdw);
00942           atomicAdd(&virialEnergy->energyElec, (double)energyElec);
00943       }
00944       if (doSlow) {
00945         volatile double energySlow = BlockReduce(tempStorage).Sum(ve.energySlow); BLOCK_SYNC;
00946         if (threadIdx.x == 0) atomicAdd(&virialEnergy->energySlow, (double)energySlow);
00947       }
00948       // if (doGBIS) {
00949       //   double energyGBIS = BlockReduce(tempStorage).Sum(ve.energyGBIS); BLOCK_SYNC;
00950       //   if (threadIdx.x == 0) atomicAdd(&virialEnergy->energyGBIS, (double)energyGBIS);
00951       // }
00952     }
00953 
00954   }
00955 
00956 }

template<bool doSlow>
__device__ __forceinline__ void shuffleNext ( float3 &  jforce,
float3 &  jforceSlow 
) [inline]

Definition at line 151 of file CudaComputeNonbondedKernel.cu.

References WARP_FULL_MASK, WARP_SHUFFLE, and WARPSIZE.

00151                                                      {
00152   jforce.x = WARP_SHUFFLE(WARP_FULL_MASK, jforce.x, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00153   jforce.y = WARP_SHUFFLE(WARP_FULL_MASK, jforce.y, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00154   jforce.z = WARP_SHUFFLE(WARP_FULL_MASK, jforce.z, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00155   if (doSlow) {
00156     jforceSlow.x = WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.x, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00157     jforceSlow.y = WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.y, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00158     jforceSlow.z = WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.z, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00159   }
00160 }

template<bool doPairlist>
__device__ __forceinline__ void shuffleNext ( float &  xyzq_j_w,
int &  vdwtypej,
int &  jatomIndex 
) [inline]

Definition at line 141 of file CudaComputeNonbondedKernel.cu.

References WARP_FULL_MASK, WARP_SHUFFLE, and WARPSIZE.

00141                                                                   {
00142   xyzq_j_w = WARP_SHUFFLE(WARP_FULL_MASK, xyzq_j_w, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00143   vdwtypej = WARP_SHUFFLE(WARP_FULL_MASK, vdwtypej, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00144   if (doPairlist) {
00145     jatomIndex   = WARP_SHUFFLE(WARP_FULL_MASK, jatomIndex, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);    
00146   }
00147 }

template<bool doPairlist>
__device__ __forceinline__ void shuffleNext ( float &  xyzq_j_w,
int &  vdwtypej,
int &  jatomIndex,
int &  jexclMaxdiff,
int &  jexclIndex 
) [inline]

Definition at line 129 of file CudaComputeNonbondedKernel.cu.

References WARP_FULL_MASK, WARP_SHUFFLE, and WARPSIZE.

00129                                                                                                       {
00130   xyzq_j_w = WARP_SHUFFLE(WARP_FULL_MASK, xyzq_j_w, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00131   vdwtypej = WARP_SHUFFLE(WARP_FULL_MASK, vdwtypej, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00132   if (doPairlist) {
00133     jatomIndex   = WARP_SHUFFLE(WARP_FULL_MASK, jatomIndex, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);    
00134     jexclIndex   = WARP_SHUFFLE(WARP_FULL_MASK, jexclIndex, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00135     jexclMaxdiff = WARP_SHUFFLE(WARP_FULL_MASK, jexclMaxdiff, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00136   }
00137 }

template<bool doSlow>
__device__ __forceinline__ void storeForces ( const int  pos,
const float3  force,
const float3  forceSlow,
float3 *__restrict__  forces,
float3 *__restrict__  forcesSlow 
) [inline]

Definition at line 115 of file CudaComputeNonbondedKernel.cu.

00116                                                                 {
00117   atomicAdd(&forces[pos].x, force.x);
00118   atomicAdd(&forces[pos].y, force.y);
00119   atomicAdd(&forces[pos].z, force.z);
00120   if (doSlow) {
00121     atomicAdd(&forcesSlow[pos].x, forceSlow.x);
00122     atomicAdd(&forcesSlow[pos].y, forceSlow.y);
00123     atomicAdd(&forcesSlow[pos].z, forceSlow.z);
00124   }
00125 }

template<bool doSlow>
__device__ __forceinline__ void storeForces ( const int  pos,
const float3  force,
const float3  forceSlow,
float *__restrict__  devForces_x,
float *__restrict__  devForces_y,
float *__restrict__  devForces_z,
float *__restrict__  devForcesSlow_x,
float *__restrict__  devForcesSlow_y,
float *__restrict__  devForcesSlow_z 
) [inline]

Definition at line 95 of file CudaComputeNonbondedKernel.cu.

00102 {
00103   atomicAdd(&devForces_x[pos], force.x);
00104   atomicAdd(&devForces_y[pos], force.y);
00105   atomicAdd(&devForces_z[pos], force.z);
00106   if (doSlow) {
00107     atomicAdd(&devForcesSlow_x[pos], forceSlow.x);
00108     atomicAdd(&devForcesSlow_y[pos], forceSlow.y);
00109     atomicAdd(&devForcesSlow_z[pos], forceSlow.z);
00110   }
00111 }

template<bool doSlow>
__device__ __forceinline__ void storeForces ( const int  pos,
const float3  force,
const float3  forceSlow,
float4 *__restrict__  devForces,
float4 *__restrict__  devForcesSlow 
) [inline]

Definition at line 81 of file CudaComputeNonbondedKernel.cu.

00082                                                                       {
00083   atomicAdd(&devForces[pos].x, force.x);
00084   atomicAdd(&devForces[pos].y, force.y);
00085   atomicAdd(&devForces[pos].z, force.z);
00086   if (doSlow) {
00087     atomicAdd(&devForcesSlow[pos].x, forceSlow.x);
00088     atomicAdd(&devForcesSlow[pos].y, forceSlow.y);
00089     atomicAdd(&devForcesSlow[pos].z, forceSlow.z);
00090   }
00091 }

template<int doSlow>
__global__ void transposeForcesKernel ( float4 *  f,
float4 *  fSlow,
float *  fx,
float *  fy,
float *  fz,
float *  fw,
float *  fSlowx,
float *  fSlowy,
float *  fSlowz,
float *  fSloww,
int  n 
) [inline]

Definition at line 1062 of file CudaComputeNonbondedKernel.cu.

01066 {
01067   int tid = blockIdx.x*blockDim.x + threadIdx.x;
01068   if (tid < n) {
01069     f[tid] = make_float4(fx[tid], fy[tid], fz[tid], fw[tid]);
01070     if (doSlow) {
01071       fSlow[tid] = make_float4(fSlowx[tid], fSlowy[tid], fSlowz[tid], fSloww[tid]);
01072     }
01073   }
01074 }


Variable Documentation

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ atomIndex

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

Referenced by HomePatch::doAtomMigration().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ boundingBoxes

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__constant__ unsigned int constExclusions[MAX_CONST_EXCLUSIONS]

Definition at line 20 of file CudaComputeNonbondedKernel.cu.

Referenced by CudaComputeNonbondedKernel::bindExclusions(), and if().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ cudaPatches

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cutoff2

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForce_w

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ devForce_x

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ devForce_y

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForce_z

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ devForces

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForceSlow_w

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForceSlow_x

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForceSlow_y

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForceSlow_z

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ devForcesSlow

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

Definition at line 18 of file DeviceCUDA.C.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t energyTableTex

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ exclIndexMaxDiff

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t forceTableTex

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ float4* __restrict__ int* __restrict__ int* __restrict__ TileListVirialEnergy* __restrict__ virialEnergy int itileList = start + threadIdx.x/WARPSIZE + blockDim.x/WARPSIZE*blockIdx.x
__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ jtiles

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 lata

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 latb

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 latc

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ mapForces

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ float4* __restrict__ mapForcesSlow

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ float4* __restrict__ int* __restrict__ mapPatchReadyQueue

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int numPatches
__global__ void const int numTileLists

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ float4* __restrict__ int* __restrict__ int* __restrict__ outputOrder

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ overflowExclusions

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ patchNumCount

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ patchPairs

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float plcutoff2

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ tileExcls

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ tileJatomStart

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ tileListDepth

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ tileListOrder

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ tileLists

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ tileListStat

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ vdwCoefTable

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t vdwCoefTableTex

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int vdwCoefTableWidth

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ vdwTypes

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ xyzq

Definition at line 186 of file CudaComputeNonbondedKernel.cu.


Generated on 12 Jul 2020 for NAMD by  doxygen 1.6.1