CudaComputeNonbondedKernel.cu File Reference

#include <cuda.h>
#include <cub/cub.cuh>
#include "CudaComputeNonbondedKernel.h"
#include "CudaTileListKernel.h"
#include "DeviceCUDA.h"

Go to the source code of this file.

Defines

#define OVERALLOC   1.2f
#define MAX_CONST_EXCLUSIONS   2048
#define NONBONDKERNEL_NUM_WARP   4
#define LARGE_FLOAT   (float)(1.0e10)
#define REDUCENONBONDEDVIRIALKERNEL_NUM_WARP   32
#define REDUCEVIRIALENERGYKERNEL_NUM_WARP   32
#define REDUCEGBISENERGYKERNEL_NUM_WARP   32
#define CALL(DOENERGY, DOVIRIAL, DOSLOW, DOPAIRLIST, DOSTREAMING)

Functions

void NAMD_die (const char *)
template<bool doEnergy, bool doSlow>
__device__ __forceinline__ void calcForceEnergy (const float r2, const float qi, const float qj, const float dx, const float dy, const float dz, const int vdwtypei, const int vdwtypej, const float2 *__restrict__ vdwCoefTable, cudaTextureObject_t vdwCoefTableTex, cudaTextureObject_t forceTableTex, cudaTextureObject_t energyTableTex, float3 &iforce, float3 &iforceSlow, float3 &jforce, float3 &jforceSlow, float &energyVdw, float &energyElec, float &energySlow)
template<bool doSlow>
__device__ __forceinline__ void storeForces (const int pos, const float3 force, const float3 forceSlow, float4 *__restrict__ devForces, float4 *__restrict__ devForcesSlow)
template<bool doSlow>
__device__ __forceinline__ void storeForces (const int pos, const float3 force, const float3 forceSlow, float *__restrict__ devForces_x, float *__restrict__ devForces_y, float *__restrict__ devForces_z, float *__restrict__ devForcesSlow_x, float *__restrict__ devForcesSlow_y, float *__restrict__ devForcesSlow_z)
template<bool doSlow>
__device__ __forceinline__ void storeForces (const int pos, const float3 force, const float3 forceSlow, float3 *__restrict__ forces, float3 *__restrict__ forcesSlow)
template<bool doPairlist>
__device__ __forceinline__ void shuffleNext (float &xyzq_j_w, int &vdwtypej, int &jatomIndex, int &jexclMaxdiff, int &jexclIndex)
template<bool doPairlist>
__device__ __forceinline__ void shuffleNext (float &xyzq_j_w, int &vdwtypej, int &jatomIndex)
template<bool doSlow>
__device__ __forceinline__ void shuffleNext (float3 &jforce, float3 &jforceSlow)
__device__ __forceinline__ float distsq (const BoundingBox a, const float4 b)
template<bool doEnergy, bool doVirial, bool doSlow, bool doPairlist, bool doStreaming>
__global__ void __launch_bounds__ (WARPSIZE *NONBONDKERNEL_NUM_WARP, doPairlist?(10):(doEnergy?(10):(10))) nonbondedForceKernel(const int start
 if (itileList< numTileLists)
__global__ void reduceNonbondedVirialKernel (const bool doSlow, const int atomStorageSize, const float4 *__restrict__ xyzq, const float4 *__restrict__ devForces, const float4 *__restrict__ devForcesSlow, VirialEnergy *__restrict__ virialEnergy)
__global__ void reduceVirialEnergyKernel (const bool doEnergy, const bool doVirial, const bool doSlow, const int numTileLists, const TileListVirialEnergy *__restrict__ tileListVirialEnergy, VirialEnergy *__restrict__ virialEnergy)
__global__ void reduceGBISEnergyKernel (const int numTileLists, const TileListVirialEnergy *__restrict__ tileListVirialEnergy, VirialEnergy *__restrict__ virialEnergy)
template<int doSlow>
__global__ void transposeForcesKernel (float4 *f, float4 *fSlow, float *fx, float *fy, float *fz, float *fw, float *fSlowx, float *fSlowy, float *fSlowz, float *fSloww, int n)

Variables

__thread DeviceCUDAdeviceCUDA
__constant__ unsigned int constExclusions [MAX_CONST_EXCLUSIONS]
__global__ void const int numTileLists
__global__ void const int
const TileList *__restrict__ 
tileLists
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ 
tileExcls
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ 
tileJatomStart
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int 
vdwCoefTableWidth
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__ 
vdwCoefTable
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ 
vdwTypes
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 
lata
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 
latb
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 
latc
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ 
xyzq
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float 
cutoff2
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t 
vdwCoefTableTex
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t 
forceTableTex
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t 
energyTableTex
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int 
atomStorageSize
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float 
plcutoff2
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__ 
patchPairs
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ 
atomIndex
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ 
exclIndexMaxDiff
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__ 
overflowExclusions
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ 
tileListDepth
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ 
tileListOrder
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ 
jtiles
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ 
tileListStat
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__ 
boundingBoxes
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ 
devForces
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ 
devForcesSlow
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ 
devForce_x
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForce_y
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForce_z
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForce_w
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForceSlow_x
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForceSlow_y
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForceSlow_z
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForceSlow_w
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int 
numPatches
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__ 
patchNumCount
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ 
cudaPatches
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ float4
*__restrict__ 
mapForces
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ float4
*__restrict__ float4
*__restrict__ 
mapForcesSlow
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ float4
*__restrict__ float4
*__restrict__ int
*__restrict__ 
mapPatchReadyQueue
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ float4
*__restrict__ float4
*__restrict__ int
*__restrict__ int
*__restrict__ 
outputOrder
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ float4
*__restrict__ float4
*__restrict__ int
*__restrict__ int
*__restrict__
TileListVirialEnergy
*__restrict__ virialEnergy int 
itileList = start + threadIdx.x/WARPSIZE + blockDim.x/WARPSIZE*blockIdx.x

Define Documentation

#define CALL ( DOENERGY,
DOVIRIAL,
DOSLOW,
DOPAIRLIST,
DOSTREAMING   ) 
Value:
nonbondedForceKernel<DOENERGY, DOVIRIAL, DOSLOW, DOPAIRLIST, DOSTREAMING> \
  <<< nblock, nthread, shMemSize, stream >>>  \
  (start, tlKernel.getNumTileLists(), tlKernel.getTileLists(), tlKernel.getTileExcls(), tlKernel.getTileJatomStart(), \
    cudaNonbondedTables.getVdwCoefTableWidth(), cudaNonbondedTables.getVdwCoefTable(), \
    vdwTypes, lata, latb, latc, tlKernel.get_xyzq(), cutoff2, \
    cudaNonbondedTables.getVdwCoefTableTex(), cudaNonbondedTables.getForceTableTex(), cudaNonbondedTables.getEnergyTableTex(), \
    atomStorageSize, tlKernel.get_plcutoff2(), tlKernel.getPatchPairs(), atomIndex, exclIndexMaxDiff, overflowExclusions, \
    tlKernel.getTileListDepth(), tlKernel.getTileListOrder(), tlKernel.getJtiles(), tlKernel.getTileListStatDevPtr(), \
    tlKernel.getBoundingBoxes(), d_forces, d_forcesSlow, \
    force_x, force_y, force_z, force_w, \
    forceSlow_x, forceSlow_y, forceSlow_z, forceSlow_w, \
    numPatches, patchNumCountPtr, tlKernel.getCudaPatches(), m_forces, m_forcesSlow, m_patchReadyQueue, \
    outputOrderPtr, tlKernel.getTileListVirialEnergy()); called=true
#define LARGE_FLOAT   (float)(1.0e10)

Definition at line 171 of file CudaComputeNonbondedKernel.cu.

#define MAX_CONST_EXCLUSIONS   2048

Definition at line 15 of file CudaComputeNonbondedKernel.cu.

#define NONBONDKERNEL_NUM_WARP   4

Definition at line 18 of file CudaComputeNonbondedKernel.cu.

#define OVERALLOC   1.2f
#define REDUCEGBISENERGYKERNEL_NUM_WARP   32
#define REDUCENONBONDEDVIRIALKERNEL_NUM_WARP   32
#define REDUCEVIRIALENERGYKERNEL_NUM_WARP   32

Function Documentation

template<bool doEnergy, bool doVirial, bool doSlow, bool doPairlist, bool doStreaming>
__global__ void __launch_bounds__ ( WARPSIZE *  NONBONDKERNEL_NUM_WARP,
doPairlist?  10):(doEnergy?(10):(10) 
) const [inline]
template<bool doEnergy, bool doSlow>
__device__ __forceinline__ void calcForceEnergy ( const float  r2,
const float  qi,
const float  qj,
const float  dx,
const float  dy,
const float  dz,
const int  vdwtypei,
const int  vdwtypej,
const float2 *__restrict__  vdwCoefTable,
cudaTextureObject_t  vdwCoefTableTex,
cudaTextureObject_t  forceTableTex,
cudaTextureObject_t  energyTableTex,
float3 &  iforce,
float3 &  iforceSlow,
float3 &  jforce,
float3 &  jforceSlow,
float &  energyVdw,
float &  energyElec,
float &  energySlow 
) [inline]

Definition at line 22 of file CudaComputeNonbondedKernel.cu.

References __ldg, float2::x, and float2::y.

00028                                                           {
00029 
00030   int vdwIndex = vdwtypej + vdwtypei;
00031 #if __CUDA_ARCH__ >= 350
00032   float2 ljab = __ldg(&vdwCoefTable[vdwIndex]);
00033 #else
00034   float2 ljab = tex1Dfetch<float2>(vdwCoefTableTex, vdwIndex);
00035 #endif
00036 
00037   float rinv = rsqrtf(r2);
00038   float4 ei;
00039   float4 fi = tex1D<float4>(forceTableTex, rinv);
00040   if (doEnergy) ei = tex1D<float4>(energyTableTex, rinv);
00041 
00042   float fSlow = qi * qj;
00043   float f = ljab.x * fi.z + ljab.y * fi.y + fSlow * fi.x;
00044 
00045   if (doEnergy) {
00046     energyVdw  += ljab.x * ei.z + ljab.y * ei.y;
00047     energyElec += fSlow * ei.x;
00048     if (doSlow) energySlow += fSlow * ei.w;
00049   }
00050   if (doSlow) fSlow *= fi.w;
00051 
00052   float fx = dx * f;
00053   float fy = dy * f;
00054   float fz = dz * f;
00055   iforce.x += fx;
00056   iforce.y += fy;
00057   iforce.z += fz;
00058   jforce.x -= fx;
00059   jforce.y -= fy;
00060   jforce.z -= fz;
00061 
00062   if (doSlow) {
00063     float fxSlow = dx * fSlow;
00064     float fySlow = dy * fSlow;
00065     float fzSlow = dz * fSlow;
00066     iforceSlow.x += fxSlow;
00067     iforceSlow.y += fySlow;
00068     iforceSlow.z += fzSlow;
00069     jforceSlow.x -= fxSlow;
00070     jforceSlow.y -= fySlow;
00071     jforceSlow.z -= fzSlow;
00072   }
00073 }

__device__ __forceinline__ float distsq ( const BoundingBox  a,
const float4  b 
)

Definition at line 163 of file CudaComputeNonbondedKernel.cu.

References BoundingBox::wx, BoundingBox::wy, BoundingBox::wz, BoundingBox::x, BoundingBox::y, and BoundingBox::z.

Referenced by buildTileListsBBKernel(), and if().

00163                                                                              {
00164   float dx = max(0.0f, fabsf(a.x - b.x) - a.wx);
00165   float dy = max(0.0f, fabsf(a.y - b.y) - a.wy);
00166   float dz = max(0.0f, fabsf(a.z - b.z) - a.wz);
00167   float r2 = dx*dx + dy*dy + dz*dz;
00168   return r2;
00169 }

if (  ) 

Definition at line 224 of file CudaComputeNonbondedKernel.cu.

References CudaPatchRecord::atomStart, constExclusions, distsq(), PatchPairRecord::iatomFreeSize, PatchPairRecord::iatomSize, TileList::iatomStart, j, PatchPairRecord::jatomFreeSize, PatchPairRecord::jatomSize, TileList::jtileEnd, TileList::jtileStart, LARGE_FLOAT, MAX_CONST_EXCLUSIONS, CudaPatchRecord::numAtoms, TileList::offsetXYZ, TileList::patchInd, TileList::patchNumList, tempStorage, WARP_ALL, WARP_ANY, WARP_FULL_MASK, WARP_SHUFFLE_XOR, WARP_SYNC, WARPSIZE, BoundingBox::x, BoundingBox::y, and BoundingBox::z.

00225   {
00226 
00227     float3 iforce;
00228     float3 iforceSlow;
00229     float energyVdw, energyElec, energySlow;
00230     int nexcluded;
00231     unsigned int itileListLen;
00232     int2 patchInd;
00233     int2 patchNumList;
00234     __shared__ float4 s_xyzq[NONBONDKERNEL_NUM_WARP][WARPSIZE];
00235     __shared__ int s_vdwtypej[NONBONDKERNEL_NUM_WARP][WARPSIZE];
00236     __shared__ float3 s_jforce[NONBONDKERNEL_NUM_WARP][WARPSIZE];
00237     __shared__ float3 s_jforceSlow[NONBONDKERNEL_NUM_WARP][WARPSIZE];
00238     __shared__ int s_jatomIndex[NONBONDKERNEL_NUM_WARP][WARPSIZE];            
00239 
00240     // Start computation
00241     {
00242       // Warp index (0...warpsize-1)
00243       const int wid = threadIdx.x % WARPSIZE;
00244       const int iwarp = threadIdx.x / WARPSIZE;
00245 
00246       TileList tmp = tileLists[itileList];
00247       int iatomStart = tmp.iatomStart;
00248       int jtileStart = tmp.jtileStart;
00249       int jtileEnd   = tmp.jtileEnd;
00250       patchInd     = tmp.patchInd;
00251       patchNumList = tmp.patchNumList;
00252 
00253       float shx = tmp.offsetXYZ.x*lata.x + tmp.offsetXYZ.y*latb.x + tmp.offsetXYZ.z*latc.x;
00254       float shy = tmp.offsetXYZ.x*lata.y + tmp.offsetXYZ.y*latb.y + tmp.offsetXYZ.z*latc.y;
00255       float shz = tmp.offsetXYZ.x*lata.z + tmp.offsetXYZ.y*latb.z + tmp.offsetXYZ.z*latc.z;
00256 
00257       // DH - set zeroShift flag if magnitude of shift vector is zero
00258       bool zeroShift = ! (shx*shx + shy*shy + shz*shz > 0);
00259 
00260       int iatomSize, iatomFreeSize, jatomSize, jatomFreeSize;
00261       if (doPairlist) {
00262         PatchPairRecord PPStmp = patchPairs[itileList];
00263         iatomSize     = PPStmp.iatomSize;
00264         iatomFreeSize = PPStmp.iatomFreeSize;
00265         jatomSize     = PPStmp.jatomSize;
00266         jatomFreeSize = PPStmp.jatomFreeSize;
00267       }
00268 
00269       // Write to global memory here to avoid register spilling
00270       if (doVirial) {
00271         if (wid == 0) {
00272           virialEnergy[itileList].shx = shx;
00273           virialEnergy[itileList].shy = shy;
00274           virialEnergy[itileList].shz = shz;
00275         }
00276       }
00277 
00278       // Load i-atom data (and shift coordinates)
00279       float4 xyzq_i = xyzq[iatomStart + wid];
00280       xyzq_i.x += shx;
00281       xyzq_i.y += shy;
00282       xyzq_i.z += shz;
00283       int vdwtypei = vdwTypes[iatomStart + wid]*vdwCoefTableWidth;
00284 
00285       // Load i-atom data (and shift coordinates)
00286       BoundingBox boundingBoxI;
00287       if (doPairlist) {
00288         boundingBoxI = boundingBoxes[iatomStart/WARPSIZE];
00289         boundingBoxI.x += shx;
00290         boundingBoxI.y += shy;
00291         boundingBoxI.z += shz;
00292       }
00293 
00294       // Get i-atom global index
00295 #ifdef USE_NEW_EXCL_METHOD
00296       int iatomIndex, minExclAtom, maxExclAtom;
00297 #else
00298       int iatomIndex;
00299 #endif
00300       if (doPairlist) {
00301 #ifdef USE_NEW_EXCL_METHOD
00302         iatomIndex = atomIndex[iatomStart + wid];
00303         int2 tmp = minmaxExclAtom[iatomStart + wid];
00304         minExclAtom = tmp.x;
00305         maxExclAtom = tmp.y;
00306 #else
00307         iatomIndex = atomIndex[iatomStart + wid];
00308 #endif
00309       }
00310 
00311       // i-forces in registers
00312       // float3 iforce;
00313       iforce.x = 0.0f;
00314       iforce.y = 0.0f;
00315       iforce.z = 0.0f;
00316 
00317       // float3 iforceSlow;
00318       if (doSlow) {
00319         iforceSlow.x = 0.0f;
00320         iforceSlow.y = 0.0f;
00321         iforceSlow.z = 0.0f;
00322       }
00323 
00324       // float energyVdw, energyElec, energySlow;
00325       if (doEnergy) {
00326         energyVdw = 0.0f;
00327         energyElec = 0.0f;
00328         if (doSlow) energySlow = 0.0f;
00329       }
00330 
00331       // Number of exclusions
00332       // NOTE: Lowest bit is used as indicator bit for tile pairs:
00333       //       bit 0 tile has no atoms within pairlist cutoff
00334       //       bit 1 tile has atoms within pairlist cutoff
00335       // int nexcluded;
00336       if (doPairlist) nexcluded = 0;
00337 
00338       // Number of i loops and free atoms
00339       int nfreei;
00340       if (doPairlist) {
00341         int nloopi = min(iatomSize - iatomStart, WARPSIZE);
00342         nfreei = max(iatomFreeSize - iatomStart, 0);
00343         if (wid >= nloopi) {
00344           xyzq_i.x = -LARGE_FLOAT;
00345           xyzq_i.y = -LARGE_FLOAT;
00346           xyzq_i.z = -LARGE_FLOAT;
00347         }
00348       }
00349 
00350       // tile list stuff
00351       // int itileListLen;
00352       // int minJatomStart;
00353       if (doPairlist) {
00354         // minJatomStart = tileJatomStart[jtileStart];
00355         itileListLen = 0;
00356       }
00357 
00358       // Exclusion index and maxdiff
00359       int iexclIndex, iexclMaxdiff;
00360       if (doPairlist) {
00361         int2 tmp = exclIndexMaxDiff[iatomStart + wid];
00362         iexclIndex   = tmp.x;
00363         iexclMaxdiff = tmp.y;
00364       }
00365 
00366       for (int jtile=jtileStart;jtile <= jtileEnd;jtile++) {
00367 
00368         // Load j-atom starting index and exclusion mask
00369         int jatomStart = tileJatomStart[jtile];
00370 
00371         float4 xyzq_j = xyzq[jatomStart + wid];
00372         WARP_SYNC(WARP_FULL_MASK);        
00373 
00374         // Check for early bail
00375         if (doPairlist) {
00376           float r2bb = distsq(boundingBoxI, xyzq_j);
00377           if (WARP_ALL(WARP_FULL_MASK, r2bb > plcutoff2)) continue;
00378         }
00379         unsigned int excl = (doPairlist) ? 0 : tileExcls[jtile].excl[wid];
00380         int vdwtypej = vdwTypes[jatomStart + wid];
00381         s_vdwtypej[iwarp][wid] = vdwtypej;
00382 
00383         // Get i-atom global index
00384         if (doPairlist) {
00385           s_jatomIndex[iwarp][wid] = atomIndex[jatomStart + wid];
00386         }
00387 
00388         // Number of j loops and free atoms
00389         int nfreej;
00390         if (doPairlist) {
00391           int nloopj = min(jatomSize - jatomStart, WARPSIZE);
00392           nfreej = max(jatomFreeSize - jatomStart, 0);
00393           //if (nfreei == 0 && nfreej == 0) continue;
00394           if (wid >= nloopj) {
00395             xyzq_j.x = LARGE_FLOAT;
00396             xyzq_j.y = LARGE_FLOAT;
00397             xyzq_j.z = LARGE_FLOAT;
00398           }
00399         }
00400 
00401         s_xyzq[iwarp][wid] = xyzq_j; 
00402 
00403         // DH - self requires that zeroShift is also set
00404         const bool self = zeroShift && (iatomStart == jatomStart);
00405         const int modval = (self) ? 2*WARPSIZE-1 : WARPSIZE-1;
00406 
00407         s_jforce[iwarp][wid] = make_float3(0.0f, 0.0f, 0.0f);
00408         if (doSlow)
00409           s_jforceSlow[iwarp][wid] = make_float3(0.0f, 0.0f, 0.0f); 
00410         WARP_SYNC(WARP_FULL_MASK);
00411         
00412 
00413         int t = (self) ? 1 : 0;
00414 
00415         if (doPairlist) {
00416           // Build pair list
00417           // NOTE: Pairlist update, we must also include the diagonal since this is used
00418           //       in GBIS phase 2.
00419           // Clear the lowest (indicator) bit
00420           nexcluded &= (~1);
00421 
00422           // For self tiles, do the diagonal term (t=0).
00423           // NOTE: No energies are computed here, since this self-diagonal term is only for GBIS phase 2
00424           if (self) {
00425             int j = (0 + wid) & modval;
00426             xyzq_j = s_xyzq[iwarp][j];
00427             float dx = xyzq_j.x - xyzq_i.x;
00428             float dy = xyzq_j.y - xyzq_i.y;
00429             float dz = xyzq_j.z - xyzq_i.z;       
00430 
00431             float r2 = dx*dx + dy*dy + dz*dz;
00432 
00433             if (j < WARPSIZE && r2 < plcutoff2) {
00434               // We have atom pair within the pairlist cutoff => Set indicator bit
00435               nexcluded |= 1;
00436             }
00437           }
00438 
00439           for (;t < WARPSIZE;t++) {
00440             int j = (t + wid) & modval;
00441 
00442             excl >>= 1;
00443             if (j < WARPSIZE ) {
00444               xyzq_j = s_xyzq[iwarp][j];
00445               float dx = xyzq_j.x - xyzq_i.x;
00446               float dy = xyzq_j.y - xyzq_i.y;
00447               float dz = xyzq_j.z - xyzq_i.z;
00448               float r2 = dx*dx + dy*dy + dz*dz;
00449               // We have atom pair within the pairlist cutoff => Set indicator bit
00450               if(r2 < plcutoff2){
00451                 nexcluded |= 1;
00452                 if (j < nfreej || wid < nfreei) {
00453                   bool excluded = false;
00454                   int indexdiff = s_jatomIndex[iwarp][j] - iatomIndex;
00455                   if ( abs(indexdiff) <= iexclMaxdiff) {
00456                     indexdiff += iexclIndex;
00457                     int indexword = ((unsigned int) indexdiff) >> 5;
00458 
00459                     if ( indexword < MAX_CONST_EXCLUSIONS ) {
00460                       indexword = constExclusions[indexword];
00461                     } else {
00462                       indexword = overflowExclusions[indexword];
00463                     }
00464 
00465                     excluded = ((indexword & (1<<(indexdiff&31))) != 0);
00466                   }
00467                   if (excluded) nexcluded += 2;
00468                   if (!excluded) excl |= 0x80000000;
00469                   if (!excluded && r2 < cutoff2) {
00470                     calcForceEnergy<doEnergy, doSlow>(
00471                       r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
00472                       vdwtypei, s_vdwtypej[iwarp][j],
00473                       vdwCoefTable, vdwCoefTableTex, forceTableTex, energyTableTex,
00474                       iforce, iforceSlow,
00475                       s_jforce[iwarp][j], s_jforceSlow[iwarp][j],
00476                       energyVdw,
00477                       energyElec, energySlow);
00478                   }
00479                 }
00480               }
00481             }
00482           } // t
00483         } else {
00484           // Just compute forces
00485           if (self) {
00486             excl >>= 1;
00487           }
00488           for (;t < WARPSIZE;t++) {
00489             if ((excl & 1)) {
00490               xyzq_j = s_xyzq[iwarp][(wid+t) & (WARPSIZE-1)];
00491               float dx = xyzq_j.x - xyzq_i.x;
00492               float dy = xyzq_j.y - xyzq_i.y;
00493               float dz = xyzq_j.z - xyzq_i.z;              
00494 
00495               float r2 = dx*dx + dy*dy + dz*dz;
00496 
00497               if (r2 < cutoff2) {
00498                 calcForceEnergy<doEnergy, doSlow>(
00499                   r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
00500                   vdwtypei, s_vdwtypej[iwarp][(wid+t) & (WARPSIZE-1)], vdwCoefTable, 
00501                   vdwCoefTableTex, forceTableTex, energyTableTex,
00502                   iforce, iforceSlow,
00503                   s_jforce[iwarp][(wid+t) & (WARPSIZE-1)],
00504                   s_jforceSlow[iwarp][(wid+t) & (WARPSIZE-1)],
00505                   energyVdw, energyElec, energySlow);
00506               } // (r2 < cutoff2)
00507             } // (excl & 1)
00508             excl >>= 1;
00509           } // t
00510           WARP_SYNC(WARP_FULL_MASK);
00511         }
00512 
00513 
00514 
00515         // Write j-forces
00516         storeForces<doSlow>(jatomStart + wid, s_jforce[iwarp][wid], s_jforceSlow[iwarp][wid],
00517           devForce_x, devForce_y, devForce_z,
00518           devForceSlow_x, devForceSlow_y, devForceSlow_z);
00519 
00520         // Write exclusions
00521         if (doPairlist && WARP_ANY(WARP_FULL_MASK, nexcluded & 1)) {
00522           int anyexcl = (65536 | WARP_ANY(WARP_FULL_MASK, excl));
00523           // Mark this jtile as non-empty:
00524           //  VdW:      1 if tile has atom pairs within pairlist cutoff and some these atoms interact
00525           //  GBIS: 65536 if tile has atom pairs within pairlist cutoff but not necessary interacting (i.e. these atoms are fixed or excluded)
00526           if (wid == 0) jtiles[jtile] = anyexcl;
00527           // Store exclusions
00528           tileExcls[jtile].excl[wid] = excl;
00529           // itileListLen:
00530           // lower 16 bits number of tiles with atom pairs within pairlist cutoff that interact
00531           // upper 16 bits number of tiles with atom pairs within pairlist cutoff (but not necessary interacting)
00532           itileListLen += anyexcl;
00533           // NOTE, this minJatomStart is only stored once for the first tile list entry
00534           // minJatomStart = min(minJatomStart, jatomStart);
00535         }
00536 
00537       } // jtile
00538 
00539       // Write i-forces
00540       storeForces<doSlow>(iatomStart + wid, iforce, iforceSlow,
00541                           devForce_x, devForce_y, devForce_z,
00542                           devForceSlow_x, devForceSlow_y, devForceSlow_z);
00543     }
00544     // Done with computation
00545 
00546     // Save pairlist stuff
00547     if (doPairlist) {
00548 
00549       // Warp index (0...warpsize-1)
00550       const int wid = threadIdx.x % WARPSIZE;
00551 
00552       if (wid == 0) {
00553         // minJatomStart is in range [0 ... atomStorageSize-1]
00554         //int atom0 = (minJatomStart)/WARPSIZE;
00555         // int atom0 = 0;
00556         // int storageOffset = atomStorageSize/WARPSIZE;
00557         // int itileListLen = 0;
00558         // for (int jtile=jtileStart;jtile <= jtileEnd;jtile++) itileListLen += jtiles[jtile];
00559         // Store 0 if itileListLen == 0
00560         // tileListDepth[itileList] = (itileListLen > 0)*(itileListLen*storageOffset + atom0);
00561         tileListDepth[itileList] = itileListLen;
00562         tileListOrder[itileList] = itileList;
00563         // Number of active tilelists with tile with atom pairs within pairlist cutoff that interact
00564         if ((itileListLen & 65535) > 0) atomicAdd(&tileListStat->numTileLists, 1);
00565         // Number of active tilelists with tiles with atom pairs within pairlist cutoff (but not necessary interacting)
00566         if (itileListLen > 0) atomicAdd(&tileListStat->numTileListsGBIS, 1);
00567         // NOTE: always numTileListsGBIS >= numTileLists
00568       }
00569 
00570       typedef cub::WarpReduce<int> WarpReduceInt;
00571       __shared__ typename WarpReduceInt::TempStorage tempStorage[NONBONDKERNEL_NUM_WARP];
00572       int warpId = threadIdx.x / WARPSIZE;
00573       // Remove indicator bit
00574       nexcluded >>= 1;
00575       volatile int nexcludedWarp = WarpReduceInt(tempStorage[warpId]).Sum(nexcluded);
00576       if (wid == 0) atomicAdd(&tileListStat->numExcluded, nexcludedWarp);
00577 
00578     }
00579 
00580     if (doVirial) {
00581       // Warp index (0...warpsize-1)
00582       const int wid = threadIdx.x % WARPSIZE;
00583 
00584       typedef cub::WarpReduce<float> WarpReduce;
00585       __shared__ typename WarpReduce::TempStorage tempStorage[NONBONDKERNEL_NUM_WARP];
00586       int warpId = threadIdx.x / WARPSIZE;
00587       volatile float iforcexSum = WarpReduce(tempStorage[warpId]).Sum(iforce.x);
00588       volatile float iforceySum = WarpReduce(tempStorage[warpId]).Sum(iforce.y);
00589       volatile float iforcezSum = WarpReduce(tempStorage[warpId]).Sum(iforce.z);
00590       if (wid == 0) {
00591         virialEnergy[itileList].forcex = iforcexSum;
00592         virialEnergy[itileList].forcey = iforceySum;
00593         virialEnergy[itileList].forcez = iforcezSum;
00594       }
00595 
00596       if (doSlow) {
00597         iforcexSum = WarpReduce(tempStorage[warpId]).Sum(iforceSlow.x);
00598         iforceySum = WarpReduce(tempStorage[warpId]).Sum(iforceSlow.y);
00599         iforcezSum = WarpReduce(tempStorage[warpId]).Sum(iforceSlow.z);
00600         if (wid == 0) {
00601           virialEnergy[itileList].forceSlowx = iforcexSum;
00602           virialEnergy[itileList].forceSlowy = iforceySum;
00603           virialEnergy[itileList].forceSlowz = iforcezSum;
00604         }
00605       }
00606     }
00607 
00608     // Reduce energy
00609     if (doEnergy) {
00610       // NOTE: We must hand write these warp-wide reductions to avoid excess register spillage
00611       //       (Why does CUB suck here?)
00612 #pragma unroll
00613       for (int i=16;i >= 1;i/=2) {
00614         energyVdw += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyVdw, i, 32);
00615         energyElec += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyElec, i, 32);
00616         if (doSlow) energySlow += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energySlow, i, 32);
00617       }
00618 
00619       if (threadIdx.x % WARPSIZE == 0) {
00620         virialEnergy[itileList].energyVdw  = energyVdw;
00621         virialEnergy[itileList].energyElec = energyElec;
00622         if (doSlow) virialEnergy[itileList].energySlow = energySlow;
00623       }
00624     }
00625 
00626     if (doStreaming) {
00627       // Make sure devForces and devForcesSlow have been written into device memory
00628       // NO NEED TO SYNCHRONIZE THREADS, THIS IS WARP-LEVEL
00629       __threadfence();
00630 
00631       int patchDone[2] = {false, false};
00632       const int wid = threadIdx.x % WARPSIZE;
00633       if (wid == 0) {
00634         int patchCountOld0 = atomicInc(&patchNumCount[patchInd.x], (unsigned int)(patchNumList.x-1));
00635         patchDone[0] = (patchCountOld0 + 1 == patchNumList.x);
00636         if (patchInd.x != patchInd.y) {
00637           int patchCountOld1 = atomicInc(&patchNumCount[patchInd.y], (unsigned int)(patchNumList.y-1));
00638           patchDone[1] = (patchCountOld1 + 1 == patchNumList.y);
00639         }
00640       }
00641 
00642       patchDone[0] = WARP_ANY(WARP_FULL_MASK, patchDone[0]);
00643       patchDone[1] = WARP_ANY(WARP_FULL_MASK, patchDone[1]);
00644 
00645       if (patchDone[0]) {
00646         // Patch 1 is done, write onto host-mapped memory
00647         CudaPatchRecord patch = cudaPatches[patchInd.x];
00648         int start = patch.atomStart;
00649         int end   = start + patch.numAtoms;
00650         for (int i=start+wid;i < end;i+=WARPSIZE) {
00651           mapForces[i] = make_float4(devForce_x[i],
00652             devForce_y[i], devForce_z[i], devForce_w[i]);
00653           if (doSlow){
00654             mapForcesSlow[i] = make_float4(devForceSlow_x[i],
00655                                            devForceSlow_y[i], 
00656                                            devForceSlow_z[i], 
00657                                            devForceSlow_w[i]);
00658           }
00659         }
00660       }
00661       if (patchDone[1]) {
00662         // Patch 2 is done
00663         CudaPatchRecord patch = cudaPatches[patchInd.y];
00664         int start = patch.atomStart;
00665         int end   = start + patch.numAtoms;
00666         for (int i=start+wid;i < end;i+=WARPSIZE) {
00667           mapForces[i] = make_float4(devForce_x[i],  devForce_y[i], devForce_z[i], devForce_w[i]);
00668           if (doSlow){
00669             mapForcesSlow[i] = make_float4(devForceSlow_x[i],
00670                                           devForceSlow_y[i], 
00671                                           devForceSlow_z[i], 
00672                                           devForceSlow_w[i]);
00673           }
00674         }
00675       }
00676 
00677       if (patchDone[0] || patchDone[1]) {
00678         // Make sure mapForces and mapForcesSlow are up-to-date
00679         __threadfence_system();
00680         // Add patch into "patchReadyQueue"
00681         if (wid == 0) {
00682           if (patchDone[0]) {
00683             int ind = atomicAdd(&tileListStat->patchReadyQueueCount, 1);
00684             // int ind = atomicInc((unsigned int *)&mapPatchReadyQueue[numPatches], numPatches-1);
00685             mapPatchReadyQueue[ind] = patchInd.x;
00686           }
00687           if (patchDone[1]) {
00688             int ind = atomicAdd(&tileListStat->patchReadyQueueCount, 1);
00689             // int ind = atomicInc((unsigned int *)&mapPatchReadyQueue[numPatches], numPatches-1);
00690             mapPatchReadyQueue[ind] = patchInd.y;
00691           }
00692         }
00693         // Make sure "patchReadyQueue" is visible in page-locked host memory
00694         __threadfence_system();
00695       }
00696     }
00697 
00698     if (doStreaming && outputOrder != NULL && threadIdx.x % WARPSIZE == 0) {
00699       int index = atomicAdd(&tileListStat->outputOrderIndex, 1);
00700       outputOrder[index] = itileList;
00701     }
00702   } // if (itileList < numTileLists)

void NAMD_die ( const char *   ) 

Definition at line 79 of file common.C.

00080 {
00081   if ( ! err_msg ) err_msg = "(unknown error)";
00082   CkPrintf("FATAL ERROR: %s\n", err_msg);
00083   fflush(stdout);
00084   char repstr[24] = "";
00085   if (CmiNumPartitions() > 1) {
00086     sprintf(repstr,"REPLICA %d ", CmiMyPartition());
00087   }
00088   CkError("%sFATAL ERROR: %s\n", repstr, err_msg);
00089   CkExit(1);
00090 }

__global__ void reduceGBISEnergyKernel ( const int  numTileLists,
const TileListVirialEnergy *__restrict__  tileListVirialEnergy,
VirialEnergy *__restrict__  virialEnergy 
)

Definition at line 951 of file CudaComputeNonbondedKernel.cu.

References BLOCK_SYNC, and tempStorage.

00953                                            {
00954 
00955   for (int ibase = blockIdx.x*blockDim.x;ibase < numTileLists;ibase += blockDim.x*gridDim.x)
00956   {
00957     int itileList = ibase + threadIdx.x;
00958     double energyGBISt = 0.0;
00959     if (itileList < numTileLists) {
00960       energyGBISt = tileListVirialEnergy[itileList].energyGBIS;
00961     }
00962 
00963     typedef cub::BlockReduce<double, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
00964     __shared__ typename BlockReduce::TempStorage tempStorage;
00965     volatile double energyGBIS = BlockReduce(tempStorage).Sum(energyGBISt); BLOCK_SYNC;
00966     if (threadIdx.x == 0) atomicAdd(&virialEnergy->energyGBIS, (double)energyGBIS);
00967   }
00968 
00969 }

__global__ void reduceNonbondedVirialKernel ( const bool  doSlow,
const int  atomStorageSize,
const float4 *__restrict__  xyzq,
const float4 *__restrict__  devForces,
const float4 *__restrict__  devForcesSlow,
VirialEnergy *__restrict__  virialEnergy 
)

Definition at line 709 of file CudaComputeNonbondedKernel.cu.

References BLOCK_SYNC, and tempStorage.

00713                                            {
00714 
00715   for (int ibase = blockIdx.x*blockDim.x;ibase < atomStorageSize;ibase += blockDim.x*gridDim.x)
00716   {
00717     int i = ibase + threadIdx.x;
00718 
00719     // Set to zero to avoid nan*0
00720     float4 pos;
00721     pos.x = 0.0f;
00722     pos.y = 0.0f;
00723     pos.z = 0.0f;
00724     float4 force, forceSlow;
00725     force.x = 0.0f;
00726     force.y = 0.0f;
00727     force.z = 0.0f;
00728     forceSlow.x = 0.0f;
00729     forceSlow.y = 0.0f;
00730     forceSlow.z = 0.0f;
00731     if (i < atomStorageSize) {
00732       pos = xyzq[i];
00733       force = devForces[i];
00734       if (doSlow) forceSlow = devForcesSlow[i];
00735     }
00736     // Reduce across the entire thread block
00737     float vxxt = force.x*pos.x;
00738     float vxyt = force.x*pos.y;
00739     float vxzt = force.x*pos.z;
00740     float vyxt = force.y*pos.x;
00741     float vyyt = force.y*pos.y;
00742     float vyzt = force.y*pos.z;
00743     float vzxt = force.z*pos.x;
00744     float vzyt = force.z*pos.y;
00745     float vzzt = force.z*pos.z;
00746     // atomicAdd(&virialEnergy->virial[0], (double)vxx);
00747     // atomicAdd(&virialEnergy->virial[1], (double)vxy);
00748     // atomicAdd(&virialEnergy->virial[2], (double)vxz);
00749     // atomicAdd(&virialEnergy->virial[3], (double)vyx);
00750     // atomicAdd(&virialEnergy->virial[4], (double)vyy);
00751     // atomicAdd(&virialEnergy->virial[5], (double)vyz);
00752     // atomicAdd(&virialEnergy->virial[6], (double)vzx);
00753     // atomicAdd(&virialEnergy->virial[7], (double)vzy);
00754     // atomicAdd(&virialEnergy->virial[8], (double)vzz);
00755 
00756     typedef cub::BlockReduce<float, REDUCENONBONDEDVIRIALKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
00757     __shared__ typename BlockReduce::TempStorage tempStorage;
00758     volatile float vxx = BlockReduce(tempStorage).Sum(vxxt); BLOCK_SYNC;
00759     volatile float vxy = BlockReduce(tempStorage).Sum(vxyt); BLOCK_SYNC;
00760     volatile float vxz = BlockReduce(tempStorage).Sum(vxzt); BLOCK_SYNC;
00761     volatile float vyx = BlockReduce(tempStorage).Sum(vyxt); BLOCK_SYNC;
00762     volatile float vyy = BlockReduce(tempStorage).Sum(vyyt); BLOCK_SYNC;
00763     volatile float vyz = BlockReduce(tempStorage).Sum(vyzt); BLOCK_SYNC;
00764     volatile float vzx = BlockReduce(tempStorage).Sum(vzxt); BLOCK_SYNC;
00765     volatile float vzy = BlockReduce(tempStorage).Sum(vzyt); BLOCK_SYNC;
00766     volatile float vzz = BlockReduce(tempStorage).Sum(vzzt); BLOCK_SYNC;
00767     if (threadIdx.x == 0) {
00768       atomicAdd(&virialEnergy->virial[0], (double)vxx);
00769       atomicAdd(&virialEnergy->virial[1], (double)vxy);
00770       atomicAdd(&virialEnergy->virial[2], (double)vxz);
00771       atomicAdd(&virialEnergy->virial[3], (double)vyx);
00772       atomicAdd(&virialEnergy->virial[4], (double)vyy);
00773       atomicAdd(&virialEnergy->virial[5], (double)vyz);
00774       atomicAdd(&virialEnergy->virial[6], (double)vzx);
00775       atomicAdd(&virialEnergy->virial[7], (double)vzy);
00776       atomicAdd(&virialEnergy->virial[8], (double)vzz);
00777     }
00778 
00779     if (doSlow) {
00780       // if (isnan(forceSlow.x) || isnan(forceSlow.y) || isnan(forceSlow.z))
00781       float vxxSlowt = forceSlow.x*pos.x;
00782       float vxySlowt = forceSlow.x*pos.y;
00783       float vxzSlowt = forceSlow.x*pos.z;
00784       float vyxSlowt = forceSlow.y*pos.x;
00785       float vyySlowt = forceSlow.y*pos.y;
00786       float vyzSlowt = forceSlow.y*pos.z;
00787       float vzxSlowt = forceSlow.z*pos.x;
00788       float vzySlowt = forceSlow.z*pos.y;
00789       float vzzSlowt = forceSlow.z*pos.z;
00790       // atomicAdd(&virialEnergy->virialSlow[0], (double)vxxSlow);
00791       // atomicAdd(&virialEnergy->virialSlow[1], (double)vxySlow);
00792       // atomicAdd(&virialEnergy->virialSlow[2], (double)vxzSlow);
00793       // atomicAdd(&virialEnergy->virialSlow[3], (double)vyxSlow);
00794       // atomicAdd(&virialEnergy->virialSlow[4], (double)vyySlow);
00795       // atomicAdd(&virialEnergy->virialSlow[5], (double)vyzSlow);
00796       // atomicAdd(&virialEnergy->virialSlow[6], (double)vzxSlow);
00797       // atomicAdd(&virialEnergy->virialSlow[7], (double)vzySlow);
00798       // atomicAdd(&virialEnergy->virialSlow[8], (double)vzzSlow);
00799       volatile float vxxSlow = BlockReduce(tempStorage).Sum(vxxSlowt); BLOCK_SYNC;
00800       volatile float vxySlow = BlockReduce(tempStorage).Sum(vxySlowt); BLOCK_SYNC;
00801       volatile float vxzSlow = BlockReduce(tempStorage).Sum(vxzSlowt); BLOCK_SYNC;
00802       volatile float vyxSlow = BlockReduce(tempStorage).Sum(vyxSlowt); BLOCK_SYNC;
00803       volatile float vyySlow = BlockReduce(tempStorage).Sum(vyySlowt); BLOCK_SYNC;
00804       volatile float vyzSlow = BlockReduce(tempStorage).Sum(vyzSlowt); BLOCK_SYNC;
00805       volatile float vzxSlow = BlockReduce(tempStorage).Sum(vzxSlowt); BLOCK_SYNC;
00806       volatile float vzySlow = BlockReduce(tempStorage).Sum(vzySlowt); BLOCK_SYNC;
00807       volatile float vzzSlow = BlockReduce(tempStorage).Sum(vzzSlowt); BLOCK_SYNC;
00808       if (threadIdx.x == 0) {
00809         atomicAdd(&virialEnergy->virialSlow[0], (double)vxxSlow);
00810         atomicAdd(&virialEnergy->virialSlow[1], (double)vxySlow);
00811         atomicAdd(&virialEnergy->virialSlow[2], (double)vxzSlow);
00812         atomicAdd(&virialEnergy->virialSlow[3], (double)vyxSlow);
00813         atomicAdd(&virialEnergy->virialSlow[4], (double)vyySlow);
00814         atomicAdd(&virialEnergy->virialSlow[5], (double)vyzSlow);
00815         atomicAdd(&virialEnergy->virialSlow[6], (double)vzxSlow);
00816         atomicAdd(&virialEnergy->virialSlow[7], (double)vzySlow);
00817         atomicAdd(&virialEnergy->virialSlow[8], (double)vzzSlow);
00818       }
00819     }
00820   
00821   }
00822 
00823 }

__global__ void reduceVirialEnergyKernel ( const bool  doEnergy,
const bool  doVirial,
const bool  doSlow,
const int  numTileLists,
const TileListVirialEnergy *__restrict__  tileListVirialEnergy,
VirialEnergy *__restrict__  virialEnergy 
)

Definition at line 826 of file CudaComputeNonbondedKernel.cu.

References BLOCK_SYNC, TileListVirialEnergy::energyElec, TileListVirialEnergy::energySlow, TileListVirialEnergy::energyVdw, TileListVirialEnergy::forceSlowx, TileListVirialEnergy::forceSlowy, TileListVirialEnergy::forceSlowz, TileListVirialEnergy::forcex, TileListVirialEnergy::forcey, TileListVirialEnergy::forcez, TileListVirialEnergy::shx, TileListVirialEnergy::shy, TileListVirialEnergy::shz, and tempStorage.

00830                                            {
00831 
00832   for (int ibase = blockIdx.x*blockDim.x;ibase < numTileLists;ibase += blockDim.x*gridDim.x)
00833   {
00834     int itileList = ibase + threadIdx.x;
00835     TileListVirialEnergy ve;
00836     if (itileList < numTileLists) {
00837       ve = tileListVirialEnergy[itileList];
00838     } else {
00839       // Set to zero to avoid nan*0
00840       if (doVirial) {
00841         ve.shx = 0.0f;
00842         ve.shy = 0.0f;
00843         ve.shz = 0.0f;
00844         ve.forcex = 0.0f;
00845         ve.forcey = 0.0f;
00846         ve.forcez = 0.0f;
00847         ve.forceSlowx = 0.0f;
00848         ve.forceSlowy = 0.0f;
00849         ve.forceSlowz = 0.0f;
00850       }
00851       if (doEnergy) {
00852         ve.energyVdw = 0.0;
00853         ve.energyElec = 0.0;
00854         ve.energySlow = 0.0;
00855         // ve.energyGBIS = 0.0;
00856       }
00857     }
00858 
00859     if (doVirial) {
00860       typedef cub::BlockReduce<float, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
00861       __shared__ typename BlockReduce::TempStorage tempStorage;
00862       float vxxt = ve.forcex*ve.shx;
00863       float vxyt = ve.forcex*ve.shy;
00864       float vxzt = ve.forcex*ve.shz;
00865       float vyxt = ve.forcey*ve.shx;
00866       float vyyt = ve.forcey*ve.shy;
00867       float vyzt = ve.forcey*ve.shz;
00868       float vzxt = ve.forcez*ve.shx;
00869       float vzyt = ve.forcez*ve.shy;
00870       float vzzt = ve.forcez*ve.shz;
00871       volatile float vxx = BlockReduce(tempStorage).Sum(vxxt); BLOCK_SYNC;
00872       volatile float vxy = BlockReduce(tempStorage).Sum(vxyt); BLOCK_SYNC;
00873       volatile float vxz = BlockReduce(tempStorage).Sum(vxzt); BLOCK_SYNC;
00874       volatile float vyx = BlockReduce(tempStorage).Sum(vyxt); BLOCK_SYNC;
00875       volatile float vyy = BlockReduce(tempStorage).Sum(vyyt); BLOCK_SYNC;
00876       volatile float vyz = BlockReduce(tempStorage).Sum(vyzt); BLOCK_SYNC;
00877       volatile float vzx = BlockReduce(tempStorage).Sum(vzxt); BLOCK_SYNC;
00878       volatile float vzy = BlockReduce(tempStorage).Sum(vzyt); BLOCK_SYNC;
00879       volatile float vzz = BlockReduce(tempStorage).Sum(vzzt); BLOCK_SYNC;
00880       if (threadIdx.x == 0) {
00881         atomicAdd(&virialEnergy->virial[0], (double)vxx);
00882         atomicAdd(&virialEnergy->virial[1], (double)vxy);
00883         atomicAdd(&virialEnergy->virial[2], (double)vxz);
00884         atomicAdd(&virialEnergy->virial[3], (double)vyx);
00885         atomicAdd(&virialEnergy->virial[4], (double)vyy);
00886         atomicAdd(&virialEnergy->virial[5], (double)vyz);
00887         atomicAdd(&virialEnergy->virial[6], (double)vzx);
00888         atomicAdd(&virialEnergy->virial[7], (double)vzy);
00889         atomicAdd(&virialEnergy->virial[8], (double)vzz);
00890       }
00891 
00892       if (doSlow) {
00893         typedef cub::BlockReduce<float, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
00894         __shared__ typename BlockReduce::TempStorage tempStorage;
00895         float vxxt = ve.forceSlowx*ve.shx;
00896         float vxyt = ve.forceSlowx*ve.shy;
00897         float vxzt = ve.forceSlowx*ve.shz;
00898         float vyxt = ve.forceSlowy*ve.shx;
00899         float vyyt = ve.forceSlowy*ve.shy;
00900         float vyzt = ve.forceSlowy*ve.shz;
00901         float vzxt = ve.forceSlowz*ve.shx;
00902         float vzyt = ve.forceSlowz*ve.shy;
00903         float vzzt = ve.forceSlowz*ve.shz;
00904         volatile float vxx = BlockReduce(tempStorage).Sum(vxxt); BLOCK_SYNC;
00905         volatile float vxy = BlockReduce(tempStorage).Sum(vxyt); BLOCK_SYNC;
00906         volatile float vxz = BlockReduce(tempStorage).Sum(vxzt); BLOCK_SYNC;
00907         volatile float vyx = BlockReduce(tempStorage).Sum(vyxt); BLOCK_SYNC;
00908         volatile float vyy = BlockReduce(tempStorage).Sum(vyyt); BLOCK_SYNC;
00909         volatile float vyz = BlockReduce(tempStorage).Sum(vyzt); BLOCK_SYNC;
00910         volatile float vzx = BlockReduce(tempStorage).Sum(vzxt); BLOCK_SYNC;
00911         volatile float vzy = BlockReduce(tempStorage).Sum(vzyt); BLOCK_SYNC;
00912         volatile float vzz = BlockReduce(tempStorage).Sum(vzzt); BLOCK_SYNC;
00913         if (threadIdx.x == 0) {
00914           atomicAdd(&virialEnergy->virialSlow[0], (double)vxx);
00915           atomicAdd(&virialEnergy->virialSlow[1], (double)vxy);
00916           atomicAdd(&virialEnergy->virialSlow[2], (double)vxz);
00917           atomicAdd(&virialEnergy->virialSlow[3], (double)vyx);
00918           atomicAdd(&virialEnergy->virialSlow[4], (double)vyy);
00919           atomicAdd(&virialEnergy->virialSlow[5], (double)vyz);
00920           atomicAdd(&virialEnergy->virialSlow[6], (double)vzx);
00921           atomicAdd(&virialEnergy->virialSlow[7], (double)vzy);
00922           atomicAdd(&virialEnergy->virialSlow[8], (double)vzz);
00923         }
00924       }
00925     }
00926 
00927     if (doEnergy) {
00928       typedef cub::BlockReduce<double, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
00929       __shared__ typename BlockReduce::TempStorage tempStorage;
00930       volatile double energyVdw  = BlockReduce(tempStorage).Sum(ve.energyVdw); BLOCK_SYNC;
00931       volatile double energyElec = BlockReduce(tempStorage).Sum(ve.energyElec); BLOCK_SYNC;
00932       if (threadIdx.x == 0) {
00933           atomicAdd(&virialEnergy->energyVdw, (double)energyVdw);
00934           atomicAdd(&virialEnergy->energyElec, (double)energyElec);
00935       }
00936       if (doSlow) {
00937         volatile double energySlow = BlockReduce(tempStorage).Sum(ve.energySlow); BLOCK_SYNC;
00938         if (threadIdx.x == 0) atomicAdd(&virialEnergy->energySlow, (double)energySlow);
00939       }
00940       // if (doGBIS) {
00941       //   double energyGBIS = BlockReduce(tempStorage).Sum(ve.energyGBIS); BLOCK_SYNC;
00942       //   if (threadIdx.x == 0) atomicAdd(&virialEnergy->energyGBIS, (double)energyGBIS);
00943       // }
00944     }
00945 
00946   }
00947 
00948 }

template<bool doSlow>
__device__ __forceinline__ void shuffleNext ( float3 &  jforce,
float3 &  jforceSlow 
) [inline]

Definition at line 147 of file CudaComputeNonbondedKernel.cu.

References WARP_FULL_MASK, WARP_SHUFFLE, and WARPSIZE.

00147                                                      {
00148   jforce.x = WARP_SHUFFLE(WARP_FULL_MASK, jforce.x, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00149   jforce.y = WARP_SHUFFLE(WARP_FULL_MASK, jforce.y, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00150   jforce.z = WARP_SHUFFLE(WARP_FULL_MASK, jforce.z, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00151   if (doSlow) {
00152     jforceSlow.x = WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.x, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00153     jforceSlow.y = WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.y, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00154     jforceSlow.z = WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.z, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
00155   }
00156 }

template<bool doPairlist>
__device__ __forceinline__ void shuffleNext ( float &  xyzq_j_w,
int &  vdwtypej,
int &  jatomIndex 
) [inline]

Definition at line 137 of file CudaComputeNonbondedKernel.cu.

References WARP_FULL_MASK, WARP_SHUFFLE, and WARPSIZE.

00137                                                                   {
00138   xyzq_j_w = WARP_SHUFFLE(WARP_FULL_MASK, xyzq_j_w, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00139   vdwtypej = WARP_SHUFFLE(WARP_FULL_MASK, vdwtypej, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00140   if (doPairlist) {
00141     jatomIndex   = WARP_SHUFFLE(WARP_FULL_MASK, jatomIndex, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);    
00142   }
00143 }

template<bool doPairlist>
__device__ __forceinline__ void shuffleNext ( float &  xyzq_j_w,
int &  vdwtypej,
int &  jatomIndex,
int &  jexclMaxdiff,
int &  jexclIndex 
) [inline]

Definition at line 125 of file CudaComputeNonbondedKernel.cu.

References WARP_FULL_MASK, WARP_SHUFFLE, and WARPSIZE.

00125                                                                                                       {
00126   xyzq_j_w = WARP_SHUFFLE(WARP_FULL_MASK, xyzq_j_w, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00127   vdwtypej = WARP_SHUFFLE(WARP_FULL_MASK, vdwtypej, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00128   if (doPairlist) {
00129     jatomIndex   = WARP_SHUFFLE(WARP_FULL_MASK, jatomIndex, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);    
00130     jexclIndex   = WARP_SHUFFLE(WARP_FULL_MASK, jexclIndex, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00131     jexclMaxdiff = WARP_SHUFFLE(WARP_FULL_MASK, jexclMaxdiff, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
00132   }
00133 }

template<bool doSlow>
__device__ __forceinline__ void storeForces ( const int  pos,
const float3  force,
const float3  forceSlow,
float3 *__restrict__  forces,
float3 *__restrict__  forcesSlow 
) [inline]

Definition at line 111 of file CudaComputeNonbondedKernel.cu.

00112                                                                 {
00113   atomicAdd(&forces[pos].x, force.x);
00114   atomicAdd(&forces[pos].y, force.y);
00115   atomicAdd(&forces[pos].z, force.z);
00116   if (doSlow) {
00117     atomicAdd(&forcesSlow[pos].x, forceSlow.x);
00118     atomicAdd(&forcesSlow[pos].y, forceSlow.y);
00119     atomicAdd(&forcesSlow[pos].z, forceSlow.z);
00120   }
00121 }

template<bool doSlow>
__device__ __forceinline__ void storeForces ( const int  pos,
const float3  force,
const float3  forceSlow,
float *__restrict__  devForces_x,
float *__restrict__  devForces_y,
float *__restrict__  devForces_z,
float *__restrict__  devForcesSlow_x,
float *__restrict__  devForcesSlow_y,
float *__restrict__  devForcesSlow_z 
) [inline]

Definition at line 91 of file CudaComputeNonbondedKernel.cu.

00098 {
00099   atomicAdd(&devForces_x[pos], force.x);
00100   atomicAdd(&devForces_y[pos], force.y);
00101   atomicAdd(&devForces_z[pos], force.z);
00102   if (doSlow) {
00103     atomicAdd(&devForcesSlow_x[pos], forceSlow.x);
00104     atomicAdd(&devForcesSlow_y[pos], forceSlow.y);
00105     atomicAdd(&devForcesSlow_z[pos], forceSlow.z);
00106   }
00107 }

template<bool doSlow>
__device__ __forceinline__ void storeForces ( const int  pos,
const float3  force,
const float3  forceSlow,
float4 *__restrict__  devForces,
float4 *__restrict__  devForcesSlow 
) [inline]

Definition at line 77 of file CudaComputeNonbondedKernel.cu.

00078                                                                       {
00079   atomicAdd(&devForces[pos].x, force.x);
00080   atomicAdd(&devForces[pos].y, force.y);
00081   atomicAdd(&devForces[pos].z, force.z);
00082   if (doSlow) {
00083     atomicAdd(&devForcesSlow[pos].x, forceSlow.x);
00084     atomicAdd(&devForcesSlow[pos].y, forceSlow.y);
00085     atomicAdd(&devForcesSlow[pos].z, forceSlow.z);
00086   }
00087 }

template<int doSlow>
__global__ void transposeForcesKernel ( float4 *  f,
float4 *  fSlow,
float *  fx,
float *  fy,
float *  fz,
float *  fw,
float *  fSlowx,
float *  fSlowy,
float *  fSlowz,
float *  fSloww,
int  n 
) [inline]

Definition at line 1054 of file CudaComputeNonbondedKernel.cu.

01058 {
01059   int tid = blockIdx.x*blockDim.x + threadIdx.x;
01060   if (tid < n) {
01061     f[tid] = make_float4(fx[tid], fy[tid], fz[tid], fw[tid]);
01062     if (doSlow) {
01063       fSlow[tid] = make_float4(fSlowx[tid], fSlowy[tid], fSlowz[tid], fSloww[tid]);
01064     }
01065   }
01066 }


Variable Documentation

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ atomIndex

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

Referenced by HomePatch::doAtomMigration().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ boundingBoxes

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__constant__ unsigned int constExclusions[MAX_CONST_EXCLUSIONS]

Definition at line 16 of file CudaComputeNonbondedKernel.cu.

Referenced by CudaComputeNonbondedKernel::bindExclusions(), and if().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ cudaPatches

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cutoff2

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForce_w

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ devForce_x

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ devForce_y

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForce_z

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ devForces

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForceSlow_w

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForceSlow_x

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForceSlow_y

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForceSlow_z

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ devForcesSlow

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

Definition at line 18 of file DeviceCUDA.C.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t energyTableTex

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ exclIndexMaxDiff

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t forceTableTex

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ float4* __restrict__ int* __restrict__ int* __restrict__ TileListVirialEnergy* __restrict__ virialEnergy int itileList = start + threadIdx.x/WARPSIZE + blockDim.x/WARPSIZE*blockIdx.x
__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ jtiles

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 lata

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 latb

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 latc

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ mapForces

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ float4* __restrict__ mapForcesSlow

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ float4* __restrict__ int* __restrict__ mapPatchReadyQueue

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int numPatches
__global__ void const int numTileLists

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ float4* __restrict__ int* __restrict__ int* __restrict__ outputOrder

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ overflowExclusions

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ patchNumCount

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ patchPairs

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float plcutoff2

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ tileExcls

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ tileJatomStart

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ tileListDepth

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ tileListOrder

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ tileLists

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ tileListStat

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ vdwCoefTable

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t vdwCoefTableTex

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int vdwCoefTableWidth

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ vdwTypes

Definition at line 182 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ xyzq

Definition at line 182 of file CudaComputeNonbondedKernel.cu.


Generated on 10 Apr 2020 for NAMD by  doxygen 1.6.1