NAMD
Macros | Functions | Variables
CudaComputeNonbondedKernel.cu File Reference
#include <cuda.h>
#include <namd_cub/cub.cuh>
#include "CudaComputeNonbondedKernel.h"
#include "CudaTileListKernel.h"
#include "DeviceCUDA.h"

Go to the source code of this file.

Macros

#define OVERALLOC   1.2f
 
#define MAX_CONST_EXCLUSIONS   2048
 
#define NONBONDKERNEL_NUM_WARP   4
 
#define LARGE_FLOAT   (float)(1.0e10)
 
#define REDUCENONBONDEDVIRIALKERNEL_NUM_WARP   32
 
#define REDUCEVIRIALENERGYKERNEL_NUM_WARP   32
 
#define REDUCEGBISENERGYKERNEL_NUM_WARP   32
 
#define CALL(DOENERGY, DOVIRIAL, DOSLOW, DOPAIRLIST, DOSTREAMING)
 

Functions

void NAMD_die (const char *)
 
template<bool doEnergy, bool doSlow>
__device__ __forceinline__ void calcForceEnergy (const float r2, const float qi, const float qj, const float dx, const float dy, const float dz, const int vdwtypei, const int vdwtypej, const float2 *__restrict__ vdwCoefTable, cudaTextureObject_t vdwCoefTableTex, cudaTextureObject_t forceTableTex, cudaTextureObject_t energyTableTex, float3 &iforce, float3 &iforceSlow, float3 &jforce, float3 &jforceSlow, float &energyVdw, float &energyElec, float &energySlow)
 
template<bool doSlow>
__device__ __forceinline__ void storeForces (const int pos, const float3 force, const float3 forceSlow, float4 *__restrict__ devForces, float4 *__restrict__ devForcesSlow)
 
template<bool doSlow>
__device__ __forceinline__ void storeForces (const int pos, const float3 force, const float3 forceSlow, float *__restrict__ devForces_x, float *__restrict__ devForces_y, float *__restrict__ devForces_z, float *__restrict__ devForcesSlow_x, float *__restrict__ devForcesSlow_y, float *__restrict__ devForcesSlow_z)
 
template<bool doSlow>
__device__ __forceinline__ void storeForces (const int pos, const float3 force, const float3 forceSlow, float3 *__restrict__ forces, float3 *__restrict__ forcesSlow)
 
template<bool doPairlist>
__device__ __forceinline__ void shuffleNext (float &xyzq_j_w, int &vdwtypej, int &jatomIndex, int &jexclMaxdiff, int &jexclIndex)
 
template<bool doPairlist>
__device__ __forceinline__ void shuffleNext (float &xyzq_j_w, int &vdwtypej, int &jatomIndex)
 
template<bool doSlow>
__device__ __forceinline__ void shuffleNext (float3 &jforce, float3 &jforceSlow)
 
__device__ __forceinline__ float distsq (const BoundingBox a, const float4 b)
 
template<bool doEnergy, bool doVirial, bool doSlow, bool doPairlist, bool doStreaming>
__global__ void __launch_bounds__ (WARPSIZE *NONBONDKERNEL_NUM_WARP, doPairlist?(10):(doEnergy?(10):(10))) nonbondedForceKernel(const int start
 
 if (itileList< numTileLists)
 
__global__ void reduceNonbondedVirialKernel (const bool doSlow, const int atomStorageSize, const float4 *__restrict__ xyzq, const float4 *__restrict__ devForces, const float4 *__restrict__ devForcesSlow, VirialEnergy *__restrict__ virialEnergy)
 
__global__ void reduceVirialEnergyKernel (const bool doEnergy, const bool doVirial, const bool doSlow, const int numTileLists, const TileListVirialEnergy *__restrict__ tileListVirialEnergy, VirialEnergy *__restrict__ virialEnergy)
 
__global__ void reduceGBISEnergyKernel (const int numTileLists, const TileListVirialEnergy *__restrict__ tileListVirialEnergy, VirialEnergy *__restrict__ virialEnergy)
 
template<int doSlow>
__global__ void transposeForcesKernel (float4 *f, float4 *fSlow, float *fx, float *fy, float *fz, float *fw, float *fSlowx, float *fSlowy, float *fSlowz, float *fSloww, int n)
 

Variables

__thread DeviceCUDAdeviceCUDA
 
__constant__ unsigned int constExclusions [MAX_CONST_EXCLUSIONS]
 
__global__ void const int numTileLists
 
__global__ void const int
const TileList *__restrict__ 
tileLists
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ 
tileExcls
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ 
tileJatomStart
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int 
vdwCoefTableWidth
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__ 
vdwCoefTable
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ 
vdwTypes
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 
lata
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 
latb
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 
latc
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ 
xyzq
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float 
cutoff2
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t 
vdwCoefTableTex
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t 
forceTableTex
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t 
energyTableTex
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int 
atomStorageSize
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float 
plcutoff2
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__ 
patchPairs
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ 
atomIndex
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ 
exclIndexMaxDiff
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__ 
overflowExclusions
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ 
tileListDepth
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ 
tileListOrder
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ 
jtiles
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ 
tileListStat
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__ 
boundingBoxes
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ 
devForces
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ 
devForcesSlow
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ 
devForce_x
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForce_y
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForce_z
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForce_w
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForceSlow_x
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForceSlow_y
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForceSlow_z
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ 
devForceSlow_w
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int 
numPatches
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__ 
patchNumCount
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ 
cudaPatches
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ float4
*__restrict__ 
mapForces
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ float4
*__restrict__ float4
*__restrict__ 
mapForcesSlow
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ float4
*__restrict__ float4
*__restrict__ int
*__restrict__ 
mapPatchReadyQueue
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ float4
*__restrict__ float4
*__restrict__ int
*__restrict__ int
*__restrict__ 
outputOrder
 
__global__ void const int
const TileList *__restrict__
TileExcl *__restrict__ const
int *__restrict__ const int
const float2 *__restrict__
const int *__restrict__ const
float3 const float3 const
float3 const float4
*__restrict__ const float
cudaTextureObject_t
cudaTextureObject_t
cudaTextureObject_t const int
const float const
PatchPairRecord *__restrict__
const int *__restrict__ const
int2 *__restrict__ const
unsigned int *__restrict__
unsigned int *__restrict__ int
*__restrict__ int
*__restrict__ TileListStat
*__restrict__ const
BoundingBox *__restrict__
float4 *__restrict__ float4
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ float
*__restrict__ const int
unsigned int *__restrict__
const CudaPatchRecord
*__restrict__ float4
*__restrict__ float4
*__restrict__ int
*__restrict__ int
*__restrict__
TileListVirialEnergy
*__restrict__ virialEnergy int 
itileList = start + threadIdx.x/WARPSIZE + blockDim.x/WARPSIZE*blockIdx.x
 

Macro Definition Documentation

#define CALL (   DOENERGY,
  DOVIRIAL,
  DOSLOW,
  DOPAIRLIST,
  DOSTREAMING 
)
Value:
nonbondedForceKernel<DOENERGY, DOVIRIAL, DOSLOW, DOPAIRLIST, DOSTREAMING> \
<<< nblock, nthread, shMemSize, stream >>> \
(start, tlKernel.getNumTileLists(), tlKernel.getTileLists(), tlKernel.getTileExcls(), tlKernel.getTileJatomStart(), \
cudaNonbondedTables.getVdwCoefTableWidth(), cudaNonbondedTables.getVdwCoefTable(), \
vdwTypes, lata, latb, latc, tlKernel.get_xyzq(), cutoff2, \
cudaNonbondedTables.getVdwCoefTableTex(), cudaNonbondedTables.getForceTableTex(), cudaNonbondedTables.getEnergyTableTex(), \
atomStorageSize, tlKernel.get_plcutoff2(), tlKernel.getPatchPairs(), atomIndex, exclIndexMaxDiff, overflowExclusions, \
tlKernel.getTileListDepth(), tlKernel.getTileListOrder(), tlKernel.getJtiles(), tlKernel.getTileListStatDevPtr(), \
tlKernel.getBoundingBoxes(), d_forces, d_forcesSlow, \
force_x, force_y, force_z, force_w, \
forceSlow_x, forceSlow_y, forceSlow_z, forceSlow_w, \
numPatches, patchNumCountPtr, tlKernel.getCudaPatches(), m_forces, m_forcesSlow, m_patchReadyQueue, \
outputOrderPtr, tlKernel.getTileListVirialEnergy()); called=true
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int numPatches
__thread cudaStream_t stream
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 lata
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ atomIndex
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ exclIndexMaxDiff
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ vdwTypes
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 latc
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ overflowExclusions
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cutoff2
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 latb
#define LARGE_FLOAT   (float)(1.0e10)

Definition at line 175 of file CudaComputeNonbondedKernel.cu.

Referenced by if().

#define MAX_CONST_EXCLUSIONS   2048

Definition at line 19 of file CudaComputeNonbondedKernel.cu.

Referenced by CudaComputeNonbondedKernel::bindExclusions(), and if().

#define NONBONDKERNEL_NUM_WARP   4

Definition at line 22 of file CudaComputeNonbondedKernel.cu.

Referenced by if(), and CudaComputeNonbondedKernel::nonbondedForce().

#define OVERALLOC   1.2f
#define REDUCEGBISENERGYKERNEL_NUM_WARP   32
#define REDUCENONBONDEDVIRIALKERNEL_NUM_WARP   32
#define REDUCEVIRIALENERGYKERNEL_NUM_WARP   32

Function Documentation

template<bool doEnergy, bool doVirial, bool doSlow, bool doPairlist, bool doStreaming>
__global__ void __launch_bounds__ ( WARPSIZE NONBONDKERNEL_NUM_WARP,
doPairlist?  10):(doEnergy?(10):(10) 
) const
template<bool doEnergy, bool doSlow>
__device__ __forceinline__ void calcForceEnergy ( const float  r2,
const float  qi,
const float  qj,
const float  dx,
const float  dy,
const float  dz,
const int  vdwtypei,
const int  vdwtypej,
const float2 *__restrict__  vdwCoefTable,
cudaTextureObject_t  vdwCoefTableTex,
cudaTextureObject_t  forceTableTex,
cudaTextureObject_t  energyTableTex,
float3 &  iforce,
float3 &  iforceSlow,
float3 &  jforce,
float3 &  jforceSlow,
float &  energyVdw,
float &  energyElec,
float &  energySlow 
)

Definition at line 26 of file CudaComputeNonbondedKernel.cu.

References __ldg, energyTableTex, forceTableTex, vdwCoefTableTex, float2::x, and float2::y.

32  {
33 
34  int vdwIndex = vdwtypej + vdwtypei;
35 #if __CUDA_ARCH__ >= 350
36  float2 ljab = __ldg(&vdwCoefTable[vdwIndex]);
37 #else
38  float2 ljab = tex1Dfetch<float2>(vdwCoefTableTex, vdwIndex);
39 #endif
40 
41  float rinv = rsqrtf(r2);
42  float4 ei;
43  float4 fi = tex1D<float4>(forceTableTex, rinv);
44  if (doEnergy) ei = tex1D<float4>(energyTableTex, rinv);
45 
46  float fSlow = qi * qj;
47  float f = ljab.x * fi.z + ljab.y * fi.y + fSlow * fi.x;
48 
49  if (doEnergy) {
50  energyVdw += ljab.x * ei.z + ljab.y * ei.y;
51  energyElec += fSlow * ei.x;
52  if (doSlow) energySlow += fSlow * ei.w;
53  }
54  if (doSlow) fSlow *= fi.w;
55 
56  float fx = dx * f;
57  float fy = dy * f;
58  float fz = dz * f;
59  iforce.x += fx;
60  iforce.y += fy;
61  iforce.z += fz;
62  jforce.x -= fx;
63  jforce.y -= fy;
64  jforce.z -= fz;
65 
66  if (doSlow) {
67  float fxSlow = dx * fSlow;
68  float fySlow = dy * fSlow;
69  float fzSlow = dz * fSlow;
70  iforceSlow.x += fxSlow;
71  iforceSlow.y += fySlow;
72  iforceSlow.z += fzSlow;
73  jforceSlow.x -= fxSlow;
74  jforceSlow.y -= fySlow;
75  jforceSlow.z -= fzSlow;
76  }
77 }
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t energyTableTex
float x
Definition: PmeSolver.C:3
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t forceTableTex
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t vdwCoefTableTex
float y
Definition: PmeSolver.C:3
#define __ldg
__device__ __forceinline__ float distsq ( const BoundingBox  a,
const float4  b 
)

Definition at line 167 of file CudaComputeNonbondedKernel.cu.

References BoundingBox::wx, BoundingBox::wy, BoundingBox::wz, BoundingBox::x, BoundingBox::y, and BoundingBox::z.

Referenced by buildTileListsBBKernel(), and if().

167  {
168  float dx = max(0.0f, fabsf(a.x - b.x) - a.wx);
169  float dy = max(0.0f, fabsf(a.y - b.y) - a.wy);
170  float dz = max(0.0f, fabsf(a.z - b.z) - a.wz);
171  float r2 = dx*dx + dy*dy + dz*dz;
172  return r2;
173 }
if ( )

Definition at line 228 of file CudaComputeNonbondedKernel.cu.

References CudaPatchRecord::atomStart, constExclusions, devForce_x, devForce_y, devForce_z, devForceSlow_x, devForceSlow_y, devForceSlow_z, distsq(), energyTableTex, forceTableTex, PatchPairRecord::iatomFreeSize, PatchPairRecord::iatomSize, TileList::iatomStart, itileList, PatchPairRecord::jatomFreeSize, PatchPairRecord::jatomSize, TileList::jtileEnd, TileList::jtileStart, LARGE_FLOAT, MAX_CONST_EXCLUSIONS, NONBONDKERNEL_NUM_WARP, CudaPatchRecord::numAtoms, TileList::offsetXYZ, TileList::patchInd, TileList::patchNumList, tempStorage, vdwCoefTable, vdwCoefTableTex, vdwCoefTableWidth, WARP_ALL, WARP_ANY, WARP_FULL_MASK, WARP_SHUFFLE_XOR, WARP_SYNC, WARPSIZE, BoundingBox::x, BoundingBox::y, and BoundingBox::z.

229  {
230 
231  float3 iforce;
232  float3 iforceSlow;
233  float energyVdw, energyElec, energySlow;
234  int nexcluded;
235  unsigned int itileListLen;
236  int2 patchInd;
237  int2 patchNumList;
238  __shared__ float4 s_xyzq[NONBONDKERNEL_NUM_WARP][WARPSIZE];
239  __shared__ int s_vdwtypej[NONBONDKERNEL_NUM_WARP][WARPSIZE];
240  __shared__ float3 s_jforce[NONBONDKERNEL_NUM_WARP][WARPSIZE];
241  __shared__ float3 s_jforceSlow[NONBONDKERNEL_NUM_WARP][WARPSIZE];
242  __shared__ int s_jatomIndex[NONBONDKERNEL_NUM_WARP][WARPSIZE];
243 
244  // Start computation
245  {
246  // Warp index (0...warpsize-1)
247  const int wid = threadIdx.x % WARPSIZE;
248  const int iwarp = threadIdx.x / WARPSIZE;
249 
251  int iatomStart = tmp.iatomStart;
252  int jtileStart = tmp.jtileStart;
253  int jtileEnd = tmp.jtileEnd;
254  patchInd = tmp.patchInd;
255  patchNumList = tmp.patchNumList;
256 
257  float shx = tmp.offsetXYZ.x*lata.x + tmp.offsetXYZ.y*latb.x + tmp.offsetXYZ.z*latc.x;
258  float shy = tmp.offsetXYZ.x*lata.y + tmp.offsetXYZ.y*latb.y + tmp.offsetXYZ.z*latc.y;
259  float shz = tmp.offsetXYZ.x*lata.z + tmp.offsetXYZ.y*latb.z + tmp.offsetXYZ.z*latc.z;
260 
261  // DH - set zeroShift flag if magnitude of shift vector is zero
262  bool zeroShift = ! (shx*shx + shy*shy + shz*shz > 0);
263 
264  int iatomSize, iatomFreeSize, jatomSize, jatomFreeSize;
265  if (doPairlist) {
267  iatomSize = PPStmp.iatomSize;
268  iatomFreeSize = PPStmp.iatomFreeSize;
269  jatomSize = PPStmp.jatomSize;
270  jatomFreeSize = PPStmp.jatomFreeSize;
271  }
272 
273  // Write to global memory here to avoid register spilling
274  if (doVirial) {
275  if (wid == 0) {
276  virialEnergy[itileList].shx = shx;
277  virialEnergy[itileList].shy = shy;
278  virialEnergy[itileList].shz = shz;
279  }
280  }
281 
282  // Load i-atom data (and shift coordinates)
283  float4 xyzq_i = xyzq[iatomStart + wid];
284  xyzq_i.x += shx;
285  xyzq_i.y += shy;
286  xyzq_i.z += shz;
287  int vdwtypei = vdwTypes[iatomStart + wid]*vdwCoefTableWidth;
288 
289  // Load i-atom data (and shift coordinates)
290  BoundingBox boundingBoxI;
291  if (doPairlist) {
292  boundingBoxI = boundingBoxes[iatomStart/WARPSIZE];
293  boundingBoxI.x += shx;
294  boundingBoxI.y += shy;
295  boundingBoxI.z += shz;
296  }
297 
298  // Get i-atom global index
299 #ifdef USE_NEW_EXCL_METHOD
300  int iatomIndex, minExclAtom, maxExclAtom;
301 #else
302  int iatomIndex;
303 #endif
304  if (doPairlist) {
305 #ifdef USE_NEW_EXCL_METHOD
306  iatomIndex = atomIndex[iatomStart + wid];
307  int2 tmp = minmaxExclAtom[iatomStart + wid];
308  minExclAtom = tmp.x;
309  maxExclAtom = tmp.y;
310 #else
311  iatomIndex = atomIndex[iatomStart + wid];
312 #endif
313  }
314 
315  // i-forces in registers
316  // float3 iforce;
317  iforce.x = 0.0f;
318  iforce.y = 0.0f;
319  iforce.z = 0.0f;
320 
321  // float3 iforceSlow;
322  if (doSlow) {
323  iforceSlow.x = 0.0f;
324  iforceSlow.y = 0.0f;
325  iforceSlow.z = 0.0f;
326  }
327 
328  // float energyVdw, energyElec, energySlow;
329  if (doEnergy) {
330  energyVdw = 0.0f;
331  energyElec = 0.0f;
332  if (doSlow) energySlow = 0.0f;
333  }
334 
335  // Number of exclusions
336  // NOTE: Lowest bit is used as indicator bit for tile pairs:
337  // bit 0 tile has no atoms within pairlist cutoff
338  // bit 1 tile has atoms within pairlist cutoff
339  // int nexcluded;
340  if (doPairlist) nexcluded = 0;
341 
342  // Number of i loops and free atoms
343  int nfreei;
344  if (doPairlist) {
345  int nloopi = min(iatomSize - iatomStart, WARPSIZE);
346  nfreei = max(iatomFreeSize - iatomStart, 0);
347  if (wid >= nloopi) {
348  xyzq_i.x = -LARGE_FLOAT;
349  xyzq_i.y = -LARGE_FLOAT;
350  xyzq_i.z = -LARGE_FLOAT;
351  }
352  }
353 
354  // tile list stuff
355  // int itileListLen;
356  // int minJatomStart;
357  if (doPairlist) {
358  // minJatomStart = tileJatomStart[jtileStart];
359  itileListLen = 0;
360  }
361 
362  // Exclusion index and maxdiff
363  int iexclIndex, iexclMaxdiff;
364  if (doPairlist) {
365  int2 tmp = exclIndexMaxDiff[iatomStart + wid];
366  iexclIndex = tmp.x;
367  iexclMaxdiff = tmp.y;
368  }
369 
370  for (int jtile=jtileStart;jtile <= jtileEnd;jtile++) {
371 
372  // Load j-atom starting index and exclusion mask
373  int jatomStart = tileJatomStart[jtile];
374 
375  float4 xyzq_j = xyzq[jatomStart + wid];
377 
378  // Check for early bail
379  if (doPairlist) {
380  float r2bb = distsq(boundingBoxI, xyzq_j);
381  if (WARP_ALL(WARP_FULL_MASK, r2bb > plcutoff2)) continue;
382  }
383  unsigned int excl = (doPairlist) ? 0 : tileExcls[jtile].excl[wid];
384  int vdwtypej = vdwTypes[jatomStart + wid];
385  s_vdwtypej[iwarp][wid] = vdwtypej;
386 
387  // Get i-atom global index
388  if (doPairlist) {
389  s_jatomIndex[iwarp][wid] = atomIndex[jatomStart + wid];
390  }
391 
392  // Number of j loops and free atoms
393  int nfreej;
394  if (doPairlist) {
395  int nloopj = min(jatomSize - jatomStart, WARPSIZE);
396  nfreej = max(jatomFreeSize - jatomStart, 0);
397  //if (nfreei == 0 && nfreej == 0) continue;
398  if (wid >= nloopj) {
399  xyzq_j.x = LARGE_FLOAT;
400  xyzq_j.y = LARGE_FLOAT;
401  xyzq_j.z = LARGE_FLOAT;
402  }
403  }
404 
405  s_xyzq[iwarp][wid] = xyzq_j;
406 
407  // DH - self requires that zeroShift is also set
408  const bool self = zeroShift && (iatomStart == jatomStart);
409  const int modval = (self) ? 2*WARPSIZE-1 : WARPSIZE-1;
410 
411  s_jforce[iwarp][wid] = make_float3(0.0f, 0.0f, 0.0f);
412  if (doSlow)
413  s_jforceSlow[iwarp][wid] = make_float3(0.0f, 0.0f, 0.0f);
415 
416 
417  int t = (self) ? 1 : 0;
418 
419  if (doPairlist) {
420  // Build pair list
421  // NOTE: Pairlist update, we must also include the diagonal since this is used
422  // in GBIS phase 2.
423  // Clear the lowest (indicator) bit
424  nexcluded &= (~1);
425 
426  // For self tiles, do the diagonal term (t=0).
427  // NOTE: No energies are computed here, since this self-diagonal term is only for GBIS phase 2
428  if (self) {
429  int j = (0 + wid) & modval;
430  xyzq_j = s_xyzq[iwarp][j];
431  float dx = xyzq_j.x - xyzq_i.x;
432  float dy = xyzq_j.y - xyzq_i.y;
433  float dz = xyzq_j.z - xyzq_i.z;
434 
435  float r2 = dx*dx + dy*dy + dz*dz;
436 
437  if (j < WARPSIZE && r2 < plcutoff2) {
438  // We have atom pair within the pairlist cutoff => Set indicator bit
439  nexcluded |= 1;
440  }
441  }
442 
443  for (;t < WARPSIZE;t++) {
444  int j = (t + wid) & modval;
445 
446  excl >>= 1;
447  if (j < WARPSIZE ) {
448  xyzq_j = s_xyzq[iwarp][j];
449  float dx = xyzq_j.x - xyzq_i.x;
450  float dy = xyzq_j.y - xyzq_i.y;
451  float dz = xyzq_j.z - xyzq_i.z;
452  float r2 = dx*dx + dy*dy + dz*dz;
453  // We have atom pair within the pairlist cutoff => Set indicator bit
454  if(r2 < plcutoff2){
455  nexcluded |= 1;
456  if (j < nfreej || wid < nfreei) {
457  bool excluded = false;
458  int indexdiff = s_jatomIndex[iwarp][j] - iatomIndex;
459  if ( abs(indexdiff) <= iexclMaxdiff) {
460  indexdiff += iexclIndex;
461  int indexword = ((unsigned int) indexdiff) >> 5;
462 
463  if ( indexword < MAX_CONST_EXCLUSIONS ) {
464  indexword = constExclusions[indexword];
465  } else {
466  indexword = overflowExclusions[indexword];
467  }
468 
469  excluded = ((indexword & (1<<(indexdiff&31))) != 0);
470  }
471  if (excluded) nexcluded += 2;
472  if (!excluded) excl |= 0x80000000;
473  if (!excluded && r2 < cutoff2) {
474  calcForceEnergy<doEnergy, doSlow>(
475  r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
476  vdwtypei, s_vdwtypej[iwarp][j],
478  iforce, iforceSlow,
479  s_jforce[iwarp][j], s_jforceSlow[iwarp][j],
480  energyVdw,
481  energyElec, energySlow);
482  }
483  }
484  }
485  }
487  } // t
488  } else {
489  // Just compute forces
490  if (self) {
491  excl >>= 1;
492  }
493  for (;t < WARPSIZE;t++) {
494  if ((excl & 1)) {
495  xyzq_j = s_xyzq[iwarp][(wid+t) & (WARPSIZE-1)];
496  float dx = xyzq_j.x - xyzq_i.x;
497  float dy = xyzq_j.y - xyzq_i.y;
498  float dz = xyzq_j.z - xyzq_i.z;
499 
500  float r2 = dx*dx + dy*dy + dz*dz;
501 
502  if (r2 < cutoff2) {
503  calcForceEnergy<doEnergy, doSlow>(
504  r2, xyzq_i.w, xyzq_j.w, dx, dy, dz,
505  vdwtypei, s_vdwtypej[iwarp][(wid+t) & (WARPSIZE-1)], vdwCoefTable,
507  iforce, iforceSlow,
508  s_jforce[iwarp][(wid+t) & (WARPSIZE-1)],
509  s_jforceSlow[iwarp][(wid+t) & (WARPSIZE-1)],
510  energyVdw, energyElec, energySlow);
511  } // (r2 < cutoff2)
512  } // (excl & 1)
513  excl >>= 1;
515  } // t
516  }
517 
518  // Write j-forces
519  storeForces<doSlow>(jatomStart + wid, s_jforce[iwarp][wid], s_jforceSlow[iwarp][wid],
522 
523  // Write exclusions
524  if (doPairlist && WARP_ANY(WARP_FULL_MASK, nexcluded & 1)) {
525  int anyexcl = (65536 | WARP_ANY(WARP_FULL_MASK, excl));
526  // Mark this jtile as non-empty:
527  // VdW: 1 if tile has atom pairs within pairlist cutoff and some these atoms interact
528  // GBIS: 65536 if tile has atom pairs within pairlist cutoff but not necessary interacting (i.e. these atoms are fixed or excluded)
529  if (wid == 0) jtiles[jtile] = anyexcl;
530  // Store exclusions
531  tileExcls[jtile].excl[wid] = excl;
532  // itileListLen:
533  // lower 16 bits number of tiles with atom pairs within pairlist cutoff that interact
534  // upper 16 bits number of tiles with atom pairs within pairlist cutoff (but not necessary interacting)
535  itileListLen += anyexcl;
536  // NOTE, this minJatomStart is only stored once for the first tile list entry
537  // minJatomStart = min(minJatomStart, jatomStart);
538  }
539 
540  } // jtile
541 
542  // Write i-forces
543  storeForces<doSlow>(iatomStart + wid, iforce, iforceSlow,
546  }
547  // Done with computation
548 
549  // Save pairlist stuff
550  if (doPairlist) {
551 
552  // Warp index (0...warpsize-1)
553  const int wid = threadIdx.x % WARPSIZE;
554 
555  if (wid == 0) {
556  // minJatomStart is in range [0 ... atomStorageSize-1]
557  //int atom0 = (minJatomStart)/WARPSIZE;
558  // int atom0 = 0;
559  // int storageOffset = atomStorageSize/WARPSIZE;
560  // int itileListLen = 0;
561  // for (int jtile=jtileStart;jtile <= jtileEnd;jtile++) itileListLen += jtiles[jtile];
562  // Store 0 if itileListLen == 0
563  // tileListDepth[itileList] = (itileListLen > 0)*(itileListLen*storageOffset + atom0);
564  tileListDepth[itileList] = itileListLen;
566  // Number of active tilelists with tile with atom pairs within pairlist cutoff that interact
567  if ((itileListLen & 65535) > 0) atomicAdd(&tileListStat->numTileLists, 1);
568  // Number of active tilelists with tiles with atom pairs within pairlist cutoff (but not necessary interacting)
569  if (itileListLen > 0) atomicAdd(&tileListStat->numTileListsGBIS, 1);
570  // NOTE: always numTileListsGBIS >= numTileLists
571  }
572 
573  typedef cub::WarpReduce<int> WarpReduceInt;
574  __shared__ typename WarpReduceInt::TempStorage tempStorage[NONBONDKERNEL_NUM_WARP];
575  int warpId = threadIdx.x / WARPSIZE;
576  // Remove indicator bit
577  nexcluded >>= 1;
578  volatile int nexcludedWarp = WarpReduceInt(tempStorage[warpId]).Sum(nexcluded);
579  if (wid == 0) atomicAdd(&tileListStat->numExcluded, nexcludedWarp);
580 
581  }
582 
583  if (doVirial) {
584  // Warp index (0...warpsize-1)
585  const int wid = threadIdx.x % WARPSIZE;
586 
587  typedef cub::WarpReduce<float> WarpReduce;
588  __shared__ typename WarpReduce::TempStorage tempStorage[NONBONDKERNEL_NUM_WARP];
589  int warpId = threadIdx.x / WARPSIZE;
590  volatile float iforcexSum = WarpReduce(tempStorage[warpId]).Sum(iforce.x);
592  volatile float iforceySum = WarpReduce(tempStorage[warpId]).Sum(iforce.y);
594  volatile float iforcezSum = WarpReduce(tempStorage[warpId]).Sum(iforce.z);
596  if (wid == 0) {
597  virialEnergy[itileList].forcex = iforcexSum;
598  virialEnergy[itileList].forcey = iforceySum;
599  virialEnergy[itileList].forcez = iforcezSum;
600  }
601 
602  if (doSlow) {
603  iforcexSum = WarpReduce(tempStorage[warpId]).Sum(iforceSlow.x);
605  iforceySum = WarpReduce(tempStorage[warpId]).Sum(iforceSlow.y);
607  iforcezSum = WarpReduce(tempStorage[warpId]).Sum(iforceSlow.z);
609  if (wid == 0) {
610  virialEnergy[itileList].forceSlowx = iforcexSum;
611  virialEnergy[itileList].forceSlowy = iforceySum;
612  virialEnergy[itileList].forceSlowz = iforcezSum;
613  }
614  }
615  }
616 
617  // Reduce energy
618  if (doEnergy) {
619  // NOTE: We must hand write these warp-wide reductions to avoid excess register spillage
620  // (Why does CUB suck here?)
621 #pragma unroll
622  for (int i=16;i >= 1;i/=2) {
623  energyVdw += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyVdw, i, 32);
624  energyElec += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energyElec, i, 32);
625  if (doSlow) energySlow += WARP_SHUFFLE_XOR(WARP_FULL_MASK, energySlow, i, 32);
626  }
627 
628  if (threadIdx.x % WARPSIZE == 0) {
629  virialEnergy[itileList].energyVdw = energyVdw;
630  virialEnergy[itileList].energyElec = energyElec;
631  if (doSlow) virialEnergy[itileList].energySlow = energySlow;
632  }
633  }
634 
635  if (doStreaming) {
636  // Make sure devForces and devForcesSlow have been written into device memory
638  __threadfence();
639 
640  int patchDone[2] = {false, false};
641  const int wid = threadIdx.x % WARPSIZE;
642  if (wid == 0) {
643  int patchCountOld0 = atomicInc(&patchNumCount[patchInd.x], (unsigned int)(patchNumList.x-1));
644  patchDone[0] = (patchCountOld0 + 1 == patchNumList.x);
645  if (patchInd.x != patchInd.y) {
646  int patchCountOld1 = atomicInc(&patchNumCount[patchInd.y], (unsigned int)(patchNumList.y-1));
647  patchDone[1] = (patchCountOld1 + 1 == patchNumList.y);
648  }
649  }
650 
651  patchDone[0] = WARP_ANY(WARP_FULL_MASK, patchDone[0]);
652  patchDone[1] = WARP_ANY(WARP_FULL_MASK, patchDone[1]);
653 
654  if (patchDone[0]) {
655  // Patch 1 is done, write onto host-mapped memory
656  CudaPatchRecord patch = cudaPatches[patchInd.x];
657  int start = patch.atomStart;
658  int end = start + patch.numAtoms;
659  for (int i=start+wid;i < end;i+=WARPSIZE) {
660  mapForces[i] = make_float4(devForce_x[i],
661  devForce_y[i], devForce_z[i], devForce_w[i]);
662  if (doSlow){
663  mapForcesSlow[i] = make_float4(devForceSlow_x[i],
664  devForceSlow_y[i],
665  devForceSlow_z[i],
666  devForceSlow_w[i]);
667  }
668  }
669  }
670  if (patchDone[1]) {
671  // Patch 2 is done
672  CudaPatchRecord patch = cudaPatches[patchInd.y];
673  int start = patch.atomStart;
674  int end = start + patch.numAtoms;
675  for (int i=start+wid;i < end;i+=WARPSIZE) {
676  mapForces[i] = make_float4(devForce_x[i], devForce_y[i], devForce_z[i], devForce_w[i]);
677  if (doSlow){
678  mapForcesSlow[i] = make_float4(devForceSlow_x[i],
679  devForceSlow_y[i],
680  devForceSlow_z[i],
681  devForceSlow_w[i]);
682  }
683  }
684  }
685 
686  if (patchDone[0] || patchDone[1]) {
687  // Make sure mapForces and mapForcesSlow are up-to-date
689  __threadfence_system();
690  // Add patch into "patchReadyQueue"
691  if (wid == 0) {
692  if (patchDone[0]) {
693  int ind = atomicAdd(&tileListStat->patchReadyQueueCount, 1);
694  // int ind = atomicInc((unsigned int *)&mapPatchReadyQueue[numPatches], numPatches-1);
695  mapPatchReadyQueue[ind] = patchInd.x;
696  }
697  if (patchDone[1]) {
698  int ind = atomicAdd(&tileListStat->patchReadyQueueCount, 1);
699  // int ind = atomicInc((unsigned int *)&mapPatchReadyQueue[numPatches], numPatches-1);
700  mapPatchReadyQueue[ind] = patchInd.y;
701  }
702  }
703  }
704  }
705 
706  if (doStreaming && outputOrder != NULL && threadIdx.x % WARPSIZE == 0) {
707  int index = atomicAdd(&tileListStat->outputOrderIndex, 1);
708  outputOrder[index] = itileList;
709  }
710  } // if (itileList < numTileLists)
#define WARP_ALL(MASK, P)
Definition: CudaUtils.h:40
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t energyTableTex
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int unsigned int *__restrict__ patchNumCount
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int unsigned int *__restrict__ const CudaPatchRecord *__restrict__ float4 *__restrict__ float4 *__restrict__ mapForcesSlow
#define WARP_FULL_MASK
Definition: CudaUtils.h:11
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ devForce_y
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ tileListStat
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int unsigned int *__restrict__ const CudaPatchRecord *__restrict__ float4 *__restrict__ float4 *__restrict__ int *__restrict__ int *__restrict__ TileListVirialEnergy *__restrict__ virialEnergy int itileList
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ devForce_x
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t forceTableTex
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ tileJatomStart
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int unsigned int *__restrict__ const CudaPatchRecord *__restrict__ cudaPatches
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ tileListDepth
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ tileListOrder
#define LARGE_FLOAT
#define NONBONDKERNEL_NUM_WARP
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t vdwCoefTableTex
__global__ void const int const TileList *__restrict__ tileLists
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 lata
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ atomIndex
__device__ __forceinline__ float distsq(const BoundingBox a, const float4 b)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int vdwCoefTableWidth
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ boundingBoxes
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ exclIndexMaxDiff
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ devForce_z
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ vdwCoefTable
#define MAX_CONST_EXCLUSIONS
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float plcutoff2
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ vdwTypes
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ devForceSlow_z
float3 offsetXYZ
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ devForceSlow_x
__constant__ unsigned int constExclusions[MAX_CONST_EXCLUSIONS]
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int unsigned int *__restrict__ const CudaPatchRecord *__restrict__ float4 *__restrict__ mapForces
__shared__ union @43 tempStorage
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ devForce_w
#define WARP_SYNC(MASK)
Definition: CudaUtils.h:43
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ xyzq
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 latc
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ jtiles
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ overflowExclusions
#define WARP_SHUFFLE_XOR(MASK, VAR, LANE, SIZE)
Definition: CudaUtils.h:32
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int unsigned int *__restrict__ const CudaPatchRecord *__restrict__ float4 *__restrict__ float4 *__restrict__ int *__restrict__ mapPatchReadyQueue
#define WARPSIZE
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ patchPairs
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ tileExcls
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ devForceSlow_y
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cutoff2
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 latb
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int unsigned int *__restrict__ const CudaPatchRecord *__restrict__ float4 *__restrict__ float4 *__restrict__ int *__restrict__ int *__restrict__ outputOrder
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ devForceSlow_w
#define WARP_ANY(MASK, P)
Definition: CudaUtils.h:41
void NAMD_die ( const char *  )

Definition at line 83 of file common.C.

84 {
85  if ( ! err_msg ) err_msg = "(unknown error)";
86  CkPrintf("FATAL ERROR: %s\n", err_msg);
87  fflush(stdout);
88  char repstr[24] = "";
89  if (CmiNumPartitions() > 1) {
90  sprintf(repstr,"REPLICA %d ", CmiMyPartition());
91  }
92  CkError("%sFATAL ERROR: %s\n", repstr, err_msg);
93 #if CHARM_VERSION < 61000
94  CkExit();
95 #else
96  CkExit(1);
97 #endif
98 }
__global__ void reduceGBISEnergyKernel ( const int  numTileLists,
const TileListVirialEnergy *__restrict__  tileListVirialEnergy,
VirialEnergy *__restrict__  virialEnergy 
)

Definition at line 959 of file CudaComputeNonbondedKernel.cu.

References BLOCK_SYNC, itileList, and tempStorage.

961  {
962 
963  for (int ibase = blockIdx.x*blockDim.x;ibase < numTileLists;ibase += blockDim.x*gridDim.x)
964  {
965  int itileList = ibase + threadIdx.x;
966  double energyGBISt = 0.0;
967  if (itileList < numTileLists) {
968  energyGBISt = tileListVirialEnergy[itileList].energyGBIS;
969  }
970 
971  typedef cub::BlockReduce<double, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
972  __shared__ typename BlockReduce::TempStorage tempStorage;
973  volatile double energyGBIS = BlockReduce(tempStorage).Sum(energyGBISt); BLOCK_SYNC;
974  if (threadIdx.x == 0) atomicAdd(&virialEnergy->energyGBIS, (double)energyGBIS);
975  }
976 
977 }
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int unsigned int *__restrict__ const CudaPatchRecord *__restrict__ float4 *__restrict__ float4 *__restrict__ int *__restrict__ int *__restrict__ TileListVirialEnergy *__restrict__ virialEnergy int itileList
__global__ void const int numTileLists
__shared__ union @43 tempStorage
__global__ void reduceNonbondedVirialKernel ( const bool  doSlow,
const int  atomStorageSize,
const float4 *__restrict__  xyzq,
const float4 *__restrict__  devForces,
const float4 *__restrict__  devForcesSlow,
VirialEnergy *__restrict__  virialEnergy 
)

Definition at line 717 of file CudaComputeNonbondedKernel.cu.

References BLOCK_SYNC, and tempStorage.

721  {
722 
723  for (int ibase = blockIdx.x*blockDim.x;ibase < atomStorageSize;ibase += blockDim.x*gridDim.x)
724  {
725  int i = ibase + threadIdx.x;
726 
727  // Set to zero to avoid nan*0
728  float4 pos;
729  pos.x = 0.0f;
730  pos.y = 0.0f;
731  pos.z = 0.0f;
732  float4 force, forceSlow;
733  force.x = 0.0f;
734  force.y = 0.0f;
735  force.z = 0.0f;
736  forceSlow.x = 0.0f;
737  forceSlow.y = 0.0f;
738  forceSlow.z = 0.0f;
739  if (i < atomStorageSize) {
740  pos = xyzq[i];
741  force = devForces[i];
742  if (doSlow) forceSlow = devForcesSlow[i];
743  }
744  // Reduce across the entire thread block
745  float vxxt = force.x*pos.x;
746  float vxyt = force.x*pos.y;
747  float vxzt = force.x*pos.z;
748  float vyxt = force.y*pos.x;
749  float vyyt = force.y*pos.y;
750  float vyzt = force.y*pos.z;
751  float vzxt = force.z*pos.x;
752  float vzyt = force.z*pos.y;
753  float vzzt = force.z*pos.z;
754  // atomicAdd(&virialEnergy->virial[0], (double)vxx);
755  // atomicAdd(&virialEnergy->virial[1], (double)vxy);
756  // atomicAdd(&virialEnergy->virial[2], (double)vxz);
757  // atomicAdd(&virialEnergy->virial[3], (double)vyx);
758  // atomicAdd(&virialEnergy->virial[4], (double)vyy);
759  // atomicAdd(&virialEnergy->virial[5], (double)vyz);
760  // atomicAdd(&virialEnergy->virial[6], (double)vzx);
761  // atomicAdd(&virialEnergy->virial[7], (double)vzy);
762  // atomicAdd(&virialEnergy->virial[8], (double)vzz);
763 
764  typedef cub::BlockReduce<float, REDUCENONBONDEDVIRIALKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
765  __shared__ typename BlockReduce::TempStorage tempStorage;
766  volatile float vxx = BlockReduce(tempStorage).Sum(vxxt); BLOCK_SYNC;
767  volatile float vxy = BlockReduce(tempStorage).Sum(vxyt); BLOCK_SYNC;
768  volatile float vxz = BlockReduce(tempStorage).Sum(vxzt); BLOCK_SYNC;
769  volatile float vyx = BlockReduce(tempStorage).Sum(vyxt); BLOCK_SYNC;
770  volatile float vyy = BlockReduce(tempStorage).Sum(vyyt); BLOCK_SYNC;
771  volatile float vyz = BlockReduce(tempStorage).Sum(vyzt); BLOCK_SYNC;
772  volatile float vzx = BlockReduce(tempStorage).Sum(vzxt); BLOCK_SYNC;
773  volatile float vzy = BlockReduce(tempStorage).Sum(vzyt); BLOCK_SYNC;
774  volatile float vzz = BlockReduce(tempStorage).Sum(vzzt); BLOCK_SYNC;
775  if (threadIdx.x == 0) {
776  atomicAdd(&virialEnergy->virial[0], (double)vxx);
777  atomicAdd(&virialEnergy->virial[1], (double)vxy);
778  atomicAdd(&virialEnergy->virial[2], (double)vxz);
779  atomicAdd(&virialEnergy->virial[3], (double)vyx);
780  atomicAdd(&virialEnergy->virial[4], (double)vyy);
781  atomicAdd(&virialEnergy->virial[5], (double)vyz);
782  atomicAdd(&virialEnergy->virial[6], (double)vzx);
783  atomicAdd(&virialEnergy->virial[7], (double)vzy);
784  atomicAdd(&virialEnergy->virial[8], (double)vzz);
785  }
786 
787  if (doSlow) {
788  // if (isnan(forceSlow.x) || isnan(forceSlow.y) || isnan(forceSlow.z))
789  float vxxSlowt = forceSlow.x*pos.x;
790  float vxySlowt = forceSlow.x*pos.y;
791  float vxzSlowt = forceSlow.x*pos.z;
792  float vyxSlowt = forceSlow.y*pos.x;
793  float vyySlowt = forceSlow.y*pos.y;
794  float vyzSlowt = forceSlow.y*pos.z;
795  float vzxSlowt = forceSlow.z*pos.x;
796  float vzySlowt = forceSlow.z*pos.y;
797  float vzzSlowt = forceSlow.z*pos.z;
798  // atomicAdd(&virialEnergy->virialSlow[0], (double)vxxSlow);
799  // atomicAdd(&virialEnergy->virialSlow[1], (double)vxySlow);
800  // atomicAdd(&virialEnergy->virialSlow[2], (double)vxzSlow);
801  // atomicAdd(&virialEnergy->virialSlow[3], (double)vyxSlow);
802  // atomicAdd(&virialEnergy->virialSlow[4], (double)vyySlow);
803  // atomicAdd(&virialEnergy->virialSlow[5], (double)vyzSlow);
804  // atomicAdd(&virialEnergy->virialSlow[6], (double)vzxSlow);
805  // atomicAdd(&virialEnergy->virialSlow[7], (double)vzySlow);
806  // atomicAdd(&virialEnergy->virialSlow[8], (double)vzzSlow);
807  volatile float vxxSlow = BlockReduce(tempStorage).Sum(vxxSlowt); BLOCK_SYNC;
808  volatile float vxySlow = BlockReduce(tempStorage).Sum(vxySlowt); BLOCK_SYNC;
809  volatile float vxzSlow = BlockReduce(tempStorage).Sum(vxzSlowt); BLOCK_SYNC;
810  volatile float vyxSlow = BlockReduce(tempStorage).Sum(vyxSlowt); BLOCK_SYNC;
811  volatile float vyySlow = BlockReduce(tempStorage).Sum(vyySlowt); BLOCK_SYNC;
812  volatile float vyzSlow = BlockReduce(tempStorage).Sum(vyzSlowt); BLOCK_SYNC;
813  volatile float vzxSlow = BlockReduce(tempStorage).Sum(vzxSlowt); BLOCK_SYNC;
814  volatile float vzySlow = BlockReduce(tempStorage).Sum(vzySlowt); BLOCK_SYNC;
815  volatile float vzzSlow = BlockReduce(tempStorage).Sum(vzzSlowt); BLOCK_SYNC;
816  if (threadIdx.x == 0) {
817  atomicAdd(&virialEnergy->virialSlow[0], (double)vxxSlow);
818  atomicAdd(&virialEnergy->virialSlow[1], (double)vxySlow);
819  atomicAdd(&virialEnergy->virialSlow[2], (double)vxzSlow);
820  atomicAdd(&virialEnergy->virialSlow[3], (double)vyxSlow);
821  atomicAdd(&virialEnergy->virialSlow[4], (double)vyySlow);
822  atomicAdd(&virialEnergy->virialSlow[5], (double)vyzSlow);
823  atomicAdd(&virialEnergy->virialSlow[6], (double)vzxSlow);
824  atomicAdd(&virialEnergy->virialSlow[7], (double)vzySlow);
825  atomicAdd(&virialEnergy->virialSlow[8], (double)vzzSlow);
826  }
827  }
828 
829  }
830 
831 }
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ devForces
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ devForcesSlow
double virialSlow[9]
__shared__ union @43 tempStorage
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ xyzq
__global__ void reduceVirialEnergyKernel ( const bool  doEnergy,
const bool  doVirial,
const bool  doSlow,
const int  numTileLists,
const TileListVirialEnergy *__restrict__  tileListVirialEnergy,
VirialEnergy *__restrict__  virialEnergy 
)

Definition at line 834 of file CudaComputeNonbondedKernel.cu.

References BLOCK_SYNC, TileListVirialEnergy::energyElec, TileListVirialEnergy::energySlow, TileListVirialEnergy::energyVdw, TileListVirialEnergy::forceSlowx, TileListVirialEnergy::forceSlowy, TileListVirialEnergy::forceSlowz, TileListVirialEnergy::forcex, TileListVirialEnergy::forcey, TileListVirialEnergy::forcez, itileList, TileListVirialEnergy::shx, TileListVirialEnergy::shy, TileListVirialEnergy::shz, and tempStorage.

838  {
839 
840  for (int ibase = blockIdx.x*blockDim.x;ibase < numTileLists;ibase += blockDim.x*gridDim.x)
841  {
842  int itileList = ibase + threadIdx.x;
844  if (itileList < numTileLists) {
845  ve = tileListVirialEnergy[itileList];
846  } else {
847  // Set to zero to avoid nan*0
848  if (doVirial) {
849  ve.shx = 0.0f;
850  ve.shy = 0.0f;
851  ve.shz = 0.0f;
852  ve.forcex = 0.0f;
853  ve.forcey = 0.0f;
854  ve.forcez = 0.0f;
855  ve.forceSlowx = 0.0f;
856  ve.forceSlowy = 0.0f;
857  ve.forceSlowz = 0.0f;
858  }
859  if (doEnergy) {
860  ve.energyVdw = 0.0;
861  ve.energyElec = 0.0;
862  ve.energySlow = 0.0;
863  // ve.energyGBIS = 0.0;
864  }
865  }
866 
867  if (doVirial) {
868  typedef cub::BlockReduce<float, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
869  __shared__ typename BlockReduce::TempStorage tempStorage;
870  float vxxt = ve.forcex*ve.shx;
871  float vxyt = ve.forcex*ve.shy;
872  float vxzt = ve.forcex*ve.shz;
873  float vyxt = ve.forcey*ve.shx;
874  float vyyt = ve.forcey*ve.shy;
875  float vyzt = ve.forcey*ve.shz;
876  float vzxt = ve.forcez*ve.shx;
877  float vzyt = ve.forcez*ve.shy;
878  float vzzt = ve.forcez*ve.shz;
879  volatile float vxx = BlockReduce(tempStorage).Sum(vxxt); BLOCK_SYNC;
880  volatile float vxy = BlockReduce(tempStorage).Sum(vxyt); BLOCK_SYNC;
881  volatile float vxz = BlockReduce(tempStorage).Sum(vxzt); BLOCK_SYNC;
882  volatile float vyx = BlockReduce(tempStorage).Sum(vyxt); BLOCK_SYNC;
883  volatile float vyy = BlockReduce(tempStorage).Sum(vyyt); BLOCK_SYNC;
884  volatile float vyz = BlockReduce(tempStorage).Sum(vyzt); BLOCK_SYNC;
885  volatile float vzx = BlockReduce(tempStorage).Sum(vzxt); BLOCK_SYNC;
886  volatile float vzy = BlockReduce(tempStorage).Sum(vzyt); BLOCK_SYNC;
887  volatile float vzz = BlockReduce(tempStorage).Sum(vzzt); BLOCK_SYNC;
888  if (threadIdx.x == 0) {
889  atomicAdd(&virialEnergy->virial[0], (double)vxx);
890  atomicAdd(&virialEnergy->virial[1], (double)vxy);
891  atomicAdd(&virialEnergy->virial[2], (double)vxz);
892  atomicAdd(&virialEnergy->virial[3], (double)vyx);
893  atomicAdd(&virialEnergy->virial[4], (double)vyy);
894  atomicAdd(&virialEnergy->virial[5], (double)vyz);
895  atomicAdd(&virialEnergy->virial[6], (double)vzx);
896  atomicAdd(&virialEnergy->virial[7], (double)vzy);
897  atomicAdd(&virialEnergy->virial[8], (double)vzz);
898  }
899 
900  if (doSlow) {
901  typedef cub::BlockReduce<float, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
902  __shared__ typename BlockReduce::TempStorage tempStorage;
903  float vxxt = ve.forceSlowx*ve.shx;
904  float vxyt = ve.forceSlowx*ve.shy;
905  float vxzt = ve.forceSlowx*ve.shz;
906  float vyxt = ve.forceSlowy*ve.shx;
907  float vyyt = ve.forceSlowy*ve.shy;
908  float vyzt = ve.forceSlowy*ve.shz;
909  float vzxt = ve.forceSlowz*ve.shx;
910  float vzyt = ve.forceSlowz*ve.shy;
911  float vzzt = ve.forceSlowz*ve.shz;
912  volatile float vxx = BlockReduce(tempStorage).Sum(vxxt); BLOCK_SYNC;
913  volatile float vxy = BlockReduce(tempStorage).Sum(vxyt); BLOCK_SYNC;
914  volatile float vxz = BlockReduce(tempStorage).Sum(vxzt); BLOCK_SYNC;
915  volatile float vyx = BlockReduce(tempStorage).Sum(vyxt); BLOCK_SYNC;
916  volatile float vyy = BlockReduce(tempStorage).Sum(vyyt); BLOCK_SYNC;
917  volatile float vyz = BlockReduce(tempStorage).Sum(vyzt); BLOCK_SYNC;
918  volatile float vzx = BlockReduce(tempStorage).Sum(vzxt); BLOCK_SYNC;
919  volatile float vzy = BlockReduce(tempStorage).Sum(vzyt); BLOCK_SYNC;
920  volatile float vzz = BlockReduce(tempStorage).Sum(vzzt); BLOCK_SYNC;
921  if (threadIdx.x == 0) {
922  atomicAdd(&virialEnergy->virialSlow[0], (double)vxx);
923  atomicAdd(&virialEnergy->virialSlow[1], (double)vxy);
924  atomicAdd(&virialEnergy->virialSlow[2], (double)vxz);
925  atomicAdd(&virialEnergy->virialSlow[3], (double)vyx);
926  atomicAdd(&virialEnergy->virialSlow[4], (double)vyy);
927  atomicAdd(&virialEnergy->virialSlow[5], (double)vyz);
928  atomicAdd(&virialEnergy->virialSlow[6], (double)vzx);
929  atomicAdd(&virialEnergy->virialSlow[7], (double)vzy);
930  atomicAdd(&virialEnergy->virialSlow[8], (double)vzz);
931  }
932  }
933  }
934 
935  if (doEnergy) {
936  typedef cub::BlockReduce<double, REDUCEVIRIALENERGYKERNEL_NUM_WARP*WARPSIZE> BlockReduce;
937  __shared__ typename BlockReduce::TempStorage tempStorage;
938  volatile double energyVdw = BlockReduce(tempStorage).Sum(ve.energyVdw); BLOCK_SYNC;
939  volatile double energyElec = BlockReduce(tempStorage).Sum(ve.energyElec); BLOCK_SYNC;
940  if (threadIdx.x == 0) {
941  atomicAdd(&virialEnergy->energyVdw, (double)energyVdw);
942  atomicAdd(&virialEnergy->energyElec, (double)energyElec);
943  }
944  if (doSlow) {
945  volatile double energySlow = BlockReduce(tempStorage).Sum(ve.energySlow); BLOCK_SYNC;
946  if (threadIdx.x == 0) atomicAdd(&virialEnergy->energySlow, (double)energySlow);
947  }
948  // if (doGBIS) {
949  // double energyGBIS = BlockReduce(tempStorage).Sum(ve.energyGBIS); BLOCK_SYNC;
950  // if (threadIdx.x == 0) atomicAdd(&virialEnergy->energyGBIS, (double)energyGBIS);
951  // }
952  }
953 
954  }
955 
956 }
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int unsigned int *__restrict__ const CudaPatchRecord *__restrict__ float4 *__restrict__ float4 *__restrict__ int *__restrict__ int *__restrict__ TileListVirialEnergy *__restrict__ virialEnergy int itileList
double virialSlow[9]
__global__ void const int numTileLists
__shared__ union @43 tempStorage
template<bool doPairlist>
__device__ __forceinline__ void shuffleNext ( float &  xyzq_j_w,
int &  vdwtypej,
int &  jatomIndex,
int &  jexclMaxdiff,
int &  jexclIndex 
)

Definition at line 129 of file CudaComputeNonbondedKernel.cu.

References WARP_FULL_MASK, WARP_SHUFFLE, and WARPSIZE.

129  {
130  xyzq_j_w = WARP_SHUFFLE(WARP_FULL_MASK, xyzq_j_w, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
131  vdwtypej = WARP_SHUFFLE(WARP_FULL_MASK, vdwtypej, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
132  if (doPairlist) {
133  jatomIndex = WARP_SHUFFLE(WARP_FULL_MASK, jatomIndex, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
134  jexclIndex = WARP_SHUFFLE(WARP_FULL_MASK, jexclIndex, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
135  jexclMaxdiff = WARP_SHUFFLE(WARP_FULL_MASK, jexclMaxdiff, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
136  }
137 }
#define WARP_FULL_MASK
Definition: CudaUtils.h:11
#define WARPSIZE
#define WARP_SHUFFLE(MASK, VAR, LANE, SIZE)
Definition: CudaUtils.h:38
template<bool doPairlist>
__device__ __forceinline__ void shuffleNext ( float &  xyzq_j_w,
int &  vdwtypej,
int &  jatomIndex 
)

Definition at line 141 of file CudaComputeNonbondedKernel.cu.

References WARP_FULL_MASK, WARP_SHUFFLE, and WARPSIZE.

141  {
142  xyzq_j_w = WARP_SHUFFLE(WARP_FULL_MASK, xyzq_j_w, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
143  vdwtypej = WARP_SHUFFLE(WARP_FULL_MASK, vdwtypej, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
144  if (doPairlist) {
145  jatomIndex = WARP_SHUFFLE(WARP_FULL_MASK, jatomIndex, (threadIdx.x+1) & (WARPSIZE-1), WARPSIZE);
146  }
147 }
#define WARP_FULL_MASK
Definition: CudaUtils.h:11
#define WARPSIZE
#define WARP_SHUFFLE(MASK, VAR, LANE, SIZE)
Definition: CudaUtils.h:38
template<bool doSlow>
__device__ __forceinline__ void shuffleNext ( float3 &  jforce,
float3 &  jforceSlow 
)

Definition at line 151 of file CudaComputeNonbondedKernel.cu.

References WARP_FULL_MASK, WARP_SHUFFLE, and WARPSIZE.

151  {
152  jforce.x = WARP_SHUFFLE(WARP_FULL_MASK, jforce.x, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
153  jforce.y = WARP_SHUFFLE(WARP_FULL_MASK, jforce.y, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
154  jforce.z = WARP_SHUFFLE(WARP_FULL_MASK, jforce.z, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
155  if (doSlow) {
156  jforceSlow.x = WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.x, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
157  jforceSlow.y = WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.y, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
158  jforceSlow.z = WARP_SHUFFLE(WARP_FULL_MASK, jforceSlow.z, (threadIdx.x+1)&(WARPSIZE-1), WARPSIZE);
159  }
160 }
#define WARP_FULL_MASK
Definition: CudaUtils.h:11
#define WARPSIZE
#define WARP_SHUFFLE(MASK, VAR, LANE, SIZE)
Definition: CudaUtils.h:38
template<bool doSlow>
__device__ __forceinline__ void storeForces ( const int  pos,
const float3  force,
const float3  forceSlow,
float4 *__restrict__  devForces,
float4 *__restrict__  devForcesSlow 
)

Definition at line 81 of file CudaComputeNonbondedKernel.cu.

References x, y, and z.

82  {
83  atomicAdd(&devForces[pos].x, force.x);
84  atomicAdd(&devForces[pos].y, force.y);
85  atomicAdd(&devForces[pos].z, force.z);
86  if (doSlow) {
87  atomicAdd(&devForcesSlow[pos].x, forceSlow.x);
88  atomicAdd(&devForcesSlow[pos].y, forceSlow.y);
89  atomicAdd(&devForcesSlow[pos].z, forceSlow.z);
90  }
91 }
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ devForces
gridSize z
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ devForcesSlow
gridSize y
gridSize x
template<bool doSlow>
__device__ __forceinline__ void storeForces ( const int  pos,
const float3  force,
const float3  forceSlow,
float *__restrict__  devForces_x,
float *__restrict__  devForces_y,
float *__restrict__  devForces_z,
float *__restrict__  devForcesSlow_x,
float *__restrict__  devForcesSlow_y,
float *__restrict__  devForcesSlow_z 
)

Definition at line 95 of file CudaComputeNonbondedKernel.cu.

102 {
103  atomicAdd(&devForces_x[pos], force.x);
104  atomicAdd(&devForces_y[pos], force.y);
105  atomicAdd(&devForces_z[pos], force.z);
106  if (doSlow) {
107  atomicAdd(&devForcesSlow_x[pos], forceSlow.x);
108  atomicAdd(&devForcesSlow_y[pos], forceSlow.y);
109  atomicAdd(&devForcesSlow_z[pos], forceSlow.z);
110  }
111 }
template<bool doSlow>
__device__ __forceinline__ void storeForces ( const int  pos,
const float3  force,
const float3  forceSlow,
float3 *__restrict__  forces,
float3 *__restrict__  forcesSlow 
)

Definition at line 115 of file CudaComputeNonbondedKernel.cu.

References x, y, and z.

116  {
117  atomicAdd(&forces[pos].x, force.x);
118  atomicAdd(&forces[pos].y, force.y);
119  atomicAdd(&forces[pos].z, force.z);
120  if (doSlow) {
121  atomicAdd(&forcesSlow[pos].x, forceSlow.x);
122  atomicAdd(&forcesSlow[pos].y, forceSlow.y);
123  atomicAdd(&forcesSlow[pos].z, forceSlow.z);
124  }
125 }
static __thread float4 * forces
gridSize z
gridSize y
gridSize x
template<int doSlow>
__global__ void transposeForcesKernel ( float4 *  f,
float4 *  fSlow,
float *  fx,
float *  fy,
float *  fz,
float *  fw,
float *  fSlowx,
float *  fSlowy,
float *  fSlowz,
float *  fSloww,
int  n 
)

Definition at line 1062 of file CudaComputeNonbondedKernel.cu.

1066 {
1067  int tid = blockIdx.x*blockDim.x + threadIdx.x;
1068  if (tid < n) {
1069  f[tid] = make_float4(fx[tid], fy[tid], fz[tid], fw[tid]);
1070  if (doSlow) {
1071  fSlow[tid] = make_float4(fSlowx[tid], fSlowy[tid], fSlowz[tid], fSloww[tid]);
1072  }
1073  }
1074 }

Variable Documentation

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ atomIndex
__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize
__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ boundingBoxes

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__constant__ unsigned int constExclusions[MAX_CONST_EXCLUSIONS]

Definition at line 20 of file CudaComputeNonbondedKernel.cu.

Referenced by CudaComputeNonbondedKernel::bindExclusions(), and if().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ cudaPatches

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cutoff2
__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForce_w

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ devForce_x

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

Referenced by if().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ devForce_y

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

Referenced by if().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForce_z

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

Referenced by if().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ devForces

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForceSlow_w

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForceSlow_x

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

Referenced by if().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForceSlow_y

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

Referenced by if().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ devForceSlow_z

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

Referenced by if().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ devForcesSlow

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__thread DeviceCUDA* deviceCUDA

Definition at line 18 of file DeviceCUDA.C.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t energyTableTex

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

Referenced by calcForceEnergy(), if(), and modifiedExclusionForce().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ exclIndexMaxDiff

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t forceTableTex

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

Referenced by calcForceEnergy(), if(), and modifiedExclusionForce().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ float4* __restrict__ int* __restrict__ int* __restrict__ TileListVirialEnergy* __restrict__ virialEnergy int itileList = start + threadIdx.x/WARPSIZE + blockDim.x/WARPSIZE*blockIdx.x
__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ jtiles

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 lata
__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 latb
__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 latc
__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ mapForces

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ float4* __restrict__ mapForcesSlow

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ float4* __restrict__ int* __restrict__ mapPatchReadyQueue

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int numPatches
__global__ void const int numTileLists

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

Referenced by buildTileListsBBKernel().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ const CudaPatchRecord* __restrict__ float4* __restrict__ float4* __restrict__ int* __restrict__ int* __restrict__ outputOrder

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ overflowExclusions

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ const BoundingBox* __restrict__ float4* __restrict__ float4* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ float* __restrict__ const int unsigned int* __restrict__ patchNumCount

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ patchPairs

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float plcutoff2

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ tileExcls

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ tileJatomStart

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ tileListDepth

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ tileListOrder

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ tileLists

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord* __restrict__ const int* __restrict__ const int2* __restrict__ const unsigned int* __restrict__ unsigned int* __restrict__ int* __restrict__ int* __restrict__ TileListStat* __restrict__ tileListStat

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ vdwCoefTable

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

Referenced by if(), and modifiedExclusionForcesKernel().

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ const float cudaTextureObject_t vdwCoefTableTex
__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int vdwCoefTableWidth
__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ vdwTypes

Definition at line 186 of file CudaComputeNonbondedKernel.cu.

__global__ void const int const TileList* __restrict__ TileExcl* __restrict__ const int* __restrict__ const int const float2* __restrict__ const int* __restrict__ const float3 const float3 const float3 const float4* __restrict__ xyzq