1001 maxTileListLen = maxTileListLenIn;
1006 reallocate_device<int>(&patchNumLists, &patchNumListsSize,
numPatches);
1007 reallocate_device<int>(&emptyPatches, &emptyPatchesSize,
numPatches+1);
1008 reallocate_host<int>(&h_emptyPatches, &h_emptyPatchesSize,
numPatches+1);
1012 reallocate_device<TileList>(&tileLists1, &tileLists1Size, numTileListsPrev,
OVERALLOC);
1013 reallocate_device<PatchPairRecord>(&patchPairs1, &patchPairs1Size, numTileListsPrev,
OVERALLOC);
1020 reallocate_device<int>(&tilePos, &tilePosSize, numComputes,
OVERALLOC);
1025 calcTileListPosKernel<1024> <<< nblock, nthread, 0,
stream >>>
1026 (numComputes, cudaComputes,
cudaPatches, tilePos);
1034 updatePatchesKernel<32> <<< nblock, nthread, 0,
stream >>>
1035 (numComputes, tilePos, cudaComputes,
cudaPatches, tileLists1);
1044 reallocate_device<unsigned int>(&tileListDepth2, &tileListDepth2Size, numTileListsPrev + 1,
OVERALLOC);
1045 reallocate_device<int>(&tileListOrder2, &tileListOrder2Size, numTileListsPrev,
OVERALLOC);
1048 reallocate_device<unsigned int>(&tileListDepth1, &tileListDepth1Size, numTileListsPrev + 1,
OVERALLOC);
1050 reallocate_device<int>(&tileListOrder1, &tileListOrder1Size, numTileListsPrev,
OVERALLOC);
1058 int numBoundingBoxes = atomStorageSize/
WARPSIZE;
1074 if(shmem_size > maxShmemPerBlock){
1075 NAMD_die(
"CudaTileListKernel::buildTileLists, maximum shared memory allocation exceeded. Too many atoms in a patch");
1082 int reallocCount = 0;
1084 reallocate_device<int>(&tileJatomStart1, &tileJatomStart1Size, numJtiles,
OVERALLOC);
1089 buildTileListsBBKernel <<< nblock, nthread, shmem_size, stream >>>
1090 (numTileListsPrev, tileLists1,
cudaPatches, tilePos,
1093 tileListDepth1, tileListOrder1, patchPairs1,
1099 copy_DtoH<TileListStat>(d_tileListStat, h_tileListStat, 1,
stream);
1101 numJtiles = h_tileListStat->numJtiles;
1103 if (h_tileListStat->tilesSizeExceeded) {
1105 if (reallocCount > 1) {
1106 NAMD_die(
"CudaTileListKernel::buildTileLists, multiple reallocations detected");
1120 reallocate_device<TileListVirialEnergy>(&tileListVirialEnergy, &tileListVirialEnergySize,
numTileLists,
OVERALLOC);
1124 reallocate_device<int>(&tileJatomStart2, &tileJatomStart2Size, numJtiles,
OVERALLOC);
1125 reallocate_device<TileExcl>(&tileExcls1, &tileExcls1Size, numJtiles,
OVERALLOC);
1126 reallocate_device<TileExcl>(&tileExcls2, &tileExcls2Size, numJtiles,
OVERALLOC);
1129 int numJtilesSrc = numJtiles;
1131 int numJtilesDst = numJtiles;
1137 numTileListsSrc, numJtilesSrc,
1138 PtrSize<TileList>(tileLists1, tileLists1Size), PtrSize<int>(tileJatomStart1, tileJatomStart1Size),
1139 PtrSize<unsigned int>(tileListDepth1, tileListDepth1Size), PtrSize<int>(tileListOrder1, tileListOrder1Size),
1140 PtrSize<PatchPairRecord>(patchPairs1, patchPairs1Size), PtrSize<TileExcl>(NULL, 0),
1141 numTileListsDst, numJtilesDst,
1142 PtrSize<TileList>(tileLists2, tileLists2Size), PtrSize<int>(tileJatomStart2, tileJatomStart2Size),
1143 PtrSize<unsigned int>(tileListDepth2, tileListDepth2Size), PtrSize<int>(tileListOrder2, tileListOrder2Size),
1144 PtrSize<PatchPairRecord>(patchPairs2, patchPairs2Size), PtrSize<TileExcl>(NULL, 0),
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int numPatches
void clearTileListStat(cudaStream_t stream)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int unsigned int *__restrict__ const CudaPatchRecord *__restrict__ cudaPatches
__thread cudaStream_t stream
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 lata
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ boundingBoxes
const int numTileListsDst
__host__ __device__ __forceinline__ int buildTileListsBBKernel_shmem_sizePerThread(const int maxTileListLen)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float plcutoff2
void NAMD_die(const char *err_msg)
__global__ void const int numTileLists
#define BOUNDINGBOXKERNEL_NUM_WARP
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ xyzq
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 latc
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ jtiles
__thread DeviceCUDA * deviceCUDA
#define TILELISTKERNELNEW_NUM_WARP
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 latb
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float const PatchPairRecord *__restrict__ const int *__restrict__ const int2 *__restrict__ const unsigned int *__restrict__ unsigned int *__restrict__ int *__restrict__ int *__restrict__ TileListStat *__restrict__ const BoundingBox *__restrict__ float4 *__restrict__ float4 *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ float *__restrict__ const int unsigned int *__restrict__ const CudaPatchRecord *__restrict__ float4 *__restrict__ float4 *__restrict__ int *__restrict__ int *__restrict__ outputOrder