CudaTileListKernel Class Reference

#include <CudaTileListKernel.h>

List of all members.

Classes

struct  PtrSize

Public Member Functions

 CudaTileListKernel (int deviceID, bool doStreaming)
 ~CudaTileListKernel ()
int getNumEmptyPatches ()
int * getEmptyPatches ()
int getNumExcluded ()
float get_plcutoff2 ()
int getNumTileLists ()
int getNumTileListsGBIS ()
int getNumJtiles ()
BoundingBoxgetBoundingBoxes ()
int * getJtiles ()
float4 * get_xyzq ()
TileListStatgetTileListStatDevPtr ()
void clearTileListStat (cudaStream_t stream)
int * getTileJatomStart ()
TileListgetTileLists ()
unsigned int * getTileListDepth ()
int * getTileListOrder ()
TileExclgetTileExcls ()
PatchPairRecordgetPatchPairs ()
int * getTileJatomStartGBIS ()
TileListgetTileListsGBIS ()
TileListVirialEnergygetTileListVirialEnergy ()
CudaPatchRecordgetCudaPatches ()
void prepareTileList (cudaStream_t stream)
void finishTileList (cudaStream_t stream)
void updateComputes (const int numComputesIn, const CudaComputeRecord *h_cudaComputes, cudaStream_t stream)
void buildTileLists (const int numTileListsPrev, const int numPatchesIn, const int atomStorageSizeIn, const int maxTileListLenIn, const float3 lata, const float3 latb, const float3 latc, const CudaPatchRecord *h_cudaPatches, const float4 *h_xyzq, const float plcutoff2In, const size_t maxShmemPerBlock, cudaStream_t stream)
void reSortTileLists (const bool doGBIS, cudaStream_t stream)
void setTileListVirialEnergyLength (int len)
void setTileListVirialEnergyGBISLength (int len)
int getTileListVirialEnergyLength ()
int getTileListVirialEnergyGBISLength ()
int getNumPatches ()
int getNumComputes ()
int * getOutputOrder ()

Detailed Description

Definition at line 87 of file CudaTileListKernel.h.


Constructor & Destructor Documentation

CudaTileListKernel::CudaTileListKernel ( int  deviceID,
bool  doStreaming 
)

Definition at line 705 of file CudaTileListKernel.cu.

References cudaCheck.

00705                                                                      :
00706 deviceID(deviceID), doStreaming(doStreaming) {
00707 
00708   cudaCheck(cudaSetDevice(deviceID));
00709 
00710   activeBuffer = 1;
00711 
00712   numPatches = 0;
00713   numComputes = 0;
00714 
00715   cudaPatches = NULL;
00716   cudaPatchesSize = 0;
00717 
00718   cudaComputes = NULL;
00719   cudaComputesSize = 0;
00720 
00721   patchNumLists = NULL;
00722   patchNumListsSize = 0;
00723 
00724   emptyPatches = NULL;
00725   emptyPatchesSize = 0;
00726   h_emptyPatches = NULL;
00727   h_emptyPatchesSize = 0;
00728   numEmptyPatches = 0;
00729 
00730   sortKeySrc = NULL;
00731   sortKeySrcSize = 0;
00732   sortKeyDst = NULL;
00733   sortKeyDstSize = 0;
00734 
00735   tileLists1 = NULL;
00736   tileLists1Size = 0;
00737   tileLists2 = NULL;
00738   tileLists2Size = 0;
00739 
00740   patchPairs1 = NULL;
00741   patchPairs1Size = 0;
00742   patchPairs2 = NULL;
00743   patchPairs2Size = 0;
00744 
00745   tileJatomStart1 = NULL;
00746   tileJatomStart1Size = 0;
00747   tileJatomStart2 = NULL;
00748   tileJatomStart2Size = 0;
00749 
00750   boundingBoxes = NULL;
00751   boundingBoxesSize = 0;
00752 
00753   tileListDepth1 = NULL;
00754   tileListDepth1Size = 0;
00755   tileListDepth2 = NULL;
00756   tileListDepth2Size = 0;
00757 
00758   tileListOrder1 = NULL;
00759   tileListOrder1Size = 0;
00760   tileListOrder2 = NULL;
00761   tileListOrder2Size = 0;
00762 
00763   tileExcls1 = NULL;
00764   tileExcls1Size = 0;
00765   tileExcls2 = NULL;
00766   tileExcls2Size = 0;
00767 
00768   xyzq = NULL;
00769   xyzqSize = 0;
00770 
00771   allocate_device<TileListStat>(&d_tileListStat, 1);
00772   allocate_host<TileListStat>(&h_tileListStat, 1);
00773 
00774   tileListPos = NULL;
00775   tileListPosSize = 0;
00776   tempStorage = NULL;
00777   tempStorageSize = 0;
00778 
00779   jtiles = NULL;
00780   jtilesSize = 0;
00781 
00782   tilePos = NULL;
00783   tilePosSize = 0;
00784 
00785   tileListsGBIS = NULL;
00786   tileListsGBISSize = 0;
00787 
00788   tileJatomStartGBIS = NULL;
00789   tileJatomStartGBISSize = 0;
00790 
00791   tileListVirialEnergy = NULL;
00792   tileListVirialEnergySize = 0;
00793 
00794   atomStorageSize = 0;
00795   numTileLists = 0;
00796   numTileListsGBIS = 0;
00797   numJtiles = 1;
00798 
00799   outputOrder = NULL;
00800   outputOrderSize = 0;
00801   doOutputOrder = false;
00802 
00803   minmaxListLen = NULL;
00804   minmaxListLenSize = 0;
00805 
00806   sortKeys = NULL;
00807   sortKeysSize = 0;
00808   sortKeys_endbit = 0;
00809 
00810   cudaCheck(cudaEventCreate(&tileListStatEvent));
00811   tileListStatEventRecord = false;
00812 }

CudaTileListKernel::~CudaTileListKernel (  ) 

Definition at line 814 of file CudaTileListKernel.cu.

References cudaCheck.

00814                                         {
00815   cudaCheck(cudaSetDevice(deviceID));
00816   deallocate_device<TileListStat>(&d_tileListStat);
00817   deallocate_host<TileListStat>(&h_tileListStat);
00818   //
00819   if (patchNumLists != NULL) deallocate_device<int>(&patchNumLists);
00820   if (emptyPatches != NULL) deallocate_device<int>(&emptyPatches);
00821   if (h_emptyPatches != NULL) deallocate_host<int>(&h_emptyPatches);
00822   if (sortKeySrc != NULL) deallocate_device<unsigned int>(&sortKeySrc);
00823   if (sortKeyDst != NULL) deallocate_device<unsigned int>(&sortKeyDst);
00824   //
00825   if (cudaPatches != NULL) deallocate_device<CudaPatchRecord>(&cudaPatches);
00826   if (cudaComputes != NULL) deallocate_device<CudaComputeRecord>(&cudaComputes);
00827   if (patchPairs1 != NULL) deallocate_device<PatchPairRecord>(&patchPairs1);
00828   if (patchPairs2 != NULL) deallocate_device<PatchPairRecord>(&patchPairs2);
00829   if (tileLists1 != NULL) deallocate_device<TileList>(&tileLists1);
00830   if (tileLists2 != NULL) deallocate_device<TileList>(&tileLists2);
00831   if (tileJatomStart1 != NULL) deallocate_device<int>(&tileJatomStart1);
00832   if (tileJatomStart2 != NULL) deallocate_device<int>(&tileJatomStart2);
00833   if (boundingBoxes != NULL) deallocate_device<BoundingBox>(&boundingBoxes);
00834   if (tileListDepth1 != NULL) deallocate_device<unsigned int>(&tileListDepth1);
00835   if (tileListDepth2 != NULL) deallocate_device<unsigned int>(&tileListDepth2);
00836   if (tileListOrder1 != NULL) deallocate_device<int>(&tileListOrder1);
00837   if (tileListOrder2 != NULL) deallocate_device<int>(&tileListOrder2);
00838   if (tileListPos != NULL) deallocate_device<int>(&tileListPos);
00839   if (tileExcls1 != NULL) deallocate_device<TileExcl>(&tileExcls1);
00840   if (tileExcls2 != NULL) deallocate_device<TileExcl>(&tileExcls2);
00841   if (tempStorage != NULL) deallocate_device<char>(&tempStorage);
00842   if (jtiles != NULL) deallocate_device<int>(&jtiles);
00843   if (tilePos != NULL) deallocate_device<int>(&tilePos);
00844 
00845   if (tileListsGBIS != NULL) deallocate_device<TileList>(&tileListsGBIS);
00846   if (tileJatomStartGBIS != NULL) deallocate_device<int>(&tileJatomStartGBIS);
00847 
00848   if (tileListVirialEnergy != NULL) deallocate_device<TileListVirialEnergy>(&tileListVirialEnergy);
00849 
00850   if (xyzq != NULL) deallocate_device<float4>(&xyzq);
00851 
00852   if (sortKeys != NULL) deallocate_device<unsigned int>(&sortKeys);
00853   if (minmaxListLen != NULL) deallocate_device<int2>(&minmaxListLen);
00854 
00855   cudaCheck(cudaEventDestroy(tileListStatEvent));
00856 }


Member Function Documentation

void CudaTileListKernel::buildTileLists ( const int  numTileListsPrev,
const int  numPatchesIn,
const int  atomStorageSizeIn,
const int  maxTileListLenIn,
const float3  lata,
const float3  latb,
const float3  latc,
const CudaPatchRecord h_cudaPatches,
const float4 *  h_xyzq,
const float  plcutoff2In,
const size_t  maxShmemPerBlock,
cudaStream_t  stream 
)

Definition at line 992 of file CudaTileListKernel.cu.

References BOUNDINGBOXKERNEL_NUM_WARP, buildTileListsBBKernel_shmem_sizePerThread(), clearTileListStat(), cudaCheck, deviceCUDA, DeviceCUDA::getMaxNumBlocks(), NAMD_die(), TileListStat::numTileLists, OVERALLOC, TILELISTKERNELNEW_NUM_WARP, TileListStat::tilesSizeExceeded, and WARPSIZE.

00997                        {
00998 
00999   numPatches = numPatchesIn;
01000   atomStorageSize = atomStorageSizeIn;
01001   maxTileListLen = maxTileListLenIn;
01002   plcutoff2 = plcutoff2In;
01003 
01004   if (doStreaming) {
01005     // Re-allocate patchNumLists
01006     reallocate_device<int>(&patchNumLists, &patchNumListsSize, numPatches);
01007     reallocate_device<int>(&emptyPatches, &emptyPatchesSize, numPatches+1);
01008     reallocate_host<int>(&h_emptyPatches, &h_emptyPatchesSize, numPatches+1);
01009   }
01010 
01011   // Re-allocate (tileLists1, patchPairs1
01012   reallocate_device<TileList>(&tileLists1, &tileLists1Size, numTileListsPrev, OVERALLOC);
01013   reallocate_device<PatchPairRecord>(&patchPairs1, &patchPairs1Size, numTileListsPrev, OVERALLOC);
01014 
01015   // Copy cudaPatches to device
01016   reallocate_device<CudaPatchRecord>(&cudaPatches, &cudaPatchesSize, numPatches);
01017   copy_HtoD<CudaPatchRecord>(h_cudaPatches, cudaPatches, numPatches, stream);
01018 
01019   // Re-allocate temporary storage
01020   reallocate_device<int>(&tilePos, &tilePosSize, numComputes, OVERALLOC);
01021   // Calculate tile list positions (tilePos)
01022   {
01023     int nthread = 1024;
01024     int nblock = 1;
01025     calcTileListPosKernel<1024> <<< nblock, nthread, 0, stream >>>
01026     (numComputes, cudaComputes, cudaPatches, tilePos);
01027     cudaCheck(cudaGetLastError());
01028   }
01029 
01030   // Build (tileLists1.patchInd, tileLists1.offsetXYZ)
01031   {
01032     int nthread = 512;
01033     int nblock = min(deviceCUDA->getMaxNumBlocks(), (numComputes-1)/(nthread/32)+1);
01034     updatePatchesKernel<32> <<< nblock, nthread, 0, stream >>>
01035     (numComputes, tilePos, cudaComputes, cudaPatches, tileLists1);
01036     cudaCheck(cudaGetLastError());
01037   }
01038 
01039   // ---------------------------------------------------------------------------------------------
01040 
01041 
01042   // NOTE: tileListDepth2 and tileListOrder2 must have at least same size as
01043   // tileListDepth2 and tileListOrder2 since they're used in sorting
01044   reallocate_device<unsigned int>(&tileListDepth2, &tileListDepth2Size, numTileListsPrev + 1, OVERALLOC);
01045   reallocate_device<int>(&tileListOrder2, &tileListOrder2Size, numTileListsPrev, OVERALLOC);
01046 
01047   // Allocate with +1 to include last term in the exclusive sum
01048   reallocate_device<unsigned int>(&tileListDepth1, &tileListDepth1Size, numTileListsPrev + 1, OVERALLOC);
01049 
01050   reallocate_device<int>(&tileListOrder1, &tileListOrder1Size, numTileListsPrev, OVERALLOC);
01051 
01052   reallocate_device<float4>(&xyzq, &xyzqSize, atomStorageSize, OVERALLOC);
01053 
01054   copy_HtoD<float4>(h_xyzq, xyzq, atomStorageSize, stream);
01055 
01056   // Fills in boundingBoxes[0 ... numBoundingBoxes-1]
01057   {
01058     int numBoundingBoxes = atomStorageSize/WARPSIZE;
01059     reallocate_device<BoundingBox>(&boundingBoxes, &boundingBoxesSize, numBoundingBoxes, OVERALLOC);
01060 
01061     int nwarp = BOUNDINGBOXKERNEL_NUM_WARP;
01062     int nthread = WARPSIZE*nwarp;
01063     int nblock = min(deviceCUDA->getMaxNumBlocks(), (atomStorageSize-1)/nthread+1);
01064     buildBoundingBoxesKernel <<< nblock, nthread, 0, stream >>> (atomStorageSize, xyzq, boundingBoxes);
01065     cudaCheck(cudaGetLastError());
01066   }
01067 
01068   {
01069     int nwarp = TILELISTKERNELNEW_NUM_WARP;
01070     int nthread = WARPSIZE*nwarp;
01071     int nblock = min(deviceCUDA->getMaxNumBlocks(), (numTileListsPrev-1)/nthread+1);
01072 
01073     int shmem_size = buildTileListsBBKernel_shmem_sizePerThread(maxTileListLen)*nthread;
01074     if(shmem_size > maxShmemPerBlock){
01075       NAMD_die("CudaTileListKernel::buildTileLists, maximum shared memory allocation exceeded. Too many atoms in a patch");
01076     }
01077 
01078     // NOTE: In the first call numJtiles = 1. buildTileListsBBKernel will return and
01079     //       tell the required size in h_tileListStat->numJtiles. In subsequent calls,
01080     //       re-allocation only happens when the size is exceeded.
01081     h_tileListStat->tilesSizeExceeded = true;
01082     int reallocCount = 0;
01083     while (h_tileListStat->tilesSizeExceeded) {
01084       reallocate_device<int>(&tileJatomStart1, &tileJatomStart1Size, numJtiles, OVERALLOC);
01085 
01086       clearTileListStat(stream);
01087       // clear_device_array<TileListStat>(d_tileListStat, 1, stream);
01088 
01089       buildTileListsBBKernel <<< nblock, nthread, shmem_size, stream >>>
01090       (numTileListsPrev, tileLists1, cudaPatches, tilePos,
01091         lata, latb, latc, plcutoff2, maxTileListLen,
01092         boundingBoxes, tileJatomStart1, tileJatomStart1Size,
01093         tileListDepth1, tileListOrder1, patchPairs1,
01094         d_tileListStat);
01095 
01096       cudaCheck(cudaGetLastError());
01097 
01098       // get (numATileLists, numJtiles, tilesSizeExceeded)
01099       copy_DtoH<TileListStat>(d_tileListStat, h_tileListStat, 1, stream);
01100       cudaCheck(cudaStreamSynchronize(stream));
01101       numJtiles = h_tileListStat->numJtiles;
01102 
01103       if (h_tileListStat->tilesSizeExceeded) {
01104         reallocCount++;
01105         if (reallocCount > 1) {
01106           NAMD_die("CudaTileListKernel::buildTileLists, multiple reallocations detected");
01107         }
01108       }
01109 
01110     }
01111 
01112     numTileLists = h_tileListStat->numTileLists;
01113 
01114     reallocate_device<int>(&jtiles, &jtilesSize, numJtiles, OVERALLOC);
01115   }
01116 
01117   // Re-allocate tileListVirialEnergy.
01118   // NOTE: Since numTileLists here is an upper estimate (since it's based on bounding boxes),
01119   //       we're quaranteed to have enough space
01120   reallocate_device<TileListVirialEnergy>(&tileListVirialEnergy, &tileListVirialEnergySize, numTileLists, OVERALLOC);
01121 
01122   reallocate_device<TileList>(&tileLists2, &tileLists2Size, numTileLists, OVERALLOC);
01123   reallocate_device<PatchPairRecord>(&patchPairs2, &patchPairs2Size, numTileLists, OVERALLOC);
01124   reallocate_device<int>(&tileJatomStart2, &tileJatomStart2Size, numJtiles, OVERALLOC);
01125   reallocate_device<TileExcl>(&tileExcls1, &tileExcls1Size, numJtiles, OVERALLOC);
01126   reallocate_device<TileExcl>(&tileExcls2, &tileExcls2Size, numJtiles, OVERALLOC);
01127 
01128   int numTileListsSrc = numTileListsPrev;
01129   int numJtilesSrc    = numJtiles;
01130   int numTileListsDst = numTileLists;
01131   int numJtilesDst    = numJtiles;
01132 
01133   // Sort tiles
01134   sortTileLists(
01135     false,
01136     0, false,
01137     numTileListsSrc, numJtilesSrc,
01138     PtrSize<TileList>(tileLists1, tileLists1Size), PtrSize<int>(tileJatomStart1, tileJatomStart1Size),
01139     PtrSize<unsigned int>(tileListDepth1, tileListDepth1Size), PtrSize<int>(tileListOrder1, tileListOrder1Size),
01140     PtrSize<PatchPairRecord>(patchPairs1, patchPairs1Size), PtrSize<TileExcl>(NULL, 0),
01141     numTileListsDst, numJtilesDst,
01142     PtrSize<TileList>(tileLists2, tileLists2Size), PtrSize<int>(tileJatomStart2, tileJatomStart2Size),
01143     PtrSize<unsigned int>(tileListDepth2, tileListDepth2Size), PtrSize<int>(tileListOrder2, tileListOrder2Size),
01144     PtrSize<PatchPairRecord>(patchPairs2, patchPairs2Size), PtrSize<TileExcl>(NULL, 0),
01145     stream);
01146 
01147   // Set active buffer to 2
01148   setActiveBuffer(2);
01149 
01150   if (doOutputOrder) reallocate_device<int>(&outputOrder, &outputOrderSize, numTileLists, OVERALLOC);
01151 }

void CudaTileListKernel::clearTileListStat ( cudaStream_t  stream  ) 

Definition at line 862 of file CudaTileListKernel.cu.

References getNumEmptyPatches(), and TileListStat::patchReadyQueueCount.

Referenced by buildTileLists(), and CudaComputeNonbondedKernel::nonbondedForce().

00862                                                               {
00863   // clear tileListStat, for patchReadyQueueCount, which is set equal to the number of empty patches
00864   memset(h_tileListStat, 0, sizeof(TileListStat));
00865   h_tileListStat->patchReadyQueueCount = getNumEmptyPatches();
00866   copy_HtoD<TileListStat>(h_tileListStat, d_tileListStat, 1, stream);
00867 }

void CudaTileListKernel::finishTileList ( cudaStream_t  stream  ) 

Definition at line 869 of file CudaTileListKernel.cu.

References cudaCheck.

00869                                                            {
00870   copy_DtoH<TileListStat>(d_tileListStat, h_tileListStat, 1, stream);
00871   cudaCheck(cudaEventRecord(tileListStatEvent, stream));
00872   tileListStatEventRecord = true;
00873 }

float CudaTileListKernel::get_plcutoff2 (  )  [inline]

Definition at line 277 of file CudaTileListKernel.h.

00277 {return plcutoff2;}

float4* CudaTileListKernel::get_xyzq (  )  [inline]
BoundingBox* CudaTileListKernel::getBoundingBoxes (  )  [inline]

Definition at line 281 of file CudaTileListKernel.h.

00281 {return boundingBoxes;}

CudaPatchRecord* CudaTileListKernel::getCudaPatches (  )  [inline]

Definition at line 302 of file CudaTileListKernel.h.

00302 {return cudaPatches;}

int* CudaTileListKernel::getEmptyPatches (  )  [inline]

Definition at line 273 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbonded::launchWork().

00273 {return h_emptyPatches;}

int* CudaTileListKernel::getJtiles (  )  [inline]

Definition at line 282 of file CudaTileListKernel.h.

00282 {return jtiles;}

int CudaTileListKernel::getNumComputes (  )  [inline]

Definition at line 326 of file CudaTileListKernel.h.

00326 {return numComputes;}

int CudaTileListKernel::getNumEmptyPatches (  )  [inline]

Definition at line 272 of file CudaTileListKernel.h.

Referenced by clearTileListStat(), and CudaComputeNonbonded::launchWork().

00272 {return numEmptyPatches;}

int CudaTileListKernel::getNumExcluded (  )  [inline]

Definition at line 275 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbonded::finishReductions().

00275 {return numExcluded;}

int CudaTileListKernel::getNumJtiles (  )  [inline]

Definition at line 280 of file CudaTileListKernel.h.

00280 {return numJtiles;}

int CudaTileListKernel::getNumPatches (  )  [inline]
int CudaTileListKernel::getNumTileLists (  )  [inline]

Definition at line 278 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbondedKernel::nonbondedForce().

00278 {return numTileLists;}

int CudaTileListKernel::getNumTileListsGBIS (  )  [inline]

Definition at line 279 of file CudaTileListKernel.h.

Referenced by CudaComputeGBISKernel::GBISphase1(), CudaComputeGBISKernel::GBISphase2(), and CudaComputeGBISKernel::GBISphase3().

00279 {return numTileListsGBIS;}

int* CudaTileListKernel::getOutputOrder (  )  [inline]

Definition at line 327 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbondedKernel::nonbondedForce().

00327                         {
00328     if (!doStreaming) return NULL;
00329     if (doOutputOrder) {
00330       return outputOrder;
00331     } else {
00332       return NULL;
00333     }
00334   }

PatchPairRecord* CudaTileListKernel::getPatchPairs (  )  [inline]

Definition at line 295 of file CudaTileListKernel.h.

Referenced by CudaComputeGBISKernel::GBISphase1(), and CudaComputeGBISKernel::GBISphase3().

00295 {return ((activeBuffer == 1) ? patchPairs1 : patchPairs2);}

TileExcl* CudaTileListKernel::getTileExcls (  )  [inline]

Definition at line 294 of file CudaTileListKernel.h.

00294 {return ((activeBuffer == 1) ? tileExcls1 : tileExcls2);}

int* CudaTileListKernel::getTileJatomStart (  )  [inline]

Definition at line 288 of file CudaTileListKernel.h.

00288 {return ((activeBuffer == 1) ? tileJatomStart1 : tileJatomStart2);}

int* CudaTileListKernel::getTileJatomStartGBIS (  )  [inline]

Definition at line 297 of file CudaTileListKernel.h.

Referenced by CudaComputeGBISKernel::GBISphase1(), and CudaComputeGBISKernel::GBISphase3().

00297 {return tileJatomStartGBIS;}

unsigned int* CudaTileListKernel::getTileListDepth (  )  [inline]

Definition at line 292 of file CudaTileListKernel.h.

00292 {return ((activeBuffer == 1) ? tileListDepth1 : tileListDepth2);}

int* CudaTileListKernel::getTileListOrder (  )  [inline]

Definition at line 293 of file CudaTileListKernel.h.

00293 {return ((activeBuffer == 1) ? tileListOrder1 : tileListOrder2);}

TileList* CudaTileListKernel::getTileLists (  )  [inline]

Definition at line 289 of file CudaTileListKernel.h.

00289                            {
00290     return ((activeBuffer == 1) ? tileLists1 : tileLists2);
00291   }

TileList* CudaTileListKernel::getTileListsGBIS (  )  [inline]

Definition at line 298 of file CudaTileListKernel.h.

Referenced by CudaComputeGBISKernel::GBISphase1(), and CudaComputeGBISKernel::GBISphase3().

00298 {return tileListsGBIS;}

TileListStat* CudaTileListKernel::getTileListStatDevPtr (  )  [inline]

Definition at line 285 of file CudaTileListKernel.h.

00285 {return d_tileListStat;}

TileListVirialEnergy* CudaTileListKernel::getTileListVirialEnergy (  )  [inline]

Definition at line 300 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbondedKernel::reduceVirialEnergy().

00300 {return tileListVirialEnergy;}

int CudaTileListKernel::getTileListVirialEnergyGBISLength (  )  [inline]

Definition at line 322 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbondedKernel::reduceVirialEnergy().

00322 {return tileListVirialEnergyGBISLength;}

int CudaTileListKernel::getTileListVirialEnergyLength (  )  [inline]

Definition at line 321 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbondedKernel::reduceVirialEnergy().

00321 {return tileListVirialEnergyLength;}

void CudaTileListKernel::prepareTileList ( cudaStream_t  stream  ) 

Definition at line 858 of file CudaTileListKernel.cu.

00858                                                             {
00859   clear_device_array<int>(jtiles, numJtiles, stream);
00860 }

void CudaTileListKernel::reSortTileLists ( const bool  doGBIS,
cudaStream_t  stream 
)

Definition at line 1564 of file CudaTileListKernel.cu.

References cudaCheck, NAMD_die(), TileListStat::numExcluded, TileListStat::numTileLists, TileListStat::numTileListsGBIS, and OVERALLOC.

01564                                                                                {
01565   // Store previous number of active lists
01566   int numTileListsPrev = numTileLists;
01567 
01568   // Wait for finishTileList() to stop copying
01569   if (!tileListStatEventRecord)
01570     NAMD_die("CudaTileListKernel::reSortTileLists, tileListStatEvent not recorded");
01571   cudaCheck(cudaEventSynchronize(tileListStatEvent));
01572 
01573   // Get numTileLists, numTileListsGBIS, and numExcluded
01574   {
01575     numTileLists     = h_tileListStat->numTileLists;
01576     numTileListsGBIS = h_tileListStat->numTileListsGBIS;
01577     numExcluded      = h_tileListStat->numExcluded;
01578   }
01579 
01580   // Sort {tileLists2, tileJatomStart2, tileExcl2} => {tileLists1, tileJatomStart1, tileExcl1}
01581   // VdW tile list in {tileLists1, tileJatomStart1, tileExcl1}
01582   sortTileLists(true, 0, true,
01583     numTileListsPrev, numJtiles,
01584     PtrSize<TileList>(tileLists2, tileLists2Size), PtrSize<int>(tileJatomStart2, tileJatomStart2Size),
01585     PtrSize<unsigned int>(tileListDepth2, tileListDepth2Size), PtrSize<int>(tileListOrder2, tileListOrder2Size),
01586     PtrSize<PatchPairRecord>(NULL, 0), PtrSize<TileExcl>(tileExcls2, tileExcls2Size),
01587     numTileLists, numJtiles,
01588     PtrSize<TileList>(tileLists1, tileLists1Size), PtrSize<int>(tileJatomStart1, tileJatomStart1Size),
01589     PtrSize<unsigned int>(tileListDepth1, tileListDepth1Size), PtrSize<int>(tileListOrder1, tileListOrder1Size),
01590     PtrSize<PatchPairRecord>(NULL, 0), PtrSize<TileExcl>(tileExcls1, tileExcls1Size),
01591     stream);
01592 
01593   // fprintf(stderr, "reSortTileLists, writing tile lists to disk...\n");
01594   // writeTileList("tileList.txt", numTileLists, tileLists1, stream);
01595   // writeTileJatomStart("tileJatomStart.txt", numJtiles, tileJatomStart1, stream);
01596 
01597   // markJtileOverlap(4, numTileLists, tileLists1, numJtiles, tileJatomStart1, stream);
01598 
01599   // NOTE:
01600   // Only {tileList1, tileJatomStart1, tileExcl1} are used from here on,
01601   // the rest {tileListDepth1, tileListOrder1, patchPairs1} may be re-used by the GBIS sorting
01602 
01603   if (doGBIS) {
01604     // GBIS is used => produce a second tile list
01605     // GBIS tile list in {tileListGBIS, tileJatomStartGBIS, patchPairs1}
01606     reallocate_device<TileList>(&tileListsGBIS, &tileListsGBISSize, numTileListsGBIS, OVERALLOC);
01607     reallocate_device<int>(&tileJatomStartGBIS, &tileJatomStartGBISSize, numJtiles, OVERALLOC);
01608 
01609     sortTileLists(true, 16, true,
01610       numTileListsPrev, numJtiles,
01611       PtrSize<TileList>(tileLists2, tileLists2Size), PtrSize<int>(tileJatomStart2, tileJatomStart2Size),
01612       PtrSize<unsigned int>(tileListDepth2, tileListDepth2Size), PtrSize<int>(tileListOrder2, tileListOrder2Size),
01613       PtrSize<PatchPairRecord>(patchPairs2, patchPairs2Size), PtrSize<TileExcl>(NULL, 0),
01614       numTileListsGBIS, numJtiles,
01615       PtrSize<TileList>(tileListsGBIS, tileListsGBISSize), PtrSize<int>(tileJatomStartGBIS, tileJatomStartGBISSize),
01616       PtrSize<unsigned int>(tileListDepth1, tileListDepth1Size), PtrSize<int>(tileListOrder1, tileListOrder1Size),
01617       PtrSize<PatchPairRecord>(patchPairs1, patchPairs1Size), PtrSize<TileExcl>(NULL, 0),
01618       stream);
01619   }
01620 
01621   // Set active buffer to be 1
01622   setActiveBuffer(1);
01623 
01624 }

void CudaTileListKernel::setTileListVirialEnergyGBISLength ( int  len  ) 

Definition at line 1662 of file CudaTileListKernel.cu.

References NAMD_die().

Referenced by CudaComputeGBISKernel::GBISphase2().

01662                                                                   {
01663   if (len > tileListVirialEnergySize) {
01664     NAMD_die("CudaTileListKernel::setTileListVirialEnergyGBISLength, size overflow");
01665   }
01666   tileListVirialEnergyGBISLength = len;
01667 }

void CudaTileListKernel::setTileListVirialEnergyLength ( int  len  ) 

Definition at line 1655 of file CudaTileListKernel.cu.

References NAMD_die().

Referenced by CudaComputeNonbondedKernel::nonbondedForce().

01655                                                               {
01656   if (len > tileListVirialEnergySize) {
01657     NAMD_die("CudaTileListKernel::setTileListVirialEnergyLength, size overflow");
01658   }
01659   tileListVirialEnergyLength = len;
01660 }

void CudaTileListKernel::updateComputes ( const int  numComputesIn,
const CudaComputeRecord h_cudaComputes,
cudaStream_t  stream 
)

Definition at line 875 of file CudaTileListKernel.cu.

00876                                                                 {
00877 
00878   numComputes = numComputesIn;
00879 
00880   reallocate_device<CudaComputeRecord>(&cudaComputes, &cudaComputesSize, numComputes);
00881   copy_HtoD<CudaComputeRecord>(h_cudaComputes, cudaComputes, numComputes, stream);
00882 
00883   if (doStreaming) doOutputOrder = true;
00884 }


The documentation for this class was generated from the following files:

Generated on 16 Jul 2020 for NAMD by  doxygen 1.6.1