CudaTileListKernel Class Reference

#include <CudaTileListKernel.h>

List of all members.

Classes

struct  PtrSize

Public Member Functions

 CudaTileListKernel (int deviceID, bool doStreaming)
 ~CudaTileListKernel ()
int getNumEmptyPatches ()
int * getEmptyPatches ()
int getNumExcluded ()
float get_plcutoff2 ()
int getNumTileLists ()
int getNumTileListsGBIS ()
int getNumJtiles ()
BoundingBoxgetBoundingBoxes ()
int * getJtiles ()
float4 * get_xyzq ()
TileListStatgetTileListStatDevPtr ()
void clearTileListStat (cudaStream_t stream)
int * getTileJatomStart ()
TileListgetTileLists ()
unsigned int * getTileListDepth ()
int * getTileListOrder ()
TileExclgetTileExcls ()
PatchPairRecordgetPatchPairs ()
int * getTileJatomStartGBIS ()
TileListgetTileListsGBIS ()
TileListVirialEnergygetTileListVirialEnergy ()
CudaPatchRecordgetCudaPatches ()
void prepareTileList (cudaStream_t stream)
void finishTileList (cudaStream_t stream)
void updateComputes (const int numComputesIn, const CudaComputeRecord *h_cudaComputes, cudaStream_t stream)
void buildTileLists (const int numTileListsPrev, const int numPatchesIn, const int atomStorageSizeIn, const int maxTileListLenIn, const float3 lata, const float3 latb, const float3 latc, const CudaPatchRecord *h_cudaPatches, const float4 *h_xyzq, const float plcutoff2In, const size_t maxShmemPerBlock, cudaStream_t stream)
void reSortTileLists (const bool doGBIS, cudaStream_t stream)
void setTileListVirialEnergyLength (int len)
void setTileListVirialEnergyGBISLength (int len)
int getTileListVirialEnergyLength ()
int getTileListVirialEnergyGBISLength ()
int getNumPatches ()
int getNumComputes ()
int * getOutputOrder ()

Detailed Description

Definition at line 87 of file CudaTileListKernel.h.


Constructor & Destructor Documentation

CudaTileListKernel::CudaTileListKernel ( int  deviceID,
bool  doStreaming 
)

Definition at line 699 of file CudaTileListKernel.cu.

References cudaCheck.

00699                                                                      :
00700 deviceID(deviceID), doStreaming(doStreaming) {
00701 
00702   cudaCheck(cudaSetDevice(deviceID));
00703 
00704   activeBuffer = 1;
00705 
00706   numPatches = 0;
00707   numComputes = 0;
00708 
00709   cudaPatches = NULL;
00710   cudaPatchesSize = 0;
00711 
00712   cudaComputes = NULL;
00713   cudaComputesSize = 0;
00714 
00715   patchNumLists = NULL;
00716   patchNumListsSize = 0;
00717 
00718   emptyPatches = NULL;
00719   emptyPatchesSize = 0;
00720   h_emptyPatches = NULL;
00721   h_emptyPatchesSize = 0;
00722   numEmptyPatches = 0;
00723 
00724   sortKeySrc = NULL;
00725   sortKeySrcSize = 0;
00726   sortKeyDst = NULL;
00727   sortKeyDstSize = 0;
00728 
00729   tileLists1 = NULL;
00730   tileLists1Size = 0;
00731   tileLists2 = NULL;
00732   tileLists2Size = 0;
00733 
00734   patchPairs1 = NULL;
00735   patchPairs1Size = 0;
00736   patchPairs2 = NULL;
00737   patchPairs2Size = 0;
00738 
00739   tileJatomStart1 = NULL;
00740   tileJatomStart1Size = 0;
00741   tileJatomStart2 = NULL;
00742   tileJatomStart2Size = 0;
00743 
00744   boundingBoxes = NULL;
00745   boundingBoxesSize = 0;
00746 
00747   tileListDepth1 = NULL;
00748   tileListDepth1Size = 0;
00749   tileListDepth2 = NULL;
00750   tileListDepth2Size = 0;
00751 
00752   tileListOrder1 = NULL;
00753   tileListOrder1Size = 0;
00754   tileListOrder2 = NULL;
00755   tileListOrder2Size = 0;
00756 
00757   tileExcls1 = NULL;
00758   tileExcls1Size = 0;
00759   tileExcls2 = NULL;
00760   tileExcls2Size = 0;
00761 
00762   xyzq = NULL;
00763   xyzqSize = 0;
00764 
00765   allocate_device<TileListStat>(&d_tileListStat, 1);
00766   allocate_host<TileListStat>(&h_tileListStat, 1);
00767 
00768   tileListPos = NULL;
00769   tileListPosSize = 0;
00770   tempStorage = NULL;
00771   tempStorageSize = 0;
00772 
00773   jtiles = NULL;
00774   jtilesSize = 0;
00775 
00776   tilePos = NULL;
00777   tilePosSize = 0;
00778 
00779   tileListsGBIS = NULL;
00780   tileListsGBISSize = 0;
00781 
00782   tileJatomStartGBIS = NULL;
00783   tileJatomStartGBISSize = 0;
00784 
00785   tileListVirialEnergy = NULL;
00786   tileListVirialEnergySize = 0;
00787 
00788   atomStorageSize = 0;
00789   numTileLists = 0;
00790   numTileListsGBIS = 0;
00791   numJtiles = 1;
00792 
00793   outputOrder = NULL;
00794   outputOrderSize = 0;
00795   doOutputOrder = false;
00796 
00797   minmaxListLen = NULL;
00798   minmaxListLenSize = 0;
00799 
00800   sortKeys = NULL;
00801   sortKeysSize = 0;
00802   sortKeys_endbit = 0;
00803 
00804   cudaCheck(cudaEventCreate(&tileListStatEvent));
00805   tileListStatEventRecord = false;
00806 }

CudaTileListKernel::~CudaTileListKernel (  ) 

Definition at line 808 of file CudaTileListKernel.cu.

References cudaCheck.

00808                                         {
00809   cudaCheck(cudaSetDevice(deviceID));
00810   deallocate_device<TileListStat>(&d_tileListStat);
00811   deallocate_host<TileListStat>(&h_tileListStat);
00812   //
00813   if (patchNumLists != NULL) deallocate_device<int>(&patchNumLists);
00814   if (emptyPatches != NULL) deallocate_device<int>(&emptyPatches);
00815   if (h_emptyPatches != NULL) deallocate_host<int>(&h_emptyPatches);
00816   if (sortKeySrc != NULL) deallocate_device<unsigned int>(&sortKeySrc);
00817   if (sortKeyDst != NULL) deallocate_device<unsigned int>(&sortKeyDst);
00818   //
00819   if (cudaPatches != NULL) deallocate_device<CudaPatchRecord>(&cudaPatches);
00820   if (cudaComputes != NULL) deallocate_device<CudaComputeRecord>(&cudaComputes);
00821   if (patchPairs1 != NULL) deallocate_device<PatchPairRecord>(&patchPairs1);
00822   if (patchPairs2 != NULL) deallocate_device<PatchPairRecord>(&patchPairs2);
00823   if (tileLists1 != NULL) deallocate_device<TileList>(&tileLists1);
00824   if (tileLists2 != NULL) deallocate_device<TileList>(&tileLists2);
00825   if (tileJatomStart1 != NULL) deallocate_device<int>(&tileJatomStart1);
00826   if (tileJatomStart2 != NULL) deallocate_device<int>(&tileJatomStart2);
00827   if (boundingBoxes != NULL) deallocate_device<BoundingBox>(&boundingBoxes);
00828   if (tileListDepth1 != NULL) deallocate_device<unsigned int>(&tileListDepth1);
00829   if (tileListDepth2 != NULL) deallocate_device<unsigned int>(&tileListDepth2);
00830   if (tileListOrder1 != NULL) deallocate_device<int>(&tileListOrder1);
00831   if (tileListOrder2 != NULL) deallocate_device<int>(&tileListOrder2);
00832   if (tileListPos != NULL) deallocate_device<int>(&tileListPos);
00833   if (tileExcls1 != NULL) deallocate_device<TileExcl>(&tileExcls1);
00834   if (tileExcls2 != NULL) deallocate_device<TileExcl>(&tileExcls2);
00835   if (tempStorage != NULL) deallocate_device<char>(&tempStorage);
00836   if (jtiles != NULL) deallocate_device<int>(&jtiles);
00837   if (tilePos != NULL) deallocate_device<int>(&tilePos);
00838 
00839   if (tileListsGBIS != NULL) deallocate_device<TileList>(&tileListsGBIS);
00840   if (tileJatomStartGBIS != NULL) deallocate_device<int>(&tileJatomStartGBIS);
00841 
00842   if (tileListVirialEnergy != NULL) deallocate_device<TileListVirialEnergy>(&tileListVirialEnergy);
00843 
00844   if (xyzq != NULL) deallocate_device<float4>(&xyzq);
00845 
00846   if (sortKeys != NULL) deallocate_device<unsigned int>(&sortKeys);
00847   if (minmaxListLen != NULL) deallocate_device<int2>(&minmaxListLen);
00848 
00849   cudaCheck(cudaEventDestroy(tileListStatEvent));
00850 }


Member Function Documentation

void CudaTileListKernel::buildTileLists ( const int  numTileListsPrev,
const int  numPatchesIn,
const int  atomStorageSizeIn,
const int  maxTileListLenIn,
const float3  lata,
const float3  latb,
const float3  latc,
const CudaPatchRecord h_cudaPatches,
const float4 *  h_xyzq,
const float  plcutoff2In,
const size_t  maxShmemPerBlock,
cudaStream_t  stream 
)

Definition at line 986 of file CudaTileListKernel.cu.

References BOUNDINGBOXKERNEL_NUM_WARP, buildTileListsBBKernel_shmem_sizePerThread(), clearTileListStat(), cudaCheck, deviceCUDA, DeviceCUDA::getMaxNumBlocks(), NAMD_die(), TileListStat::numTileLists, OVERALLOC, TILELISTKERNELNEW_NUM_WARP, TileListStat::tilesSizeExceeded, and WARPSIZE.

00991                        {
00992 
00993   numPatches = numPatchesIn;
00994   atomStorageSize = atomStorageSizeIn;
00995   maxTileListLen = maxTileListLenIn;
00996   plcutoff2 = plcutoff2In;
00997 
00998   if (doStreaming) {
00999     // Re-allocate patchNumLists
01000     reallocate_device<int>(&patchNumLists, &patchNumListsSize, numPatches);
01001     reallocate_device<int>(&emptyPatches, &emptyPatchesSize, numPatches+1);
01002     reallocate_host<int>(&h_emptyPatches, &h_emptyPatchesSize, numPatches+1);
01003   }
01004 
01005   // Re-allocate (tileLists1, patchPairs1
01006   reallocate_device<TileList>(&tileLists1, &tileLists1Size, numTileListsPrev, OVERALLOC);
01007   reallocate_device<PatchPairRecord>(&patchPairs1, &patchPairs1Size, numTileListsPrev, OVERALLOC);
01008 
01009   // Copy cudaPatches to device
01010   reallocate_device<CudaPatchRecord>(&cudaPatches, &cudaPatchesSize, numPatches);
01011   copy_HtoD<CudaPatchRecord>(h_cudaPatches, cudaPatches, numPatches, stream);
01012 
01013   // Re-allocate temporary storage
01014   reallocate_device<int>(&tilePos, &tilePosSize, numComputes, OVERALLOC);
01015   // Calculate tile list positions (tilePos)
01016   {
01017     int nthread = 1024;
01018     int nblock = 1;
01019     calcTileListPosKernel<1024> <<< nblock, nthread, 0, stream >>>
01020     (numComputes, cudaComputes, cudaPatches, tilePos);
01021     cudaCheck(cudaGetLastError());
01022   }
01023 
01024   // Build (tileLists1.patchInd, tileLists1.offsetXYZ)
01025   {
01026     int nthread = 512;
01027     int nblock = min(deviceCUDA->getMaxNumBlocks(), (numComputes-1)/(nthread/32)+1);
01028     updatePatchesKernel<32> <<< nblock, nthread, 0, stream >>>
01029     (numComputes, tilePos, cudaComputes, cudaPatches, tileLists1);
01030     cudaCheck(cudaGetLastError());
01031   }
01032 
01033   // ---------------------------------------------------------------------------------------------
01034 
01035 
01036   // NOTE: tileListDepth2 and tileListOrder2 must have at least same size as
01037   // tileListDepth2 and tileListOrder2 since they're used in sorting
01038   reallocate_device<unsigned int>(&tileListDepth2, &tileListDepth2Size, numTileListsPrev + 1, OVERALLOC);
01039   reallocate_device<int>(&tileListOrder2, &tileListOrder2Size, numTileListsPrev, OVERALLOC);
01040 
01041   // Allocate with +1 to include last term in the exclusive sum
01042   reallocate_device<unsigned int>(&tileListDepth1, &tileListDepth1Size, numTileListsPrev + 1, OVERALLOC);
01043 
01044   reallocate_device<int>(&tileListOrder1, &tileListOrder1Size, numTileListsPrev, OVERALLOC);
01045 
01046   reallocate_device<float4>(&xyzq, &xyzqSize, atomStorageSize, OVERALLOC);
01047 
01048   copy_HtoD<float4>(h_xyzq, xyzq, atomStorageSize, stream);
01049 
01050   // Fills in boundingBoxes[0 ... numBoundingBoxes-1]
01051   {
01052     int numBoundingBoxes = atomStorageSize/WARPSIZE;
01053     reallocate_device<BoundingBox>(&boundingBoxes, &boundingBoxesSize, numBoundingBoxes, OVERALLOC);
01054 
01055     int nwarp = BOUNDINGBOXKERNEL_NUM_WARP;
01056     int nthread = WARPSIZE*nwarp;
01057     int nblock = min(deviceCUDA->getMaxNumBlocks(), (atomStorageSize-1)/nthread+1);
01058     buildBoundingBoxesKernel <<< nblock, nthread, 0, stream >>> (atomStorageSize, xyzq, boundingBoxes);
01059     cudaCheck(cudaGetLastError());
01060   }
01061 
01062   {
01063     int nwarp = TILELISTKERNELNEW_NUM_WARP;
01064     int nthread = WARPSIZE*nwarp;
01065     int nblock = min(deviceCUDA->getMaxNumBlocks(), (numTileListsPrev-1)/nthread+1);
01066 
01067     int shmem_size = buildTileListsBBKernel_shmem_sizePerThread(maxTileListLen)*nthread;
01068     if(shmem_size > maxShmemPerBlock){
01069       NAMD_die("CudaTileListKernel::buildTileLists, maximum shared memory allocation exceeded. Too many atoms in a patch");
01070     }
01071 
01072     // NOTE: In the first call numJtiles = 1. buildTileListsBBKernel will return and
01073     //       tell the required size in h_tileListStat->numJtiles. In subsequent calls,
01074     //       re-allocation only happens when the size is exceeded.
01075     h_tileListStat->tilesSizeExceeded = true;
01076     int reallocCount = 0;
01077     while (h_tileListStat->tilesSizeExceeded) {
01078       reallocate_device<int>(&tileJatomStart1, &tileJatomStart1Size, numJtiles, OVERALLOC);
01079 
01080       clearTileListStat(stream);
01081       // clear_device_array<TileListStat>(d_tileListStat, 1, stream);
01082 
01083       buildTileListsBBKernel <<< nblock, nthread, shmem_size, stream >>>
01084       (numTileListsPrev, tileLists1, cudaPatches, tilePos,
01085         lata, latb, latc, plcutoff2, maxTileListLen,
01086         boundingBoxes, tileJatomStart1, tileJatomStart1Size,
01087         tileListDepth1, tileListOrder1, patchPairs1,
01088         d_tileListStat);
01089 
01090       cudaCheck(cudaGetLastError());
01091 
01092       // get (numATileLists, numJtiles, tilesSizeExceeded)
01093       copy_DtoH<TileListStat>(d_tileListStat, h_tileListStat, 1, stream);
01094       cudaCheck(cudaStreamSynchronize(stream));
01095       numJtiles = h_tileListStat->numJtiles;
01096 
01097       if (h_tileListStat->tilesSizeExceeded) {
01098         reallocCount++;
01099         if (reallocCount > 1) {
01100           NAMD_die("CudaTileListKernel::buildTileLists, multiple reallocations detected");
01101         }
01102       }
01103 
01104     }
01105 
01106     numTileLists = h_tileListStat->numTileLists;
01107 
01108     reallocate_device<int>(&jtiles, &jtilesSize, numJtiles, OVERALLOC);
01109   }
01110 
01111   // Re-allocate tileListVirialEnergy.
01112   // NOTE: Since numTileLists here is an upper estimate (since it's based on bounding boxes),
01113   //       we're quaranteed to have enough space
01114   reallocate_device<TileListVirialEnergy>(&tileListVirialEnergy, &tileListVirialEnergySize, numTileLists, OVERALLOC);
01115 
01116   reallocate_device<TileList>(&tileLists2, &tileLists2Size, numTileLists, OVERALLOC);
01117   reallocate_device<PatchPairRecord>(&patchPairs2, &patchPairs2Size, numTileLists, OVERALLOC);
01118   reallocate_device<int>(&tileJatomStart2, &tileJatomStart2Size, numJtiles, OVERALLOC);
01119   reallocate_device<TileExcl>(&tileExcls1, &tileExcls1Size, numJtiles, OVERALLOC);
01120   reallocate_device<TileExcl>(&tileExcls2, &tileExcls2Size, numJtiles, OVERALLOC);
01121 
01122   int numTileListsSrc = numTileListsPrev;
01123   int numJtilesSrc    = numJtiles;
01124   int numTileListsDst = numTileLists;
01125   int numJtilesDst    = numJtiles;
01126 
01127   // Sort tiles
01128   sortTileLists(
01129     false,
01130     0, false,
01131     numTileListsSrc, numJtilesSrc,
01132     PtrSize<TileList>(tileLists1, tileLists1Size), PtrSize<int>(tileJatomStart1, tileJatomStart1Size),
01133     PtrSize<unsigned int>(tileListDepth1, tileListDepth1Size), PtrSize<int>(tileListOrder1, tileListOrder1Size),
01134     PtrSize<PatchPairRecord>(patchPairs1, patchPairs1Size), PtrSize<TileExcl>(NULL, 0),
01135     numTileListsDst, numJtilesDst,
01136     PtrSize<TileList>(tileLists2, tileLists2Size), PtrSize<int>(tileJatomStart2, tileJatomStart2Size),
01137     PtrSize<unsigned int>(tileListDepth2, tileListDepth2Size), PtrSize<int>(tileListOrder2, tileListOrder2Size),
01138     PtrSize<PatchPairRecord>(patchPairs2, patchPairs2Size), PtrSize<TileExcl>(NULL, 0),
01139     stream);
01140 
01141   // Set active buffer to 2
01142   setActiveBuffer(2);
01143 
01144   if (doOutputOrder) reallocate_device<int>(&outputOrder, &outputOrderSize, numTileLists, OVERALLOC);
01145 }

void CudaTileListKernel::clearTileListStat ( cudaStream_t  stream  ) 

Definition at line 856 of file CudaTileListKernel.cu.

References getNumEmptyPatches(), and TileListStat::patchReadyQueueCount.

Referenced by buildTileLists(), and CudaComputeNonbondedKernel::nonbondedForce().

00856                                                               {
00857   // clear tileListStat, for patchReadyQueueCount, which is set equal to the number of empty patches
00858   memset(h_tileListStat, 0, sizeof(TileListStat));
00859   h_tileListStat->patchReadyQueueCount = getNumEmptyPatches();
00860   copy_HtoD<TileListStat>(h_tileListStat, d_tileListStat, 1, stream);
00861 }

void CudaTileListKernel::finishTileList ( cudaStream_t  stream  ) 

Definition at line 863 of file CudaTileListKernel.cu.

References cudaCheck.

00863                                                            {
00864   copy_DtoH<TileListStat>(d_tileListStat, h_tileListStat, 1, stream);
00865   cudaCheck(cudaEventRecord(tileListStatEvent, stream));
00866   tileListStatEventRecord = true;
00867 }

float CudaTileListKernel::get_plcutoff2 (  )  [inline]

Definition at line 277 of file CudaTileListKernel.h.

00277 {return plcutoff2;}

float4* CudaTileListKernel::get_xyzq (  )  [inline]
BoundingBox* CudaTileListKernel::getBoundingBoxes (  )  [inline]

Definition at line 281 of file CudaTileListKernel.h.

00281 {return boundingBoxes;}

CudaPatchRecord* CudaTileListKernel::getCudaPatches (  )  [inline]

Definition at line 302 of file CudaTileListKernel.h.

00302 {return cudaPatches;}

int* CudaTileListKernel::getEmptyPatches (  )  [inline]

Definition at line 273 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbonded::launchWork().

00273 {return h_emptyPatches;}

int* CudaTileListKernel::getJtiles (  )  [inline]

Definition at line 282 of file CudaTileListKernel.h.

00282 {return jtiles;}

int CudaTileListKernel::getNumComputes (  )  [inline]

Definition at line 326 of file CudaTileListKernel.h.

00326 {return numComputes;}

int CudaTileListKernel::getNumEmptyPatches (  )  [inline]

Definition at line 272 of file CudaTileListKernel.h.

Referenced by clearTileListStat(), and CudaComputeNonbonded::launchWork().

00272 {return numEmptyPatches;}

int CudaTileListKernel::getNumExcluded (  )  [inline]

Definition at line 275 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbonded::finishReductions().

00275 {return numExcluded;}

int CudaTileListKernel::getNumJtiles (  )  [inline]

Definition at line 280 of file CudaTileListKernel.h.

00280 {return numJtiles;}

int CudaTileListKernel::getNumPatches (  )  [inline]
int CudaTileListKernel::getNumTileLists (  )  [inline]

Definition at line 278 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbondedKernel::nonbondedForce().

00278 {return numTileLists;}

int CudaTileListKernel::getNumTileListsGBIS (  )  [inline]

Definition at line 279 of file CudaTileListKernel.h.

Referenced by CudaComputeGBISKernel::GBISphase1(), CudaComputeGBISKernel::GBISphase2(), and CudaComputeGBISKernel::GBISphase3().

00279 {return numTileListsGBIS;}

int* CudaTileListKernel::getOutputOrder (  )  [inline]

Definition at line 327 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbondedKernel::nonbondedForce().

00327                         {
00328     if (!doStreaming) return NULL;
00329     if (doOutputOrder) {
00330       return outputOrder;
00331     } else {
00332       return NULL;
00333     }
00334   }

PatchPairRecord* CudaTileListKernel::getPatchPairs (  )  [inline]

Definition at line 295 of file CudaTileListKernel.h.

Referenced by CudaComputeGBISKernel::GBISphase1(), and CudaComputeGBISKernel::GBISphase3().

00295 {return ((activeBuffer == 1) ? patchPairs1 : patchPairs2);}

TileExcl* CudaTileListKernel::getTileExcls (  )  [inline]

Definition at line 294 of file CudaTileListKernel.h.

00294 {return ((activeBuffer == 1) ? tileExcls1 : tileExcls2);}

int* CudaTileListKernel::getTileJatomStart (  )  [inline]

Definition at line 288 of file CudaTileListKernel.h.

00288 {return ((activeBuffer == 1) ? tileJatomStart1 : tileJatomStart2);}

int* CudaTileListKernel::getTileJatomStartGBIS (  )  [inline]

Definition at line 297 of file CudaTileListKernel.h.

Referenced by CudaComputeGBISKernel::GBISphase1(), and CudaComputeGBISKernel::GBISphase3().

00297 {return tileJatomStartGBIS;}

unsigned int* CudaTileListKernel::getTileListDepth (  )  [inline]

Definition at line 292 of file CudaTileListKernel.h.

00292 {return ((activeBuffer == 1) ? tileListDepth1 : tileListDepth2);}

int* CudaTileListKernel::getTileListOrder (  )  [inline]

Definition at line 293 of file CudaTileListKernel.h.

00293 {return ((activeBuffer == 1) ? tileListOrder1 : tileListOrder2);}

TileList* CudaTileListKernel::getTileLists (  )  [inline]

Definition at line 289 of file CudaTileListKernel.h.

00289                            {
00290     return ((activeBuffer == 1) ? tileLists1 : tileLists2);
00291   }

TileList* CudaTileListKernel::getTileListsGBIS (  )  [inline]

Definition at line 298 of file CudaTileListKernel.h.

Referenced by CudaComputeGBISKernel::GBISphase1(), and CudaComputeGBISKernel::GBISphase3().

00298 {return tileListsGBIS;}

TileListStat* CudaTileListKernel::getTileListStatDevPtr (  )  [inline]

Definition at line 285 of file CudaTileListKernel.h.

00285 {return d_tileListStat;}

TileListVirialEnergy* CudaTileListKernel::getTileListVirialEnergy (  )  [inline]

Definition at line 300 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbondedKernel::reduceVirialEnergy().

00300 {return tileListVirialEnergy;}

int CudaTileListKernel::getTileListVirialEnergyGBISLength (  )  [inline]

Definition at line 322 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbondedKernel::reduceVirialEnergy().

00322 {return tileListVirialEnergyGBISLength;}

int CudaTileListKernel::getTileListVirialEnergyLength (  )  [inline]

Definition at line 321 of file CudaTileListKernel.h.

Referenced by CudaComputeNonbondedKernel::reduceVirialEnergy().

00321 {return tileListVirialEnergyLength;}

void CudaTileListKernel::prepareTileList ( cudaStream_t  stream  ) 

Definition at line 852 of file CudaTileListKernel.cu.

00852                                                             {
00853   clear_device_array<int>(jtiles, numJtiles, stream);
00854 }

void CudaTileListKernel::reSortTileLists ( const bool  doGBIS,
cudaStream_t  stream 
)

Definition at line 1558 of file CudaTileListKernel.cu.

References cudaCheck, NAMD_die(), TileListStat::numExcluded, TileListStat::numTileLists, TileListStat::numTileListsGBIS, and OVERALLOC.

01558                                                                                {
01559   // Store previous number of active lists
01560   int numTileListsPrev = numTileLists;
01561 
01562   // Wait for finishTileList() to stop copying
01563   if (!tileListStatEventRecord)
01564     NAMD_die("CudaTileListKernel::reSortTileLists, tileListStatEvent not recorded");
01565   cudaCheck(cudaEventSynchronize(tileListStatEvent));
01566 
01567   // Get numTileLists, numTileListsGBIS, and numExcluded
01568   {
01569     numTileLists     = h_tileListStat->numTileLists;
01570     numTileListsGBIS = h_tileListStat->numTileListsGBIS;
01571     numExcluded      = h_tileListStat->numExcluded;
01572   }
01573 
01574   // Sort {tileLists2, tileJatomStart2, tileExcl2} => {tileLists1, tileJatomStart1, tileExcl1}
01575   // VdW tile list in {tileLists1, tileJatomStart1, tileExcl1}
01576   sortTileLists(true, 0, true,
01577     numTileListsPrev, numJtiles,
01578     PtrSize<TileList>(tileLists2, tileLists2Size), PtrSize<int>(tileJatomStart2, tileJatomStart2Size),
01579     PtrSize<unsigned int>(tileListDepth2, tileListDepth2Size), PtrSize<int>(tileListOrder2, tileListOrder2Size),
01580     PtrSize<PatchPairRecord>(NULL, 0), PtrSize<TileExcl>(tileExcls2, tileExcls2Size),
01581     numTileLists, numJtiles,
01582     PtrSize<TileList>(tileLists1, tileLists1Size), PtrSize<int>(tileJatomStart1, tileJatomStart1Size),
01583     PtrSize<unsigned int>(tileListDepth1, tileListDepth1Size), PtrSize<int>(tileListOrder1, tileListOrder1Size),
01584     PtrSize<PatchPairRecord>(NULL, 0), PtrSize<TileExcl>(tileExcls1, tileExcls1Size),
01585     stream);
01586 
01587   // fprintf(stderr, "reSortTileLists, writing tile lists to disk...\n");
01588   // writeTileList("tileList.txt", numTileLists, tileLists1, stream);
01589   // writeTileJatomStart("tileJatomStart.txt", numJtiles, tileJatomStart1, stream);
01590 
01591   // markJtileOverlap(4, numTileLists, tileLists1, numJtiles, tileJatomStart1, stream);
01592 
01593   // NOTE:
01594   // Only {tileList1, tileJatomStart1, tileExcl1} are used from here on,
01595   // the rest {tileListDepth1, tileListOrder1, patchPairs1} may be re-used by the GBIS sorting
01596 
01597   if (doGBIS) {
01598     // GBIS is used => produce a second tile list
01599     // GBIS tile list in {tileListGBIS, tileJatomStartGBIS, patchPairs1}
01600     reallocate_device<TileList>(&tileListsGBIS, &tileListsGBISSize, numTileListsGBIS, OVERALLOC);
01601     reallocate_device<int>(&tileJatomStartGBIS, &tileJatomStartGBISSize, numJtiles, OVERALLOC);
01602 
01603     sortTileLists(true, 16, true,
01604       numTileListsPrev, numJtiles,
01605       PtrSize<TileList>(tileLists2, tileLists2Size), PtrSize<int>(tileJatomStart2, tileJatomStart2Size),
01606       PtrSize<unsigned int>(tileListDepth2, tileListDepth2Size), PtrSize<int>(tileListOrder2, tileListOrder2Size),
01607       PtrSize<PatchPairRecord>(patchPairs2, patchPairs2Size), PtrSize<TileExcl>(NULL, 0),
01608       numTileListsGBIS, numJtiles,
01609       PtrSize<TileList>(tileListsGBIS, tileListsGBISSize), PtrSize<int>(tileJatomStartGBIS, tileJatomStartGBISSize),
01610       PtrSize<unsigned int>(tileListDepth1, tileListDepth1Size), PtrSize<int>(tileListOrder1, tileListOrder1Size),
01611       PtrSize<PatchPairRecord>(patchPairs1, patchPairs1Size), PtrSize<TileExcl>(NULL, 0),
01612       stream);
01613   }
01614 
01615   // Set active buffer to be 1
01616   setActiveBuffer(1);
01617 
01618 }

void CudaTileListKernel::setTileListVirialEnergyGBISLength ( int  len  ) 

Definition at line 1656 of file CudaTileListKernel.cu.

References NAMD_die().

Referenced by CudaComputeGBISKernel::GBISphase2().

01656                                                                   {
01657   if (len > tileListVirialEnergySize) {
01658     NAMD_die("CudaTileListKernel::setTileListVirialEnergyGBISLength, size overflow");
01659   }
01660   tileListVirialEnergyGBISLength = len;
01661 }

void CudaTileListKernel::setTileListVirialEnergyLength ( int  len  ) 

Definition at line 1649 of file CudaTileListKernel.cu.

References NAMD_die().

Referenced by CudaComputeNonbondedKernel::nonbondedForce().

01649                                                               {
01650   if (len > tileListVirialEnergySize) {
01651     NAMD_die("CudaTileListKernel::setTileListVirialEnergyLength, size overflow");
01652   }
01653   tileListVirialEnergyLength = len;
01654 }

void CudaTileListKernel::updateComputes ( const int  numComputesIn,
const CudaComputeRecord h_cudaComputes,
cudaStream_t  stream 
)

Definition at line 869 of file CudaTileListKernel.cu.

00870                                                                 {
00871 
00872   numComputes = numComputesIn;
00873 
00874   reallocate_device<CudaComputeRecord>(&cudaComputes, &cudaComputesSize, numComputes);
00875   copy_HtoD<CudaComputeRecord>(h_cudaComputes, cudaComputes, numComputes, stream);
00876 
00877   if (doStreaming) doOutputOrder = true;
00878 }


The documentation for this class was generated from the following files:

Generated on 8 Dec 2019 for NAMD by  doxygen 1.6.1