NAMD
Public Member Functions | List of all members
CudaComputeGBISKernel Class Reference

#include <CudaComputeGBISKernel.h>

Public Member Functions

 CudaComputeGBISKernel (int deviceID)
 
 ~CudaComputeGBISKernel ()
 
void updateIntRad (const int atomStorageSize, float *intRad0H, float *intRadSH, cudaStream_t stream)
 
void updateBornRad (const int atomStorageSize, float *bornRadH, cudaStream_t stream)
 
void update_dHdrPrefix (const int atomStorageSize, float *dHdrPrefixH, cudaStream_t stream)
 
void GBISphase1 (CudaTileListKernel &tlKernel, const int atomStorageSize, const float3 lata, const float3 latb, const float3 latc, const float a_cut, float *h_psiSum, cudaStream_t stream)
 
void GBISphase2 (CudaTileListKernel &tlKernel, const int atomStorageSize, const bool doEnergy, const bool doSlow, const float3 lata, const float3 latb, const float3 latc, const float r_cut, const float scaling, const float kappa, const float smoothDist, const float epsilon_p, const float epsilon_s, float4 *d_forces, float *h_dEdaSum, cudaStream_t stream)
 
void GBISphase3 (CudaTileListKernel &tlKernel, const int atomStorageSize, const float3 lata, const float3 latb, const float3 latc, const float a_cut, float4 *d_forces, cudaStream_t stream)
 

Detailed Description

Definition at line 5 of file CudaComputeGBISKernel.h.

Constructor & Destructor Documentation

CudaComputeGBISKernel::CudaComputeGBISKernel ( int  deviceID)

Definition at line 427 of file CudaComputeGBISKernel.cu.

References cudaCheck.

427  : deviceID(deviceID) {
428  cudaCheck(cudaSetDevice(deviceID));
429 
430  intRad0 = NULL;
431  intRad0Size = 0;
432 
433  intRadS = NULL;
434  intRadSSize = 0;
435 
436  psiSum = NULL;
437  psiSumSize = 0;
438 
439  bornRad = NULL;
440  bornRadSize = 0;
441 
442  dEdaSum = NULL;
443  dEdaSumSize = 0;
444 
445  dHdrPrefix = NULL;
446  dHdrPrefixSize = 0;
447 
448 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
CudaComputeGBISKernel::~CudaComputeGBISKernel ( )

Definition at line 453 of file CudaComputeGBISKernel.cu.

References cudaCheck.

453  {
454  cudaCheck(cudaSetDevice(deviceID));
455  if (intRad0 != NULL) deallocate_device<float>(&intRad0);
456  if (intRadS != NULL) deallocate_device<float>(&intRadS);
457  if (psiSum != NULL) deallocate_device<float>(&psiSum);
458  if (bornRad != NULL) deallocate_device<float>(&bornRad);
459  if (dEdaSum != NULL) deallocate_device<float>(&dEdaSum);
460  if (dHdrPrefix != NULL) deallocate_device<float>(&dHdrPrefix);
461 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:79

Member Function Documentation

void CudaComputeGBISKernel::GBISphase1 ( CudaTileListKernel tlKernel,
const int  atomStorageSize,
const float3  lata,
const float3  latb,
const float3  latc,
const float  a_cut,
float *  h_psiSum,
cudaStream_t  stream 
)

Definition at line 495 of file CudaComputeGBISKernel.cu.

References GBISParam< 1 >::a_cut, atomStorageSize, cudaCheck, cutoff2, deviceCUDA, FS_MAX, GBISKERNEL_NUM_WARP, CudaTileListKernel::get_xyzq(), DeviceCUDA::getMaxNumBlocks(), CudaTileListKernel::getNumTileListsGBIS(), CudaTileListKernel::getPatchPairs(), CudaTileListKernel::getTileJatomStartGBIS(), CudaTileListKernel::getTileListsGBIS(), lata, latb, latc, stream, and WARPSIZE.

497  {
498 
499  reallocate_device<float>(&psiSum, &psiSumSize, atomStorageSize, 1.2f);
500  clear_device_array<float>(psiSum, atomStorageSize, stream);
501 
502  int nwarp = GBISKERNEL_NUM_WARP;
503  int nthread = WARPSIZE*nwarp;
504  int nblock = min(deviceCUDA->getMaxNumBlocks(), (tlKernel.getNumTileListsGBIS()-1)/nwarp+1);
505 
506  GBISParam<1> param;
507  param.a_cut = a_cut;
508 
509  float cutoff2 = (a_cut + FS_MAX)*(a_cut + FS_MAX);
510 
511  GBIS_Kernel<false, false, 1> <<< nblock, nthread, 0, stream >>>
512  (tlKernel.getNumTileListsGBIS(), tlKernel.getTileListsGBIS(), tlKernel.getTileJatomStartGBIS(),
513  tlKernel.getPatchPairs(), lata, latb, latc, tlKernel.get_xyzq(), cutoff2,
514  param, intRad0, intRadS, NULL, psiSum, NULL, NULL);
515 
516  cudaCheck(cudaGetLastError());
517 
518  copy_DtoH<float>(psiSum, h_psiSum, atomStorageSize, stream);
519 }
PatchPairRecord * getPatchPairs()
__thread cudaStream_t stream
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize
TileList * getTileListsGBIS()
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 lata
int getMaxNumBlocks()
Definition: DeviceCUDA.C:415
#define FS_MAX
Definition: ComputeGBIS.inl:24
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 latc
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:18
#define WARPSIZE
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cutoff2
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 latb
#define GBISKERNEL_NUM_WARP
void CudaComputeGBISKernel::GBISphase2 ( CudaTileListKernel tlKernel,
const int  atomStorageSize,
const bool  doEnergy,
const bool  doSlow,
const float3  lata,
const float3  latb,
const float3  latc,
const float  r_cut,
const float  scaling,
const float  kappa,
const float  smoothDist,
const float  epsilon_p,
const float  epsilon_s,
float4 *  d_forces,
float *  h_dEdaSum,
cudaStream_t  stream 
)

Definition at line 524 of file CudaComputeGBISKernel.cu.

References atomStorageSize, CALL, cudaCheck, deviceCUDA, GBISParam< 2 >::epsilon_p_i, GBISParam< 2 >::epsilon_s_i, GBISKERNEL_NUM_WARP, DeviceCUDA::getMaxNumBlocks(), CudaTileListKernel::getNumTileListsGBIS(), GBISParam< 2 >::kappa, GBISParam< 2 >::r_cut2, GBISParam< 2 >::r_cut_2, GBISParam< 2 >::r_cut_4, GBISParam< 2 >::scaling, CudaTileListKernel::setTileListVirialEnergyGBISLength(), GBISParam< 2 >::smoothDist, stream, and WARPSIZE.

529  {
530 
531  reallocate_device<float>(&dEdaSum, &dEdaSumSize, atomStorageSize, 1.2f);
532  clear_device_array<float>(dEdaSum, atomStorageSize, stream);
533 
534  if (doEnergy) {
536  }
537 
538  int nwarp = GBISKERNEL_NUM_WARP;
539  int nthread = WARPSIZE*nwarp;
540 
541  int nblock = min(deviceCUDA->getMaxNumBlocks(), (tlKernel.getNumTileListsGBIS()-1)/nwarp+1);
542 
543  GBISParam<2> param;
544  param.r_cut2 = r_cut*r_cut;
545  param.r_cut_2 = 1.f / param.r_cut2;
546  param.r_cut_4 = 4.f*param.r_cut_2*param.r_cut_2;
547  param.epsilon_s_i = 1.f / epsilon_s;
548  param.epsilon_p_i = 1.f / epsilon_p;
549  param.scaling = scaling;
550  param.kappa = kappa;
551  param.smoothDist = smoothDist;
552 
553 #define CALL(DOENERGY, DOSLOW) GBIS_Kernel<DOENERGY, DOSLOW, 2> \
554  <<< nblock, nthread, 0, stream >>> \
555  (tlKernel.getNumTileListsGBIS(), tlKernel.getTileListsGBIS(), tlKernel.getTileJatomStartGBIS(), \
556  tlKernel.getPatchPairs(), lata, latb, latc, tlKernel.get_xyzq(), param.r_cut2, \
557  param, bornRad, NULL, NULL, dEdaSum, d_forces, tlKernel.getTileListVirialEnergy())
558 
559  if (!doEnergy && !doSlow) CALL(false, false);
560  if (!doEnergy && doSlow) CALL(false, true);
561  if ( doEnergy && !doSlow) CALL(true, false);
562  if ( doEnergy && doSlow) CALL(true, true);
563 
564  cudaCheck(cudaGetLastError());
565 
566  copy_DtoH<float>(dEdaSum, h_dEdaSum, atomStorageSize, stream);
567 }
void setTileListVirialEnergyGBISLength(int len)
__thread cudaStream_t stream
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize
int getMaxNumBlocks()
Definition: DeviceCUDA.C:415
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:18
#define WARPSIZE
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
#define CALL(DOENERGY, DOVIRIAL)
#define GBISKERNEL_NUM_WARP
void CudaComputeGBISKernel::GBISphase3 ( CudaTileListKernel tlKernel,
const int  atomStorageSize,
const float3  lata,
const float3  latb,
const float3  latc,
const float  a_cut,
float4 *  d_forces,
cudaStream_t  stream 
)

Definition at line 572 of file CudaComputeGBISKernel.cu.

References GBISParam< 3 >::a_cut, cudaCheck, cutoff2, deviceCUDA, FS_MAX, GBISKERNEL_NUM_WARP, CudaTileListKernel::get_xyzq(), DeviceCUDA::getMaxNumBlocks(), CudaTileListKernel::getNumTileListsGBIS(), CudaTileListKernel::getPatchPairs(), CudaTileListKernel::getTileJatomStartGBIS(), CudaTileListKernel::getTileListsGBIS(), lata, latb, latc, and WARPSIZE.

574  {
575 
576  int nwarp = GBISKERNEL_NUM_WARP;
577  int nthread = WARPSIZE*nwarp;
578  int nblock = min(deviceCUDA->getMaxNumBlocks(), (tlKernel.getNumTileListsGBIS()-1)/nwarp+1);
579 
580  GBISParam<3> param;
581  param.a_cut = a_cut;
582 
583  float cutoff2 = (a_cut + FS_MAX)*(a_cut + FS_MAX);
584 
585  GBIS_Kernel<false, false, 3> <<< nblock, nthread, 0, stream >>>
586  (tlKernel.getNumTileListsGBIS(), tlKernel.getTileListsGBIS(), tlKernel.getTileJatomStartGBIS(),
587  tlKernel.getPatchPairs(), lata, latb, latc, tlKernel.get_xyzq(), cutoff2,
588  param, intRad0, intRadS, dHdrPrefix, NULL, d_forces, NULL);
589 
590  cudaCheck(cudaGetLastError());
591 }
PatchPairRecord * getPatchPairs()
__thread cudaStream_t stream
TileList * getTileListsGBIS()
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 lata
int getMaxNumBlocks()
Definition: DeviceCUDA.C:415
#define FS_MAX
Definition: ComputeGBIS.inl:24
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 latc
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:18
#define WARPSIZE
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cutoff2
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 latb
#define GBISKERNEL_NUM_WARP
void CudaComputeGBISKernel::update_dHdrPrefix ( const int  atomStorageSize,
float *  dHdrPrefixH,
cudaStream_t  stream 
)

Definition at line 487 of file CudaComputeGBISKernel.cu.

References atomStorageSize, dHdrPrefixH, and stream.

487  {
488  reallocate_device<float>(&dHdrPrefix, &dHdrPrefixSize, atomStorageSize, 1.2f);
489  copy_HtoD<float>(dHdrPrefixH, dHdrPrefix, atomStorageSize, stream);
490 }
static __thread float * dHdrPrefixH
__thread cudaStream_t stream
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize
void CudaComputeGBISKernel::updateBornRad ( const int  atomStorageSize,
float *  bornRadH,
cudaStream_t  stream 
)

Definition at line 479 of file CudaComputeGBISKernel.cu.

References atomStorageSize, bornRadH, and stream.

479  {
480  reallocate_device<float>(&bornRad, &bornRadSize, atomStorageSize, 1.2f);
481  copy_HtoD<float>(bornRadH, bornRad, atomStorageSize, stream);
482 }
static __thread float * bornRadH
__thread cudaStream_t stream
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize
void CudaComputeGBISKernel::updateIntRad ( const int  atomStorageSize,
float *  intRad0H,
float *  intRadSH,
cudaStream_t  stream 
)

Definition at line 466 of file CudaComputeGBISKernel.cu.

References atomStorageSize, intRad0H, intRadSH, and stream.

467  {
468 
469  reallocate_device<float>(&intRad0, &intRad0Size, atomStorageSize, 1.2f);
470  reallocate_device<float>(&intRadS, &intRadSSize, atomStorageSize, 1.2f);
471 
472  copy_HtoD<float>(intRad0H, intRad0, atomStorageSize, stream);
473  copy_HtoD<float>(intRadSH, intRadS, atomStorageSize, stream);
474 }
__thread cudaStream_t stream
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize
static __thread float * intRadSH
static __thread float * intRad0H

The documentation for this class was generated from the following files: