NAMD
Classes | Public Types | Public Member Functions | Static Public Member Functions | List of all members
ComputeBondedCUDAKernel Class Reference

#include <ComputeBondedCUDAKernel.h>

Classes

struct  BondedVirial
 

Public Types

enum  {
  energyIndex_BOND =0, energyIndex_ANGLE, energyIndex_DIHEDRAL, energyIndex_IMPROPER,
  energyIndex_ELECT, energyIndex_LJ, energyIndex_ELECT_SLOW, energyIndex_CROSSTERM,
  normalVirialIndex_XX, normalVirialIndex_XY, normalVirialIndex_XZ, normalVirialIndex_YX,
  normalVirialIndex_YY, normalVirialIndex_YZ, normalVirialIndex_ZX, normalVirialIndex_ZY,
  normalVirialIndex_ZZ, nbondVirialIndex_XX, nbondVirialIndex_XY, nbondVirialIndex_XZ,
  nbondVirialIndex_YX, nbondVirialIndex_YY, nbondVirialIndex_YZ, nbondVirialIndex_ZX,
  nbondVirialIndex_ZY, nbondVirialIndex_ZZ, slowVirialIndex_XX, slowVirialIndex_XY,
  slowVirialIndex_XZ, slowVirialIndex_YX, slowVirialIndex_YY, slowVirialIndex_YZ,
  slowVirialIndex_ZX, slowVirialIndex_ZY, slowVirialIndex_ZZ, amdDiheVirialIndex_XX,
  amdDiheVirialIndex_XY, amdDiheVirialIndex_XZ, amdDiheVirialIndex_YX, amdDiheVirialIndex_YY,
  amdDiheVirialIndex_YZ, amdDiheVirialIndex_ZX, amdDiheVirialIndex_ZY, amdDiheVirialIndex_ZZ,
  energies_virials_SIZE
}
 

Public Member Functions

 ComputeBondedCUDAKernel (int deviceID, CudaNonbondedTables &cudaNonbondedTables)
 
 ~ComputeBondedCUDAKernel ()
 
void update (const int numBondsIn, const int numAnglesIn, const int numDihedralsIn, const int numImpropersIn, const int numModifiedExclusionsIn, const int numExclusionsIn, const int numCrosstermsIn, const char *h_tupleData, cudaStream_t stream)
 
void setupBondValues (int numBondValues, CudaBondValue *h_bondValues)
 
void setupAngleValues (int numAngleValues, CudaAngleValue *h_angleValues)
 
void setupDihedralValues (int numDihedralValues, CudaDihedralValue *h_dihedralValues)
 
void setupImproperValues (int numImproperValues, CudaDihedralValue *h_improperValues)
 
void setupCrosstermValues (int numCrosstermValues, CudaCrosstermValue *h_crosstermValues)
 
int getForceStride (const int atomStorageSize)
 
int getForceSize (const int atomStorageSize)
 
int getAllForceSize (const int atomStorageSize, const bool doSlow)
 
void bondedForce (const double scale14, const int atomStorageSize, const bool doEnergy, const bool doVirial, const bool doSlow, const float3 lata, const float3 latb, const float3 latc, const float cutoff2, const float r2_delta, const int r2_delta_expc, const float4 *h_xyzq, FORCE_TYPE *h_forces, double *h_energies, cudaStream_t stream)
 

Static Public Member Functions

static int warpAlign (const int n)
 

Detailed Description

Definition at line 46 of file ComputeBondedCUDAKernel.h.

Member Enumeration Documentation

anonymous enum
Enumerator
energyIndex_BOND 
energyIndex_ANGLE 
energyIndex_DIHEDRAL 
energyIndex_IMPROPER 
energyIndex_ELECT 
energyIndex_LJ 
energyIndex_ELECT_SLOW 
energyIndex_CROSSTERM 
normalVirialIndex_XX 
normalVirialIndex_XY 
normalVirialIndex_XZ 
normalVirialIndex_YX 
normalVirialIndex_YY 
normalVirialIndex_YZ 
normalVirialIndex_ZX 
normalVirialIndex_ZY 
normalVirialIndex_ZZ 
nbondVirialIndex_XX 
nbondVirialIndex_XY 
nbondVirialIndex_XZ 
nbondVirialIndex_YX 
nbondVirialIndex_YY 
nbondVirialIndex_YZ 
nbondVirialIndex_ZX 
nbondVirialIndex_ZY 
nbondVirialIndex_ZZ 
slowVirialIndex_XX 
slowVirialIndex_XY 
slowVirialIndex_XZ 
slowVirialIndex_YX 
slowVirialIndex_YY 
slowVirialIndex_YZ 
slowVirialIndex_ZX 
slowVirialIndex_ZY 
slowVirialIndex_ZZ 
amdDiheVirialIndex_XX 
amdDiheVirialIndex_XY 
amdDiheVirialIndex_XZ 
amdDiheVirialIndex_YX 
amdDiheVirialIndex_YY 
amdDiheVirialIndex_YZ 
amdDiheVirialIndex_ZX 
amdDiheVirialIndex_ZY 
amdDiheVirialIndex_ZZ 
energies_virials_SIZE 

Definition at line 50 of file ComputeBondedCUDAKernel.h.

Constructor & Destructor Documentation

ComputeBondedCUDAKernel::ComputeBondedCUDAKernel ( int  deviceID,
CudaNonbondedTables cudaNonbondedTables 
)

Definition at line 1591 of file ComputeBondedCUDAKernel.cu.

References cudaCheck, and energies_virials_SIZE.

1591  :
1592 deviceID(deviceID), cudaNonbondedTables(cudaNonbondedTables) {
1593 
1594  cudaCheck(cudaSetDevice(deviceID));
1595 
1596  tupleData = NULL;
1597  tupleDataSize = 0;
1598 
1599  numBonds = 0;
1600  numAngles = 0;
1601  numDihedrals = 0;
1602  numImpropers = 0;
1603  numModifiedExclusions = 0;
1604  numExclusions = 0;
1605  numCrossterms = 0;
1606 
1607  bondValues = NULL;
1608  angleValues = NULL;
1609  dihedralValues = NULL;
1610  improperValues = NULL;
1611  crosstermValues = NULL;
1612 
1613  xyzq = NULL;
1614  xyzqSize = 0;
1615 
1616  forces = NULL;
1617  forcesSize = 0;
1618 
1619  allocate_device<double>(&energies_virials, energies_virials_SIZE);
1620 }
static __thread float4 * forces
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ xyzq
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
ComputeBondedCUDAKernel::~ComputeBondedCUDAKernel ( )

Definition at line 1625 of file ComputeBondedCUDAKernel.cu.

References cudaCheck.

1625  {
1626  cudaCheck(cudaSetDevice(deviceID));
1627 
1628  deallocate_device<double>(&energies_virials);
1629  // deallocate_device<BondedVirial>(&virial);
1630 
1631  if (tupleData != NULL) deallocate_device<char>(&tupleData);
1632  if (xyzq != NULL) deallocate_device<float4>(&xyzq);
1633  if (forces != NULL) deallocate_device<FORCE_TYPE>(&forces);
1634 
1635  if (bondValues != NULL) deallocate_device<CudaBondValue>(&bondValues);
1636  if (angleValues != NULL) deallocate_device<CudaAngleValue>(&angleValues);
1637  if (dihedralValues != NULL) deallocate_device<CudaDihedralValue>(&dihedralValues);
1638  if (improperValues != NULL) deallocate_device<CudaDihedralValue>(&improperValues);
1639  if (crosstermValues != NULL) deallocate_device<CudaCrosstermValue>(&crosstermValues);
1640 }
static __thread float4 * forces
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ xyzq
#define cudaCheck(stmt)
Definition: CudaUtils.h:79

Member Function Documentation

void ComputeBondedCUDAKernel::bondedForce ( const double  scale14,
const int  atomStorageSize,
const bool  doEnergy,
const bool  doVirial,
const bool  doSlow,
const float3  lata,
const float3  latb,
const float3  latc,
const float  cutoff2,
const float  r2_delta,
const int  r2_delta_expc,
const float4 *  h_xyzq,
FORCE_TYPE h_forces,
double *  h_energies,
cudaStream_t  stream 
)

Definition at line 1775 of file ComputeBondedCUDAKernel.cu.

References atomStorageSize, BONDEDFORCESKERNEL_NUM_WARP, CALL, cudaCheck, deviceCUDA, energies_virials_SIZE, getAllForceSize(), getForceSize(), getForceStride(), DeviceCUDA::getMaxNumBlocks(), stream, and WARPSIZE.

1782  {
1783 
1784  int forceStorageSize = getAllForceSize(atomStorageSize, true);
1785  int forceCopySize = getAllForceSize(atomStorageSize, doSlow);
1786  int forceStride = getForceStride(atomStorageSize);
1787 
1788  int forceSize = getForceSize(atomStorageSize);
1789  int startNbond = forceSize;
1790  int startSlow = 2*forceSize;
1791 
1792  // Re-allocate coordinate and force arrays if neccessary
1793  reallocate_device<float4>(&xyzq, &xyzqSize, atomStorageSize, 1.4f);
1794  reallocate_device<FORCE_TYPE>(&forces, &forcesSize, forceStorageSize, 1.4f);
1795 
1796  // Copy coordinates to device
1797  copy_HtoD<float4>(h_xyzq, xyzq, atomStorageSize, stream);
1798 
1799  // Clear force array
1800  clear_device_array<FORCE_TYPE>(forces, forceCopySize, stream);
1801  if (doEnergy || doVirial) {
1802  clear_device_array<double>(energies_virials, energies_virials_SIZE, stream);
1803  }
1804 
1805  float one_scale14 = (float)(1.0 - scale14);
1806 
1807  // If doSlow = false, these exclusions are not calculated
1808  int numExclusionsDoSlow = doSlow ? numExclusions : 0;
1809 
1810  int nthread = BONDEDFORCESKERNEL_NUM_WARP * WARPSIZE;
1811 
1812  int numBondsTB = (numBonds + nthread - 1)/nthread;
1813  int numAnglesTB = (numAngles + nthread - 1)/nthread;
1814  int numDihedralsTB = (numDihedrals + nthread - 1)/nthread;
1815  int numImpropersTB = (numImpropers + nthread - 1)/nthread;
1816  int numExclusionsTB= (numExclusionsDoSlow + nthread - 1)/nthread;
1817  int numCrosstermsTB= (numCrossterms + nthread - 1)/nthread;
1818 
1819  int nblock = numBondsTB + numAnglesTB + numDihedralsTB + numImpropersTB +
1820  numExclusionsTB + numCrosstermsTB;
1821  int shmem_size = 0;
1822 
1823  // printf("%d %d %d %d %d %d nblock %d\n",
1824  // numBonds, numAngles, numDihedrals, numImpropers, numModifiedExclusions, numExclusions, nblock);
1825 
1826  int max_nblock = deviceCUDA->getMaxNumBlocks();
1827 
1828  int start = 0;
1829  while (start < nblock)
1830  {
1831  int nleft = nblock - start;
1832  int nblock_use = min(max_nblock, nleft);
1833 
1834 #define CALL(DOENERGY, DOVIRIAL) \
1835  bondedForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL> \
1836  <<< nblock_use, nthread, shmem_size, stream >>> \
1837  (start, numBonds, bonds, bondValues, \
1838  numAngles, angles, angleValues, \
1839  numDihedrals, dihedrals, dihedralValues, \
1840  numImpropers, impropers, improperValues, \
1841  numExclusionsDoSlow, exclusions, \
1842  numCrossterms, crossterms, crosstermValues, \
1843  cutoff2, \
1844  r2_delta, r2_delta_expc, \
1845  cudaNonbondedTables.get_r2_table(), cudaNonbondedTables.getExclusionTable(), \
1846  cudaNonbondedTables.get_r2_table_tex(), cudaNonbondedTables.getExclusionTableTex(), \
1847  xyzq, forceStride, \
1848  lata, latb, latc, \
1849  forces, &forces[startSlow], energies_virials);
1850 
1851  if (!doEnergy && !doVirial) CALL(0, 0);
1852  if (!doEnergy && doVirial) CALL(0, 1);
1853  if (doEnergy && !doVirial) CALL(1, 0);
1854  if (doEnergy && doVirial) CALL(1, 1);
1855 
1856 #undef CALL
1857  cudaCheck(cudaGetLastError());
1858 
1859  start += nblock_use;
1860  }
1861 
1863  nblock = (numModifiedExclusions + nthread - 1)/nthread;
1864 
1865  bool doElect = (one_scale14 == 0.0f) ? false : true;
1866 
1867  start = 0;
1868  while (start < nblock)
1869  {
1870  int nleft = nblock - start;
1871  int nblock_use = min(max_nblock, nleft);
1872 
1873 #define CALL(DOENERGY, DOVIRIAL, DOELECT) \
1874  modifiedExclusionForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL, DOELECT> \
1875  <<< nblock_use, nthread, shmem_size, stream >>> \
1876  (start, numModifiedExclusions, modifiedExclusions, \
1877  doSlow, one_scale14, cutoff2, \
1878  cudaNonbondedTables.getVdwCoefTableWidth(), cudaNonbondedTables.getExclusionVdwCoefTable(), \
1879  cudaNonbondedTables.getExclusionVdwCoefTableTex(), \
1880  cudaNonbondedTables.getModifiedExclusionForceTableTex(), cudaNonbondedTables.getModifiedExclusionEnergyTableTex(), \
1881  xyzq, forceStride, lata, latb, latc, \
1882  &forces[startNbond], &forces[startSlow], energies_virials);
1883 
1884  if (!doEnergy && !doVirial && !doElect) CALL(0, 0, 0);
1885  if (!doEnergy && doVirial && !doElect) CALL(0, 1, 0);
1886  if (doEnergy && !doVirial && !doElect) CALL(1, 0, 0);
1887  if (doEnergy && doVirial && !doElect) CALL(1, 1, 0);
1888 
1889  if (!doEnergy && !doVirial && doElect) CALL(0, 0, 1);
1890  if (!doEnergy && doVirial && doElect) CALL(0, 1, 1);
1891  if (doEnergy && !doVirial && doElect) CALL(1, 0, 1);
1892  if (doEnergy && doVirial && doElect) CALL(1, 1, 1);
1893 
1894 #undef CALL
1895  cudaCheck(cudaGetLastError());
1896 
1897  start += nblock_use;
1898  }
1899 
1900  copy_DtoH<FORCE_TYPE>(forces, h_forces, forceCopySize, stream);
1901  if (doEnergy || doVirial) {
1902  copy_DtoH<double>(energies_virials, h_energies_virials, energies_virials_SIZE, stream);
1903  }
1904 
1905 }
int getForceStride(const int atomStorageSize)
static __thread float4 * forces
int getAllForceSize(const int atomStorageSize, const bool doSlow)
#define BONDEDFORCESKERNEL_NUM_WARP
__thread cudaStream_t stream
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize
int getForceSize(const int atomStorageSize)
int getMaxNumBlocks()
Definition: DeviceCUDA.C:415
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ xyzq
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:18
#define WARPSIZE
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
#define CALL(DOENERGY, DOVIRIAL)
int ComputeBondedCUDAKernel::getAllForceSize ( const int  atomStorageSize,
const bool  doSlow 
)

Definition at line 1755 of file ComputeBondedCUDAKernel.cu.

References getForceSize().

Referenced by bondedForce().

1755  {
1756 
1757  int forceSize = getForceSize(atomStorageSize);
1758 
1759  if (numModifiedExclusions > 0 || numExclusions > 0) {
1760  if (doSlow) {
1761  // All three force arrays [normal, nbond, slow]
1762  forceSize *= 3;
1763  } else {
1764  // Two force arrays [normal, nbond]
1765  forceSize *= 2;
1766  }
1767  }
1768 
1769  return forceSize;
1770 }
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize
int getForceSize(const int atomStorageSize)
int ComputeBondedCUDAKernel::getForceSize ( const int  atomStorageSize)

Definition at line 1744 of file ComputeBondedCUDAKernel.cu.

References getForceStride().

Referenced by bondedForce(), and getAllForceSize().

1744  {
1745 #ifdef USE_STRIDED_FORCE
1746  return (3*getForceStride(atomStorageSize));
1747 #else
1748  return (3*atomStorageSize);
1749 #endif
1750 }
int getForceStride(const int atomStorageSize)
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize
int ComputeBondedCUDAKernel::getForceStride ( const int  atomStorageSize)

Definition at line 1732 of file ComputeBondedCUDAKernel.cu.

References FORCE_TYPE.

Referenced by bondedForce(), and getForceSize().

1732  {
1733 #ifdef USE_STRIDED_FORCE
1734  // Align stride to 256 bytes
1735  return ((atomStorageSize*sizeof(FORCE_TYPE) - 1)/256 + 1)*256/sizeof(FORCE_TYPE);
1736 #else
1737  return 1;
1738 #endif
1739 }
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int atomStorageSize
#define FORCE_TYPE
void ComputeBondedCUDAKernel::setupAngleValues ( int  numAngleValues,
CudaAngleValue h_angleValues 
)

Definition at line 1647 of file ComputeBondedCUDAKernel.cu.

1647  {
1648  allocate_device<CudaAngleValue>(&angleValues, numAngleValues);
1649  copy_HtoD_sync<CudaAngleValue>(h_angleValues, angleValues, numAngleValues);
1650 }
void ComputeBondedCUDAKernel::setupBondValues ( int  numBondValues,
CudaBondValue h_bondValues 
)

Definition at line 1642 of file ComputeBondedCUDAKernel.cu.

1642  {
1643  allocate_device<CudaBondValue>(&bondValues, numBondValues);
1644  copy_HtoD_sync<CudaBondValue>(h_bondValues, bondValues, numBondValues);
1645 }
void ComputeBondedCUDAKernel::setupCrosstermValues ( int  numCrosstermValues,
CudaCrosstermValue h_crosstermValues 
)

Definition at line 1662 of file ComputeBondedCUDAKernel.cu.

1662  {
1663  allocate_device<CudaCrosstermValue>(&crosstermValues, numCrosstermValues);
1664  copy_HtoD_sync<CudaCrosstermValue>(h_crosstermValues, crosstermValues, numCrosstermValues);
1665 }
void ComputeBondedCUDAKernel::setupDihedralValues ( int  numDihedralValues,
CudaDihedralValue h_dihedralValues 
)

Definition at line 1652 of file ComputeBondedCUDAKernel.cu.

1652  {
1653  allocate_device<CudaDihedralValue>(&dihedralValues, numDihedralValues);
1654  copy_HtoD_sync<CudaDihedralValue>(h_dihedralValues, dihedralValues, numDihedralValues);
1655 }
void ComputeBondedCUDAKernel::setupImproperValues ( int  numImproperValues,
CudaDihedralValue h_improperValues 
)

Definition at line 1657 of file ComputeBondedCUDAKernel.cu.

1657  {
1658  allocate_device<CudaDihedralValue>(&improperValues, numImproperValues);
1659  copy_HtoD_sync<CudaDihedralValue>(h_improperValues, improperValues, numImproperValues);
1660 }
void ComputeBondedCUDAKernel::update ( const int  numBondsIn,
const int  numAnglesIn,
const int  numDihedralsIn,
const int  numImpropersIn,
const int  numModifiedExclusionsIn,
const int  numExclusionsIn,
const int  numCrosstermsIn,
const char *  h_tupleData,
cudaStream_t  stream 
)

Definition at line 1670 of file ComputeBondedCUDAKernel.cu.

References stream, and warpAlign().

1679  {
1680 
1681  numBonds = numBondsIn;
1682  numAngles = numAnglesIn;
1683  numDihedrals = numDihedralsIn;
1684  numImpropers = numImpropersIn;
1685  numModifiedExclusions = numModifiedExclusionsIn;
1686  numExclusions = numExclusionsIn;
1687  numCrossterms = numCrosstermsIn;
1688 
1689  int numBondsWA = warpAlign(numBonds);
1690  int numAnglesWA = warpAlign(numAngles);
1691  int numDihedralsWA = warpAlign(numDihedrals);
1692  int numImpropersWA = warpAlign(numImpropers);
1693  int numModifiedExclusionsWA = warpAlign(numModifiedExclusions);
1694  int numExclusionsWA = warpAlign(numExclusions);
1695  int numCrosstermsWA = warpAlign(numCrossterms);
1696 
1697  int sizeTot = numBondsWA*sizeof(CudaBond) + numAnglesWA*sizeof(CudaAngle) +
1698  numDihedralsWA*sizeof(CudaDihedral) + numImpropersWA*sizeof(CudaDihedral) +
1699  numModifiedExclusionsWA*sizeof(CudaExclusion) + numExclusionsWA*sizeof(CudaExclusion) +
1700  numCrosstermsWA*sizeof(CudaCrossterm);
1701 
1702  reallocate_device<char>(&tupleData, &tupleDataSize, sizeTot, 1.4f);
1703  copy_HtoD<char>(h_tupleData, tupleData, sizeTot, stream);
1704 
1705  // Setup pointers
1706  int pos = 0;
1707  bonds = (CudaBond *)&tupleData[pos];
1708  pos += numBondsWA*sizeof(CudaBond);
1709 
1710  angles = (CudaAngle* )&tupleData[pos];
1711  pos += numAnglesWA*sizeof(CudaAngle);
1712 
1713  dihedrals = (CudaDihedral* )&tupleData[pos];
1714  pos += numDihedralsWA*sizeof(CudaDihedral);
1715 
1716  impropers = (CudaDihedral* )&tupleData[pos];
1717  pos += numImpropersWA*sizeof(CudaDihedral);
1718 
1719  modifiedExclusions = (CudaExclusion* )&tupleData[pos];
1720  pos += numModifiedExclusionsWA*sizeof(CudaExclusion);
1721 
1722  exclusions = (CudaExclusion* )&tupleData[pos];
1723  pos += numExclusionsWA*sizeof(CudaExclusion);
1724 
1725  crossterms = (CudaCrossterm* )&tupleData[pos];
1726  pos += numCrosstermsWA*sizeof(CudaCrossterm);
1727 }
static int warpAlign(const int n)
static __thread unsigned int * exclusions
__thread cudaStream_t stream
static int ComputeBondedCUDAKernel::warpAlign ( const int  n)
inlinestatic

Definition at line 145 of file ComputeBondedCUDAKernel.h.

References WARPSIZE.

Referenced by update().

145 {return ((n + WARPSIZE - 1)/WARPSIZE)*WARPSIZE;}
#define WARPSIZE

The documentation for this class was generated from the following files: