ComputeBondedCUDAKernel.cu File Reference

#include <math.h>
#include <cuda.h>
#include <namd_cub/cub.cuh>
#include "ComputeBondedCUDAKernel.h"
#include "DeviceCUDA.h"

Go to the source code of this file.

Defines

#define BONDEDFORCESKERNEL_NUM_WARP   4
#define CALL(DOENERGY, DOVIRIAL)
#define CALL(DOENERGY, DOVIRIAL, DOELECT)

Functions

__device__ long long int lliroundf (float f)
__device__ unsigned long long int llitoulli (long long int l)
template<typename T >
__forceinline__ __device__ void convertForces (const float x, const float y, const float z, T &Tx, T &Ty, T &Tz)
template<>
__forceinline__ __device__ void convertForces< long long int > (const float x, const float y, const float z, long long int &Tx, long long int &Ty, long long int &Tz)
template<typename T >
__forceinline__ __device__ void calcComponentForces (float fij, const float dx, const float dy, const float dz, T &fxij, T &fyij, T &fzij)
template<>
__forceinline__ __device__ void calcComponentForces< long long int > (float fij, const float dx, const float dy, const float dz, long long int &fxij, long long int &fyij, long long int &fzij)
template<typename T >
__forceinline__ __device__ void calcComponentForces (float fij1, const float dx1, const float dy1, const float dz1, float fij2, const float dx2, const float dy2, const float dz2, T &fxij, T &fyij, T &fzij)
template<>
__forceinline__ __device__ void calcComponentForces< long long int > (float fij1, const float dx1, const float dy1, const float dz1, float fij2, const float dx2, const float dy2, const float dz2, long long int &fxij, long long int &fyij, long long int &fzij)
template<typename T >
__forceinline__ __device__ void storeForces (const T fx, const T fy, const T fz, const int ind, const int stride, T *force)
template<>
__forceinline__ __device__ void storeForces< long long int > (const long long int fx, const long long int fy, const long long int fz, const int ind, const int stride, long long int *force)
template<typename T , bool doEnergy, bool doVirial>
__device__ void bondForce (const int index, const CudaBond *__restrict__ bondList, const CudaBondValue *__restrict__ bondValues, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, double &energy, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virial)
template<typename T , bool doEnergy, bool doVirial, bool doElect>
__device__ void modifiedExclusionForce (const int index, const CudaExclusion *__restrict__ exclusionList, const bool doSlow, const float one_scale14, const int vdwCoefTableWidth, cudaTextureObject_t vdwCoefTableTex, cudaTextureObject_t forceTableTex, cudaTextureObject_t energyTableTex, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, const float cutoff2, double &energyVdw, T *__restrict__ forceNbond, double &energyNbond, T *__restrict__ forceSlow, double &energySlow, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virialNbond, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virialSlow)
template<typename T , bool doEnergy, bool doVirial>
__device__ void exclusionForce (const int index, const CudaExclusion *__restrict__ exclusionList, const float r2_delta, const int r2_delta_expc, cudaTextureObject_t r2_table_tex, cudaTextureObject_t exclusionTableTex, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, const float cutoff2, T *__restrict__ forceSlow, double &energySlow, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virialSlow)
template<typename T , bool doEnergy, bool doVirial>
__device__ void angleForce (const int index, const CudaAngle *__restrict__ angleList, const CudaAngleValue *__restrict__ angleValues, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, double &energy, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virial)
template<bool doEnergy>
__forceinline__ __device__ void diheComp (const CudaDihedralValue *dihedralValues, int ic, const float sin_phi, const float cos_phi, const float scale, float &df, double &e)
template<typename T , bool doEnergy, bool doVirial>
__device__ void diheForce (const int index, const CudaDihedral *__restrict__ diheList, const CudaDihedralValue *__restrict__ dihedralValues, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, double &energy, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virial)
__device__ __forceinline__ float3 cross (const float3 v1, const float3 v2)
__device__ __forceinline__ float dot (const float3 v1, const float3 v2)
template<typename T , bool doEnergy, bool doVirial>
__device__ void crosstermForce (const int index, const CudaCrossterm *__restrict__ crosstermList, const CudaCrosstermValue *__restrict__ crosstermValues, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, double &energy, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virial)

Variables

__thread DeviceCUDAdeviceCUDA

Define Documentation

#define BONDEDFORCESKERNEL_NUM_WARP   4
#define CALL ( DOENERGY,
DOVIRIAL,
DOELECT   ) 
Value:
modifiedExclusionForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL, DOELECT> \
  <<< nblock_use, nthread, shmem_size, stream >>> \
  (start, numModifiedExclusions, modifiedExclusions, \
    doSlow, one_scale14, cutoff2, \
    cudaNonbondedTables.getVdwCoefTableWidth(), cudaNonbondedTables.getExclusionVdwCoefTable(), \
    cudaNonbondedTables.getExclusionVdwCoefTableTex(), \
    cudaNonbondedTables.getModifiedExclusionForceTableTex(), cudaNonbondedTables.getModifiedExclusionEnergyTableTex(), \
    xyzq, forceStride, lata, latb, latc, \
    &forces[startNbond], &forces[startSlow], energies_virials);
#define CALL ( DOENERGY,
DOVIRIAL   ) 
Value:
bondedForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL> \
  <<< nblock_use, nthread, shmem_size, stream >>> \
  (start, numBonds, bonds, bondValues, \
    numAngles, angles, angleValues, \
    numDihedrals, dihedrals, dihedralValues, \
    numImpropers, impropers, improperValues, \
    numExclusionsDoSlow, exclusions, \
    numCrossterms, crossterms, crosstermValues, \
    cutoff2, \
    r2_delta, r2_delta_expc, \
    cudaNonbondedTables.get_r2_table(), cudaNonbondedTables.getExclusionTable(), \
    cudaNonbondedTables.get_r2_table_tex(), cudaNonbondedTables.getExclusionTableTex(), \
    xyzq, forceStride, \
    lata, latb, latc, \
    forces, &forces[startSlow], energies_virials);

Referenced by cuda_nonbonded_forces(), CudaComputeGBISKernel::GBISphase2(), and CudaComputeNonbondedKernel::nonbondedForce().


Function Documentation

template<typename T , bool doEnergy, bool doVirial>
__device__ void angleForce ( const int  index,
const CudaAngle *__restrict__  angleList,
const CudaAngleValue *__restrict__  angleValues,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
double &  energy,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virial 
) [inline]

Definition at line 440 of file ComputeBondedCUDAKernel.cu.

References force_to_float, CudaAngle::i, CudaAngle::ioffsetXYZ, CudaAngle::itype, CudaAngle::j, CudaAngleValue::k, CudaAngle::k, CudaAngleValue::k_ub, CudaAngle::koffsetXYZ, CudaAngleValue::normal, CudaAngleValue::r_ub, CudaAngle::scale, and CudaAngleValue::theta0.

00452     {
00453 
00454   CudaAngle al = angleList[index];
00455 
00456   float ishx = al.ioffsetXYZ.x*lata.x + al.ioffsetXYZ.y*latb.x + al.ioffsetXYZ.z*latc.x;
00457   float ishy = al.ioffsetXYZ.x*lata.y + al.ioffsetXYZ.y*latb.y + al.ioffsetXYZ.z*latc.y;
00458   float ishz = al.ioffsetXYZ.x*lata.z + al.ioffsetXYZ.y*latb.z + al.ioffsetXYZ.z*latc.z;
00459 
00460   float kshx = al.koffsetXYZ.x*lata.x + al.koffsetXYZ.y*latb.x + al.koffsetXYZ.z*latc.x;
00461   float kshy = al.koffsetXYZ.x*lata.y + al.koffsetXYZ.y*latb.y + al.koffsetXYZ.z*latc.y;
00462   float kshz = al.koffsetXYZ.x*lata.z + al.koffsetXYZ.y*latb.z + al.koffsetXYZ.z*latc.z;
00463 
00464   float xij = xyzq[al.i].x + ishx - xyzq[al.j].x;
00465   float yij = xyzq[al.i].y + ishy - xyzq[al.j].y;
00466   float zij = xyzq[al.i].z + ishz - xyzq[al.j].z;
00467 
00468   float xkj = xyzq[al.k].x + kshx - xyzq[al.j].x;
00469   float ykj = xyzq[al.k].y + kshy - xyzq[al.j].y;
00470   float zkj = xyzq[al.k].z + kshz - xyzq[al.j].z;
00471 
00472   float rij_inv = rsqrtf(xij*xij + yij*yij + zij*zij);
00473   float rkj_inv = rsqrtf(xkj*xkj + ykj*ykj + zkj*zkj);
00474 
00475   float xijr = xij*rij_inv;
00476   float yijr = yij*rij_inv;
00477   float zijr = zij*rij_inv;
00478   float xkjr = xkj*rkj_inv;
00479   float ykjr = ykj*rkj_inv;
00480   float zkjr = zkj*rkj_inv;
00481   float cos_theta = xijr*xkjr + yijr*ykjr + zijr*zkjr;
00482 
00483   CudaAngleValue angleValue = angleValues[al.itype];
00484   angleValue.k *= al.scale;
00485 
00486   float diff;
00487   if (angleValue.normal == 1) {
00488     // Restrict values of cos_theta to the interval [-0.999 ... 0.999]
00489     cos_theta = min(0.999f, max(-0.999f, cos_theta));
00490     float theta = acosf(cos_theta);
00491     diff = theta - angleValue.theta0;
00492   } else {
00493     diff = cos_theta - angleValue.theta0;
00494   }
00495 
00496   if (doEnergy) {
00497     energy += (double)(angleValue.k * diff * diff);
00498   }
00499   if (angleValue.normal == 1) {
00500     float inv_sin_theta = rsqrtf(1.0f - cos_theta*cos_theta);
00501     if (inv_sin_theta > 1.0e6) {
00502       diff = (diff < 0.0f) ? 1.0f : -1.0f;
00503     } else {
00504       diff *= -inv_sin_theta;
00505     }
00506   }
00507   diff *= -2.0f*angleValue.k;
00508 
00509   float dtxi = rij_inv*(xkjr - cos_theta*xijr);
00510   float dtxj = rkj_inv*(xijr - cos_theta*xkjr);
00511   float dtyi = rij_inv*(ykjr - cos_theta*yijr);
00512   float dtyj = rkj_inv*(yijr - cos_theta*ykjr);
00513   float dtzi = rij_inv*(zkjr - cos_theta*zijr);
00514   float dtzj = rkj_inv*(zijr - cos_theta*zkjr);
00515 
00516   T T_dtxi, T_dtyi, T_dtzi;
00517   T T_dtxj, T_dtyj, T_dtzj;
00518   calcComponentForces<T>(diff, dtxi, dtyi, dtzi, T_dtxi, T_dtyi, T_dtzi);
00519   calcComponentForces<T>(diff, dtxj, dtyj, dtzj, T_dtxj, T_dtyj, T_dtzj);
00520   T T_dtxk = -T_dtxi - T_dtxj;
00521   T T_dtyk = -T_dtyi - T_dtyj;
00522   T T_dtzk = -T_dtzi - T_dtzj;
00523   storeForces<T>(T_dtxk, T_dtyk, T_dtzk, al.j, stride, force);
00524 
00525   if (angleValue.k_ub) {
00526     float xik = xij - xkj;
00527     float yik = yij - ykj;
00528     float zik = zij - zkj;
00529     float rik_inv = rsqrtf(xik*xik + yik*yik + zik*zik);
00530     float rik = 1.0f/rik_inv;
00531     float diff_ub = rik - angleValue.r_ub;
00532     if (doEnergy) {
00533       energy += (double)(angleValue.k_ub * diff_ub * diff_ub);
00534     }
00535     diff_ub *= -2.0f*angleValue.k_ub*rik_inv;
00536     T T_dxik, T_dyik, T_dzik;
00537     calcComponentForces<T>(diff_ub, xik, yik, zik, T_dxik, T_dyik, T_dzik);
00538     T_dtxi += T_dxik;
00539     T_dtyi += T_dyik;
00540     T_dtzi += T_dzik;
00541     T_dtxj -= T_dxik;
00542     T_dtyj -= T_dyik;
00543     T_dtzj -= T_dzik;
00544   }
00545 
00546   storeForces<T>(T_dtxi, T_dtyi, T_dtzi, al.i, stride, force);
00547   storeForces<T>(T_dtxj, T_dtyj, T_dtzj, al.k, stride, force);
00548 
00549   // Store virial
00550   if (doVirial) {
00551 #ifdef WRITE_FULL_VIRIALS
00552     float fxi = ((float)T_dtxi)*force_to_float;
00553     float fyi = ((float)T_dtyi)*force_to_float;
00554     float fzi = ((float)T_dtzi)*force_to_float;
00555     float fxk = ((float)T_dtxj)*force_to_float;
00556     float fyk = ((float)T_dtyj)*force_to_float;
00557     float fzk = ((float)T_dtzj)*force_to_float;
00558     virial.xx = (fxi*xij) + (fxk*xkj);
00559     virial.xy = (fxi*yij) + (fxk*ykj);
00560     virial.xz = (fxi*zij) + (fxk*zkj);
00561     virial.yx = (fyi*xij) + (fyk*xkj);
00562     virial.yy = (fyi*yij) + (fyk*ykj);
00563     virial.yz = (fyi*zij) + (fyk*zkj);
00564     virial.zx = (fzi*xij) + (fzk*xkj);
00565     virial.zy = (fzi*yij) + (fzk*ykj);
00566     virial.zz = (fzi*zij) + (fzk*zkj);
00567 #endif
00568   }
00569 }

template<typename T , bool doEnergy, bool doVirial>
__device__ void bondForce ( const int  index,
const CudaBond *__restrict__  bondList,
const CudaBondValue *__restrict__  bondValues,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
double &  energy,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virial 
) [inline]

Definition at line 145 of file ComputeBondedCUDAKernel.cu.

References CudaBond::i, CudaBond::ioffsetXYZ, CudaBond::itype, CudaBond::j, CudaBondValue::k, CudaBond::scale, CudaBondValue::x0, and CudaBondValue::x1.

00158     {
00159 
00160   CudaBond bl = bondList[index];
00161   CudaBondValue bondValue = bondValues[bl.itype];
00162   if (bondValue.x0 == 0.0f) return;
00163 
00164   float shx = bl.ioffsetXYZ.x*lata.x + bl.ioffsetXYZ.y*latb.x + bl.ioffsetXYZ.z*latc.x;
00165   float shy = bl.ioffsetXYZ.x*lata.y + bl.ioffsetXYZ.y*latb.y + bl.ioffsetXYZ.z*latc.y;
00166   float shz = bl.ioffsetXYZ.x*lata.z + bl.ioffsetXYZ.y*latb.z + bl.ioffsetXYZ.z*latc.z;
00167 
00168   float4 xyzqi = xyzq[bl.i];
00169   float4 xyzqj = xyzq[bl.j];
00170 
00171   float xij = xyzqi.x + shx - xyzqj.x;
00172   float yij = xyzqi.y + shy - xyzqj.y;
00173   float zij = xyzqi.z + shz - xyzqj.z;
00174 
00175   float r = sqrtf(xij*xij + yij*yij + zij*zij);
00176 
00177   float db = r - bondValue.x0;
00178   if (bondValue.x1) {
00179     // in this case, the bond represents a harmonic wall potential
00180     // where x0 is the lower wall and x1 is the upper
00181     db = (r > bondValue.x1 ? r - bondValue.x1 : (r > bondValue.x0 ? 0 : db));
00182   }
00183   float fij = db * bondValue.k * bl.scale;
00184  
00185   if (doEnergy) {
00186     energy += (double)(fij*db);
00187   }
00188   fij *= -2.0f/r;
00189   
00190   T T_fxij, T_fyij, T_fzij;
00191   calcComponentForces<T>(fij, xij, yij, zij, T_fxij, T_fyij, T_fzij);
00192   
00193   // Store forces
00194   storeForces<T>(T_fxij, T_fyij, T_fzij, bl.i, stride, force);
00195   storeForces<T>(-T_fxij, -T_fyij, -T_fzij, bl.j, stride, force);
00196 
00197   // Store virial
00198   if (doVirial) {
00199 #ifdef WRITE_FULL_VIRIALS
00200     float fxij = fij*xij;
00201     float fyij = fij*yij;
00202     float fzij = fij*zij;
00203     virial.xx = (fxij*xij);
00204     virial.xy = (fxij*yij);
00205     virial.xz = (fxij*zij);
00206     virial.yx = (fyij*xij);
00207     virial.yy = (fyij*yij);
00208     virial.yz = (fyij*zij);
00209     virial.zx = (fzij*xij);
00210     virial.zy = (fzij*yij);
00211     virial.zz = (fzij*zij);
00212 #endif
00213   }
00214 }

template<typename T >
__forceinline__ __device__ void calcComponentForces ( float  fij1,
const float  dx1,
const float  dy1,
const float  dz1,
float  fij2,
const float  dx2,
const float  dy2,
const float  dz2,
T &  fxij,
T &  fyij,
T &  fzij 
) [inline]

Definition at line 86 of file ComputeBondedCUDAKernel.cu.

00091                              {
00092 
00093   fxij = (T)(fij1*dx1 + fij2*dx2);
00094   fyij = (T)(fij1*dy1 + fij2*dy2);
00095   fzij = (T)(fij1*dz1 + fij2*dz2);
00096 }

template<typename T >
__forceinline__ __device__ void calcComponentForces ( float  fij,
const float  dx,
const float  dy,
const float  dz,
T &  fxij,
T &  fyij,
T &  fzij 
) [inline]

Definition at line 57 of file ComputeBondedCUDAKernel.cu.

00060                              {
00061 
00062   fxij = (T)(fij*dx);
00063   fyij = (T)(fij*dy);
00064   fzij = (T)(fij*dz);
00065 
00066 }

template<>
__forceinline__ __device__ void calcComponentForces< long long int > ( float  fij1,
const float  dx1,
const float  dy1,
const float  dz1,
float  fij2,
const float  dx2,
const float  dy2,
const float  dz2,
long long int &  fxij,
long long int &  fyij,
long long int &  fzij 
) [inline]
template<>
__forceinline__ __device__ void calcComponentForces< long long int > ( float  fij,
const float  dx,
const float  dy,
const float  dz,
long long int &  fxij,
long long int &  fyij,
long long int &  fzij 
) [inline]
template<typename T >
__forceinline__ __device__ void convertForces ( const float  x,
const float  y,
const float  z,
T &  Tx,
T &  Ty,
T &  Tz 
) [inline]

Definition at line 37 of file ComputeBondedCUDAKernel.cu.

00038                        {
00039 
00040   Tx = (T)(x);
00041   Ty = (T)(y);
00042   Tz = (T)(z);
00043 }

template<>
__forceinline__ __device__ void convertForces< long long int > ( const float  x,
const float  y,
const float  z,
long long int &  Tx,
long long int &  Ty,
long long int &  Tz 
) [inline]
__device__ __forceinline__ float3 cross ( const float3  v1,
const float3  v2 
)
template<typename T , bool doEnergy, bool doVirial>
__device__ void crosstermForce ( const int  index,
const CudaCrossterm *__restrict__  crosstermList,
const CudaCrosstermValue *__restrict__  crosstermValues,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
double &  energy,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virial 
) [inline]
template<bool doEnergy>
__forceinline__ __device__ void diheComp ( const CudaDihedralValue dihedralValues,
int  ic,
const float  sin_phi,
const float  cos_phi,
const float  scale,
float &  df,
double &  e 
) [inline]

Definition at line 578 of file ComputeBondedCUDAKernel.cu.

References CudaDihedralValue::delta, CudaDihedralValue::k, M_PI, and CudaDihedralValue::n.

00579                                                                                      {
00580 
00581   df = 0.0f;
00582   if (doEnergy) e = 0.0;
00583 
00584   float phi = atan2f(sin_phi, cos_phi);
00585 
00586   bool lrep = true;
00587   while (lrep) {
00588     CudaDihedralValue dihedralValue = dihedralValues[ic];
00589     dihedralValue.k *= scale;
00590 
00591     // Last dihedral has n low bit set to 0
00592     lrep = (dihedralValue.n & 1);
00593     dihedralValue.n >>= 1;
00594     if (dihedralValue.n) {
00595       float nf = dihedralValue.n;
00596       float x = nf*phi - dihedralValue.delta;
00597       if (doEnergy) {
00598         float sin_x, cos_x;
00599         sincosf(x, &sin_x, &cos_x);
00600         e += (double)(dihedralValue.k*(1.0f + cos_x));
00601         df += (double)(nf*dihedralValue.k*sin_x);
00602       } else {
00603         float sin_x = sinf(x);
00604         df += (double)(nf*dihedralValue.k*sin_x);
00605       }
00606     } else {
00607       float diff = phi - dihedralValue.delta;
00608       if (diff < -(float)(M_PI)) diff += (float)(2.0*M_PI);
00609       if (diff >  (float)(M_PI)) diff -= (float)(2.0*M_PI);
00610       if (doEnergy) {
00611         e += (double)(dihedralValue.k*diff*diff);
00612       }
00613       df -= 2.0f*dihedralValue.k*diff;
00614     }
00615     ic++;
00616   }
00617 
00618 }

template<typename T , bool doEnergy, bool doVirial>
__device__ void diheForce ( const int  index,
const CudaDihedral *__restrict__  diheList,
const CudaDihedralValue *__restrict__  dihedralValues,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
double &  energy,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virial 
) [inline]

Definition at line 622 of file ComputeBondedCUDAKernel.cu.

References force_to_float, CudaDihedral::i, CudaDihedral::ioffsetXYZ, CudaDihedral::itype, CudaDihedral::j, CudaDihedral::joffsetXYZ, CudaDihedral::k, CudaDihedral::l, CudaDihedral::loffsetXYZ, and CudaDihedral::scale.

00634     {
00635 
00636   CudaDihedral dl = diheList[index];
00637 
00638   float ishx = dl.ioffsetXYZ.x*lata.x + dl.ioffsetXYZ.y*latb.x + dl.ioffsetXYZ.z*latc.x;
00639   float ishy = dl.ioffsetXYZ.x*lata.y + dl.ioffsetXYZ.y*latb.y + dl.ioffsetXYZ.z*latc.y;
00640   float ishz = dl.ioffsetXYZ.x*lata.z + dl.ioffsetXYZ.y*latb.z + dl.ioffsetXYZ.z*latc.z;
00641 
00642   float jshx = dl.joffsetXYZ.x*lata.x + dl.joffsetXYZ.y*latb.x + dl.joffsetXYZ.z*latc.x;
00643   float jshy = dl.joffsetXYZ.x*lata.y + dl.joffsetXYZ.y*latb.y + dl.joffsetXYZ.z*latc.y;
00644   float jshz = dl.joffsetXYZ.x*lata.z + dl.joffsetXYZ.y*latb.z + dl.joffsetXYZ.z*latc.z;
00645 
00646   float lshx = dl.loffsetXYZ.x*lata.x + dl.loffsetXYZ.y*latb.x + dl.loffsetXYZ.z*latc.x;
00647   float lshy = dl.loffsetXYZ.x*lata.y + dl.loffsetXYZ.y*latb.y + dl.loffsetXYZ.z*latc.y;
00648   float lshz = dl.loffsetXYZ.x*lata.z + dl.loffsetXYZ.y*latb.z + dl.loffsetXYZ.z*latc.z;
00649 
00650   float xij = xyzq[dl.i].x + ishx - xyzq[dl.j].x;
00651   float yij = xyzq[dl.i].y + ishy - xyzq[dl.j].y;
00652   float zij = xyzq[dl.i].z + ishz - xyzq[dl.j].z;
00653 
00654   float xjk = xyzq[dl.j].x + jshx - xyzq[dl.k].x;
00655   float yjk = xyzq[dl.j].y + jshy - xyzq[dl.k].y;
00656   float zjk = xyzq[dl.j].z + jshz - xyzq[dl.k].z;
00657 
00658   float xlk = xyzq[dl.l].x + lshx - xyzq[dl.k].x;
00659   float ylk = xyzq[dl.l].y + lshy - xyzq[dl.k].y;
00660   float zlk = xyzq[dl.l].z + lshz - xyzq[dl.k].z;
00661 
00662   // A=F^G, B=H^G.
00663   float ax = yij*zjk - zij*yjk;
00664   float ay = zij*xjk - xij*zjk;
00665   float az = xij*yjk - yij*xjk;
00666   float bx = ylk*zjk - zlk*yjk;
00667   float by = zlk*xjk - xlk*zjk;
00668   float bz = xlk*yjk - ylk*xjk;
00669 
00670   float ra2 = ax*ax + ay*ay + az*az;
00671   float rb2 = bx*bx + by*by + bz*bz;
00672   float rg = sqrtf(xjk*xjk + yjk*yjk + zjk*zjk);
00673 
00674   //    if((ra2 <= rxmin2) .or. (rb2 <= rxmin2) .or. (rg <= rxmin)) then
00675   //          nlinear = nlinear + 1
00676   //       endif
00677 
00678   float rgr = 1.0f / rg;
00679   float ra2r = 1.0f / ra2;
00680   float rb2r = 1.0f / rb2;
00681   float rabr = sqrtf(ra2r*rb2r);
00682 
00683   float cos_phi = (ax*bx + ay*by + az*bz)*rabr;
00684   //
00685   // Note that sin(phi).G/|G|=B^A/(|A|.|B|)
00686   // which can be simplify to sin(phi)=|G|H.A/(|A|.|B|)
00687   float sin_phi = rg*rabr*(ax*xlk + ay*ylk + az*zlk);
00688   //
00689   //     Energy and derivative contributions.
00690 
00691   float df;
00692   double e;
00693   diheComp<doEnergy>(dihedralValues, dl.itype, sin_phi, cos_phi, dl.scale, df, e);
00694 
00695   if (doEnergy) energy += e;
00696 
00697   //
00698   //     Compute derivatives wrt catesian coordinates.
00699   //
00700   // GAA=dE/dphi.|G|/A^2, GBB=dE/dphi.|G|/B^2, FG=F.G, HG=H.G
00701   //  FGA=dE/dphi*F.G/(|G|A^2), HGB=dE/dphi*H.G/(|G|B^2)
00702 
00703   float fg = xij*xjk + yij*yjk + zij*zjk;
00704   float hg = xlk*xjk + ylk*yjk + zlk*zjk;
00705   ra2r *= df;
00706   rb2r *= df;
00707   float fga = fg*ra2r*rgr;
00708   float hgb = hg*rb2r*rgr;
00709   float gaa = ra2r*rg;
00710   float gbb = rb2r*rg;
00711   // DFi=dE/dFi, DGi=dE/dGi, DHi=dE/dHi.
00712 
00713   // Store forces
00714   T T_fix, T_fiy, T_fiz;
00715   calcComponentForces<T>(-gaa, ax, ay, az, T_fix, T_fiy, T_fiz);
00716   storeForces<T>(T_fix, T_fiy, T_fiz, dl.i, stride, force);
00717 
00718   T dgx, dgy, dgz;
00719   calcComponentForces<T>(fga, ax, ay, az, -hgb, bx, by, bz, dgx, dgy, dgz);
00720   T T_fjx = dgx - T_fix;
00721   T T_fjy = dgy - T_fiy;
00722   T T_fjz = dgz - T_fiz;
00723   storeForces<T>(T_fjx, T_fjy, T_fjz, dl.j, stride, force);
00724 
00725   T dhx, dhy, dhz;
00726   calcComponentForces<T>(gbb, bx, by, bz, dhx, dhy, dhz);
00727   T T_fkx = -dhx - dgx;
00728   T T_fky = -dhy - dgy;
00729   T T_fkz = -dhz - dgz;
00730   storeForces<T>(T_fkx, T_fky, T_fkz, dl.k, stride, force);
00731   storeForces<T>(dhx, dhy, dhz, dl.l, stride, force);
00732 
00733   // Store virial
00734   if (doVirial) {
00735 #ifdef WRITE_FULL_VIRIALS
00736     float fix = ((float)T_fix)*force_to_float;
00737     float fiy = ((float)T_fiy)*force_to_float;
00738     float fiz = ((float)T_fiz)*force_to_float;
00739     float fjx = ((float)dgx)*force_to_float;
00740     float fjy = ((float)dgy)*force_to_float;
00741     float fjz = ((float)dgz)*force_to_float;
00742     float flx = ((float)dhx)*force_to_float;
00743     float fly = ((float)dhy)*force_to_float;
00744     float flz = ((float)dhz)*force_to_float;
00745     virial.xx = (fix*xij) + (fjx*xjk) + (flx*xlk);
00746     virial.xy = (fix*yij) + (fjx*yjk) + (flx*ylk);
00747     virial.xz = (fix*zij) + (fjx*zjk) + (flx*zlk);
00748     virial.yx = (fiy*xij) + (fjy*xjk) + (fly*xlk);
00749     virial.yy = (fiy*yij) + (fjy*yjk) + (fly*ylk);
00750     virial.yz = (fiy*zij) + (fjy*zjk) + (fly*zlk);
00751     virial.zx = (fiz*xij) + (fjz*xjk) + (flz*xlk);
00752     virial.zy = (fiz*yij) + (fjz*yjk) + (flz*ylk);
00753     virial.zz = (fiz*zij) + (fjz*zjk) + (flz*zlk);
00754 #endif
00755   }
00756 
00757 }

__device__ __forceinline__ float dot ( const float3  v1,
const float3  v2 
)

Definition at line 767 of file ComputeBondedCUDAKernel.cu.

Referenced by crosstermForce(), and ARestraint::GetDihe().

00767                                                                        {
00768   return (v1.x * v2.x + v1.y * v2.y + v1.z * v2.z);
00769 }

template<typename T , bool doEnergy, bool doVirial>
__device__ void exclusionForce ( const int  index,
const CudaExclusion *__restrict__  exclusionList,
const float  r2_delta,
const int  r2_delta_expc,
cudaTextureObject_t  r2_table_tex,
cudaTextureObject_t  exclusionTableTex,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
const float  cutoff2,
T *__restrict__  forceSlow,
double &  energySlow,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virialSlow 
) [inline]

Definition at line 350 of file ComputeBondedCUDAKernel.cu.

References __ldg, diffa, CudaExclusion::i, CudaExclusion::ioffsetXYZ, CudaExclusion::j, and table_i.

00371     {
00372 
00373   CudaExclusion bl = exclusionList[index];
00374 
00375   float shx = bl.ioffsetXYZ.x*lata.x + bl.ioffsetXYZ.y*latb.x + bl.ioffsetXYZ.z*latc.x;
00376   float shy = bl.ioffsetXYZ.x*lata.y + bl.ioffsetXYZ.y*latb.y + bl.ioffsetXYZ.z*latc.y;
00377   float shz = bl.ioffsetXYZ.x*lata.z + bl.ioffsetXYZ.y*latb.z + bl.ioffsetXYZ.z*latc.z;
00378 
00379   float4 xyzqi = xyzq[bl.i];
00380   float4 xyzqj = xyzq[bl.j];
00381 
00382   float xij = xyzqi.x + shx - xyzqj.x;
00383   float yij = xyzqi.y + shy - xyzqj.y;
00384   float zij = xyzqi.z + shz - xyzqj.z;
00385 
00386   float r2 = xij*xij + yij*yij + zij*zij;
00387   if (r2 < cutoff2) {
00388     r2 += r2_delta;
00389 
00390     union { float f; int i; } r2i;
00391     r2i.f = r2;
00392     int table_i = (r2i.i >> 17) + r2_delta_expc;  // table_i >= 0
00393 
00394 #if __CUDA_ARCH__ >= 350
00395     float r2_table_val = __ldg(&r2_table[table_i]);
00396 #else
00397     float r2_table_val = tex1Dfetch<float>(r2_table_tex, table_i);
00398 #endif
00399     float diffa = r2 - r2_table_val;
00400     float qq = xyzqi.w * xyzqj.w;
00401 
00402 #if __CUDA_ARCH__ >= 350
00403     float4 slow = __ldg(&exclusionTable[table_i]);
00404 #else
00405     float4 slow = tex1Dfetch<float4>(exclusionTableTex, table_i);
00406 #endif
00407 
00408     if (doEnergy) {
00409       energySlow += (double)(qq*(((diffa * (1.0f/6.0f)*slow.x + 0.25f*slow.y ) * diffa + 0.5f*slow.z ) * diffa + slow.w));
00410     }
00411 
00412     float fSlow = -qq*((diffa * slow.x + slow.y) * diffa + slow.z);
00413 
00414     T T_fxij, T_fyij, T_fzij;
00415     calcComponentForces<T>(fSlow, xij, yij, zij, T_fxij, T_fyij, T_fzij);
00416     storeForces<T>(T_fxij, T_fyij, T_fzij, bl.i, stride, forceSlow);
00417     storeForces<T>(-T_fxij, -T_fyij, -T_fzij, bl.j, stride, forceSlow);
00418 
00419     // Store virial
00420     if (doVirial) {
00421 #ifdef WRITE_FULL_VIRIALS
00422       float fxij = fSlow*xij;
00423       float fyij = fSlow*yij;
00424       float fzij = fSlow*zij;
00425       virialSlow.xx = (fxij*xij);
00426       virialSlow.xy = (fxij*yij);
00427       virialSlow.xz = (fxij*zij);
00428       virialSlow.yx = (fyij*xij);
00429       virialSlow.yy = (fyij*yij);
00430       virialSlow.yz = (fyij*zij);
00431       virialSlow.zx = (fzij*xij);
00432       virialSlow.zy = (fzij*yij);
00433       virialSlow.zz = (fzij*zij);
00434 #endif
00435     }
00436   }
00437 }

__device__ long long int lliroundf ( float  f  )  [inline]

Definition at line 21 of file ComputeBondedCUDAKernel.cu.

00022 {
00023     long long int l;
00024     asm("cvt.rni.s64.f32  %0, %1;" : "=l"(l) : "f"(f));
00025     return l;
00026 }

__device__ unsigned long long int llitoulli ( long long int  l  )  [inline]

Definition at line 28 of file ComputeBondedCUDAKernel.cu.

00029 {
00030     unsigned long long int u;
00031     asm("mov.b64    %0, %1;" : "=l"(u) : "l"(l));
00032     return u;
00033 }

template<typename T , bool doEnergy, bool doVirial, bool doElect>
__device__ void modifiedExclusionForce ( const int  index,
const CudaExclusion *__restrict__  exclusionList,
const bool  doSlow,
const float  one_scale14,
const int  vdwCoefTableWidth,
cudaTextureObject_t  vdwCoefTableTex,
cudaTextureObject_t  forceTableTex,
cudaTextureObject_t  energyTableTex,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
const float  cutoff2,
double &  energyVdw,
T *__restrict__  forceNbond,
double &  energyNbond,
T *__restrict__  forceSlow,
double &  energySlow,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virialNbond,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virialSlow 
) [inline]

Definition at line 220 of file ComputeBondedCUDAKernel.cu.

References __ldg, CudaExclusion::i, CudaExclusion::ioffsetXYZ, CudaExclusion::j, CudaExclusion::vdwtypei, CudaExclusion::vdwtypej, float2::x, ComputeBondedCUDAKernel::BondedVirial< T >::xx, ComputeBondedCUDAKernel::BondedVirial< T >::xy, ComputeBondedCUDAKernel::BondedVirial< T >::xz, float2::y, ComputeBondedCUDAKernel::BondedVirial< T >::yx, ComputeBondedCUDAKernel::BondedVirial< T >::yy, ComputeBondedCUDAKernel::BondedVirial< T >::yz, ComputeBondedCUDAKernel::BondedVirial< T >::zx, ComputeBondedCUDAKernel::BondedVirial< T >::zy, and ComputeBondedCUDAKernel::BondedVirial< T >::zz.

00246     {
00247 
00248   CudaExclusion bl = exclusionList[index];
00249 
00250   float shx = bl.ioffsetXYZ.x*lata.x + bl.ioffsetXYZ.y*latb.x + bl.ioffsetXYZ.z*latc.x;
00251   float shy = bl.ioffsetXYZ.x*lata.y + bl.ioffsetXYZ.y*latb.y + bl.ioffsetXYZ.z*latc.y;
00252   float shz = bl.ioffsetXYZ.x*lata.z + bl.ioffsetXYZ.y*latb.z + bl.ioffsetXYZ.z*latc.z;
00253 
00254   float4 xyzqi = xyzq[bl.i];
00255   float4 xyzqj = xyzq[bl.j];
00256 
00257   float xij = xyzqi.x + shx - xyzqj.x;
00258   float yij = xyzqi.y + shy - xyzqj.y;
00259   float zij = xyzqi.z + shz - xyzqj.z;
00260 
00261   float r2 = xij*xij + yij*yij + zij*zij;
00262   if (r2 < cutoff2) {
00263 
00264     float rinv = rsqrtf(r2);
00265 
00266     float qq;
00267     if (doElect) qq = one_scale14 * xyzqi.w * xyzqj.w;
00268 
00269     int vdwIndex = bl.vdwtypei + bl.vdwtypej*vdwCoefTableWidth;
00270 #if __CUDA_ARCH__ >= 350
00271     float2 ljab = __ldg(&vdwCoefTable[vdwIndex]);
00272 #else
00273     float2 ljab = tex1Dfetch<float2>(vdwCoefTableTex, vdwIndex);
00274 #endif
00275 
00276     float4 fi = tex1D<float4>(forceTableTex, rinv);
00277     float4 ei;
00278     if (doEnergy) {
00279       ei = tex1D<float4>(energyTableTex, rinv);
00280       energyVdw   += (double)(ljab.x * ei.z + ljab.y * ei.y);
00281       if (doElect) {
00282         energyNbond += qq * ei.x;
00283         if (doSlow) energySlow  += qq * ei.w;
00284       }
00285     }
00286 
00287     float fNbond;
00288     if (doElect) {
00289       fNbond = -(ljab.x * fi.z + ljab.y * fi.y + qq * fi.x);
00290     } else {
00291       fNbond = -(ljab.x * fi.z + ljab.y * fi.y);
00292     }
00293     T T_fxij, T_fyij, T_fzij;
00294     calcComponentForces<T>(fNbond, xij, yij, zij, T_fxij, T_fyij, T_fzij);
00295     storeForces<T>(T_fxij, T_fyij, T_fzij, bl.i, stride, forceNbond);
00296     storeForces<T>(-T_fxij, -T_fyij, -T_fzij, bl.j, stride, forceNbond);
00297 
00298     float fSlow;
00299     if (doSlow && doElect) {
00300       fSlow = -qq * fi.w;
00301       T T_fxij, T_fyij, T_fzij;
00302       calcComponentForces<T>(fSlow, xij, yij, zij, T_fxij, T_fyij, T_fzij);
00303       storeForces<T>(T_fxij, T_fyij, T_fzij, bl.i, stride, forceSlow);
00304       storeForces<T>(-T_fxij, -T_fyij, -T_fzij, bl.j, stride, forceSlow);
00305     }
00306 
00307     // Store virial
00308     if (doVirial) {
00309 #ifdef WRITE_FULL_VIRIALS
00310       float fxij = fNbond*xij;
00311       float fyij = fNbond*yij;
00312       float fzij = fNbond*zij;
00313       virialNbond.xx = (fxij*xij);
00314       virialNbond.xy = (fxij*yij);
00315       virialNbond.xz = (fxij*zij);
00316       virialNbond.yx = (fyij*xij);
00317       virialNbond.yy = (fyij*yij);
00318       virialNbond.yz = (fyij*zij);
00319       virialNbond.zx = (fzij*xij);
00320       virialNbond.zy = (fzij*yij);
00321       virialNbond.zz = (fzij*zij);
00322 #endif
00323     }
00324 
00325     // Store virial
00326     if (doVirial && doSlow && doElect) {
00327 #ifdef WRITE_FULL_VIRIALS
00328       float fxij = fSlow*xij;
00329       float fyij = fSlow*yij;
00330       float fzij = fSlow*zij;
00331       virialSlow.xx = (fxij*xij);
00332       virialSlow.xy = (fxij*yij);
00333       virialSlow.xz = (fxij*zij);
00334       virialSlow.yx = (fyij*xij);
00335       virialSlow.yy = (fyij*yij);
00336       virialSlow.yz = (fyij*zij);
00337       virialSlow.zx = (fzij*xij);
00338       virialSlow.zy = (fzij*yij);
00339       virialSlow.zz = (fzij*zij);
00340 #endif
00341     }
00342 
00343   }
00344 }

template<typename T >
__forceinline__ __device__ void storeForces ( const T  fx,
const T  fy,
const T  fz,
const int  ind,
const int  stride,
T *  force 
) [inline]

Definition at line 122 of file ComputeBondedCUDAKernel.cu.

00124                {
00125   // The generic version can not be used
00126 }

template<>
__forceinline__ __device__ void storeForces< long long int > ( const long long int  fx,
const long long int  fy,
const long long int  fz,
const int  ind,
const int  stride,
long long int *  force 
) [inline]

Variable Documentation


Generated on 16 Jul 2020 for NAMD by  doxygen 1.6.1