ComputeBondedCUDAKernel.cu File Reference

#include <math.h>
#include <cuda.h>
#include <cub/cub.cuh>
#include "ComputeBondedCUDAKernel.h"
#include "DeviceCUDA.h"

Go to the source code of this file.

Defines

#define BONDEDFORCESKERNEL_NUM_WARP   4
#define CALL(DOENERGY, DOVIRIAL)
#define CALL(DOENERGY, DOVIRIAL, DOELECT)

Functions

__device__ long long int lliroundf (float f)
__device__ unsigned long long int llitoulli (long long int l)
template<typename T >
__forceinline__ __device__ void convertForces (const float x, const float y, const float z, T &Tx, T &Ty, T &Tz)
template<>
__forceinline__ __device__ void convertForces< long long int > (const float x, const float y, const float z, long long int &Tx, long long int &Ty, long long int &Tz)
template<typename T >
__forceinline__ __device__ void calcComponentForces (float fij, const float dx, const float dy, const float dz, T &fxij, T &fyij, T &fzij)
template<>
__forceinline__ __device__ void calcComponentForces< long long int > (float fij, const float dx, const float dy, const float dz, long long int &fxij, long long int &fyij, long long int &fzij)
template<typename T >
__forceinline__ __device__ void calcComponentForces (float fij1, const float dx1, const float dy1, const float dz1, float fij2, const float dx2, const float dy2, const float dz2, T &fxij, T &fyij, T &fzij)
template<>
__forceinline__ __device__ void calcComponentForces< long long int > (float fij1, const float dx1, const float dy1, const float dz1, float fij2, const float dx2, const float dy2, const float dz2, long long int &fxij, long long int &fyij, long long int &fzij)
template<typename T >
__forceinline__ __device__ void storeForces (const T fx, const T fy, const T fz, const int ind, const int stride, T *force)
template<>
__forceinline__ __device__ void storeForces< long long int > (const long long int fx, const long long int fy, const long long int fz, const int ind, const int stride, long long int *force)
template<typename T , bool doEnergy, bool doVirial>
__device__ void bondForce (const int index, const CudaBond *__restrict__ bondList, const CudaBondValue *__restrict__ bondValues, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, double &energy, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virial)
template<typename T , bool doEnergy, bool doVirial, bool doElect>
__device__ void modifiedExclusionForce (const int index, const CudaExclusion *__restrict__ exclusionList, const bool doSlow, const float one_scale14, const int vdwCoefTableWidth, cudaTextureObject_t vdwCoefTableTex, cudaTextureObject_t forceTableTex, cudaTextureObject_t energyTableTex, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, const float cutoff2, double &energyVdw, T *__restrict__ forceNbond, double &energyNbond, T *__restrict__ forceSlow, double &energySlow, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virialNbond, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virialSlow)
template<typename T , bool doEnergy, bool doVirial>
__device__ void exclusionForce (const int index, const CudaExclusion *__restrict__ exclusionList, const float r2_delta, const int r2_delta_expc, cudaTextureObject_t r2_table_tex, cudaTextureObject_t exclusionTableTex, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, const float cutoff2, T *__restrict__ forceSlow, double &energySlow, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virialSlow)
template<typename T , bool doEnergy, bool doVirial>
__device__ void angleForce (const int index, const CudaAngle *__restrict__ angleList, const CudaAngleValue *__restrict__ angleValues, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, double &energy, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virial)
template<bool doEnergy>
__forceinline__ __device__ void diheComp (const CudaDihedralValue *dihedralValues, int ic, const float sin_phi, const float cos_phi, const float scale, float &df, double &e)
template<typename T , bool doEnergy, bool doVirial>
__device__ void diheForce (const int index, const CudaDihedral *__restrict__ diheList, const CudaDihedralValue *__restrict__ dihedralValues, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, double &energy, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virial)
__device__ __forceinline__ float3 cross (const float3 v1, const float3 v2)
__device__ __forceinline__ float dot (const float3 v1, const float3 v2)
template<typename T , bool doEnergy, bool doVirial>
__device__ void crosstermForce (const int index, const CudaCrossterm *__restrict__ crosstermList, const CudaCrosstermValue *__restrict__ crosstermValues, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, double &energy, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virial)

Variables

__thread DeviceCUDAdeviceCUDA

Define Documentation

#define BONDEDFORCESKERNEL_NUM_WARP   4
#define CALL ( DOENERGY,
DOVIRIAL,
DOELECT   ) 
Value:
modifiedExclusionForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL, DOELECT> \
  <<< nblock_use, nthread, shmem_size, stream >>> \
  (start, numModifiedExclusions, modifiedExclusions, \
    doSlow, one_scale14, cutoff2, \
    cudaNonbondedTables.getVdwCoefTableWidth(), cudaNonbondedTables.getExclusionVdwCoefTable(), \
    cudaNonbondedTables.getExclusionVdwCoefTableTex(), \
    cudaNonbondedTables.getModifiedExclusionForceTableTex(), cudaNonbondedTables.getModifiedExclusionEnergyTableTex(), \
    xyzq, forceStride, lata, latb, latc, \
    &forces[startNbond], &forces[startSlow], energies_virials);
#define CALL ( DOENERGY,
DOVIRIAL   ) 
Value:
bondedForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL> \
  <<< nblock_use, nthread, shmem_size, stream >>> \
  (start, numBonds, bonds, bondValues, \
    numAngles, angles, angleValues, \
    numDihedrals, dihedrals, dihedralValues, \
    numImpropers, impropers, improperValues, \
    numExclusionsDoSlow, exclusions, \
    numCrossterms, crossterms, crosstermValues, \
    cutoff2, \
    r2_delta, r2_delta_expc, \
    cudaNonbondedTables.get_r2_table(), cudaNonbondedTables.getExclusionTable(), \
    cudaNonbondedTables.get_r2_table_tex(), cudaNonbondedTables.getExclusionTableTex(), \
    xyzq, forceStride, \
    lata, latb, latc, \
    forces, &forces[startSlow], energies_virials);

Referenced by cuda_nonbonded_forces(), CudaComputeGBISKernel::GBISphase2(), and CudaComputeNonbondedKernel::nonbondedForce().


Function Documentation

template<typename T , bool doEnergy, bool doVirial>
__device__ void angleForce ( const int  index,
const CudaAngle *__restrict__  angleList,
const CudaAngleValue *__restrict__  angleValues,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
double &  energy,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virial 
) [inline]

Definition at line 431 of file ComputeBondedCUDAKernel.cu.

References force_to_float, CudaAngle::i, CudaAngle::ioffsetXYZ, CudaAngle::itype, CudaAngle::j, CudaAngleValue::k, CudaAngle::k, CudaAngleValue::k_ub, CudaAngle::koffsetXYZ, CudaAngleValue::normal, CudaAngleValue::r_ub, CudaAngle::scale, and CudaAngleValue::theta0.

00443     {
00444 
00445   CudaAngle al = angleList[index];
00446 
00447   float ishx = al.ioffsetXYZ.x*lata.x + al.ioffsetXYZ.y*latb.x + al.ioffsetXYZ.z*latc.x;
00448   float ishy = al.ioffsetXYZ.x*lata.y + al.ioffsetXYZ.y*latb.y + al.ioffsetXYZ.z*latc.y;
00449   float ishz = al.ioffsetXYZ.x*lata.z + al.ioffsetXYZ.y*latb.z + al.ioffsetXYZ.z*latc.z;
00450 
00451   float kshx = al.koffsetXYZ.x*lata.x + al.koffsetXYZ.y*latb.x + al.koffsetXYZ.z*latc.x;
00452   float kshy = al.koffsetXYZ.x*lata.y + al.koffsetXYZ.y*latb.y + al.koffsetXYZ.z*latc.y;
00453   float kshz = al.koffsetXYZ.x*lata.z + al.koffsetXYZ.y*latb.z + al.koffsetXYZ.z*latc.z;
00454 
00455   float xij = xyzq[al.i].x + ishx - xyzq[al.j].x;
00456   float yij = xyzq[al.i].y + ishy - xyzq[al.j].y;
00457   float zij = xyzq[al.i].z + ishz - xyzq[al.j].z;
00458 
00459   float xkj = xyzq[al.k].x + kshx - xyzq[al.j].x;
00460   float ykj = xyzq[al.k].y + kshy - xyzq[al.j].y;
00461   float zkj = xyzq[al.k].z + kshz - xyzq[al.j].z;
00462 
00463   float rij_inv = rsqrtf(xij*xij + yij*yij + zij*zij);
00464   float rkj_inv = rsqrtf(xkj*xkj + ykj*ykj + zkj*zkj);
00465 
00466   float xijr = xij*rij_inv;
00467   float yijr = yij*rij_inv;
00468   float zijr = zij*rij_inv;
00469   float xkjr = xkj*rkj_inv;
00470   float ykjr = ykj*rkj_inv;
00471   float zkjr = zkj*rkj_inv;
00472   float cos_theta = xijr*xkjr + yijr*ykjr + zijr*zkjr;
00473 
00474   CudaAngleValue angleValue = angleValues[al.itype];
00475   angleValue.k *= al.scale;
00476 
00477   float diff;
00478   if (angleValue.normal == 1) {
00479     // Restrict values of cos_theta to the interval [-0.999 ... 0.999]
00480     cos_theta = min(0.999f, max(-0.999f, cos_theta));
00481     float theta = acosf(cos_theta);
00482     diff = theta - angleValue.theta0;
00483   } else {
00484     diff = cos_theta - angleValue.theta0;
00485   }
00486 
00487   if (doEnergy) {
00488     energy += (double)(angleValue.k * diff * diff);
00489   }
00490   if (angleValue.normal == 1) {
00491     float inv_sin_theta = rsqrtf(1.0f - cos_theta*cos_theta);
00492     if (inv_sin_theta > 1.0e6) {
00493       diff = (diff < 0.0f) ? 1.0f : -1.0f;
00494     } else {
00495       diff *= -inv_sin_theta;
00496     }
00497   }
00498   diff *= -2.0f*angleValue.k;
00499 
00500   float dtxi = rij_inv*(xkjr - cos_theta*xijr);
00501   float dtxj = rkj_inv*(xijr - cos_theta*xkjr);
00502   float dtyi = rij_inv*(ykjr - cos_theta*yijr);
00503   float dtyj = rkj_inv*(yijr - cos_theta*ykjr);
00504   float dtzi = rij_inv*(zkjr - cos_theta*zijr);
00505   float dtzj = rkj_inv*(zijr - cos_theta*zkjr);
00506 
00507   T T_dtxi, T_dtyi, T_dtzi;
00508   T T_dtxj, T_dtyj, T_dtzj;
00509   calcComponentForces<T>(diff, dtxi, dtyi, dtzi, T_dtxi, T_dtyi, T_dtzi);
00510   calcComponentForces<T>(diff, dtxj, dtyj, dtzj, T_dtxj, T_dtyj, T_dtzj);
00511   T T_dtxk = -T_dtxi - T_dtxj;
00512   T T_dtyk = -T_dtyi - T_dtyj;
00513   T T_dtzk = -T_dtzi - T_dtzj;
00514   storeForces<T>(T_dtxk, T_dtyk, T_dtzk, al.j, stride, force);
00515 
00516   if (angleValue.k_ub) {
00517     float xik = xij - xkj;
00518     float yik = yij - ykj;
00519     float zik = zij - zkj;
00520     float rik_inv = rsqrtf(xik*xik + yik*yik + zik*zik);
00521     float rik = 1.0f/rik_inv;
00522     float diff_ub = rik - angleValue.r_ub;
00523     if (doEnergy) {
00524       energy += (double)(angleValue.k_ub * diff_ub * diff_ub);
00525     }
00526     diff_ub *= -2.0f*angleValue.k_ub*rik_inv;
00527     T T_dxik, T_dyik, T_dzik;
00528     calcComponentForces<T>(diff_ub, xik, yik, zik, T_dxik, T_dyik, T_dzik);
00529     T_dtxi += T_dxik;
00530     T_dtyi += T_dyik;
00531     T_dtzi += T_dzik;
00532     T_dtxj -= T_dxik;
00533     T_dtyj -= T_dyik;
00534     T_dtzj -= T_dzik;
00535   }
00536 
00537   storeForces<T>(T_dtxi, T_dtyi, T_dtzi, al.i, stride, force);
00538   storeForces<T>(T_dtxj, T_dtyj, T_dtzj, al.k, stride, force);
00539 
00540   // Store virial
00541   if (doVirial) {
00542 #ifdef WRITE_FULL_VIRIALS
00543     float fxi = ((float)T_dtxi)*force_to_float;
00544     float fyi = ((float)T_dtyi)*force_to_float;
00545     float fzi = ((float)T_dtzi)*force_to_float;
00546     float fxk = ((float)T_dtxj)*force_to_float;
00547     float fyk = ((float)T_dtyj)*force_to_float;
00548     float fzk = ((float)T_dtzj)*force_to_float;
00549     virial.xx = (fxi*xij) + (fxk*xkj);
00550     virial.xy = (fxi*yij) + (fxk*ykj);
00551     virial.xz = (fxi*zij) + (fxk*zkj);
00552     virial.yx = (fyi*xij) + (fyk*xkj);
00553     virial.yy = (fyi*yij) + (fyk*ykj);
00554     virial.yz = (fyi*zij) + (fyk*zkj);
00555     virial.zx = (fzi*xij) + (fzk*xkj);
00556     virial.zy = (fzi*yij) + (fzk*ykj);
00557     virial.zz = (fzi*zij) + (fzk*zkj);
00558 #endif
00559   }
00560 }

template<typename T , bool doEnergy, bool doVirial>
__device__ void bondForce ( const int  index,
const CudaBond *__restrict__  bondList,
const CudaBondValue *__restrict__  bondValues,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
double &  energy,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virial 
) [inline]

Definition at line 141 of file ComputeBondedCUDAKernel.cu.

References CudaBond::i, CudaBond::ioffsetXYZ, CudaBond::itype, CudaBond::j, CudaBondValue::k, CudaBond::scale, and CudaBondValue::x0.

00154     {
00155 
00156   CudaBond bl = bondList[index];
00157   CudaBondValue bondValue = bondValues[bl.itype];
00158   if (bondValue.x0 == 0.0f) return;
00159 
00160   float shx = bl.ioffsetXYZ.x*lata.x + bl.ioffsetXYZ.y*latb.x + bl.ioffsetXYZ.z*latc.x;
00161   float shy = bl.ioffsetXYZ.x*lata.y + bl.ioffsetXYZ.y*latb.y + bl.ioffsetXYZ.z*latc.y;
00162   float shz = bl.ioffsetXYZ.x*lata.z + bl.ioffsetXYZ.y*latb.z + bl.ioffsetXYZ.z*latc.z;
00163 
00164   float4 xyzqi = xyzq[bl.i];
00165   float4 xyzqj = xyzq[bl.j];
00166 
00167   float xij = xyzqi.x + shx - xyzqj.x;
00168   float yij = xyzqi.y + shy - xyzqj.y;
00169   float zij = xyzqi.z + shz - xyzqj.z;
00170 
00171   float r = sqrtf(xij*xij + yij*yij + zij*zij);
00172 
00173   float db = r - bondValue.x0;
00174   float fij = db * bondValue.k * bl.scale;
00175  
00176   if (doEnergy) {
00177     energy += (double)(fij*db);
00178   }
00179   fij *= -2.0f/r;
00180   
00181   T T_fxij, T_fyij, T_fzij;
00182   calcComponentForces<T>(fij, xij, yij, zij, T_fxij, T_fyij, T_fzij);
00183   
00184   // Store forces
00185   storeForces<T>(T_fxij, T_fyij, T_fzij, bl.i, stride, force);
00186   storeForces<T>(-T_fxij, -T_fyij, -T_fzij, bl.j, stride, force);
00187 
00188   // Store virial
00189   if (doVirial) {
00190 #ifdef WRITE_FULL_VIRIALS
00191     float fxij = fij*xij;
00192     float fyij = fij*yij;
00193     float fzij = fij*zij;
00194     virial.xx = (fxij*xij);
00195     virial.xy = (fxij*yij);
00196     virial.xz = (fxij*zij);
00197     virial.yx = (fyij*xij);
00198     virial.yy = (fyij*yij);
00199     virial.yz = (fyij*zij);
00200     virial.zx = (fzij*xij);
00201     virial.zy = (fzij*yij);
00202     virial.zz = (fzij*zij);
00203 #endif
00204   }
00205 }

template<typename T >
__forceinline__ __device__ void calcComponentForces ( float  fij1,
const float  dx1,
const float  dy1,
const float  dz1,
float  fij2,
const float  dx2,
const float  dy2,
const float  dz2,
T &  fxij,
T &  fyij,
T &  fzij 
) [inline]

Definition at line 82 of file ComputeBondedCUDAKernel.cu.

00087                              {
00088 
00089   fxij = (T)(fij1*dx1 + fij2*dx2);
00090   fyij = (T)(fij1*dy1 + fij2*dy2);
00091   fzij = (T)(fij1*dz1 + fij2*dz2);
00092 }

template<typename T >
__forceinline__ __device__ void calcComponentForces ( float  fij,
const float  dx,
const float  dy,
const float  dz,
T &  fxij,
T &  fyij,
T &  fzij 
) [inline]

Definition at line 53 of file ComputeBondedCUDAKernel.cu.

00056                              {
00057 
00058   fxij = (T)(fij*dx);
00059   fyij = (T)(fij*dy);
00060   fzij = (T)(fij*dz);
00061 
00062 }

template<>
__forceinline__ __device__ void calcComponentForces< long long int > ( float  fij1,
const float  dx1,
const float  dy1,
const float  dz1,
float  fij2,
const float  dx2,
const float  dy2,
const float  dz2,
long long int &  fxij,
long long int &  fyij,
long long int &  fzij 
) [inline]
template<>
__forceinline__ __device__ void calcComponentForces< long long int > ( float  fij,
const float  dx,
const float  dy,
const float  dz,
long long int &  fxij,
long long int &  fyij,
long long int &  fzij 
) [inline]
template<typename T >
__forceinline__ __device__ void convertForces ( const float  x,
const float  y,
const float  z,
T &  Tx,
T &  Ty,
T &  Tz 
) [inline]

Definition at line 33 of file ComputeBondedCUDAKernel.cu.

00034                        {
00035 
00036   Tx = (T)(x);
00037   Ty = (T)(y);
00038   Tz = (T)(z);
00039 }

template<>
__forceinline__ __device__ void convertForces< long long int > ( const float  x,
const float  y,
const float  z,
long long int &  Tx,
long long int &  Ty,
long long int &  Tz 
) [inline]
__device__ __forceinline__ float3 cross ( const float3  v1,
const float3  v2 
)
template<typename T , bool doEnergy, bool doVirial>
__device__ void crosstermForce ( const int  index,
const CudaCrossterm *__restrict__  crosstermList,
const CudaCrosstermValue *__restrict__  crosstermValues,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
double &  energy,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virial 
) [inline]
template<bool doEnergy>
__forceinline__ __device__ void diheComp ( const CudaDihedralValue dihedralValues,
int  ic,
const float  sin_phi,
const float  cos_phi,
const float  scale,
float &  df,
double &  e 
) [inline]

Definition at line 569 of file ComputeBondedCUDAKernel.cu.

References CudaDihedralValue::delta, CudaDihedralValue::k, M_PI, and CudaDihedralValue::n.

00570                                                                                      {
00571 
00572   df = 0.0f;
00573   if (doEnergy) e = 0.0;
00574 
00575   float phi = atan2f(sin_phi, cos_phi);
00576 
00577   bool lrep = true;
00578   while (lrep) {
00579     CudaDihedralValue dihedralValue = dihedralValues[ic];
00580     dihedralValue.k *= scale;
00581 
00582     // Last dihedral has n low bit set to 0
00583     lrep = (dihedralValue.n & 1);
00584     dihedralValue.n >>= 1;
00585     if (dihedralValue.n) {
00586       float nf = dihedralValue.n;
00587       float x = nf*phi - dihedralValue.delta;
00588       if (doEnergy) {
00589         float sin_x, cos_x;
00590         sincosf(x, &sin_x, &cos_x);
00591         e += (double)(dihedralValue.k*(1.0f + cos_x));
00592         df += (double)(nf*dihedralValue.k*sin_x);
00593       } else {
00594         float sin_x = sinf(x);
00595         df += (double)(nf*dihedralValue.k*sin_x);
00596       }
00597     } else {
00598       float diff = phi - dihedralValue.delta;
00599       if (diff < -(float)(M_PI)) diff += (float)(2.0*M_PI);
00600       if (diff >  (float)(M_PI)) diff -= (float)(2.0*M_PI);
00601       if (doEnergy) {
00602         e += (double)(dihedralValue.k*diff*diff);
00603       }
00604       df -= 2.0f*dihedralValue.k*diff;
00605     }
00606     ic++;
00607   }
00608 
00609 }

template<typename T , bool doEnergy, bool doVirial>
__device__ void diheForce ( const int  index,
const CudaDihedral *__restrict__  diheList,
const CudaDihedralValue *__restrict__  dihedralValues,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
double &  energy,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virial 
) [inline]

Definition at line 613 of file ComputeBondedCUDAKernel.cu.

References force_to_float, CudaDihedral::i, CudaDihedral::ioffsetXYZ, CudaDihedral::itype, CudaDihedral::j, CudaDihedral::joffsetXYZ, CudaDihedral::k, CudaDihedral::l, CudaDihedral::loffsetXYZ, and CudaDihedral::scale.

00625     {
00626 
00627   CudaDihedral dl = diheList[index];
00628 
00629   float ishx = dl.ioffsetXYZ.x*lata.x + dl.ioffsetXYZ.y*latb.x + dl.ioffsetXYZ.z*latc.x;
00630   float ishy = dl.ioffsetXYZ.x*lata.y + dl.ioffsetXYZ.y*latb.y + dl.ioffsetXYZ.z*latc.y;
00631   float ishz = dl.ioffsetXYZ.x*lata.z + dl.ioffsetXYZ.y*latb.z + dl.ioffsetXYZ.z*latc.z;
00632 
00633   float jshx = dl.joffsetXYZ.x*lata.x + dl.joffsetXYZ.y*latb.x + dl.joffsetXYZ.z*latc.x;
00634   float jshy = dl.joffsetXYZ.x*lata.y + dl.joffsetXYZ.y*latb.y + dl.joffsetXYZ.z*latc.y;
00635   float jshz = dl.joffsetXYZ.x*lata.z + dl.joffsetXYZ.y*latb.z + dl.joffsetXYZ.z*latc.z;
00636 
00637   float lshx = dl.loffsetXYZ.x*lata.x + dl.loffsetXYZ.y*latb.x + dl.loffsetXYZ.z*latc.x;
00638   float lshy = dl.loffsetXYZ.x*lata.y + dl.loffsetXYZ.y*latb.y + dl.loffsetXYZ.z*latc.y;
00639   float lshz = dl.loffsetXYZ.x*lata.z + dl.loffsetXYZ.y*latb.z + dl.loffsetXYZ.z*latc.z;
00640 
00641   float xij = xyzq[dl.i].x + ishx - xyzq[dl.j].x;
00642   float yij = xyzq[dl.i].y + ishy - xyzq[dl.j].y;
00643   float zij = xyzq[dl.i].z + ishz - xyzq[dl.j].z;
00644 
00645   float xjk = xyzq[dl.j].x + jshx - xyzq[dl.k].x;
00646   float yjk = xyzq[dl.j].y + jshy - xyzq[dl.k].y;
00647   float zjk = xyzq[dl.j].z + jshz - xyzq[dl.k].z;
00648 
00649   float xlk = xyzq[dl.l].x + lshx - xyzq[dl.k].x;
00650   float ylk = xyzq[dl.l].y + lshy - xyzq[dl.k].y;
00651   float zlk = xyzq[dl.l].z + lshz - xyzq[dl.k].z;
00652 
00653   // A=F^G, B=H^G.
00654   float ax = yij*zjk - zij*yjk;
00655   float ay = zij*xjk - xij*zjk;
00656   float az = xij*yjk - yij*xjk;
00657   float bx = ylk*zjk - zlk*yjk;
00658   float by = zlk*xjk - xlk*zjk;
00659   float bz = xlk*yjk - ylk*xjk;
00660 
00661   float ra2 = ax*ax + ay*ay + az*az;
00662   float rb2 = bx*bx + by*by + bz*bz;
00663   float rg = sqrtf(xjk*xjk + yjk*yjk + zjk*zjk);
00664 
00665   //    if((ra2 <= rxmin2) .or. (rb2 <= rxmin2) .or. (rg <= rxmin)) then
00666   //          nlinear = nlinear + 1
00667   //       endif
00668 
00669   float rgr = 1.0f / rg;
00670   float ra2r = 1.0f / ra2;
00671   float rb2r = 1.0f / rb2;
00672   float rabr = sqrtf(ra2r*rb2r);
00673 
00674   float cos_phi = (ax*bx + ay*by + az*bz)*rabr;
00675   //
00676   // Note that sin(phi).G/|G|=B^A/(|A|.|B|)
00677   // which can be simplify to sin(phi)=|G|H.A/(|A|.|B|)
00678   float sin_phi = rg*rabr*(ax*xlk + ay*ylk + az*zlk);
00679   //
00680   //     Energy and derivative contributions.
00681 
00682   float df;
00683   double e;
00684   diheComp<doEnergy>(dihedralValues, dl.itype, sin_phi, cos_phi, dl.scale, df, e);
00685 
00686   if (doEnergy) energy += e;
00687 
00688   //
00689   //     Compute derivatives wrt catesian coordinates.
00690   //
00691   // GAA=dE/dphi.|G|/A^2, GBB=dE/dphi.|G|/B^2, FG=F.G, HG=H.G
00692   //  FGA=dE/dphi*F.G/(|G|A^2), HGB=dE/dphi*H.G/(|G|B^2)
00693 
00694   float fg = xij*xjk + yij*yjk + zij*zjk;
00695   float hg = xlk*xjk + ylk*yjk + zlk*zjk;
00696   ra2r *= df;
00697   rb2r *= df;
00698   float fga = fg*ra2r*rgr;
00699   float hgb = hg*rb2r*rgr;
00700   float gaa = ra2r*rg;
00701   float gbb = rb2r*rg;
00702   // DFi=dE/dFi, DGi=dE/dGi, DHi=dE/dHi.
00703 
00704   // Store forces
00705   T T_fix, T_fiy, T_fiz;
00706   calcComponentForces<T>(-gaa, ax, ay, az, T_fix, T_fiy, T_fiz);
00707   storeForces<T>(T_fix, T_fiy, T_fiz, dl.i, stride, force);
00708 
00709   T dgx, dgy, dgz;
00710   calcComponentForces<T>(fga, ax, ay, az, -hgb, bx, by, bz, dgx, dgy, dgz);
00711   T T_fjx = dgx - T_fix;
00712   T T_fjy = dgy - T_fiy;
00713   T T_fjz = dgz - T_fiz;
00714   storeForces<T>(T_fjx, T_fjy, T_fjz, dl.j, stride, force);
00715 
00716   T dhx, dhy, dhz;
00717   calcComponentForces<T>(gbb, bx, by, bz, dhx, dhy, dhz);
00718   T T_fkx = -dhx - dgx;
00719   T T_fky = -dhy - dgy;
00720   T T_fkz = -dhz - dgz;
00721   storeForces<T>(T_fkx, T_fky, T_fkz, dl.k, stride, force);
00722   storeForces<T>(dhx, dhy, dhz, dl.l, stride, force);
00723 
00724   // Store virial
00725   if (doVirial) {
00726 #ifdef WRITE_FULL_VIRIALS
00727     float fix = ((float)T_fix)*force_to_float;
00728     float fiy = ((float)T_fiy)*force_to_float;
00729     float fiz = ((float)T_fiz)*force_to_float;
00730     float fjx = ((float)dgx)*force_to_float;
00731     float fjy = ((float)dgy)*force_to_float;
00732     float fjz = ((float)dgz)*force_to_float;
00733     float flx = ((float)dhx)*force_to_float;
00734     float fly = ((float)dhy)*force_to_float;
00735     float flz = ((float)dhz)*force_to_float;
00736     virial.xx = (fix*xij) + (fjx*xjk) + (flx*xlk);
00737     virial.xy = (fix*yij) + (fjx*yjk) + (flx*ylk);
00738     virial.xz = (fix*zij) + (fjx*zjk) + (flx*zlk);
00739     virial.yx = (fiy*xij) + (fjy*xjk) + (fly*xlk);
00740     virial.yy = (fiy*yij) + (fjy*yjk) + (fly*ylk);
00741     virial.yz = (fiy*zij) + (fjy*zjk) + (fly*zlk);
00742     virial.zx = (fiz*xij) + (fjz*xjk) + (flz*xlk);
00743     virial.zy = (fiz*yij) + (fjz*yjk) + (flz*ylk);
00744     virial.zz = (fiz*zij) + (fjz*zjk) + (flz*zlk);
00745 #endif
00746   }
00747 
00748 }

__device__ __forceinline__ float dot ( const float3  v1,
const float3  v2 
)

Definition at line 758 of file ComputeBondedCUDAKernel.cu.

Referenced by crosstermForce(), and ARestraint::GetDihe().

00758                                                                        {
00759   return (v1.x * v2.x + v1.y * v2.y + v1.z * v2.z);
00760 }

template<typename T , bool doEnergy, bool doVirial>
__device__ void exclusionForce ( const int  index,
const CudaExclusion *__restrict__  exclusionList,
const float  r2_delta,
const int  r2_delta_expc,
cudaTextureObject_t  r2_table_tex,
cudaTextureObject_t  exclusionTableTex,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
const float  cutoff2,
T *__restrict__  forceSlow,
double &  energySlow,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virialSlow 
) [inline]

Definition at line 341 of file ComputeBondedCUDAKernel.cu.

References __ldg, diffa, CudaExclusion::i, CudaExclusion::ioffsetXYZ, CudaExclusion::j, and table_i.

00362     {
00363 
00364   CudaExclusion bl = exclusionList[index];
00365 
00366   float shx = bl.ioffsetXYZ.x*lata.x + bl.ioffsetXYZ.y*latb.x + bl.ioffsetXYZ.z*latc.x;
00367   float shy = bl.ioffsetXYZ.x*lata.y + bl.ioffsetXYZ.y*latb.y + bl.ioffsetXYZ.z*latc.y;
00368   float shz = bl.ioffsetXYZ.x*lata.z + bl.ioffsetXYZ.y*latb.z + bl.ioffsetXYZ.z*latc.z;
00369 
00370   float4 xyzqi = xyzq[bl.i];
00371   float4 xyzqj = xyzq[bl.j];
00372 
00373   float xij = xyzqi.x + shx - xyzqj.x;
00374   float yij = xyzqi.y + shy - xyzqj.y;
00375   float zij = xyzqi.z + shz - xyzqj.z;
00376 
00377   float r2 = xij*xij + yij*yij + zij*zij;
00378   if (r2 < cutoff2) {
00379     r2 += r2_delta;
00380 
00381     union { float f; int i; } r2i;
00382     r2i.f = r2;
00383     int table_i = (r2i.i >> 17) + r2_delta_expc;  // table_i >= 0
00384 
00385 #if __CUDA_ARCH__ >= 350
00386     float r2_table_val = __ldg(&r2_table[table_i]);
00387 #else
00388     float r2_table_val = tex1Dfetch<float>(r2_table_tex, table_i);
00389 #endif
00390     float diffa = r2 - r2_table_val;
00391     float qq = xyzqi.w * xyzqj.w;
00392 
00393 #if __CUDA_ARCH__ >= 350
00394     float4 slow = __ldg(&exclusionTable[table_i]);
00395 #else
00396     float4 slow = tex1Dfetch<float4>(exclusionTableTex, table_i);
00397 #endif
00398 
00399     if (doEnergy) {
00400       energySlow += (double)(qq*(((diffa * (1.0f/6.0f)*slow.x + 0.25f*slow.y ) * diffa + 0.5f*slow.z ) * diffa + slow.w));
00401     }
00402 
00403     float fSlow = -qq*((diffa * slow.x + slow.y) * diffa + slow.z);
00404 
00405     T T_fxij, T_fyij, T_fzij;
00406     calcComponentForces<T>(fSlow, xij, yij, zij, T_fxij, T_fyij, T_fzij);
00407     storeForces<T>(T_fxij, T_fyij, T_fzij, bl.i, stride, forceSlow);
00408     storeForces<T>(-T_fxij, -T_fyij, -T_fzij, bl.j, stride, forceSlow);
00409 
00410     // Store virial
00411     if (doVirial) {
00412 #ifdef WRITE_FULL_VIRIALS
00413       float fxij = fSlow*xij;
00414       float fyij = fSlow*yij;
00415       float fzij = fSlow*zij;
00416       virialSlow.xx = (fxij*xij);
00417       virialSlow.xy = (fxij*yij);
00418       virialSlow.xz = (fxij*zij);
00419       virialSlow.yx = (fyij*xij);
00420       virialSlow.yy = (fyij*yij);
00421       virialSlow.yz = (fyij*zij);
00422       virialSlow.zx = (fzij*xij);
00423       virialSlow.zy = (fzij*yij);
00424       virialSlow.zz = (fzij*zij);
00425 #endif
00426     }
00427   }
00428 }

__device__ long long int lliroundf ( float  f  )  [inline]

Definition at line 17 of file ComputeBondedCUDAKernel.cu.

00018 {
00019     long long int l;
00020     asm("cvt.rni.s64.f32  %0, %1;" : "=l"(l) : "f"(f));
00021     return l;
00022 }

__device__ unsigned long long int llitoulli ( long long int  l  )  [inline]

Definition at line 24 of file ComputeBondedCUDAKernel.cu.

00025 {
00026     unsigned long long int u;
00027     asm("mov.b64    %0, %1;" : "=l"(u) : "l"(l));
00028     return u;
00029 }

template<typename T , bool doEnergy, bool doVirial, bool doElect>
__device__ void modifiedExclusionForce ( const int  index,
const CudaExclusion *__restrict__  exclusionList,
const bool  doSlow,
const float  one_scale14,
const int  vdwCoefTableWidth,
cudaTextureObject_t  vdwCoefTableTex,
cudaTextureObject_t  forceTableTex,
cudaTextureObject_t  energyTableTex,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
const float  cutoff2,
double &  energyVdw,
T *__restrict__  forceNbond,
double &  energyNbond,
T *__restrict__  forceSlow,
double &  energySlow,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virialNbond,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virialSlow 
) [inline]

Definition at line 211 of file ComputeBondedCUDAKernel.cu.

References __ldg, CudaExclusion::i, CudaExclusion::ioffsetXYZ, CudaExclusion::j, CudaExclusion::vdwtypei, CudaExclusion::vdwtypej, float2::x, ComputeBondedCUDAKernel::BondedVirial< T >::xx, ComputeBondedCUDAKernel::BondedVirial< T >::xy, ComputeBondedCUDAKernel::BondedVirial< T >::xz, float2::y, ComputeBondedCUDAKernel::BondedVirial< T >::yx, ComputeBondedCUDAKernel::BondedVirial< T >::yy, ComputeBondedCUDAKernel::BondedVirial< T >::yz, ComputeBondedCUDAKernel::BondedVirial< T >::zx, ComputeBondedCUDAKernel::BondedVirial< T >::zy, and ComputeBondedCUDAKernel::BondedVirial< T >::zz.

00237     {
00238 
00239   CudaExclusion bl = exclusionList[index];
00240 
00241   float shx = bl.ioffsetXYZ.x*lata.x + bl.ioffsetXYZ.y*latb.x + bl.ioffsetXYZ.z*latc.x;
00242   float shy = bl.ioffsetXYZ.x*lata.y + bl.ioffsetXYZ.y*latb.y + bl.ioffsetXYZ.z*latc.y;
00243   float shz = bl.ioffsetXYZ.x*lata.z + bl.ioffsetXYZ.y*latb.z + bl.ioffsetXYZ.z*latc.z;
00244 
00245   float4 xyzqi = xyzq[bl.i];
00246   float4 xyzqj = xyzq[bl.j];
00247 
00248   float xij = xyzqi.x + shx - xyzqj.x;
00249   float yij = xyzqi.y + shy - xyzqj.y;
00250   float zij = xyzqi.z + shz - xyzqj.z;
00251 
00252   float r2 = xij*xij + yij*yij + zij*zij;
00253   if (r2 < cutoff2) {
00254 
00255     float rinv = rsqrtf(r2);
00256 
00257     float qq;
00258     if (doElect) qq = one_scale14 * xyzqi.w * xyzqj.w;
00259 
00260     int vdwIndex = bl.vdwtypei + bl.vdwtypej*vdwCoefTableWidth;
00261 #if __CUDA_ARCH__ >= 350
00262     float2 ljab = __ldg(&vdwCoefTable[vdwIndex]);
00263 #else
00264     float2 ljab = tex1Dfetch<float2>(vdwCoefTableTex, vdwIndex);
00265 #endif
00266 
00267     float4 fi = tex1D<float4>(forceTableTex, rinv);
00268     float4 ei;
00269     if (doEnergy) {
00270       ei = tex1D<float4>(energyTableTex, rinv);
00271       energyVdw   += (double)(ljab.x * ei.z + ljab.y * ei.y);
00272       if (doElect) {
00273         energyNbond += qq * ei.x;
00274         if (doSlow) energySlow  += qq * ei.w;
00275       }
00276     }
00277 
00278     float fNbond;
00279     if (doElect) {
00280       fNbond = -(ljab.x * fi.z + ljab.y * fi.y + qq * fi.x);
00281     } else {
00282       fNbond = -(ljab.x * fi.z + ljab.y * fi.y);
00283     }
00284     T T_fxij, T_fyij, T_fzij;
00285     calcComponentForces<T>(fNbond, xij, yij, zij, T_fxij, T_fyij, T_fzij);
00286     storeForces<T>(T_fxij, T_fyij, T_fzij, bl.i, stride, forceNbond);
00287     storeForces<T>(-T_fxij, -T_fyij, -T_fzij, bl.j, stride, forceNbond);
00288 
00289     float fSlow;
00290     if (doSlow && doElect) {
00291       fSlow = -qq * fi.w;
00292       T T_fxij, T_fyij, T_fzij;
00293       calcComponentForces<T>(fSlow, xij, yij, zij, T_fxij, T_fyij, T_fzij);
00294       storeForces<T>(T_fxij, T_fyij, T_fzij, bl.i, stride, forceSlow);
00295       storeForces<T>(-T_fxij, -T_fyij, -T_fzij, bl.j, stride, forceSlow);
00296     }
00297 
00298     // Store virial
00299     if (doVirial) {
00300 #ifdef WRITE_FULL_VIRIALS
00301       float fxij = fNbond*xij;
00302       float fyij = fNbond*yij;
00303       float fzij = fNbond*zij;
00304       virialNbond.xx = (fxij*xij);
00305       virialNbond.xy = (fxij*yij);
00306       virialNbond.xz = (fxij*zij);
00307       virialNbond.yx = (fyij*xij);
00308       virialNbond.yy = (fyij*yij);
00309       virialNbond.yz = (fyij*zij);
00310       virialNbond.zx = (fzij*xij);
00311       virialNbond.zy = (fzij*yij);
00312       virialNbond.zz = (fzij*zij);
00313 #endif
00314     }
00315 
00316     // Store virial
00317     if (doVirial && doSlow && doElect) {
00318 #ifdef WRITE_FULL_VIRIALS
00319       float fxij = fSlow*xij;
00320       float fyij = fSlow*yij;
00321       float fzij = fSlow*zij;
00322       virialSlow.xx = (fxij*xij);
00323       virialSlow.xy = (fxij*yij);
00324       virialSlow.xz = (fxij*zij);
00325       virialSlow.yx = (fyij*xij);
00326       virialSlow.yy = (fyij*yij);
00327       virialSlow.yz = (fyij*zij);
00328       virialSlow.zx = (fzij*xij);
00329       virialSlow.zy = (fzij*yij);
00330       virialSlow.zz = (fzij*zij);
00331 #endif
00332     }
00333 
00334   }
00335 }

template<typename T >
__forceinline__ __device__ void storeForces ( const T  fx,
const T  fy,
const T  fz,
const int  ind,
const int  stride,
T *  force 
) [inline]

Definition at line 118 of file ComputeBondedCUDAKernel.cu.

00120                {
00121   // The generic version can not be used
00122 }

template<>
__forceinline__ __device__ void storeForces< long long int > ( const long long int  fx,
const long long int  fy,
const long long int  fz,
const int  ind,
const int  stride,
long long int *  force 
) [inline]

Variable Documentation


Generated on 7 Dec 2019 for NAMD by  doxygen 1.6.1