ComputeBondedCUDAKernel.cu File Reference

#include <math.h>
#include <cuda.h>
#include <cub/cub.cuh>
#include "ComputeBondedCUDAKernel.h"
#include "DeviceCUDA.h"

Go to the source code of this file.

Defines

#define BONDEDFORCESKERNEL_NUM_WARP   4
#define CALL(DOENERGY, DOVIRIAL)
#define CALL(DOENERGY, DOVIRIAL, DOELECT)

Functions

__device__ long long int lliroundf (float f)
__device__ unsigned long long int llitoulli (long long int l)
template<typename T >
__forceinline__ __device__ void convertForces (const float x, const float y, const float z, T &Tx, T &Ty, T &Tz)
template<>
__forceinline__ __device__ void convertForces< long long int > (const float x, const float y, const float z, long long int &Tx, long long int &Ty, long long int &Tz)
template<typename T >
__forceinline__ __device__ void calcComponentForces (float fij, const float dx, const float dy, const float dz, T &fxij, T &fyij, T &fzij)
template<>
__forceinline__ __device__ void calcComponentForces< long long int > (float fij, const float dx, const float dy, const float dz, long long int &fxij, long long int &fyij, long long int &fzij)
template<typename T >
__forceinline__ __device__ void calcComponentForces (float fij1, const float dx1, const float dy1, const float dz1, float fij2, const float dx2, const float dy2, const float dz2, T &fxij, T &fyij, T &fzij)
template<>
__forceinline__ __device__ void calcComponentForces< long long int > (float fij1, const float dx1, const float dy1, const float dz1, float fij2, const float dx2, const float dy2, const float dz2, long long int &fxij, long long int &fyij, long long int &fzij)
template<typename T >
__forceinline__ __device__ void storeForces (const T fx, const T fy, const T fz, const int ind, const int stride, T *force)
template<>
__forceinline__ __device__ void storeForces< long long int > (const long long int fx, const long long int fy, const long long int fz, const int ind, const int stride, long long int *force)
template<typename T , bool doEnergy, bool doVirial>
__device__ void bondForce (const int index, const CudaBond *__restrict__ bondList, const CudaBondValue *__restrict__ bondValues, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, double &energy, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virial)
template<typename T , bool doEnergy, bool doVirial, bool doElect>
__device__ void modifiedExclusionForce (const int index, const CudaExclusion *__restrict__ exclusionList, const bool doSlow, const float one_scale14, const int vdwCoefTableWidth, cudaTextureObject_t vdwCoefTableTex, cudaTextureObject_t forceTableTex, cudaTextureObject_t energyTableTex, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, const float cutoff2, double &energyVdw, T *__restrict__ forceNbond, double &energyNbond, T *__restrict__ forceSlow, double &energySlow, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virialNbond, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virialSlow)
template<typename T , bool doEnergy, bool doVirial>
__device__ void exclusionForce (const int index, const CudaExclusion *__restrict__ exclusionList, const float r2_delta, const int r2_delta_expc, cudaTextureObject_t r2_table_tex, cudaTextureObject_t exclusionTableTex, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, const float cutoff2, T *__restrict__ forceSlow, double &energySlow, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virialSlow)
template<typename T , bool doEnergy, bool doVirial>
__device__ void angleForce (const int index, const CudaAngle *__restrict__ angleList, const CudaAngleValue *__restrict__ angleValues, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, double &energy, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virial)
template<bool doEnergy>
__forceinline__ __device__ void diheComp (const CudaDihedralValue *dihedralValues, int ic, const float sin_phi, const float cos_phi, const float scale, float &df, double &e)
template<typename T , bool doEnergy, bool doVirial>
__device__ void diheForce (const int index, const CudaDihedral *__restrict__ diheList, const CudaDihedralValue *__restrict__ dihedralValues, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, double &energy, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virial)
__device__ __forceinline__ float3 cross (const float3 v1, const float3 v2)
__device__ __forceinline__ float dot (const float3 v1, const float3 v2)
template<typename T , bool doEnergy, bool doVirial>
__device__ void crosstermForce (const int index, const CudaCrossterm *__restrict__ crosstermList, const CudaCrosstermValue *__restrict__ crosstermValues, const float4 *__restrict__ xyzq, const int stride, const float3 lata, const float3 latb, const float3 latc, T *__restrict__ force, double &energy, ComputeBondedCUDAKernel::BondedVirial *__restrict__ virial)

Variables

__thread DeviceCUDAdeviceCUDA

Define Documentation

#define BONDEDFORCESKERNEL_NUM_WARP   4
#define CALL ( DOENERGY,
DOVIRIAL,
DOELECT   ) 
Value:
modifiedExclusionForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL, DOELECT> \
  <<< nblock_use, nthread, shmem_size, stream >>> \
  (start, numModifiedExclusions, modifiedExclusions, \
    doSlow, one_scale14, cutoff2, \
    cudaNonbondedTables.getVdwCoefTableWidth(), cudaNonbondedTables.getExclusionVdwCoefTable(), \
    cudaNonbondedTables.getExclusionVdwCoefTableTex(), \
    cudaNonbondedTables.getModifiedExclusionForceTableTex(), cudaNonbondedTables.getModifiedExclusionEnergyTableTex(), \
    xyzq, forceStride, lata, latb, latc, \
    &forces[startNbond], &forces[startSlow], energies_virials);
#define CALL ( DOENERGY,
DOVIRIAL   ) 
Value:
bondedForcesKernel<FORCE_TYPE, DOENERGY, DOVIRIAL> \
  <<< nblock_use, nthread, shmem_size, stream >>> \
  (start, numBonds, bonds, bondValues, \
    numAngles, angles, angleValues, \
    numDihedrals, dihedrals, dihedralValues, \
    numImpropers, impropers, improperValues, \
    numExclusionsDoSlow, exclusions, \
    numCrossterms, crossterms, crosstermValues, \
    cutoff2, \
    r2_delta, r2_delta_expc, \
    cudaNonbondedTables.get_r2_table(), cudaNonbondedTables.getExclusionTable(), \
    cudaNonbondedTables.get_r2_table_tex(), cudaNonbondedTables.getExclusionTableTex(), \
    xyzq, forceStride, \
    lata, latb, latc, \
    forces, &forces[startSlow], energies_virials);

Referenced by cuda_nonbonded_forces(), CudaComputeGBISKernel::GBISphase2(), and CudaComputeNonbondedKernel::nonbondedForce().


Function Documentation

template<typename T , bool doEnergy, bool doVirial>
__device__ void angleForce ( const int  index,
const CudaAngle *__restrict__  angleList,
const CudaAngleValue *__restrict__  angleValues,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
double &  energy,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virial 
) [inline]

Definition at line 436 of file ComputeBondedCUDAKernel.cu.

References force_to_float, CudaAngle::i, CudaAngle::ioffsetXYZ, CudaAngle::itype, CudaAngle::j, CudaAngleValue::k, CudaAngle::k, CudaAngleValue::k_ub, CudaAngle::koffsetXYZ, CudaAngleValue::normal, CudaAngleValue::r_ub, CudaAngle::scale, and CudaAngleValue::theta0.

00448     {
00449 
00450   CudaAngle al = angleList[index];
00451 
00452   float ishx = al.ioffsetXYZ.x*lata.x + al.ioffsetXYZ.y*latb.x + al.ioffsetXYZ.z*latc.x;
00453   float ishy = al.ioffsetXYZ.x*lata.y + al.ioffsetXYZ.y*latb.y + al.ioffsetXYZ.z*latc.y;
00454   float ishz = al.ioffsetXYZ.x*lata.z + al.ioffsetXYZ.y*latb.z + al.ioffsetXYZ.z*latc.z;
00455 
00456   float kshx = al.koffsetXYZ.x*lata.x + al.koffsetXYZ.y*latb.x + al.koffsetXYZ.z*latc.x;
00457   float kshy = al.koffsetXYZ.x*lata.y + al.koffsetXYZ.y*latb.y + al.koffsetXYZ.z*latc.y;
00458   float kshz = al.koffsetXYZ.x*lata.z + al.koffsetXYZ.y*latb.z + al.koffsetXYZ.z*latc.z;
00459 
00460   float xij = xyzq[al.i].x + ishx - xyzq[al.j].x;
00461   float yij = xyzq[al.i].y + ishy - xyzq[al.j].y;
00462   float zij = xyzq[al.i].z + ishz - xyzq[al.j].z;
00463 
00464   float xkj = xyzq[al.k].x + kshx - xyzq[al.j].x;
00465   float ykj = xyzq[al.k].y + kshy - xyzq[al.j].y;
00466   float zkj = xyzq[al.k].z + kshz - xyzq[al.j].z;
00467 
00468   float rij_inv = rsqrtf(xij*xij + yij*yij + zij*zij);
00469   float rkj_inv = rsqrtf(xkj*xkj + ykj*ykj + zkj*zkj);
00470 
00471   float xijr = xij*rij_inv;
00472   float yijr = yij*rij_inv;
00473   float zijr = zij*rij_inv;
00474   float xkjr = xkj*rkj_inv;
00475   float ykjr = ykj*rkj_inv;
00476   float zkjr = zkj*rkj_inv;
00477   float cos_theta = xijr*xkjr + yijr*ykjr + zijr*zkjr;
00478 
00479   CudaAngleValue angleValue = angleValues[al.itype];
00480   angleValue.k *= al.scale;
00481 
00482   float diff;
00483   if (angleValue.normal == 1) {
00484     // Restrict values of cos_theta to the interval [-0.999 ... 0.999]
00485     cos_theta = min(0.999f, max(-0.999f, cos_theta));
00486     float theta = acosf(cos_theta);
00487     diff = theta - angleValue.theta0;
00488   } else {
00489     diff = cos_theta - angleValue.theta0;
00490   }
00491 
00492   if (doEnergy) {
00493     energy += (double)(angleValue.k * diff * diff);
00494   }
00495   if (angleValue.normal == 1) {
00496     float inv_sin_theta = rsqrtf(1.0f - cos_theta*cos_theta);
00497     if (inv_sin_theta > 1.0e6) {
00498       diff = (diff < 0.0f) ? 1.0f : -1.0f;
00499     } else {
00500       diff *= -inv_sin_theta;
00501     }
00502   }
00503   diff *= -2.0f*angleValue.k;
00504 
00505   float dtxi = rij_inv*(xkjr - cos_theta*xijr);
00506   float dtxj = rkj_inv*(xijr - cos_theta*xkjr);
00507   float dtyi = rij_inv*(ykjr - cos_theta*yijr);
00508   float dtyj = rkj_inv*(yijr - cos_theta*ykjr);
00509   float dtzi = rij_inv*(zkjr - cos_theta*zijr);
00510   float dtzj = rkj_inv*(zijr - cos_theta*zkjr);
00511 
00512   T T_dtxi, T_dtyi, T_dtzi;
00513   T T_dtxj, T_dtyj, T_dtzj;
00514   calcComponentForces<T>(diff, dtxi, dtyi, dtzi, T_dtxi, T_dtyi, T_dtzi);
00515   calcComponentForces<T>(diff, dtxj, dtyj, dtzj, T_dtxj, T_dtyj, T_dtzj);
00516   T T_dtxk = -T_dtxi - T_dtxj;
00517   T T_dtyk = -T_dtyi - T_dtyj;
00518   T T_dtzk = -T_dtzi - T_dtzj;
00519   storeForces<T>(T_dtxk, T_dtyk, T_dtzk, al.j, stride, force);
00520 
00521   if (angleValue.k_ub) {
00522     float xik = xij - xkj;
00523     float yik = yij - ykj;
00524     float zik = zij - zkj;
00525     float rik_inv = rsqrtf(xik*xik + yik*yik + zik*zik);
00526     float rik = 1.0f/rik_inv;
00527     float diff_ub = rik - angleValue.r_ub;
00528     if (doEnergy) {
00529       energy += (double)(angleValue.k_ub * diff_ub * diff_ub);
00530     }
00531     diff_ub *= -2.0f*angleValue.k_ub*rik_inv;
00532     T T_dxik, T_dyik, T_dzik;
00533     calcComponentForces<T>(diff_ub, xik, yik, zik, T_dxik, T_dyik, T_dzik);
00534     T_dtxi += T_dxik;
00535     T_dtyi += T_dyik;
00536     T_dtzi += T_dzik;
00537     T_dtxj -= T_dxik;
00538     T_dtyj -= T_dyik;
00539     T_dtzj -= T_dzik;
00540   }
00541 
00542   storeForces<T>(T_dtxi, T_dtyi, T_dtzi, al.i, stride, force);
00543   storeForces<T>(T_dtxj, T_dtyj, T_dtzj, al.k, stride, force);
00544 
00545   // Store virial
00546   if (doVirial) {
00547 #ifdef WRITE_FULL_VIRIALS
00548     float fxi = ((float)T_dtxi)*force_to_float;
00549     float fyi = ((float)T_dtyi)*force_to_float;
00550     float fzi = ((float)T_dtzi)*force_to_float;
00551     float fxk = ((float)T_dtxj)*force_to_float;
00552     float fyk = ((float)T_dtyj)*force_to_float;
00553     float fzk = ((float)T_dtzj)*force_to_float;
00554     virial.xx = (fxi*xij) + (fxk*xkj);
00555     virial.xy = (fxi*yij) + (fxk*ykj);
00556     virial.xz = (fxi*zij) + (fxk*zkj);
00557     virial.yx = (fyi*xij) + (fyk*xkj);
00558     virial.yy = (fyi*yij) + (fyk*ykj);
00559     virial.yz = (fyi*zij) + (fyk*zkj);
00560     virial.zx = (fzi*xij) + (fzk*xkj);
00561     virial.zy = (fzi*yij) + (fzk*ykj);
00562     virial.zz = (fzi*zij) + (fzk*zkj);
00563 #endif
00564   }
00565 }

template<typename T , bool doEnergy, bool doVirial>
__device__ void bondForce ( const int  index,
const CudaBond *__restrict__  bondList,
const CudaBondValue *__restrict__  bondValues,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
double &  energy,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virial 
) [inline]

Definition at line 141 of file ComputeBondedCUDAKernel.cu.

References CudaBond::i, CudaBond::ioffsetXYZ, CudaBond::itype, CudaBond::j, CudaBondValue::k, CudaBond::scale, CudaBondValue::x0, and CudaBondValue::x1.

00154     {
00155 
00156   CudaBond bl = bondList[index];
00157   CudaBondValue bondValue = bondValues[bl.itype];
00158   if (bondValue.x0 == 0.0f) return;
00159 
00160   float shx = bl.ioffsetXYZ.x*lata.x + bl.ioffsetXYZ.y*latb.x + bl.ioffsetXYZ.z*latc.x;
00161   float shy = bl.ioffsetXYZ.x*lata.y + bl.ioffsetXYZ.y*latb.y + bl.ioffsetXYZ.z*latc.y;
00162   float shz = bl.ioffsetXYZ.x*lata.z + bl.ioffsetXYZ.y*latb.z + bl.ioffsetXYZ.z*latc.z;
00163 
00164   float4 xyzqi = xyzq[bl.i];
00165   float4 xyzqj = xyzq[bl.j];
00166 
00167   float xij = xyzqi.x + shx - xyzqj.x;
00168   float yij = xyzqi.y + shy - xyzqj.y;
00169   float zij = xyzqi.z + shz - xyzqj.z;
00170 
00171   float r = sqrtf(xij*xij + yij*yij + zij*zij);
00172 
00173   float db = r - bondValue.x0;
00174   if (bondValue.x1) {
00175     // in this case, the bond represents a harmonic wall potential
00176     // where x0 is the lower wall and x1 is the upper
00177     db = (r > bondValue.x1 ? r - bondValue.x1 : (r > bondValue.x0 ? 0 : db));
00178   }
00179   float fij = db * bondValue.k * bl.scale;
00180  
00181   if (doEnergy) {
00182     energy += (double)(fij*db);
00183   }
00184   fij *= -2.0f/r;
00185   
00186   T T_fxij, T_fyij, T_fzij;
00187   calcComponentForces<T>(fij, xij, yij, zij, T_fxij, T_fyij, T_fzij);
00188   
00189   // Store forces
00190   storeForces<T>(T_fxij, T_fyij, T_fzij, bl.i, stride, force);
00191   storeForces<T>(-T_fxij, -T_fyij, -T_fzij, bl.j, stride, force);
00192 
00193   // Store virial
00194   if (doVirial) {
00195 #ifdef WRITE_FULL_VIRIALS
00196     float fxij = fij*xij;
00197     float fyij = fij*yij;
00198     float fzij = fij*zij;
00199     virial.xx = (fxij*xij);
00200     virial.xy = (fxij*yij);
00201     virial.xz = (fxij*zij);
00202     virial.yx = (fyij*xij);
00203     virial.yy = (fyij*yij);
00204     virial.yz = (fyij*zij);
00205     virial.zx = (fzij*xij);
00206     virial.zy = (fzij*yij);
00207     virial.zz = (fzij*zij);
00208 #endif
00209   }
00210 }

template<typename T >
__forceinline__ __device__ void calcComponentForces ( float  fij1,
const float  dx1,
const float  dy1,
const float  dz1,
float  fij2,
const float  dx2,
const float  dy2,
const float  dz2,
T &  fxij,
T &  fyij,
T &  fzij 
) [inline]

Definition at line 82 of file ComputeBondedCUDAKernel.cu.

00087                              {
00088 
00089   fxij = (T)(fij1*dx1 + fij2*dx2);
00090   fyij = (T)(fij1*dy1 + fij2*dy2);
00091   fzij = (T)(fij1*dz1 + fij2*dz2);
00092 }

template<typename T >
__forceinline__ __device__ void calcComponentForces ( float  fij,
const float  dx,
const float  dy,
const float  dz,
T &  fxij,
T &  fyij,
T &  fzij 
) [inline]

Definition at line 53 of file ComputeBondedCUDAKernel.cu.

00056                              {
00057 
00058   fxij = (T)(fij*dx);
00059   fyij = (T)(fij*dy);
00060   fzij = (T)(fij*dz);
00061 
00062 }

template<>
__forceinline__ __device__ void calcComponentForces< long long int > ( float  fij1,
const float  dx1,
const float  dy1,
const float  dz1,
float  fij2,
const float  dx2,
const float  dy2,
const float  dz2,
long long int &  fxij,
long long int &  fyij,
long long int &  fzij 
) [inline]
template<>
__forceinline__ __device__ void calcComponentForces< long long int > ( float  fij,
const float  dx,
const float  dy,
const float  dz,
long long int &  fxij,
long long int &  fyij,
long long int &  fzij 
) [inline]
template<typename T >
__forceinline__ __device__ void convertForces ( const float  x,
const float  y,
const float  z,
T &  Tx,
T &  Ty,
T &  Tz 
) [inline]

Definition at line 33 of file ComputeBondedCUDAKernel.cu.

00034                        {
00035 
00036   Tx = (T)(x);
00037   Ty = (T)(y);
00038   Tz = (T)(z);
00039 }

template<>
__forceinline__ __device__ void convertForces< long long int > ( const float  x,
const float  y,
const float  z,
long long int &  Tx,
long long int &  Ty,
long long int &  Tz 
) [inline]
__device__ __forceinline__ float3 cross ( const float3  v1,
const float3  v2 
)
template<typename T , bool doEnergy, bool doVirial>
__device__ void crosstermForce ( const int  index,
const CudaCrossterm *__restrict__  crosstermList,
const CudaCrosstermValue *__restrict__  crosstermValues,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
double &  energy,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virial 
) [inline]
template<bool doEnergy>
__forceinline__ __device__ void diheComp ( const CudaDihedralValue dihedralValues,
int  ic,
const float  sin_phi,
const float  cos_phi,
const float  scale,
float &  df,
double &  e 
) [inline]

Definition at line 574 of file ComputeBondedCUDAKernel.cu.

References CudaDihedralValue::delta, CudaDihedralValue::k, M_PI, and CudaDihedralValue::n.

00575                                                                                      {
00576 
00577   df = 0.0f;
00578   if (doEnergy) e = 0.0;
00579 
00580   float phi = atan2f(sin_phi, cos_phi);
00581 
00582   bool lrep = true;
00583   while (lrep) {
00584     CudaDihedralValue dihedralValue = dihedralValues[ic];
00585     dihedralValue.k *= scale;
00586 
00587     // Last dihedral has n low bit set to 0
00588     lrep = (dihedralValue.n & 1);
00589     dihedralValue.n >>= 1;
00590     if (dihedralValue.n) {
00591       float nf = dihedralValue.n;
00592       float x = nf*phi - dihedralValue.delta;
00593       if (doEnergy) {
00594         float sin_x, cos_x;
00595         sincosf(x, &sin_x, &cos_x);
00596         e += (double)(dihedralValue.k*(1.0f + cos_x));
00597         df += (double)(nf*dihedralValue.k*sin_x);
00598       } else {
00599         float sin_x = sinf(x);
00600         df += (double)(nf*dihedralValue.k*sin_x);
00601       }
00602     } else {
00603       float diff = phi - dihedralValue.delta;
00604       if (diff < -(float)(M_PI)) diff += (float)(2.0*M_PI);
00605       if (diff >  (float)(M_PI)) diff -= (float)(2.0*M_PI);
00606       if (doEnergy) {
00607         e += (double)(dihedralValue.k*diff*diff);
00608       }
00609       df -= 2.0f*dihedralValue.k*diff;
00610     }
00611     ic++;
00612   }
00613 
00614 }

template<typename T , bool doEnergy, bool doVirial>
__device__ void diheForce ( const int  index,
const CudaDihedral *__restrict__  diheList,
const CudaDihedralValue *__restrict__  dihedralValues,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
T *__restrict__  force,
double &  energy,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virial 
) [inline]

Definition at line 618 of file ComputeBondedCUDAKernel.cu.

References force_to_float, CudaDihedral::i, CudaDihedral::ioffsetXYZ, CudaDihedral::itype, CudaDihedral::j, CudaDihedral::joffsetXYZ, CudaDihedral::k, CudaDihedral::l, CudaDihedral::loffsetXYZ, and CudaDihedral::scale.

00630     {
00631 
00632   CudaDihedral dl = diheList[index];
00633 
00634   float ishx = dl.ioffsetXYZ.x*lata.x + dl.ioffsetXYZ.y*latb.x + dl.ioffsetXYZ.z*latc.x;
00635   float ishy = dl.ioffsetXYZ.x*lata.y + dl.ioffsetXYZ.y*latb.y + dl.ioffsetXYZ.z*latc.y;
00636   float ishz = dl.ioffsetXYZ.x*lata.z + dl.ioffsetXYZ.y*latb.z + dl.ioffsetXYZ.z*latc.z;
00637 
00638   float jshx = dl.joffsetXYZ.x*lata.x + dl.joffsetXYZ.y*latb.x + dl.joffsetXYZ.z*latc.x;
00639   float jshy = dl.joffsetXYZ.x*lata.y + dl.joffsetXYZ.y*latb.y + dl.joffsetXYZ.z*latc.y;
00640   float jshz = dl.joffsetXYZ.x*lata.z + dl.joffsetXYZ.y*latb.z + dl.joffsetXYZ.z*latc.z;
00641 
00642   float lshx = dl.loffsetXYZ.x*lata.x + dl.loffsetXYZ.y*latb.x + dl.loffsetXYZ.z*latc.x;
00643   float lshy = dl.loffsetXYZ.x*lata.y + dl.loffsetXYZ.y*latb.y + dl.loffsetXYZ.z*latc.y;
00644   float lshz = dl.loffsetXYZ.x*lata.z + dl.loffsetXYZ.y*latb.z + dl.loffsetXYZ.z*latc.z;
00645 
00646   float xij = xyzq[dl.i].x + ishx - xyzq[dl.j].x;
00647   float yij = xyzq[dl.i].y + ishy - xyzq[dl.j].y;
00648   float zij = xyzq[dl.i].z + ishz - xyzq[dl.j].z;
00649 
00650   float xjk = xyzq[dl.j].x + jshx - xyzq[dl.k].x;
00651   float yjk = xyzq[dl.j].y + jshy - xyzq[dl.k].y;
00652   float zjk = xyzq[dl.j].z + jshz - xyzq[dl.k].z;
00653 
00654   float xlk = xyzq[dl.l].x + lshx - xyzq[dl.k].x;
00655   float ylk = xyzq[dl.l].y + lshy - xyzq[dl.k].y;
00656   float zlk = xyzq[dl.l].z + lshz - xyzq[dl.k].z;
00657 
00658   // A=F^G, B=H^G.
00659   float ax = yij*zjk - zij*yjk;
00660   float ay = zij*xjk - xij*zjk;
00661   float az = xij*yjk - yij*xjk;
00662   float bx = ylk*zjk - zlk*yjk;
00663   float by = zlk*xjk - xlk*zjk;
00664   float bz = xlk*yjk - ylk*xjk;
00665 
00666   float ra2 = ax*ax + ay*ay + az*az;
00667   float rb2 = bx*bx + by*by + bz*bz;
00668   float rg = sqrtf(xjk*xjk + yjk*yjk + zjk*zjk);
00669 
00670   //    if((ra2 <= rxmin2) .or. (rb2 <= rxmin2) .or. (rg <= rxmin)) then
00671   //          nlinear = nlinear + 1
00672   //       endif
00673 
00674   float rgr = 1.0f / rg;
00675   float ra2r = 1.0f / ra2;
00676   float rb2r = 1.0f / rb2;
00677   float rabr = sqrtf(ra2r*rb2r);
00678 
00679   float cos_phi = (ax*bx + ay*by + az*bz)*rabr;
00680   //
00681   // Note that sin(phi).G/|G|=B^A/(|A|.|B|)
00682   // which can be simplify to sin(phi)=|G|H.A/(|A|.|B|)
00683   float sin_phi = rg*rabr*(ax*xlk + ay*ylk + az*zlk);
00684   //
00685   //     Energy and derivative contributions.
00686 
00687   float df;
00688   double e;
00689   diheComp<doEnergy>(dihedralValues, dl.itype, sin_phi, cos_phi, dl.scale, df, e);
00690 
00691   if (doEnergy) energy += e;
00692 
00693   //
00694   //     Compute derivatives wrt catesian coordinates.
00695   //
00696   // GAA=dE/dphi.|G|/A^2, GBB=dE/dphi.|G|/B^2, FG=F.G, HG=H.G
00697   //  FGA=dE/dphi*F.G/(|G|A^2), HGB=dE/dphi*H.G/(|G|B^2)
00698 
00699   float fg = xij*xjk + yij*yjk + zij*zjk;
00700   float hg = xlk*xjk + ylk*yjk + zlk*zjk;
00701   ra2r *= df;
00702   rb2r *= df;
00703   float fga = fg*ra2r*rgr;
00704   float hgb = hg*rb2r*rgr;
00705   float gaa = ra2r*rg;
00706   float gbb = rb2r*rg;
00707   // DFi=dE/dFi, DGi=dE/dGi, DHi=dE/dHi.
00708 
00709   // Store forces
00710   T T_fix, T_fiy, T_fiz;
00711   calcComponentForces<T>(-gaa, ax, ay, az, T_fix, T_fiy, T_fiz);
00712   storeForces<T>(T_fix, T_fiy, T_fiz, dl.i, stride, force);
00713 
00714   T dgx, dgy, dgz;
00715   calcComponentForces<T>(fga, ax, ay, az, -hgb, bx, by, bz, dgx, dgy, dgz);
00716   T T_fjx = dgx - T_fix;
00717   T T_fjy = dgy - T_fiy;
00718   T T_fjz = dgz - T_fiz;
00719   storeForces<T>(T_fjx, T_fjy, T_fjz, dl.j, stride, force);
00720 
00721   T dhx, dhy, dhz;
00722   calcComponentForces<T>(gbb, bx, by, bz, dhx, dhy, dhz);
00723   T T_fkx = -dhx - dgx;
00724   T T_fky = -dhy - dgy;
00725   T T_fkz = -dhz - dgz;
00726   storeForces<T>(T_fkx, T_fky, T_fkz, dl.k, stride, force);
00727   storeForces<T>(dhx, dhy, dhz, dl.l, stride, force);
00728 
00729   // Store virial
00730   if (doVirial) {
00731 #ifdef WRITE_FULL_VIRIALS
00732     float fix = ((float)T_fix)*force_to_float;
00733     float fiy = ((float)T_fiy)*force_to_float;
00734     float fiz = ((float)T_fiz)*force_to_float;
00735     float fjx = ((float)dgx)*force_to_float;
00736     float fjy = ((float)dgy)*force_to_float;
00737     float fjz = ((float)dgz)*force_to_float;
00738     float flx = ((float)dhx)*force_to_float;
00739     float fly = ((float)dhy)*force_to_float;
00740     float flz = ((float)dhz)*force_to_float;
00741     virial.xx = (fix*xij) + (fjx*xjk) + (flx*xlk);
00742     virial.xy = (fix*yij) + (fjx*yjk) + (flx*ylk);
00743     virial.xz = (fix*zij) + (fjx*zjk) + (flx*zlk);
00744     virial.yx = (fiy*xij) + (fjy*xjk) + (fly*xlk);
00745     virial.yy = (fiy*yij) + (fjy*yjk) + (fly*ylk);
00746     virial.yz = (fiy*zij) + (fjy*zjk) + (fly*zlk);
00747     virial.zx = (fiz*xij) + (fjz*xjk) + (flz*xlk);
00748     virial.zy = (fiz*yij) + (fjz*yjk) + (flz*ylk);
00749     virial.zz = (fiz*zij) + (fjz*zjk) + (flz*zlk);
00750 #endif
00751   }
00752 
00753 }

__device__ __forceinline__ float dot ( const float3  v1,
const float3  v2 
)

Definition at line 763 of file ComputeBondedCUDAKernel.cu.

Referenced by crosstermForce(), and ARestraint::GetDihe().

00763                                                                        {
00764   return (v1.x * v2.x + v1.y * v2.y + v1.z * v2.z);
00765 }

template<typename T , bool doEnergy, bool doVirial>
__device__ void exclusionForce ( const int  index,
const CudaExclusion *__restrict__  exclusionList,
const float  r2_delta,
const int  r2_delta_expc,
cudaTextureObject_t  r2_table_tex,
cudaTextureObject_t  exclusionTableTex,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
const float  cutoff2,
T *__restrict__  forceSlow,
double &  energySlow,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virialSlow 
) [inline]

Definition at line 346 of file ComputeBondedCUDAKernel.cu.

References __ldg, diffa, CudaExclusion::i, CudaExclusion::ioffsetXYZ, CudaExclusion::j, and table_i.

00367     {
00368 
00369   CudaExclusion bl = exclusionList[index];
00370 
00371   float shx = bl.ioffsetXYZ.x*lata.x + bl.ioffsetXYZ.y*latb.x + bl.ioffsetXYZ.z*latc.x;
00372   float shy = bl.ioffsetXYZ.x*lata.y + bl.ioffsetXYZ.y*latb.y + bl.ioffsetXYZ.z*latc.y;
00373   float shz = bl.ioffsetXYZ.x*lata.z + bl.ioffsetXYZ.y*latb.z + bl.ioffsetXYZ.z*latc.z;
00374 
00375   float4 xyzqi = xyzq[bl.i];
00376   float4 xyzqj = xyzq[bl.j];
00377 
00378   float xij = xyzqi.x + shx - xyzqj.x;
00379   float yij = xyzqi.y + shy - xyzqj.y;
00380   float zij = xyzqi.z + shz - xyzqj.z;
00381 
00382   float r2 = xij*xij + yij*yij + zij*zij;
00383   if (r2 < cutoff2) {
00384     r2 += r2_delta;
00385 
00386     union { float f; int i; } r2i;
00387     r2i.f = r2;
00388     int table_i = (r2i.i >> 17) + r2_delta_expc;  // table_i >= 0
00389 
00390 #if __CUDA_ARCH__ >= 350
00391     float r2_table_val = __ldg(&r2_table[table_i]);
00392 #else
00393     float r2_table_val = tex1Dfetch<float>(r2_table_tex, table_i);
00394 #endif
00395     float diffa = r2 - r2_table_val;
00396     float qq = xyzqi.w * xyzqj.w;
00397 
00398 #if __CUDA_ARCH__ >= 350
00399     float4 slow = __ldg(&exclusionTable[table_i]);
00400 #else
00401     float4 slow = tex1Dfetch<float4>(exclusionTableTex, table_i);
00402 #endif
00403 
00404     if (doEnergy) {
00405       energySlow += (double)(qq*(((diffa * (1.0f/6.0f)*slow.x + 0.25f*slow.y ) * diffa + 0.5f*slow.z ) * diffa + slow.w));
00406     }
00407 
00408     float fSlow = -qq*((diffa * slow.x + slow.y) * diffa + slow.z);
00409 
00410     T T_fxij, T_fyij, T_fzij;
00411     calcComponentForces<T>(fSlow, xij, yij, zij, T_fxij, T_fyij, T_fzij);
00412     storeForces<T>(T_fxij, T_fyij, T_fzij, bl.i, stride, forceSlow);
00413     storeForces<T>(-T_fxij, -T_fyij, -T_fzij, bl.j, stride, forceSlow);
00414 
00415     // Store virial
00416     if (doVirial) {
00417 #ifdef WRITE_FULL_VIRIALS
00418       float fxij = fSlow*xij;
00419       float fyij = fSlow*yij;
00420       float fzij = fSlow*zij;
00421       virialSlow.xx = (fxij*xij);
00422       virialSlow.xy = (fxij*yij);
00423       virialSlow.xz = (fxij*zij);
00424       virialSlow.yx = (fyij*xij);
00425       virialSlow.yy = (fyij*yij);
00426       virialSlow.yz = (fyij*zij);
00427       virialSlow.zx = (fzij*xij);
00428       virialSlow.zy = (fzij*yij);
00429       virialSlow.zz = (fzij*zij);
00430 #endif
00431     }
00432   }
00433 }

__device__ long long int lliroundf ( float  f  )  [inline]

Definition at line 17 of file ComputeBondedCUDAKernel.cu.

00018 {
00019     long long int l;
00020     asm("cvt.rni.s64.f32  %0, %1;" : "=l"(l) : "f"(f));
00021     return l;
00022 }

__device__ unsigned long long int llitoulli ( long long int  l  )  [inline]

Definition at line 24 of file ComputeBondedCUDAKernel.cu.

00025 {
00026     unsigned long long int u;
00027     asm("mov.b64    %0, %1;" : "=l"(u) : "l"(l));
00028     return u;
00029 }

template<typename T , bool doEnergy, bool doVirial, bool doElect>
__device__ void modifiedExclusionForce ( const int  index,
const CudaExclusion *__restrict__  exclusionList,
const bool  doSlow,
const float  one_scale14,
const int  vdwCoefTableWidth,
cudaTextureObject_t  vdwCoefTableTex,
cudaTextureObject_t  forceTableTex,
cudaTextureObject_t  energyTableTex,
const float4 *__restrict__  xyzq,
const int  stride,
const float3  lata,
const float3  latb,
const float3  latc,
const float  cutoff2,
double &  energyVdw,
T *__restrict__  forceNbond,
double &  energyNbond,
T *__restrict__  forceSlow,
double &  energySlow,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virialNbond,
ComputeBondedCUDAKernel::BondedVirial *__restrict__  virialSlow 
) [inline]

Definition at line 216 of file ComputeBondedCUDAKernel.cu.

References __ldg, CudaExclusion::i, CudaExclusion::ioffsetXYZ, CudaExclusion::j, CudaExclusion::vdwtypei, CudaExclusion::vdwtypej, float2::x, ComputeBondedCUDAKernel::BondedVirial< T >::xx, ComputeBondedCUDAKernel::BondedVirial< T >::xy, ComputeBondedCUDAKernel::BondedVirial< T >::xz, float2::y, ComputeBondedCUDAKernel::BondedVirial< T >::yx, ComputeBondedCUDAKernel::BondedVirial< T >::yy, ComputeBondedCUDAKernel::BondedVirial< T >::yz, ComputeBondedCUDAKernel::BondedVirial< T >::zx, ComputeBondedCUDAKernel::BondedVirial< T >::zy, and ComputeBondedCUDAKernel::BondedVirial< T >::zz.

00242     {
00243 
00244   CudaExclusion bl = exclusionList[index];
00245 
00246   float shx = bl.ioffsetXYZ.x*lata.x + bl.ioffsetXYZ.y*latb.x + bl.ioffsetXYZ.z*latc.x;
00247   float shy = bl.ioffsetXYZ.x*lata.y + bl.ioffsetXYZ.y*latb.y + bl.ioffsetXYZ.z*latc.y;
00248   float shz = bl.ioffsetXYZ.x*lata.z + bl.ioffsetXYZ.y*latb.z + bl.ioffsetXYZ.z*latc.z;
00249 
00250   float4 xyzqi = xyzq[bl.i];
00251   float4 xyzqj = xyzq[bl.j];
00252 
00253   float xij = xyzqi.x + shx - xyzqj.x;
00254   float yij = xyzqi.y + shy - xyzqj.y;
00255   float zij = xyzqi.z + shz - xyzqj.z;
00256 
00257   float r2 = xij*xij + yij*yij + zij*zij;
00258   if (r2 < cutoff2) {
00259 
00260     float rinv = rsqrtf(r2);
00261 
00262     float qq;
00263     if (doElect) qq = one_scale14 * xyzqi.w * xyzqj.w;
00264 
00265     int vdwIndex = bl.vdwtypei + bl.vdwtypej*vdwCoefTableWidth;
00266 #if __CUDA_ARCH__ >= 350
00267     float2 ljab = __ldg(&vdwCoefTable[vdwIndex]);
00268 #else
00269     float2 ljab = tex1Dfetch<float2>(vdwCoefTableTex, vdwIndex);
00270 #endif
00271 
00272     float4 fi = tex1D<float4>(forceTableTex, rinv);
00273     float4 ei;
00274     if (doEnergy) {
00275       ei = tex1D<float4>(energyTableTex, rinv);
00276       energyVdw   += (double)(ljab.x * ei.z + ljab.y * ei.y);
00277       if (doElect) {
00278         energyNbond += qq * ei.x;
00279         if (doSlow) energySlow  += qq * ei.w;
00280       }
00281     }
00282 
00283     float fNbond;
00284     if (doElect) {
00285       fNbond = -(ljab.x * fi.z + ljab.y * fi.y + qq * fi.x);
00286     } else {
00287       fNbond = -(ljab.x * fi.z + ljab.y * fi.y);
00288     }
00289     T T_fxij, T_fyij, T_fzij;
00290     calcComponentForces<T>(fNbond, xij, yij, zij, T_fxij, T_fyij, T_fzij);
00291     storeForces<T>(T_fxij, T_fyij, T_fzij, bl.i, stride, forceNbond);
00292     storeForces<T>(-T_fxij, -T_fyij, -T_fzij, bl.j, stride, forceNbond);
00293 
00294     float fSlow;
00295     if (doSlow && doElect) {
00296       fSlow = -qq * fi.w;
00297       T T_fxij, T_fyij, T_fzij;
00298       calcComponentForces<T>(fSlow, xij, yij, zij, T_fxij, T_fyij, T_fzij);
00299       storeForces<T>(T_fxij, T_fyij, T_fzij, bl.i, stride, forceSlow);
00300       storeForces<T>(-T_fxij, -T_fyij, -T_fzij, bl.j, stride, forceSlow);
00301     }
00302 
00303     // Store virial
00304     if (doVirial) {
00305 #ifdef WRITE_FULL_VIRIALS
00306       float fxij = fNbond*xij;
00307       float fyij = fNbond*yij;
00308       float fzij = fNbond*zij;
00309       virialNbond.xx = (fxij*xij);
00310       virialNbond.xy = (fxij*yij);
00311       virialNbond.xz = (fxij*zij);
00312       virialNbond.yx = (fyij*xij);
00313       virialNbond.yy = (fyij*yij);
00314       virialNbond.yz = (fyij*zij);
00315       virialNbond.zx = (fzij*xij);
00316       virialNbond.zy = (fzij*yij);
00317       virialNbond.zz = (fzij*zij);
00318 #endif
00319     }
00320 
00321     // Store virial
00322     if (doVirial && doSlow && doElect) {
00323 #ifdef WRITE_FULL_VIRIALS
00324       float fxij = fSlow*xij;
00325       float fyij = fSlow*yij;
00326       float fzij = fSlow*zij;
00327       virialSlow.xx = (fxij*xij);
00328       virialSlow.xy = (fxij*yij);
00329       virialSlow.xz = (fxij*zij);
00330       virialSlow.yx = (fyij*xij);
00331       virialSlow.yy = (fyij*yij);
00332       virialSlow.yz = (fyij*zij);
00333       virialSlow.zx = (fzij*xij);
00334       virialSlow.zy = (fzij*yij);
00335       virialSlow.zz = (fzij*zij);
00336 #endif
00337     }
00338 
00339   }
00340 }

template<typename T >
__forceinline__ __device__ void storeForces ( const T  fx,
const T  fy,
const T  fz,
const int  ind,
const int  stride,
T *  force 
) [inline]

Definition at line 118 of file ComputeBondedCUDAKernel.cu.

00120                {
00121   // The generic version can not be used
00122 }

template<>
__forceinline__ __device__ void storeForces< long long int > ( const long long int  fx,
const long long int  fy,
const long long int  fz,
const int  ind,
const int  stride,
long long int *  force 
) [inline]

Variable Documentation


Generated on 21 Jan 2020 for NAMD by  doxygen 1.6.1