NAMD
Public Member Functions | Friends | List of all members
ComputePme Class Reference

#include <ComputePme.h>

Inheritance diagram for ComputePme:
Compute ComputePmeUtil

Public Member Functions

 ComputePme (ComputeID c, PatchID pid)
 
virtual ~ComputePme ()
 
void initialize ()
 
void atomUpdate ()
 
int noWork ()
 
void doWork ()
 
void doQMWork ()
 
void ungridForces ()
 
void setMgr (ComputePmeMgr *mgr)
 
- Public Member Functions inherited from Compute
 Compute (ComputeID)
 
int type ()
 
virtual ~Compute ()
 
void setNumPatches (int n)
 
int getNumPatches ()
 
virtual void patchReady (PatchID, int doneMigration, int seq)
 
virtual void finishPatch (int)
 
int sequence (void)
 
int priority (void)
 
int getGBISPhase (void)
 
virtual void gbisP2PatchReady (PatchID, int seq)
 
virtual void gbisP3PatchReady (PatchID, int seq)
 
- Public Member Functions inherited from ComputePmeUtil
 ComputePmeUtil ()
 
 ~ComputePmeUtil ()
 

Friends

class ComputePmeMgr
 

Additional Inherited Members

- Static Public Member Functions inherited from ComputePmeUtil
static void select (void)
 
- Public Attributes inherited from Compute
const ComputeID cid
 
LDObjHandle ldObjHandle
 
LocalWorkMsg *const localWorkMsg
 
- Static Public Attributes inherited from ComputePmeUtil
static int numGrids
 
static Bool alchOn
 
static Bool alchFepOn
 
static Bool alchThermIntOn
 
static Bool alchDecouple
 
static BigReal alchElecLambdaStart
 
static Bool lesOn
 
static int lesFactor
 
static Bool pairOn
 
static Bool selfOn
 
- Protected Member Functions inherited from Compute
void enqueueWork ()
 
- Protected Attributes inherited from Compute
int computeType
 
int basePriority
 
int gbisPhase
 
int gbisPhasePriority [3]
 

Detailed Description

Definition at line 46 of file ComputePme.h.

Constructor & Destructor Documentation

ComputePme::ComputePme ( ComputeID  c,
PatchID  pid 
)

Definition at line 2667 of file ComputePme.C.

References Compute::basePriority, DebugM, PmeGrid::dim2, PmeGrid::dim3, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, ComputePmeUtil::numGrids, Node::Object(), PmeGrid::order, PME_PRIORITY, SimParameters::PMEGridSizeX, SimParameters::PMEGridSizeY, SimParameters::PMEGridSizeZ, SimParameters::PMEInterpOrder, SimParameters::PMEOffload, SimParameters::qmForcesOn, Compute::setNumPatches(), and Node::simParameters.

2667  : Compute(c), patchID(pid)
2668 {
2669  DebugM(4,"ComputePme created.\n");
2671  setNumPatches(1);
2672 
2673  CProxy_ComputePmeMgr::ckLocalBranch(
2674  CkpvAccess(BOCclass_group).computePmeMgr)->addCompute(this);
2675 
2677 
2678  qmForcesOn = simParams->qmForcesOn;
2679  offload = simParams->PMEOffload;
2680 
2681  numGridsMax = numGrids;
2682 
2683  myGrid.K1 = simParams->PMEGridSizeX;
2684  myGrid.K2 = simParams->PMEGridSizeY;
2685  myGrid.K3 = simParams->PMEGridSizeZ;
2686  myGrid.order = simParams->PMEInterpOrder;
2687  myGrid.dim2 = myGrid.K2;
2688  myGrid.dim3 = 2 * (myGrid.K3/2 + 1);
2689 
2690 #ifdef NAMD_CUDA
2691  cuda_atoms_offset = 0;
2692  f_data_host = 0;
2693  f_data_dev = 0;
2694  if ( ! offload )
2695 #endif
2696  {
2697  for ( int g=0; g<numGrids; ++g ) myRealSpace[g] = new PmeRealSpace(myGrid);
2698  }
2699 
2700  atomsChanged = 0;
2701 
2702  qmLoclIndx = 0;
2703  qmLocalCharges = 0;
2704 }
static Node * Object()
Definition: Node.h:86
void setNumPatches(int n)
Definition: Compute.h:52
int dim2
Definition: PmeBase.h:19
int dim3
Definition: PmeBase.h:19
int K2
Definition: PmeBase.h:18
SimParameters * simParameters
Definition: Node.h:178
int K1
Definition: PmeBase.h:18
#define DebugM(x, y)
Definition: Debug.h:59
static int numGrids
Definition: ComputePme.h:32
#define PME_PRIORITY
Definition: Priorities.h:29
int order
Definition: PmeBase.h:20
#define simParams
Definition: Output.C:127
int K3
Definition: PmeBase.h:18
int basePriority
Definition: Compute.h:37
Compute(ComputeID)
Definition: Compute.C:33
ComputePme::~ComputePme ( )
virtual

Definition at line 2940 of file ComputePme.C.

2941 {
2942 #ifdef NAMD_CUDA
2943  if ( ! offload )
2944 #endif
2945  {
2946  for ( int g=0; g<numGridsMax; ++g ) delete myRealSpace[g];
2947  }
2948 }

Member Function Documentation

void ComputePme::atomUpdate ( void  )
virtual

Reimplemented from Compute.

Definition at line 2665 of file ComputePme.C.

2665 { atomsChanged = 1; }
void ComputePme::doQMWork ( )

Definition at line 3033 of file ComputePme.C.

References doWork(), Molecule::get_numQMAtoms(), Molecule::get_qmAtmChrg(), Molecule::get_qmAtmIndx(), Molecule::get_qmAtomGroup(), Patch::getCompAtomExtInfo(), Patch::getNumAtoms(), Node::molecule, and Node::Object().

3033  {
3034 
3035 // iout << CkMyPe() << ") ----> PME doQMWork.\n" << endi ;
3036 
3037 
3038  int numQMAtms = Node::Object()->molecule->get_numQMAtoms();
3039  const Real *qmAtmChrg = Node::Object()->molecule->get_qmAtmChrg() ;
3040  const int *qmAtmIndx = Node::Object()->molecule->get_qmAtmIndx() ;
3041  const Real *qmAtomGroup = Node::Object()->molecule->get_qmAtomGroup() ;
3042 
3043  const CompAtomExt *xExt = patch->getCompAtomExtInfo();
3044 
3045  // Determine number of qm atoms in this patch for the current step.
3046  numLocalQMAtoms = 0;
3047  for (int paIter=0; paIter<patch->getNumAtoms(); paIter++) {
3048  if ( qmAtomGroup[xExt[paIter].id] != 0 ) {
3049  numLocalQMAtoms++;
3050  }
3051  }
3052 
3053  // We prepare a charge vector with QM charges for use in the PME calculation.
3054 
3055  // Clears data from last step, if there is any.
3056  if (qmLoclIndx != 0)
3057  delete [] qmLoclIndx;
3058  if (qmLocalCharges != 0)
3059  delete [] qmLocalCharges;
3060 
3061  qmLoclIndx = new int[numLocalQMAtoms] ;
3062  qmLocalCharges = new Real[numLocalQMAtoms] ;
3063 
3064  // I am assuming there will be (in general) more QM atoms among all QM groups
3065  // than MM atoms in a patch.
3066  int procAtms = 0;
3067 
3068  for (int paIter=0; paIter<patch->getNumAtoms(); paIter++) {
3069 
3070  for (int i=0; i<numQMAtms; i++) {
3071 
3072  if (qmAtmIndx[i] == xExt[paIter].id) {
3073 
3074  qmLoclIndx[procAtms] = paIter ;
3075  qmLocalCharges[procAtms] = qmAtmChrg[i];
3076 
3077  procAtms++;
3078  break;
3079  }
3080 
3081  }
3082 
3083  if (procAtms == numLocalQMAtoms)
3084  break;
3085  }
3086 
3087  doWork();
3088  return ;
3089 }
static Node * Object()
Definition: Node.h:86
void doWork()
Definition: ComputePme.C:3091
const int * get_qmAtmIndx()
Definition: Molecule.h:823
int get_numQMAtoms()
Definition: Molecule.h:825
float Real
Definition: common.h:109
const Real * get_qmAtomGroup() const
Definition: Molecule.h:819
Real * get_qmAtmChrg()
Definition: Molecule.h:822
int getNumAtoms()
Definition: Patch.h:105
Molecule * molecule
Definition: Node.h:176
CompAtomExt * getCompAtomExtInfo()
Definition: Patch.h:117
void ComputePme::doWork ( void  )
virtual

Reimplemented from Compute.

Definition at line 3091 of file ComputePme.C.

References ComputePmeMgr::a_data_dev, ComputePmeMgr::a_data_host, ComputePmeUtil::alchDecouple, ComputePmeUtil::alchOn, Compute::basePriority, ResizeArray< T >::begin(), PmeParticle::cg, CompAtom::charge, ComputePmeMgr::chargeGridReady(), Compute::cid, Box< Owner, Data >::close(), COMPUTE_HOME_PRIORITY, COULOMB, ComputePmeMgr::cuda_atoms_alloc, ComputePmeMgr::cuda_atoms_count, ComputePmeMgr::cuda_busy, cuda_errcheck(), ComputePmeMgr::cuda_lock, ComputePmeMgr::cuda_submit_charges(), ComputePmeMgr::cuda_submit_charges_deque, DebugM, deviceCUDA, ComputeNonbondedUtil::dielectric_1, Flags::doMolly, ComputeNonbondedUtil::ewaldcof, PmeRealSpace::fill_charges(), Patch::flags, DeviceCUDA::getDeviceID(), DeviceCUDA::getMasterPe(), Patch::getNumAtoms(), PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Flags::lattice, ComputePmeMgr::cuda_submit_charges_args::lattice, ComputePmeUtil::lesOn, ComputePmeMgr::cuda_submit_charges_args::mgr, NAMD_bug(), ComputePmeUtil::numGrids, Box< Owner, Data >::open(), PmeGrid::order, ComputePmeUtil::pairOn, CompAtom::partition, PATCH_PRIORITY, PME_OFFLOAD_PRIORITY, PME_PRIORITY, ComputePmeMgr::pmeComputes, CompAtom::position, ResizeArray< T >::resize(), scale_coordinates(), ComputeNonbondedUtil::scaling, ComputePmeUtil::selfOn, Compute::sequence(), ComputePmeMgr::cuda_submit_charges_args::sequence, PmeRealSpace::set_num_atoms(), ResizeArray< T >::size(), SQRT_PI, ComputePmeMgr::submitReductions(), TRACE_COMPOBJ_IDOFFSET, ungridForces(), PmeParticle::x, Vector::x, PmeParticle::y, Vector::y, PmeParticle::z, and Vector::z.

Referenced by doQMWork().

3092 {
3093  DebugM(4,"Entering ComputePme::doWork().\n");
3094 
3096 #ifdef NAMD_CUDA
3098 #else
3100 #endif
3101  ungridForces();
3102  // CkPrintf("doWork 2 pe %d %d %d\n", CkMyPe(), myMgr->ungridForcesCount, myMgr->recipEvirCount);
3103  if ( ! --(myMgr->ungridForcesCount) && ! myMgr->recipEvirCount ) myMgr->submitReductions();
3104  return;
3105  }
3107  // CkPrintf("doWork 1 pe %d %d %d\n", CkMyPe(), myMgr->ungridForcesCount, myMgr->recipEvirCount);
3108 
3109 #ifdef TRACE_COMPUTE_OBJECTS
3110  double traceObjStartTime = CmiWallTimer();
3111 #endif
3112 
3113 #ifdef NAMD_CUDA
3114  if ( offload ) cudaSetDevice(deviceCUDA->getDeviceID());
3115 #endif
3116 
3117  // allocate storage
3118  numLocalAtoms = patch->getNumAtoms();
3119 
3120  Lattice &lattice = patch->flags.lattice;
3121 
3122  localData_alloc.resize(numLocalAtoms*(numGrids+ ((numGrids>1 || selfOn)?1:0)));
3123  localData = localData_alloc.begin();
3124  localPartition_alloc.resize(numLocalAtoms);
3125  localPartition = localPartition_alloc.begin();
3126 
3127  int g;
3128  for ( g=0; g<numGrids; ++g ) {
3129  localGridData[g] = localData + numLocalAtoms*(g+1);
3130  }
3131 
3132  // get positions and charges
3133  PmeParticle * data_ptr = localData;
3134  unsigned char * part_ptr = localPartition;
3135  const BigReal coulomb_sqrt = sqrt( COULOMB * ComputeNonbondedUtil::scaling
3137 
3138  {
3139  CompAtom *x = positionBox->open();
3140  // CompAtomExt *xExt = patch->getCompAtomExtInfo();
3141  if ( patch->flags.doMolly ) {
3142  positionBox->close(&x);
3143  x = avgPositionBox->open();
3144  }
3145  int numAtoms = patch->getNumAtoms();
3146 
3147  for(int i=0; i<numAtoms; ++i)
3148  {
3149  data_ptr->x = x[i].position.x;
3150  data_ptr->y = x[i].position.y;
3151  data_ptr->z = x[i].position.z;
3152  data_ptr->cg = coulomb_sqrt * x[i].charge;
3153  ++data_ptr;
3154  *part_ptr = x[i].partition;
3155  ++part_ptr;
3156  }
3157 
3158  // QM loop to overwrite charges of QM atoms.
3159  // They are zero for NAMD, but are updated in ComputeQM.
3160  if ( qmForcesOn ) {
3161 
3162  for(int i=0; i<numLocalQMAtoms; ++i)
3163  {
3164  localData[qmLoclIndx[i]].cg = coulomb_sqrt * qmLocalCharges[i];
3165  }
3166 
3167  }
3168 
3169  if ( patch->flags.doMolly ) { avgPositionBox->close(&x); }
3170  else { positionBox->close(&x); }
3171  }
3172 
3173  // copy to other grids if needed
3174  if ( (alchOn && (!alchDecouple)) || lesOn ) {
3175  for ( g=0; g<numGrids; ++g ) {
3176  PmeParticle *lgd = localGridData[g];
3177  if (g < 2) {
3178  int nga = 0;
3179  for(int i=0; i<numLocalAtoms; ++i) {
3180  if ( localPartition[i] == 0 || localPartition[i] == (g+1) || localPartition[i] == (g+3)) {
3181  // for FEP/TI: grid 0 gets non-alch + partition 1 + partition 3;
3182  // grid 1 gets non-alch + partition 2 + + partition 4;
3183  lgd[nga++] = localData[i];
3184  }
3185  }
3186  numGridAtoms[g] = nga;
3187  } else {
3188  int nga = 0;
3189  for(int i=0; i<numLocalAtoms; ++i) {
3190  if ( localPartition[i] == 0 ) {
3191  // grid 2 (only if called for with numGrids=3) gets only non-alch
3192  lgd[nga++] = localData[i];
3193  }
3194  }
3195  numGridAtoms[g] = nga;
3196  }
3197  }
3198  } else if ( alchOn && alchDecouple) {
3199  // alchemical decoupling: four grids
3200  // g=0: partition 0 and partition 1
3201  // g=1: partition 0 and partition 2
3202  // g=2: only partition 1 atoms
3203  // g=3: only partition 2 atoms
3204  // plus one grid g=4, only partition 0, if numGrids=5
3205  for ( g=0; g<2; ++g ) { // same as before for first 2
3206  PmeParticle *lgd = localGridData[g];
3207  int nga = 0;
3208  for(int i=0; i<numLocalAtoms; ++i) {
3209  if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
3210  lgd[nga++] = localData[i];
3211  }
3212  }
3213  numGridAtoms[g] = nga;
3214  }
3215  for (g=2 ; g<4 ; ++g ) { // only alchemical atoms for these 2
3216  PmeParticle *lgd = localGridData[g];
3217  int nga = 0;
3218  for(int i=0; i<numLocalAtoms; ++i) {
3219  if ( localPartition[i] == (g-1) ) {
3220  lgd[nga++] = localData[i];
3221  }
3222  }
3223  numGridAtoms[g] = nga;
3224  }
3225  for (g=4 ; g<numGrids ; ++g ) { // only non-alchemical atoms
3226  // numGrids=5 only if alchElecLambdaStart > 0
3227  PmeParticle *lgd = localGridData[g];
3228  int nga = 0;
3229  for(int i=0; i<numLocalAtoms; ++i) {
3230  if ( localPartition[i] == 0 ) {
3231  lgd[nga++] = localData[i];
3232  }
3233  }
3234  numGridAtoms[g] = nga;
3235  }
3236  } else if ( selfOn ) {
3237  if ( numGrids != 1 ) NAMD_bug("ComputePme::doWork assertion 1 failed");
3238  g = 0;
3239  PmeParticle *lgd = localGridData[g];
3240  int nga = 0;
3241  for(int i=0; i<numLocalAtoms; ++i) {
3242  if ( localPartition[i] == 1 ) {
3243  lgd[nga++] = localData[i];
3244  }
3245  }
3246  numGridAtoms[g] = nga;
3247  } else if ( pairOn ) {
3248  if ( numGrids != 3 ) NAMD_bug("ComputePme::doWork assertion 2 failed");
3249  g = 0;
3250  PmeParticle *lgd = localGridData[g];
3251  int nga = 0;
3252  for(int i=0; i<numLocalAtoms; ++i) {
3253  if ( localPartition[i] == 1 || localPartition[i] == 2 ) {
3254  lgd[nga++] = localData[i];
3255  }
3256  }
3257  numGridAtoms[g] = nga;
3258  for ( g=1; g<3; ++g ) {
3259  PmeParticle *lgd = localGridData[g];
3260  int nga = 0;
3261  for(int i=0; i<numLocalAtoms; ++i) {
3262  if ( localPartition[i] == g ) {
3263  lgd[nga++] = localData[i];
3264  }
3265  }
3266  numGridAtoms[g] = nga;
3267  }
3268  } else {
3269  if ( numGrids != 1 ) NAMD_bug("ComputePme::doWork assertion 3 failed");
3270  localGridData[0] = localData;
3271  numGridAtoms[0] = numLocalAtoms;
3272  }
3273 
3274  if ( ! myMgr->doWorkCount ) {
3275  myMgr->doWorkCount = myMgr->pmeComputes.size();
3276 
3277 #ifdef NAMD_CUDA
3278  if ( ! offload )
3279 #endif // NAMD_CUDA
3280  {
3281  memset( (void*) myMgr->fz_arr, 0, (myGrid.K3+myGrid.order-1) * sizeof(char) );
3282 
3283  for (int i=0; i<myMgr->q_count; ++i) {
3284  memset( (void*) (myMgr->q_list[i]), 0, (myGrid.K3+myGrid.order-1) * sizeof(float) );
3285  }
3286  }
3287 
3288  for ( g=0; g<numGrids; ++g ) {
3289  myMgr->evir[g] = 0;
3290  }
3291 
3292  myMgr->strayChargeErrors = 0;
3293 
3294  myMgr->compute_sequence = sequence();
3295  }
3296 
3297  if ( sequence() != myMgr->compute_sequence ) NAMD_bug("ComputePme sequence mismatch in doWork()");
3298 
3299  int strayChargeErrors = 0;
3300 
3301  // calculate self energy
3303  for ( g=0; g<numGrids; ++g ) {
3304  BigReal selfEnergy = 0;
3305  data_ptr = localGridData[g];
3306  int i;
3307  for(i=0; i<numGridAtoms[g]; ++i)
3308  {
3309  selfEnergy += data_ptr->cg * data_ptr->cg;
3310  ++data_ptr;
3311  }
3312  selfEnergy *= -1. * ewaldcof / SQRT_PI;
3313  myMgr->evir[g][0] += selfEnergy;
3314 
3315  float **q = myMgr->q_arr + g*myMgr->fsize;
3316  char *f = myMgr->f_arr + g*myMgr->fsize;
3317 
3318  scale_coordinates(localGridData[g], numGridAtoms[g], lattice, myGrid);
3319 #ifdef NAMD_CUDA
3320  if ( offload ) {
3321  if ( myMgr->cuda_atoms_alloc == 0 ) { // first call
3322  int na = myMgr->cuda_atoms_alloc = 1.2 * (myMgr->cuda_atoms_count + 1000);
3323  cuda_errcheck("before malloc atom data for pme");
3324  cudaMallocHost((void**) &(myMgr->a_data_host), 7*na*sizeof(float));
3325  cudaMalloc((void**) &(myMgr->a_data_dev), 7*na*sizeof(float));
3326  cuda_errcheck("malloc atom data for pme");
3327  myMgr->cuda_atoms_count = 0;
3328  }
3329  cuda_atoms_offset = myMgr->cuda_atoms_count;
3330  int n = numGridAtoms[g];
3331  myMgr->cuda_atoms_count += n;
3332  if ( myMgr->cuda_atoms_count > myMgr->cuda_atoms_alloc ) {
3333  CkPrintf("Pe %d expanding CUDA PME atoms allocation because %d > %d\n",
3334  CkMyPe(), myMgr->cuda_atoms_count, myMgr->cuda_atoms_alloc);
3335  cuda_errcheck("before malloc expanded atom data for pme");
3336  int na = myMgr->cuda_atoms_alloc = 1.2 * (myMgr->cuda_atoms_count + 1000);
3337  const float *a_data_host_old = myMgr->a_data_host;
3338  cudaMallocHost((void**) &(myMgr->a_data_host), 7*na*sizeof(float));
3339  cuda_errcheck("malloc expanded host atom data for pme");
3340  memcpy(myMgr->a_data_host, a_data_host_old, 7*cuda_atoms_offset*sizeof(float));
3341  cudaFreeHost((void*) a_data_host_old);
3342  cuda_errcheck("free expanded host atom data for pme");
3343  cudaFree(myMgr->a_data_dev);
3344  cuda_errcheck("free expanded dev atom data for pme");
3345  cudaMalloc((void**) &(myMgr->a_data_dev), 7*na*sizeof(float));
3346  cuda_errcheck("malloc expanded dev atom data for pme");
3347  }
3348  float *a_data_host = myMgr->a_data_host + 7 * cuda_atoms_offset;
3349  data_ptr = localGridData[g];
3350  double order_1 = myGrid.order - 1;
3351  double K1 = myGrid.K1;
3352  double K2 = myGrid.K2;
3353  double K3 = myGrid.K3;
3354  int found_negative = 0;
3355  for ( int i=0; i<n; ++i ) {
3356  if ( data_ptr[i].x < 0 || data_ptr[i].y < 0 || data_ptr[i].z < 0 ) {
3357  found_negative = 1;
3358  // CkPrintf("low coord: %f %f %f\n", data_ptr[i].x, data_ptr[i].y, data_ptr[i].z);
3359  }
3360  double x_int = (int) data_ptr[i].x;
3361  double y_int = (int) data_ptr[i].y;
3362  double z_int = (int) data_ptr[i].z;
3363  a_data_host[7*i ] = data_ptr[i].x - x_int; // subtract in double precision
3364  a_data_host[7*i+1] = data_ptr[i].y - y_int;
3365  a_data_host[7*i+2] = data_ptr[i].z - z_int;
3366  a_data_host[7*i+3] = data_ptr[i].cg;
3367  x_int -= order_1; if ( x_int < 0 ) x_int += K1;
3368  y_int -= order_1; if ( y_int < 0 ) y_int += K2;
3369  z_int -= order_1; if ( z_int < 0 ) z_int += K3;
3370  a_data_host[7*i+4] = x_int;
3371  a_data_host[7*i+5] = y_int;
3372  a_data_host[7*i+6] = z_int;
3373  }
3374  if ( found_negative ) NAMD_bug("found negative atom coordinate in ComputePme::doWork");
3375  } else
3376 #endif // NAMD_CUDA
3377  {
3378  myRealSpace[g]->set_num_atoms(numGridAtoms[g]);
3379  myRealSpace[g]->fill_charges(q, myMgr->q_list, myMgr->q_count, strayChargeErrors, f, myMgr->fz_arr, localGridData[g]);
3380  }
3381  }
3382  myMgr->strayChargeErrors += strayChargeErrors;
3383 
3384 #ifdef TRACE_COMPUTE_OBJECTS
3385  traceUserBracketEvent(TRACE_COMPOBJ_IDOFFSET+this->cid, traceObjStartTime, CmiWallTimer());
3386 #endif
3387 
3388  if ( --(myMgr->doWorkCount) == 0 ) {
3389 // cudaDeviceSynchronize(); // XXXX
3390 #ifdef NAMD_CUDA
3391  if ( offload ) {
3393  args.mgr = myMgr;
3394  args.lattice = &lattice;
3395  args.sequence = sequence();
3396  CmiLock(ComputePmeMgr::cuda_lock);
3397  if ( ComputePmeMgr::cuda_busy ) {
3399  } else if ( CkMyPe() == deviceCUDA->getMasterPe() ) {
3400  // avoid adding work to nonbonded data preparation pe
3401  args.mgr->cuda_submit_charges(*args.lattice, args.sequence);
3402  } else {
3403  ComputePmeMgr::cuda_busy = true;
3404  while ( 1 ) {
3405  CmiUnlock(ComputePmeMgr::cuda_lock);
3406  args.mgr->cuda_submit_charges(*args.lattice, args.sequence);
3407  CmiLock(ComputePmeMgr::cuda_lock);
3411  } else {
3412  ComputePmeMgr::cuda_busy = false;
3413  break;
3414  }
3415  }
3416  }
3417  CmiUnlock(ComputePmeMgr::cuda_lock);
3418  } else
3419 #endif // NAMD_CUDA
3420  {
3421  myMgr->chargeGridReady(lattice,sequence());
3422  }
3423  }
3424  atomsChanged = 0;
3425 }
static void scale_coordinates(PmeParticle p[], int N, Lattice lattice, PmeGrid grid)
Definition: PmeBase.inl:17
unsigned char partition
Definition: NamdTypes.h:56
void ungridForces()
Definition: ComputePme.C:4012
int sequence(void)
Definition: Compute.h:64
float * a_data_dev
Definition: ComputePme.C:419
#define TRACE_COMPOBJ_IDOFFSET
Definition: Compute.h:77
double x
Definition: PmeBase.h:26
int K2
Definition: PmeBase.h:18
int K1
Definition: PmeBase.h:18
#define COULOMB
Definition: common.h:46
#define DebugM(x, y)
Definition: Debug.h:59
static int numGrids
Definition: ComputePme.h:32
BigReal z
Definition: Vector.h:66
Position position
Definition: NamdTypes.h:53
static Bool alchOn
Definition: ComputePme.h:33
double cg
Definition: PmeBase.h:27
double z
Definition: PmeBase.h:26
double y
Definition: PmeBase.h:26
Flags flags
Definition: Patch.h:127
#define SQRT_PI
Definition: ComputeExt.C:30
Charge charge
Definition: NamdTypes.h:54
void chargeGridReady(Lattice &lattice, int sequence)
Definition: ComputePme.C:3548
int cuda_atoms_alloc
Definition: ComputePme.C:423
#define PME_PRIORITY
Definition: Priorities.h:29
int order
Definition: PmeBase.h:20
#define COMPUTE_HOME_PRIORITY
Definition: Priorities.h:76
int getMasterPe()
Definition: DeviceCUDA.h:100
void NAMD_bug(const char *err_msg)
Definition: common.C:123
void fill_charges(float **q_arr, float **q_arr_list, int &q_arr_count, int &stray_count, char *f_arr, char *fz_arr, PmeParticle p[])
Definition: PmeRealSpace.C:47
gridSize z
static std::deque< cuda_submit_charges_args > cuda_submit_charges_deque
Definition: ComputePme.C:441
BigReal x
Definition: Vector.h:66
#define PME_OFFLOAD_PRIORITY
Definition: Priorities.h:41
static Bool alchDecouple
Definition: ComputePme.h:36
void submitReductions()
Definition: ComputePme.C:4208
int getDeviceID()
Definition: DeviceCUDA.h:107
static Bool pairOn
Definition: ComputePme.h:40
int K3
Definition: PmeBase.h:18
int cuda_atoms_count
Definition: ComputePme.C:422
static Bool lesOn
Definition: ComputePme.h:38
void resize(int i)
Definition: ResizeArray.h:84
void cuda_errcheck(const char *msg)
BigReal y
Definition: Vector.h:66
int getNumAtoms()
Definition: Patch.h:105
ResizeArray< ComputePme * > pmeComputes
Definition: ComputePme.C:454
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:18
static Bool selfOn
Definition: ComputePme.h:41
Lattice lattice
Definition: PatchTypes.h:44
void set_num_atoms(int natoms)
Definition: PmeRealSpace.C:20
gridSize y
int basePriority
Definition: Compute.h:37
int size(void) const
Definition: ResizeArray.h:127
gridSize x
void cuda_submit_charges(Lattice &lattice, int sequence)
Definition: ComputePme.C:3429
Data * open(void)
Definition: Box.h:39
const ComputeID cid
Definition: Compute.h:43
void close(Data **const t)
Definition: Box.h:49
int doMolly
Definition: PatchTypes.h:24
float * a_data_host
Definition: ComputePme.C:418
static bool cuda_busy
Definition: ComputePme.C:442
double BigReal
Definition: common.h:114
#define PATCH_PRIORITY(PID)
Definition: Priorities.h:25
static CmiNodeLock cuda_lock
Definition: ComputePme.C:424
iterator begin(void)
Definition: ResizeArray.h:36
void ComputePme::initialize ( void  )
virtual

Reimplemented from Compute.

Definition at line 2706 of file ComputePme.C.

References ComputePmeMgr::cuda_atoms_count, Patch::getNumAtoms(), NAMD_bug(), PatchMap::Object(), Patch::registerAvgPositionPickup(), Patch::registerForceDeposit(), and Patch::registerPositionPickup().

2706  {
2707  if (!(patch = PatchMap::Object()->patch(patchID))) {
2708  NAMD_bug("ComputePme used with unknown patch.");
2709  }
2710  positionBox = patch->registerPositionPickup(this);
2711  avgPositionBox = patch->registerAvgPositionPickup(this);
2712  forceBox = patch->registerForceDeposit(this);
2713 #ifdef NAMD_CUDA
2714  if ( offload ) {
2715  myMgr->cuda_atoms_count += patch->getNumAtoms();
2716  }
2717 #endif
2718 }
Box< Patch, CompAtom > * registerAvgPositionPickup(Compute *cid)
Definition: Patch.C:134
static PatchMap * Object()
Definition: PatchMap.h:27
void NAMD_bug(const char *err_msg)
Definition: common.C:123
int cuda_atoms_count
Definition: ComputePme.C:422
int getNumAtoms()
Definition: Patch.h:105
Box< Patch, CompAtom > * registerPositionPickup(Compute *cid)
Definition: Patch.C:107
Box< Patch, Results > * registerForceDeposit(Compute *cid)
Definition: Patch.C:228
int ComputePme::noWork ( )
virtual

Reimplemented from Compute.

Definition at line 2994 of file ComputePme.C.

References ResizeArray< T >::add(), Flags::doFullElectrostatics, Patch::flags, ComputePmeMgr::pmeComputes, ResizeArray< T >::size(), Box< Owner, Data >::skip(), and SubmitReduction::submit().

2994  {
2995 
2996  if ( patch->flags.doFullElectrostatics ) {
2997  // In QM/MM simulations, atom charges form QM regions need special treatment.
2998  if ( qmForcesOn ) {
2999  return 1;
3000  }
3001  if ( ! myMgr->ungridForcesCount && ! myMgr->recipEvirCount ) return 0; // work to do, enqueue as usual
3002  myMgr->heldComputes.add(this);
3003  return 1; // don't enqueue yet
3004  }
3005 
3006  positionBox->skip();
3007  forceBox->skip();
3008 
3009  if ( ++(myMgr->noWorkCount) == myMgr->pmeComputes.size() ) {
3010  myMgr->noWorkCount = 0;
3011  myMgr->reduction->submit();
3012  }
3013 
3014  atomsChanged = 0;
3015 
3016  return 1; // no work for this step
3017 }
Flags flags
Definition: Patch.h:127
int doFullElectrostatics
Definition: PatchTypes.h:23
void skip(void)
Definition: Box.h:63
int add(const Elem &elem)
Definition: ResizeArray.h:97
ResizeArray< ComputePme * > pmeComputes
Definition: ComputePme.C:454
void submit(void)
Definition: ReductionMgr.h:323
int size(void) const
Definition: ResizeArray.h:127
void ComputePme::setMgr ( ComputePmeMgr mgr)
inline

Definition at line 56 of file ComputePme.h.

56 { myMgr = mgr; }
void ComputePme::ungridForces ( )

Definition at line 4012 of file ComputePme.C.

References ADD_VECTOR_OBJECT, ComputePmeUtil::alchDecouple, ComputePmeUtil::alchFepOn, ComputePmeUtil::alchOn, ResizeArray< T >::begin(), Box< Owner, Data >::close(), SimParameters::commOnly, PmeRealSpace::compute_forces(), endi(), Results::f, Patch::flags, SimParameters::getCurrentLambda(), SimParameters::getCurrentLambda2(), SimParameters::getElecLambda(), Patch::getNumAtoms(), iERROR(), iout, Flags::lattice, ComputePmeUtil::lesFactor, ComputePmeUtil::lesOn, NAMD_bug(), ComputePmeUtil::numGrids, Node::Object(), Box< Owner, Data >::open(), ComputePmeUtil::pairOn, ResizeArray< T >::resize(), scale_forces(), ComputePmeUtil::selfOn, Compute::sequence(), Node::simParameters, Results::slow, Flags::step, Vector::x, Vector::y, and Vector::z.

Referenced by doWork().

4012  {
4013 
4014  if ( sequence() != myMgr->compute_sequence ) NAMD_bug("ComputePme sequence mismatch in ungridForces()");
4015 
4017 
4018  localResults_alloc.resize(numLocalAtoms* ((numGrids>1 || selfOn)?2:1));
4019  Vector *localResults = localResults_alloc.begin();
4020  Vector *gridResults;
4021 
4022  if ( alchOn || lesOn || selfOn || pairOn ) {
4023  for(int i=0; i<numLocalAtoms; ++i) { localResults[i] = 0.; }
4024  gridResults = localResults + numLocalAtoms;
4025  } else {
4026  gridResults = localResults;
4027  }
4028 
4029  Vector pairForce = 0.;
4030  Lattice &lattice = patch->flags.lattice;
4031  int g = 0;
4032  if(!simParams->commOnly) {
4033  for ( g=0; g<numGrids; ++g ) {
4034 #ifdef NETWORK_PROGRESS
4035  CmiNetworkProgress();
4036 #endif
4037 
4038 #ifdef NAMD_CUDA
4039  if ( offload ) {
4040  int errfound = 0;
4041  for ( int n=numGridAtoms[g], i=0; i<n; ++i ) {
4042  // Neither isnan() nor x != x worked when testing on Cray; this does.
4043  if ( ((int*)f_data_host)[3*i] == 0x7fffffff ) { errfound = 1; } // CUDA NaN
4044  gridResults[i].x = f_data_host[3*i];
4045  gridResults[i].y = f_data_host[3*i+1];
4046  gridResults[i].z = f_data_host[3*i+2];
4047  }
4048  if ( errfound ) {
4049  int errcount = 0;
4050  for ( int n=numGridAtoms[g], i=0; i<n; ++i ) {
4051  float f = f_data_host[3*i];
4052  if ( ((int*)f_data_host)[3*i] == 0x7fffffff ) { // CUDA NaN
4053  ++errcount;
4054  gridResults[i] = 0.;
4055  }
4056  }
4057  iout << iERROR << "Stray PME grid charges detected: "
4058  << errcount << " atoms on pe " << CkMyPe() << "\n" << endi;
4059  }
4060  } else
4061 #endif // NAMD_CUDA
4062  {
4063  myRealSpace[g]->compute_forces(myMgr->q_arr+g*myMgr->fsize, localGridData[g], gridResults);
4064  }
4065  scale_forces(gridResults, numGridAtoms[g], lattice);
4066 
4067  if (alchOn) {
4068  float scale = 1.;
4069  BigReal elecLambdaUp, elecLambdaDown;
4070  BigReal alchLambda = simParams->getCurrentLambda(patch->flags.step);
4071  myMgr->alchLambda = alchLambda;
4072  BigReal alchLambda2 = simParams->getCurrentLambda2(patch->flags.step);
4073  myMgr->alchLambda2 = alchLambda2;
4074  elecLambdaUp = simParams->getElecLambda(alchLambda);
4075  elecLambdaDown = simParams->getElecLambda(1. - alchLambda);
4076 
4077  if ( g == 0 ) scale = elecLambdaUp;
4078  else if ( g == 1 ) scale = elecLambdaDown;
4079  else if ( g == 2 ) scale = (elecLambdaUp + elecLambdaDown - 1)*(-1);
4080 
4081  if (alchDecouple) {
4082  if ( g == 2 ) scale = 1 - elecLambdaUp;
4083  else if ( g == 3 ) scale = 1 - elecLambdaDown;
4084  else if ( g == 4 ) scale = (elecLambdaUp + elecLambdaDown - 1)*(-1);
4085  }
4086  int nga = 0;
4087  if (!alchDecouple) {
4088  if (g < 2 ) {
4089  for(int i=0; i<numLocalAtoms; ++i) {
4090  if ( localPartition[i] == 0 || localPartition[i] == (g+1) || localPartition[i] == (g+3) ) {
4091  // (g=0: only partition 0 and partiton 1 and partion 3)
4092  // (g=1: only partition 0 and partiton 2 and partion 4)
4093  localResults[i] += gridResults[nga++] * scale;
4094  }
4095  }
4096  } else {
4097  for(int i=0; i<numLocalAtoms; ++i) {
4098  if ( localPartition[i] == 0 ) {
4099  // (g=2: only partition 0)
4100  localResults[i] += gridResults[nga++] * scale;
4101  }
4102  }
4103  }
4104  } else { // alchDecouple
4105  if ( g < 2 ) {
4106  for(int i=0; i<numLocalAtoms; ++i) {
4107  if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
4108  // g = 0: partition 0 or partition 1
4109  // g = 1: partition 0 or partition 2
4110  localResults[i] += gridResults[nga++] * scale;
4111  }
4112  }
4113  }
4114  else {
4115  for(int i=0; i<numLocalAtoms; ++i) {
4116  if ( localPartition[i] == (g-1) || localPartition[i] == (g-4)) {
4117  // g = 2: partition 1 only
4118  // g = 3: partition 2 only
4119  // g = 4: partition 0 only
4120  localResults[i] += gridResults[nga++] * scale;
4121  }
4122  }
4123  }
4124  }
4125  } else if ( lesOn ) {
4126  float scale = 1.;
4127  if ( alchFepOn ) {
4128  BigReal alchLambda = simParams->getCurrentLambda(patch->flags.step);
4129  myMgr->alchLambda = alchLambda;
4130  BigReal alchLambda2 = simParams->getCurrentLambda2(patch->flags.step);
4131  myMgr->alchLambda2 = alchLambda2;
4132  if ( g == 0 ) scale = alchLambda;
4133  else if ( g == 1 ) scale = 1. - alchLambda;
4134  } else if ( lesOn ) {
4135  scale = 1.0 / (float)lesFactor;
4136  }
4137  int nga = 0;
4138  for(int i=0; i<numLocalAtoms; ++i) {
4139  if ( localPartition[i] == 0 || localPartition[i] == (g+1) ) {
4140  localResults[i] += gridResults[nga++] * scale;
4141  }
4142  }
4143  } else if ( selfOn ) {
4144  PmeParticle *lgd = localGridData[g];
4145  int nga = 0;
4146  for(int i=0; i<numLocalAtoms; ++i) {
4147  if ( localPartition[i] == 1 ) {
4148  pairForce += gridResults[nga]; // should add up to almost zero
4149  localResults[i] += gridResults[nga++];
4150  }
4151  }
4152  } else if ( pairOn ) {
4153  if ( g == 0 ) {
4154  int nga = 0;
4155  for(int i=0; i<numLocalAtoms; ++i) {
4156  if ( localPartition[i] == 1 ) {
4157  pairForce += gridResults[nga];
4158  }
4159  if ( localPartition[i] == 1 || localPartition[i] == 2 ) {
4160  localResults[i] += gridResults[nga++];
4161  }
4162  }
4163  } else if ( g == 1 ) {
4164  int nga = 0;
4165  for(int i=0; i<numLocalAtoms; ++i) {
4166  if ( localPartition[i] == g ) {
4167  pairForce -= gridResults[nga]; // should add up to almost zero
4168  localResults[i] -= gridResults[nga++];
4169  }
4170  }
4171  } else {
4172  int nga = 0;
4173  for(int i=0; i<numLocalAtoms; ++i) {
4174  if ( localPartition[i] == g ) {
4175  localResults[i] -= gridResults[nga++];
4176  }
4177  }
4178  }
4179  }
4180  }
4181  }
4182 
4183  Vector *results_ptr = localResults;
4184 
4185  // add in forces
4186  {
4187  Results *r = forceBox->open();
4188  Force *f = r->f[Results::slow];
4189  int numAtoms = patch->getNumAtoms();
4190 
4191  if ( ! myMgr->strayChargeErrors && ! simParams->commOnly ) {
4192  for(int i=0; i<numAtoms; ++i) {
4193  f[i].x += results_ptr->x;
4194  f[i].y += results_ptr->y;
4195  f[i].z += results_ptr->z;
4196  ++results_ptr;
4197  }
4198  }
4199  forceBox->close(&r);
4200  }
4201 
4202  if ( pairOn || selfOn ) {
4203  ADD_VECTOR_OBJECT(myMgr->reduction,REDUCTION_PAIR_ELECT_FORCE,pairForce);
4204  }
4205 
4206 }
static Node * Object()
Definition: Node.h:86
void compute_forces(const float *const *q_arr, const PmeParticle p[], Vector f[])
Definition: PmeRealSpace.C:141
int sequence(void)
Definition: Compute.h:64
Definition: Vector.h:64
SimParameters * simParameters
Definition: Node.h:178
static int numGrids
Definition: ComputePme.h:32
BigReal z
Definition: Vector.h:66
static Bool alchOn
Definition: ComputePme.h:33
#define iout
Definition: InfoStream.h:87
Flags flags
Definition: Patch.h:127
void NAMD_bug(const char *err_msg)
Definition: common.C:123
BigReal getCurrentLambda2(const int)
Force * f[maxNumForces]
Definition: PatchTypes.h:67
static void scale_forces(Vector f[], int N, Lattice &lattice)
Definition: PmeBase.inl:60
BigReal x
Definition: Vector.h:66
static Bool alchDecouple
Definition: ComputePme.h:36
static int lesFactor
Definition: ComputePme.h:39
#define simParams
Definition: Output.C:127
static Bool pairOn
Definition: ComputePme.h:40
static Bool lesOn
Definition: ComputePme.h:38
void resize(int i)
Definition: ResizeArray.h:84
BigReal y
Definition: Vector.h:66
int getNumAtoms()
Definition: Patch.h:105
BigReal getCurrentLambda(const int)
#define ADD_VECTOR_OBJECT(R, RL, D)
Definition: ReductionMgr.h:27
static Bool selfOn
Definition: ComputePme.h:41
Lattice lattice
Definition: PatchTypes.h:44
std::ostream & iERROR(std::ostream &s)
Definition: InfoStream.C:109
infostream & endi(infostream &s)
Definition: InfoStream.C:38
static Bool alchFepOn
Definition: ComputePme.h:34
Data * open(void)
Definition: Box.h:39
void close(Data **const t)
Definition: Box.h:49
BigReal getElecLambda(const BigReal)
double BigReal
Definition: common.h:114
int step
Definition: PatchTypes.h:16
iterator begin(void)
Definition: ResizeArray.h:36

Friends And Related Function Documentation

friend class ComputePmeMgr
friend

Definition at line 58 of file ComputePme.h.


The documentation for this class was generated from the following files: