NAMD
Public Member Functions | List of all members
PmeZPencil Class Reference
Inheritance diagram for PmeZPencil:
PmePencil< CBase_PmeZPencil >

Public Member Functions

PmeZPencil_SDAG_CODE PmeZPencil ()
 
 PmeZPencil (CkMigrateMessage *)
 
 ~PmeZPencil ()
 
void fft_init ()
 
void recv_grid (const PmeGridMsg *)
 
void forward_fft ()
 
void send_trans ()
 
void send_subset_trans (int fromIdx, int toIdx)
 
void recv_untrans (const PmeUntransMsg *)
 
void recvNodeAck (PmeAckMsg *)
 
void node_process_untrans (PmeUntransMsg *)
 
void node_process_grid (PmeGridMsg *)
 
void backward_fft ()
 
void send_ungrid (PmeGridMsg *)
 
void send_all_ungrid ()
 
void send_subset_ungrid (int fromIdx, int toIdx)
 
- Public Member Functions inherited from PmePencil< CBase_PmeZPencil >
 PmePencil ()
 
 ~PmePencil ()
 
void base_init (PmePencilInitMsg *msg)
 
void order_init (int nBlocks)
 

Additional Inherited Members

- Public Types inherited from PmePencil< CBase_PmeZPencil >
typedef int AtomicInt
 
- Public Attributes inherited from PmePencil< CBase_PmeZPencil >
PmePencilInitMsgData initdata
 
Lattice lattice
 
PmeReduction evir
 
int sequence
 
AtomicInt imsg
 
AtomicInt imsgb
 
int hasData
 
int offload
 
float * data
 
float * work
 
int * send_order
 
int * needs_reply
 

Detailed Description

Definition at line 4559 of file ComputePme.C.

Constructor & Destructor Documentation

PmeZPencil_SDAG_CODE PmeZPencil::PmeZPencil ( )
inline

Definition at line 4562 of file ComputePme.C.

4562 { __sdag_init(); setMigratable(false); }
PmeZPencil::PmeZPencil ( CkMigrateMessage *  )
inline

Definition at line 4563 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::imsg, and PmePencil< CBase_PmeZPencil >::imsgb.

4563 { __sdag_init(); setMigratable (false); imsg=imsgb=0;}
PmeZPencil::~PmeZPencil ( )
inline

Definition at line 4564 of file ComputePme.C.

4564  {
4565  #ifdef NAMD_FFTW
4566  #ifdef NAMD_FFTW_3
4567  delete [] forward_plans;
4568  delete [] backward_plans;
4569  #endif
4570  #endif
4571  }

Member Function Documentation

void PmeZPencil::backward_fft ( )

Definition at line 6149 of file ComputePme.C.

References CKLOOP_CTRL_PME_BACKWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

6149  {
6150 #ifdef NAMD_FFTW
6151 #ifdef MANUAL_DEBUG_FFTW3
6152  dumpMatrixFloat3("bw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
6153 #endif
6154 #ifdef NAMD_FFTW_3
6155 #if CMK_SMP && USE_CKLOOP
6156  int useCkLoop = Node::Object()->simParameters->useCkLoop;
6157  if(useCkLoop>=CKLOOP_CTRL_PME_BACKWARDFFT
6158  && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
6159  //for(int i=0; i<numPlans; i++) fftwf_execute(backward_plans[i]);
6160  //transform the above loop
6161  CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)backward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
6162  return;
6163  }
6164 #endif
6165  fftwf_execute(backward_plan);
6166 #else
6167  rfftwnd_complex_to_real(backward_plan, nx*ny,
6168  (fftw_complex *) data, 1, initdata.grid.dim3/2, work, 1, 0);
6169 #endif
6170 #ifdef MANUAL_DEBUG_FFTW3
6171  dumpMatrixFloat3("bw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
6172 #endif
6173 
6174 #endif
6175 
6176 #if CMK_BLUEGENEL
6177  CmiNetworkProgress();
6178 #endif
6179 
6180 #ifdef FFTCHECK
6181  int dim3 = initdata.grid.dim3;
6182  int K1 = initdata.grid.K1;
6183  int K2 = initdata.grid.K2;
6184  int K3 = initdata.grid.K3;
6185  float scale = 1. / (1. * K1 * K2 * K3);
6186  float maxerr = 0.;
6187  float maxstd = 0.;
6188  int mi, mj, mk; mi = mj = mk = -1;
6189  float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
6190  const float *d = data;
6191  for ( int i=0; i<nx; ++i ) {
6192  for ( int j=0; j<ny; ++j, d += dim3 ) {
6193  for ( int k=0; k<K3; ++k ) {
6194  float std = 10. * (10. * (10. * std_base + i) + j) + k;
6195  float err = scale * d[k] - std;
6196  if ( fabsf(err) > fabsf(maxerr) ) {
6197  maxerr = err;
6198  maxstd = std;
6199  mi = i; mj = j; mk = k;
6200  }
6201  }
6202  }
6203  }
6204  CkPrintf("pencil %d %d max error %f at %d %d %d (should be %f)\n",
6205  thisIndex.x, thisIndex.y, maxerr, mi, mj, mk, maxstd);
6206 #endif
6207 
6208 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4535
int dim3
Definition: PmeBase.h:19
int K2
Definition: PmeBase.h:18
SimParameters * simParameters
Definition: Node.h:178
int K1
Definition: PmeBase.h:18
static void PmeXZPencilFFT(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5177
int K3
Definition: PmeBase.h:18
#define CKLOOP_CTRL_PME_BACKWARDFFT
Definition: SimParameters.h:97
void PmeZPencil::fft_init ( )

Definition at line 4762 of file ComputePme.C.

References PmeGrid::block1, PmeGrid::block2, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, ComputePmeMgr::fftw_plan_lock, SimParameters::FFTWEstimate, fftwf_malloc, SimParameters::FFTWPatient, PmePencilInitMsgData::grid, if(), PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_die(), PmePencil< CBase_PmeZPencil >::order_init(), PmePencilInitMsgData::pmeNodeProxy, Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, and PmePencilInitMsgData::zBlocks.

4762  {
4763  CProxy_Node nd(CkpvAccess(BOCclass_group).node);
4764  Node *node = nd.ckLocalBranch();
4766 
4767 #if USE_NODE_PAR_RECEIVE
4768  ((NodePmeMgr *)CkLocalNodeBranch(initdata.pmeNodeProxy))->registerZPencil(thisIndex,this);
4769 #endif
4770 
4771  int K1 = initdata.grid.K1;
4772  int K2 = initdata.grid.K2;
4773  int K3 = initdata.grid.K3;
4774  int dim3 = initdata.grid.dim3;
4775  int block1 = initdata.grid.block1;
4776  int block2 = initdata.grid.block2;
4777 
4778  nx = block1;
4779  if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
4780  ny = block2;
4781  if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
4782 
4783 #ifdef NAMD_FFTW
4785 
4786  data = (float *) fftwf_malloc( sizeof(float) *nx*ny*dim3);
4787  work = new float[dim3];
4788 
4790 
4791 #ifdef NAMD_FFTW_3
4792  /* need array of sizes for the how many */
4793 
4794  int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT : simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE ;
4795  int sizeLines=nx*ny;
4796  int planLineSizes[1];
4797  planLineSizes[0]=K3;
4798  int ndim=initdata.grid.dim3; // storage space is initdata.grid.dim3
4799  int ndimHalf=ndim/2;
4800  forward_plan = fftwf_plan_many_dft_r2c(1, planLineSizes, sizeLines,
4801  (float *) data, NULL, 1,
4802  ndim,
4803  (fftwf_complex *) data, NULL, 1,
4804  ndimHalf,
4805  fftwFlags);
4806 
4807  backward_plan = fftwf_plan_many_dft_c2r(1, planLineSizes, sizeLines,
4808  (fftwf_complex *) data, NULL, 1,
4809  ndimHalf,
4810  (float *) data, NULL, 1,
4811  ndim,
4812  fftwFlags);
4813 #if CMK_SMP && USE_CKLOOP
4814  if(simParams->useCkLoop) {
4815  //How many FFT plans to be created? The grain-size issue!!.
4816  //Currently, I am choosing the min(nx, ny) to be coarse-grain
4817  numPlans = (nx<=ny?nx:ny);
4818  if ( numPlans < CkMyNodeSize() ) numPlans = (nx>=ny?nx:ny);
4819  if ( numPlans < CkMyNodeSize() ) numPlans = sizeLines;
4820  int howmany = sizeLines/numPlans;
4821  forward_plans = new fftwf_plan[numPlans];
4822  backward_plans = new fftwf_plan[numPlans];
4823  for(int i=0; i<numPlans; i++) {
4824  int dimStride = i*ndim*howmany;
4825  int dimHalfStride = i*ndimHalf*howmany;
4826  forward_plans[i] = fftwf_plan_many_dft_r2c(1, planLineSizes, howmany,
4827  ((float *)data)+dimStride, NULL, 1,
4828  ndim,
4829  ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
4830  ndimHalf,
4831  fftwFlags);
4832 
4833  backward_plans[i] = fftwf_plan_many_dft_c2r(1, planLineSizes, howmany,
4834  ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
4835  ndimHalf,
4836  ((float *)data)+dimStride, NULL, 1,
4837  ndim,
4838  fftwFlags);
4839  }
4840  }else
4841 #endif
4842  {
4843  forward_plans = NULL;
4844  backward_plans = NULL;
4845  }
4846 #else
4847  forward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_REAL_TO_COMPLEX,
4848  ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
4849  | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
4850  backward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_COMPLEX_TO_REAL,
4851  ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
4852  | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
4853 #endif
4854  CmiUnlock(ComputePmeMgr::fftw_plan_lock);
4855 #else
4856  NAMD_die("Sorry, FFTW must be compiled in to use PME.");
4857 #endif
4858 
4859 #if USE_NODE_PAR_RECEIVE
4860  evir = 0.;
4861  memset(data, 0, sizeof(float) * nx*ny*dim3);
4862 #endif
4863 }
static CmiNodeLock fftw_plan_lock
Definition: ComputePme.C:414
PmePencilInitMsgData initdata
Definition: ComputePme.C:4535
int dim3
Definition: PmeBase.h:19
Definition: Node.h:78
void order_init(int nBlocks)
Definition: ComputePme.C:4523
int K2
Definition: PmeBase.h:18
SimParameters * simParameters
Definition: Node.h:178
int K1
Definition: PmeBase.h:18
int block1
Definition: PmeBase.h:21
if(ComputeNonbondedUtil::goMethod==2)
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:218
int block2
Definition: PmeBase.h:21
void NAMD_die(const char *err_msg)
Definition: common.C:83
#define simParams
Definition: Output.C:127
int K3
Definition: PmeBase.h:18
#define fftwf_malloc
Definition: ComputePme.C:13
void PmeZPencil::forward_fft ( )

Definition at line 5186 of file ComputePme.C.

References CKLOOP_CTRL_PME_FORWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_grid().

5186  {
5187  evir = 0.;
5188 #ifdef FFTCHECK
5189  int dim3 = initdata.grid.dim3;
5190  int K3 = initdata.grid.K3;
5191  float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
5192  float *d = data;
5193  for ( int i=0; i<nx; ++i ) {
5194  for ( int j=0; j<ny; ++j, d += dim3 ) {
5195  for ( int k=0; k<dim3; ++k ) {
5196  d[k] = 10. * (10. * (10. * std_base + i) + j) + k;
5197  }
5198  }
5199  }
5200 #endif
5201 #ifdef NAMD_FFTW
5202 #ifdef MANUAL_DEBUG_FFTW3
5203  dumpMatrixFloat3("fw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
5204 #endif
5205 #ifdef NAMD_FFTW_3
5206 #if CMK_SMP && USE_CKLOOP
5207  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5208  if(useCkLoop>=CKLOOP_CTRL_PME_FORWARDFFT
5209  && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
5210  //for(int i=0; i<numPlans; i++) fftwf_execute(forward_plans[i]);
5211  //transform the above loop
5212  CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)forward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
5213  return;
5214  }
5215 #endif
5216  fftwf_execute(forward_plan);
5217 #else
5218  rfftwnd_real_to_complex(forward_plan, nx*ny,
5219  data, 1, initdata.grid.dim3, (fftw_complex *) work, 1, 0);
5220 #endif
5221 #ifdef MANUAL_DEBUG_FFTW3
5222  dumpMatrixFloat3("fw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
5223 #endif
5224 
5225 #endif
5226 #ifdef ZEROCHECK
5227  int dim3 = initdata.grid.dim3;
5228  int K3 = initdata.grid.K3;
5229  float *d = data;
5230  for ( int i=0; i<nx; ++i ) {
5231  for ( int j=0; j<ny; ++j, d += dim3 ) {
5232  for ( int k=0; k<dim3; ++k ) {
5233  if ( d[k] == 0. ) CkPrintf("0 in Z at %d %d %d %d %d %d %d %d %d\n",
5234  thisIndex.x, thisIndex.y, i, j, k, nx, ny, dim3);
5235  }
5236  }
5237  }
5238 #endif
5239 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4535
int dim3
Definition: PmeBase.h:19
SimParameters * simParameters
Definition: Node.h:178
static void PmeXZPencilFFT(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5177
int K3
Definition: PmeBase.h:18
#define CKLOOP_CTRL_PME_FORWARDFFT
Definition: SimParameters.h:94
void PmeZPencil::node_process_grid ( PmeGridMsg msg)

Definition at line 6290 of file ComputePme.C.

References ComputePmeMgr::fftw_plan_lock, forward_fft(), PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsg, recv_grid(), send_trans(), and ResizeArray< T >::size().

Referenced by NodePmeMgr::recvZGrid().

6291 {
6292 #if USE_NODE_PAR_RECEIVE
6294  CmiMemoryReadFence();
6295 #endif
6296  recv_grid(msg);
6297  if(msg->hasData) hasData=msg->hasData;
6298  int limsg;
6299  CmiMemoryAtomicFetchAndInc(imsg,limsg);
6300  grid_msgs[limsg] = msg;
6301  // CkPrintf("[%d] PmeZPencil node_process_grid for %d %d %d has %d of %d imsg %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z, limsg, grid_msgs.size(), imsg);
6302  if(limsg+1 == grid_msgs.size())
6303  {
6304 
6305  if (hasData)
6306  {
6307  forward_fft();
6308  }
6309  send_trans();
6310  imsg=0;
6311  CmiMemoryWriteFence();
6312  // CkPrintf("[%d] PmeZPencil grid node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
6313  }
6314 #if USE_NODE_PAR_RECEIVE
6315  CmiUnlock(ComputePmeMgr::fftw_plan_lock);
6316  CmiMemoryWriteFence();
6317 #endif
6318 }
static CmiNodeLock fftw_plan_lock
Definition: ComputePme.C:414
void forward_fft()
Definition: ComputePme.C:5186
int size(void) const
Definition: ResizeArray.h:127
void send_trans()
Definition: ComputePme.C:5299
void recv_grid(const PmeGridMsg *)
Definition: ComputePme.C:5135
void PmeZPencil::node_process_untrans ( PmeUntransMsg msg)

Definition at line 6325 of file ComputePme.C.

References backward_fft(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, ComputePmeMgr::fftw_plan_lock, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsgb, PmePencil< CBase_PmeZPencil >::initdata, NAMD_bug(), recv_untrans(), send_all_ungrid(), and PmePencilInitMsgData::zBlocks.

Referenced by recvNodeAck(), and NodePmeMgr::recvZUntrans().

6326 {
6327  if ( msg ) {
6328  if ( ! hasData ) NAMD_bug("PmeZPencil::node_process_untrans non-null msg but not hasData");
6329  recv_untrans(msg);
6330  } else if ( hasData ) NAMD_bug("PmeZPencil::node_process_untrans hasData but null msg");
6331 #if USE_NODE_PAR_RECEIVE
6332  CmiMemoryWriteFence();
6334 #endif
6335  int limsg;
6336  CmiMemoryAtomicFetchAndInc(imsgb,limsg);
6337  if(limsg+1 == initdata.zBlocks)
6338  {
6339 #if USE_NODE_PAR_RECEIVE
6340  CmiMemoryReadFence();
6341 #endif
6342  if(hasData) {
6343  backward_fft();
6344  }
6345  send_all_ungrid();
6346  hasData=0;
6347  imsgb=0;
6348  evir = 0;
6349  memset(data, 0, sizeof(float) * nx*ny* initdata.grid.dim3);
6350  CmiMemoryWriteFence();
6351  // CkPrintf("[%d] PmeZPencil untrans node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
6352  }
6353 #if USE_NODE_PAR_RECEIVE
6354  CmiUnlock(ComputePmeMgr::fftw_plan_lock);
6355 #endif
6356 }
static CmiNodeLock fftw_plan_lock
Definition: ComputePme.C:414
PmePencilInitMsgData initdata
Definition: ComputePme.C:4535
int dim3
Definition: PmeBase.h:19
void backward_fft()
Definition: ComputePme.C:6149
void send_all_ungrid()
Definition: ComputePme.C:6217
void NAMD_bug(const char *err_msg)
Definition: common.C:123
void recv_untrans(const PmeUntransMsg *)
Definition: ComputePme.C:6121
void PmeZPencil::recv_grid ( const PmeGridMsg msg)

Definition at line 5135 of file ComputePme.C.

References ResizeArray< T >::begin(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, PmeGridMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeGridMsg::qgrid, PmeGridMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by node_process_grid().

5135  {
5136 
5137  int dim3 = initdata.grid.dim3;
5138  if ( imsg == 0 ) {
5139  lattice = msg->lattice;
5140  sequence = msg->sequence;
5141 #if ! USE_NODE_PAR_RECEIVE
5142  memset(data, 0, sizeof(float)*nx*ny*dim3);
5143 #endif
5144  }
5145 
5146  if ( ! msg->hasData ) return;
5147 
5148  int zlistlen = msg->zlistlen;
5149 #ifdef NAMD_KNL
5150  int * __restrict msg_zlist = msg->zlist;
5151  int * __restrict zlist = work_zlist.begin();
5152  __assume_aligned(zlist,64);
5153  for ( int k=0; k<zlistlen; ++k ) {
5154  zlist[k] = msg_zlist[k];
5155  }
5156 #else
5157  int * __restrict zlist = msg->zlist;
5158 #endif
5159  char * __restrict fmsg = msg->fgrid;
5160  float * __restrict qmsg = msg->qgrid;
5161  float * __restrict d = data;
5162  int numGrids = 1; // pencil FFT doesn't support multiple grids
5163  for ( int g=0; g<numGrids; ++g ) {
5164  for ( int i=0; i<nx; ++i ) {
5165  for ( int j=0; j<ny; ++j, d += dim3 ) {
5166  if( *(fmsg++) ) {
5167  #pragma ivdep
5168  for ( int k=0; k<zlistlen; ++k ) {
5169  d[zlist[k]] += *(qmsg++);
5170  }
5171  }
5172  }
5173  }
5174  }
5175 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4535
int dim3
Definition: PmeBase.h:19
int sequence
Definition: ComputePme.C:116
Lattice lattice
Definition: ComputePme.C:118
float * qgrid
Definition: ComputePme.C:124
int * zlist
Definition: ComputePme.C:122
int zlistlen
Definition: ComputePme.C:121
char * fgrid
Definition: ComputePme.C:123
iterator begin(void)
Definition: ResizeArray.h:36
void PmeZPencil::recv_untrans ( const PmeUntransMsg msg)

Definition at line 6121 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, PmeUntransMsg::ny, PmeUntransMsg::qgrid, and PmeUntransMsg::sourceNode.

Referenced by node_process_untrans().

6121  {
6122 #if ! USE_NODE_PAR_RECEIVE
6123  if(imsg==0) evir=0.;
6124 #endif
6125 
6126  int block3 = initdata.grid.block3;
6127  int dim3 = initdata.grid.dim3;
6128  int kb = msg->sourceNode;
6129  int nz = msg->ny;
6130  const float *md = msg->qgrid;
6131  float *d = data;
6132  for ( int i=0; i<nx; ++i ) {
6133 #if CMK_BLUEGENEL
6134  CmiNetworkProgress();
6135 #endif
6136  for ( int j=0; j<ny; ++j, d += dim3 ) {
6137  for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
6138 #ifdef ZEROCHECK
6139  if ( (*md) == 0. ) CkPrintf("0 in YZ at %d %d %d %d %d %d %d %d %d\n",
6140  thisIndex.x, thisIndex.y, kb, i, j, k, nx, ny, nz);
6141 #endif
6142  d[2*k] = *(md++);
6143  d[2*k+1] = *(md++);
6144  }
6145  }
6146  }
6147 }
float * qgrid
Definition: ComputePme.C:154
PmePencilInitMsgData initdata
Definition: ComputePme.C:4535
int dim3
Definition: PmeBase.h:19
int block3
Definition: PmeBase.h:21
void PmeZPencil::recvNodeAck ( PmeAckMsg msg)

Definition at line 6320 of file ComputePme.C.

References node_process_untrans().

6320  {
6321  delete msg;
6323 }
void node_process_untrans(PmeUntransMsg *)
Definition: ComputePme.C:6325
void PmeZPencil::send_all_ungrid ( )

Definition at line 6217 of file ComputePme.C.

References CKLOOP_CTRL_PME_SENDUNTRANS, PmePencil< CBase_PmeZPencil >::initdata, Node::Object(), PmeZPencilSendUngrid(), send_subset_ungrid(), Node::simParameters, ResizeArray< T >::size(), SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

6217  {
6218 
6219 #if CMK_SMP && USE_CKLOOP
6220  int useCkLoop = Node::Object()->simParameters->useCkLoop;
6221  if(useCkLoop>=CKLOOP_CTRL_PME_SENDUNTRANS
6222  && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
6223  //????What's the best value for numChunks?????
6224  CkLoop_Parallelize(PmeZPencilSendUngrid, 1, (void *)this, grid_msgs.size(), 0, grid_msgs.size()-1, 1); //has to sync
6225  return;
6226  }
6227 #endif
6228  send_subset_ungrid(0, grid_msgs.size()-1);
6229 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4535
SimParameters * simParameters
Definition: Node.h:178
static void PmeZPencilSendUngrid(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:6210
#define CKLOOP_CTRL_PME_SENDUNTRANS
Definition: SimParameters.h:98
void send_subset_ungrid(int fromIdx, int toIdx)
Definition: ComputePme.C:6231
int size(void) const
Definition: ResizeArray.h:127
void PmeZPencil::send_subset_trans ( int  fromIdx,
int  toIdx 
)

Definition at line 5247 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, PmeTransMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::nx, PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmeTransMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, PmeTransMsg::sourceNode, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by PmeZPencilSendTrans().

5247  {
5248  int zBlocks = initdata.zBlocks;
5249  int block3 = initdata.grid.block3;
5250  int dim3 = initdata.grid.dim3;
5251  for ( int isend=fromIdx; isend<=toIdx; ++isend ) {
5252  int kb = send_order[isend];
5253  int nz = block3;
5254  if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
5255  int hd = ( hasData ? 1 : 0 );
5256  PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
5257  msg->lattice = lattice;
5258  msg->sourceNode = thisIndex.y;
5259  msg->hasData = hasData;
5260  msg->nx = ny;
5261  if ( hasData ) {
5262  float *md = msg->qgrid;
5263  const float *d = data;
5264  for ( int i=0; i<nx; ++i ) {
5265  for ( int j=0; j<ny; ++j, d += dim3 ) {
5266  for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
5267  *(md++) = d[2*k];
5268  *(md++) = d[2*k+1];
5269  }
5270  }
5271  }
5272  }
5273  msg->sequence = sequence;
5275 
5276  CmiEnableUrgentSend(1);
5277 #if USE_NODE_PAR_RECEIVE
5278  msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
5279 #if Y_PERSIST
5280  CmiUsePersistentHandle(&trans_handle[isend], 1);
5281 #endif
5282  initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
5283 #if Y_PERSIST
5284  CmiUsePersistentHandle(NULL, 0);
5285 #endif
5286 #else
5287 #if Y_PERSIST
5288  CmiUsePersistentHandle(&trans_handle[isend], 1);
5289 #endif
5290  initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
5291 #if Y_PERSIST
5292  CmiUsePersistentHandle(NULL, 0);
5293 #endif
5294 #endif
5295  CmiEnableUrgentSend(0);
5296  }
5297 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4535
int dim3
Definition: PmeBase.h:19
CProxy_PmeYPencil yPencil
Definition: ComputePme.C:215
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:218
float * qgrid
Definition: ComputePme.C:137
#define PRIORITY_SIZE
Definition: Priorities.h:13
int sourceNode
Definition: ComputePme.C:131
#define PME_TRANS_PRIORITY
Definition: Priorities.h:31
int block3
Definition: PmeBase.h:21
CkArrayIndex3D destElem
Definition: ComputePme.C:138
Lattice lattice
Definition: ComputePme.C:134
CProxy_PmePencilMap ym
Definition: ComputePme.C:220
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
void PmeZPencil::send_subset_ungrid ( int  fromIdx,
int  toIdx 
)

Definition at line 6231 of file ComputePme.C.

References send_ungrid().

Referenced by PmeZPencilSendUngrid(), and send_all_ungrid().

6231  {
6232  for (int limsg=fromIdx; limsg <=toIdx; ++limsg ) {
6233  PmeGridMsg *msg = grid_msgs[limsg];
6234  send_ungrid(msg);
6235  }
6236 }
void send_ungrid(PmeGridMsg *)
Definition: ComputePme.C:6238
void PmeZPencil::send_trans ( )

Definition at line 5299 of file ComputePme.C.

References PmeGrid::block3, CKLOOP_CTRL_PME_SENDTRANS, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, PmeTransMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::nx, Node::Object(), PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmeZPencilSendTrans(), PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmeTransMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, Node::simParameters, PmeTransMsg::sourceNode, SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_grid().

5299  {
5300 #if USE_PERSISTENT
5301  if (trans_handle == NULL) setup_persistent();
5302 #endif
5303 #if CMK_SMP && USE_CKLOOP
5304  int useCkLoop = Node::Object()->simParameters->useCkLoop;
5305  if(useCkLoop>=CKLOOP_CTRL_PME_SENDTRANS
5306  && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
5313  //send_subset_trans(0, initdata.zBlocks-1);
5314  CkLoop_Parallelize(PmeZPencilSendTrans, 1, (void *)this, CkMyNodeSize(), 0, initdata.zBlocks-1, 1); //not sync
5315  return;
5316  }
5317 #endif
5318  int zBlocks = initdata.zBlocks;
5319  int block3 = initdata.grid.block3;
5320  int dim3 = initdata.grid.dim3;
5321  for ( int isend=0; isend<zBlocks; ++isend ) {
5322  int kb = send_order[isend];
5323  int nz = block3;
5324  if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
5325  int hd = ( hasData ? 1 : 0 );
5326  PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
5327  msg->lattice = lattice;
5328  msg->sourceNode = thisIndex.y;
5329  msg->hasData = hasData;
5330  msg->nx = ny;
5331  if ( hasData ) {
5332  float *md = msg->qgrid;
5333  const float *d = data;
5334  for ( int i=0; i<nx; ++i ) {
5335  for ( int j=0; j<ny; ++j, d += dim3 ) {
5336  for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
5337  *(md++) = d[2*k];
5338  *(md++) = d[2*k+1];
5339  }
5340  }
5341  }
5342  }
5343  msg->sequence = sequence;
5345 
5346  CmiEnableUrgentSend(1);
5347 #if USE_NODE_PAR_RECEIVE
5348  msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
5349 #if Y_PERSIST
5350  CmiUsePersistentHandle(&trans_handle[isend], 1);
5351 #endif
5352  initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
5353 #if Y_PERSIST
5354  CmiUsePersistentHandle(NULL, 0);
5355 #endif
5356 #else
5357 #if Y_PERSIST
5358  CmiUsePersistentHandle(&trans_handle[isend], 1);
5359 #endif
5360  initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
5361 #if Y_PERSIST
5362  CmiUsePersistentHandle(NULL, 0);
5363 #endif
5364 #endif
5365  CmiEnableUrgentSend(0);
5366  }
5367 }
static Node * Object()
Definition: Node.h:86
PmePencilInitMsgData initdata
Definition: ComputePme.C:4535
int dim3
Definition: PmeBase.h:19
SimParameters * simParameters
Definition: Node.h:178
CProxy_PmeYPencil yPencil
Definition: ComputePme.C:215
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:218
float * qgrid
Definition: ComputePme.C:137
#define PRIORITY_SIZE
Definition: Priorities.h:13
int sourceNode
Definition: ComputePme.C:131
#define CKLOOP_CTRL_PME_SENDTRANS
Definition: SimParameters.h:95
#define PME_TRANS_PRIORITY
Definition: Priorities.h:31
int block3
Definition: PmeBase.h:21
static void PmeZPencilSendTrans(int first, int last, void *result, int paraNum, void *param)
Definition: ComputePme.C:5242
CkArrayIndex3D destElem
Definition: ComputePme.C:138
Lattice lattice
Definition: ComputePme.C:134
CProxy_PmePencilMap ym
Definition: ComputePme.C:220
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
void PmeZPencil::send_ungrid ( PmeGridMsg msg)

Definition at line 6238 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, NAMD_bug(), PmePencil< CBase_PmeZPencil >::offload, PME_OFFLOAD_UNGRID_PRIORITY, PME_UNGRID_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmePencilInitMsgData::pmeProxy, PRIORITY_SIZE, PmeGridMsg::qgrid, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, PmeGridMsg::sourceNode, PmePencilInitMsgData::yBlocks, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by send_subset_ungrid().

6238  {
6239 
6240 #ifdef NAMD_CUDA
6241  const int UNGRID_PRIORITY = ( offload ? PME_OFFLOAD_UNGRID_PRIORITY : PME_UNGRID_PRIORITY );
6242 #else
6243  const int UNGRID_PRIORITY = PME_UNGRID_PRIORITY ;
6244 #endif
6245 
6246  int pe = msg->sourceNode;
6247  if ( ! msg->hasData ) {
6248  delete msg;
6249  PmeAckMsg *ackmsg = new (PRIORITY_SIZE) PmeAckMsg;
6250  SET_PRIORITY(ackmsg,sequence,UNGRID_PRIORITY)
6251  CmiEnableUrgentSend(1);
6252  initdata.pmeProxy[pe].recvAck(ackmsg);
6253  CmiEnableUrgentSend(0);
6254  return;
6255  }
6256  if ( ! hasData ) NAMD_bug("PmeZPencil::send_ungrid msg->hasData but not pencil->hasData");
6257  msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
6258  int dim3 = initdata.grid.dim3;
6259  int zlistlen = msg->zlistlen;
6260  int *zlist = msg->zlist;
6261  char *fmsg = msg->fgrid;
6262  float *qmsg = msg->qgrid;
6263  float *d = data;
6264  int numGrids = 1; // pencil FFT doesn't support multiple grids
6265  for ( int g=0; g<numGrids; ++g ) {
6266 #if CMK_BLUEGENEL
6267  CmiNetworkProgress();
6268 #endif
6269  for ( int i=0; i<nx; ++i ) {
6270  for ( int j=0; j<ny; ++j, d += dim3 ) {
6271  if( *(fmsg++) ) {
6272  for ( int k=0; k<zlistlen; ++k ) {
6273  *(qmsg++) = d[zlist[k]];
6274  }
6275  }
6276  }
6277  }
6278  }
6279  SET_PRIORITY(msg,sequence,UNGRID_PRIORITY)
6280  CmiEnableUrgentSend(1);
6281 #ifdef NAMD_CUDA
6282  if ( offload ) {
6283  initdata.pmeNodeProxy[CkNodeOf(pe)].recvUngrid(msg);
6284  } else
6285 #endif
6286  initdata.pmeProxy[pe].recvUngrid(msg);
6287  CmiEnableUrgentSend(0);
6288 }
PmePencilInitMsgData initdata
Definition: ComputePme.C:4535
#define PME_UNGRID_PRIORITY
Definition: Priorities.h:74
CProxy_ComputePmeMgr pmeProxy
Definition: ComputePme.C:217
if(ComputeNonbondedUtil::goMethod==2)
#define PME_OFFLOAD_UNGRID_PRIORITY
Definition: Priorities.h:42
CProxy_NodePmeMgr pmeNodeProxy
Definition: ComputePme.C:218
int sourceNode
Definition: ComputePme.C:115
#define PRIORITY_SIZE
Definition: Priorities.h:13
void NAMD_bug(const char *err_msg)
Definition: common.C:123
gridSize y
#define SET_PRIORITY(MSG, SEQ, PRIO)
Definition: Priorities.h:18
gridSize x
void send_ungrid(PmeGridMsg *)
Definition: ComputePme.C:6238
void recvAck(DataMessage *dmsg)
Definition: DataExchanger.C:99
for(int i=0;i< n1;++i)

The documentation for this class was generated from the following file: