PmeZPencil Class Reference

Inheritance diagram for PmeZPencil:
PmePencil< CBase_PmeZPencil > CBase_PmeZPencil

List of all members.

Public Member Functions

PmeZPencil_SDAG_CODE PmeZPencil ()
 PmeZPencil (CkMigrateMessage *)
 ~PmeZPencil ()
void fft_init ()
void recv_grid (const PmeGridMsg *)
void forward_fft ()
void send_trans ()
void send_subset_trans (int fromIdx, int toIdx)
void recv_untrans (const PmeUntransMsg *)
void recvNodeAck (PmeAckMsg *)
void node_process_untrans (PmeUntransMsg *)
void node_process_grid (PmeGridMsg *)
void backward_fft ()
void send_ungrid (PmeGridMsg *)
void send_all_ungrid ()
void send_subset_ungrid (int fromIdx, int toIdx)

Detailed Description

Definition at line 4559 of file ComputePme.C.


Constructor & Destructor Documentation

PmeZPencil_SDAG_CODE PmeZPencil::PmeZPencil (  )  [inline]

Definition at line 4562 of file ComputePme.C.

04562 { __sdag_init(); setMigratable(false); }

PmeZPencil::PmeZPencil ( CkMigrateMessage *   )  [inline]

Definition at line 4563 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::imsg, and PmePencil< CBase_PmeZPencil >::imsgb.

04563 { __sdag_init();  setMigratable (false); imsg=imsgb=0;}

PmeZPencil::~PmeZPencil (  )  [inline]

Definition at line 4564 of file ComputePme.C.

04564                       {
04565         #ifdef NAMD_FFTW
04566         #ifdef NAMD_FFTW_3
04567                 delete [] forward_plans;
04568                 delete [] backward_plans;
04569         #endif
04570         #endif
04571         }


Member Function Documentation

void PmeZPencil::backward_fft (  ) 

Definition at line 6149 of file ComputePme.C.

References CKLOOP_CTRL_PME_BACKWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

06149                               {
06150 #ifdef NAMD_FFTW
06151 #ifdef MANUAL_DEBUG_FFTW3
06152   dumpMatrixFloat3("bw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
06153 #endif
06154 #ifdef NAMD_FFTW_3
06155 #if     CMK_SMP && USE_CKLOOP
06156   int useCkLoop = Node::Object()->simParameters->useCkLoop;
06157   if(useCkLoop>=CKLOOP_CTRL_PME_BACKWARDFFT
06158      && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
06159           //for(int i=0; i<numPlans; i++) fftwf_execute(backward_plans[i]);
06160           //transform the above loop
06161           CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)backward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
06162           return;
06163   }
06164 #endif
06165   fftwf_execute(backward_plan);
06166 #else
06167   rfftwnd_complex_to_real(backward_plan, nx*ny,
06168             (fftw_complex *) data, 1, initdata.grid.dim3/2, work, 1, 0);
06169 #endif
06170 #ifdef MANUAL_DEBUG_FFTW3
06171   dumpMatrixFloat3("bw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
06172 #endif
06173 
06174 #endif
06175   
06176 #if CMK_BLUEGENEL
06177   CmiNetworkProgress();
06178 #endif
06179 
06180 #ifdef FFTCHECK
06181   int dim3 = initdata.grid.dim3;
06182   int K1 = initdata.grid.K1;
06183   int K2 = initdata.grid.K2;
06184   int K3 = initdata.grid.K3;
06185   float scale = 1. / (1. * K1 * K2 * K3);
06186   float maxerr = 0.;
06187   float maxstd = 0.;
06188   int mi, mj, mk;  mi = mj = mk = -1;
06189   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
06190   const float *d = data;
06191   for ( int i=0; i<nx; ++i ) {
06192    for ( int j=0; j<ny; ++j, d += dim3 ) {
06193     for ( int k=0; k<K3; ++k ) {
06194       float std = 10. * (10. * (10. * std_base + i) + j) + k;
06195       float err = scale * d[k] - std;
06196       if ( fabsf(err) > fabsf(maxerr) ) {
06197         maxerr = err;
06198         maxstd = std;
06199         mi = i;  mj = j;  mk = k;
06200       }
06201     }
06202    }
06203   }
06204   CkPrintf("pencil %d %d max error %f at %d %d %d (should be %f)\n",
06205                 thisIndex.x, thisIndex.y, maxerr, mi, mj, mk, maxstd);
06206 #endif
06207 
06208 }

void PmeZPencil::fft_init (  ) 

Definition at line 4762 of file ComputePme.C.

References PmeGrid::block1, PmeGrid::block2, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, SimParameters::FFTWEstimate, fftwf_malloc, SimParameters::FFTWPatient, PmePencilInitMsgData::grid, if(), PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_die(), PmePencil< CBase_PmeZPencil >::order_init(), PmePencilInitMsgData::pmeNodeProxy, Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, and PmePencilInitMsgData::zBlocks.

04762                           {
04763   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
04764   Node *node = nd.ckLocalBranch();
04765   SimParameters *simParams = node->simParameters;
04766 
04767 #if USE_NODE_PAR_RECEIVE
04768   ((NodePmeMgr *)CkLocalNodeBranch(initdata.pmeNodeProxy))->registerZPencil(thisIndex,this);
04769 #endif
04770 
04771   int K1 = initdata.grid.K1;
04772   int K2 = initdata.grid.K2;
04773   int K3 = initdata.grid.K3;
04774   int dim3 = initdata.grid.dim3;
04775   int block1 = initdata.grid.block1;
04776   int block2 = initdata.grid.block2;
04777 
04778   nx = block1;
04779   if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
04780   ny = block2;
04781   if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
04782 
04783 #ifdef NAMD_FFTW
04784   CmiLock(ComputePmeMgr::fftw_plan_lock);
04785 
04786   data = (float *) fftwf_malloc( sizeof(float) *nx*ny*dim3);
04787   work = new float[dim3];
04788 
04789   order_init(initdata.zBlocks);
04790 
04791 #ifdef NAMD_FFTW_3
04792   /* need array of sizes for the how many */
04793 
04794   int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT  : simParams->FFTWEstimate ? FFTW_ESTIMATE  : FFTW_MEASURE ;
04795   int sizeLines=nx*ny;
04796   int planLineSizes[1];
04797   planLineSizes[0]=K3;
04798   int ndim=initdata.grid.dim3; // storage space is initdata.grid.dim3
04799   int ndimHalf=ndim/2;
04800   forward_plan = fftwf_plan_many_dft_r2c(1, planLineSizes, sizeLines,
04801                                          (float *) data, NULL, 1, 
04802                                          ndim,
04803                                          (fftwf_complex *) data, NULL, 1,
04804                                          ndimHalf,
04805                                          fftwFlags);
04806 
04807   backward_plan = fftwf_plan_many_dft_c2r(1, planLineSizes, sizeLines,
04808                                           (fftwf_complex *) data, NULL, 1, 
04809                                           ndimHalf,
04810                                           (float *) data, NULL, 1, 
04811                                           ndim,
04812                                           fftwFlags);
04813 #if     CMK_SMP && USE_CKLOOP
04814   if(simParams->useCkLoop) {
04815           //How many FFT plans to be created? The grain-size issue!!.
04816           //Currently, I am choosing the min(nx, ny) to be coarse-grain
04817           numPlans = (nx<=ny?nx:ny);
04818           if ( numPlans < CkMyNodeSize() ) numPlans = (nx>=ny?nx:ny);
04819           if ( numPlans < CkMyNodeSize() ) numPlans = sizeLines;
04820           int howmany = sizeLines/numPlans;
04821           forward_plans = new fftwf_plan[numPlans];
04822           backward_plans = new fftwf_plan[numPlans];
04823           for(int i=0; i<numPlans; i++) {
04824                   int dimStride = i*ndim*howmany;
04825                   int dimHalfStride = i*ndimHalf*howmany;
04826                   forward_plans[i] = fftwf_plan_many_dft_r2c(1, planLineSizes, howmany,
04827                                                                                                          ((float *)data)+dimStride, NULL, 1,
04828                                                                                                          ndim,
04829                                                                                                          ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
04830                                                                                                          ndimHalf,
04831                                                                                                          fftwFlags);
04832 
04833                   backward_plans[i] = fftwf_plan_many_dft_c2r(1, planLineSizes, howmany,
04834                                                                                                          ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
04835                                                                                                          ndimHalf,
04836                                                                                                          ((float *)data)+dimStride, NULL, 1,
04837                                                                                                          ndim,
04838                                                                                                          fftwFlags);
04839           }
04840   }else 
04841 #endif 
04842   {
04843           forward_plans = NULL;
04844           backward_plans = NULL;
04845   }
04846 #else
04847   forward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_REAL_TO_COMPLEX,
04848         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
04849         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
04850   backward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_COMPLEX_TO_REAL,
04851         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
04852         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
04853 #endif
04854   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
04855 #else
04856   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
04857 #endif
04858 
04859 #if USE_NODE_PAR_RECEIVE
04860     evir = 0.;
04861     memset(data, 0, sizeof(float) * nx*ny*dim3);
04862 #endif
04863 }

void PmeZPencil::forward_fft (  ) 

Definition at line 5186 of file ComputePme.C.

References CKLOOP_CTRL_PME_FORWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_grid().

05186                              {
05187   evir = 0.;
05188 #ifdef FFTCHECK
05189   int dim3 = initdata.grid.dim3;
05190   int K3 = initdata.grid.K3;
05191   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
05192   float *d = data;
05193   for ( int i=0; i<nx; ++i ) {
05194    for ( int j=0; j<ny; ++j, d += dim3 ) {
05195     for ( int k=0; k<dim3; ++k ) {
05196       d[k] = 10. * (10. * (10. * std_base + i) + j) + k;
05197     }
05198    }
05199   }
05200 #endif
05201 #ifdef NAMD_FFTW
05202 #ifdef MANUAL_DEBUG_FFTW3
05203   dumpMatrixFloat3("fw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
05204 #endif
05205 #ifdef NAMD_FFTW_3
05206 #if     CMK_SMP && USE_CKLOOP
05207   int useCkLoop = Node::Object()->simParameters->useCkLoop;
05208   if(useCkLoop>=CKLOOP_CTRL_PME_FORWARDFFT
05209      && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
05210           //for(int i=0; i<numPlans; i++) fftwf_execute(forward_plans[i]);
05211           //transform the above loop
05212           CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)forward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
05213           return;
05214   }
05215 #endif
05216   fftwf_execute(forward_plan);
05217 #else
05218   rfftwnd_real_to_complex(forward_plan, nx*ny,
05219         data, 1, initdata.grid.dim3, (fftw_complex *) work, 1, 0);
05220 #endif
05221 #ifdef MANUAL_DEBUG_FFTW3
05222   dumpMatrixFloat3("fw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
05223 #endif
05224 
05225 #endif
05226 #ifdef ZEROCHECK
05227   int dim3 = initdata.grid.dim3;
05228   int K3 = initdata.grid.K3;
05229   float *d = data;
05230   for ( int i=0; i<nx; ++i ) {
05231    for ( int j=0; j<ny; ++j, d += dim3 ) {
05232     for ( int k=0; k<dim3; ++k ) {
05233       if ( d[k] == 0. ) CkPrintf("0 in Z at %d %d %d %d %d %d %d %d %d\n",
05234         thisIndex.x, thisIndex.y, i, j, k, nx, ny, dim3);
05235     }
05236    }
05237   }
05238 #endif
05239 }

void PmeZPencil::node_process_grid ( PmeGridMsg msg  ) 

Definition at line 6290 of file ComputePme.C.

References forward_fft(), PmePencil< CBase_PmeZPencil >::hasData, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::imsg, recv_grid(), send_trans(), and ResizeArray< Elem >::size().

Referenced by NodePmeMgr::recvZGrid().

06291 {
06292 #if USE_NODE_PAR_RECEIVE
06293   CmiLock(ComputePmeMgr::fftw_plan_lock);
06294   CmiMemoryReadFence();
06295 #endif
06296   recv_grid(msg);
06297   if(msg->hasData) hasData=msg->hasData;
06298   int limsg;
06299   CmiMemoryAtomicFetchAndInc(imsg,limsg);
06300   grid_msgs[limsg] = msg;
06301   //  CkPrintf("[%d] PmeZPencil node_process_grid for %d %d %d has %d of %d imsg %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z, limsg, grid_msgs.size(), imsg);      
06302   if(limsg+1 == grid_msgs.size())
06303     {
06304 
06305       if (hasData)
06306         {
06307           forward_fft();
06308         }
06309       send_trans();
06310       imsg=0;
06311       CmiMemoryWriteFence();
06312       //      CkPrintf("[%d] PmeZPencil grid node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
06313     }
06314 #if USE_NODE_PAR_RECEIVE
06315   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
06316   CmiMemoryWriteFence();
06317 #endif
06318 }

void PmeZPencil::node_process_untrans ( PmeUntransMsg msg  ) 

Definition at line 6325 of file ComputePme.C.

References backward_fft(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsgb, PmePencil< CBase_PmeZPencil >::initdata, NAMD_bug(), recv_untrans(), send_all_ungrid(), and PmePencilInitMsgData::zBlocks.

Referenced by recvNodeAck(), and NodePmeMgr::recvZUntrans().

06326 {
06327   if ( msg ) {
06328     if ( ! hasData ) NAMD_bug("PmeZPencil::node_process_untrans non-null msg but not hasData");
06329     recv_untrans(msg);
06330   } else if ( hasData ) NAMD_bug("PmeZPencil::node_process_untrans hasData but null msg");
06331 #if USE_NODE_PAR_RECEIVE
06332   CmiMemoryWriteFence();
06333   CmiLock(ComputePmeMgr::fftw_plan_lock);
06334 #endif    
06335   int limsg;
06336   CmiMemoryAtomicFetchAndInc(imsgb,limsg);
06337   if(limsg+1 == initdata.zBlocks)
06338     {
06339 #if USE_NODE_PAR_RECEIVE
06340       CmiMemoryReadFence();
06341 #endif    
06342       if(hasData) {
06343         backward_fft();
06344       }
06345       send_all_ungrid();
06346       hasData=0;
06347       imsgb=0;
06348       evir = 0;
06349       memset(data, 0, sizeof(float) * nx*ny* initdata.grid.dim3); 
06350       CmiMemoryWriteFence();
06351       //      CkPrintf("[%d] PmeZPencil untrans node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
06352     }
06353 #if USE_NODE_PAR_RECEIVE
06354   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
06355 #endif
06356 }

void PmeZPencil::recv_grid ( const PmeGridMsg msg  ) 

Definition at line 5135 of file ComputePme.C.

References ResizeArray< Elem >::begin(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGridMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeGridMsg::qgrid, PmeGridMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by node_process_grid().

05135                                                 {
05136 
05137   int dim3 = initdata.grid.dim3;
05138   if ( imsg == 0 ) {
05139     lattice = msg->lattice;
05140     sequence = msg->sequence;
05141 #if ! USE_NODE_PAR_RECEIVE
05142     memset(data, 0, sizeof(float)*nx*ny*dim3);
05143 #endif
05144   }
05145 
05146   if ( ! msg->hasData ) return;
05147 
05148   int zlistlen = msg->zlistlen;
05149 #ifdef NAMD_KNL
05150   int * __restrict msg_zlist = msg->zlist;
05151   int * __restrict zlist = work_zlist.begin();
05152   __assume_aligned(zlist,64);
05153   for ( int k=0; k<zlistlen; ++k ) {
05154     zlist[k] = msg_zlist[k];
05155   }
05156 #else
05157   int * __restrict zlist = msg->zlist;
05158 #endif
05159   char * __restrict fmsg = msg->fgrid;
05160   float * __restrict qmsg = msg->qgrid;
05161   float * __restrict d = data;
05162   int numGrids = 1;  // pencil FFT doesn't support multiple grids
05163   for ( int g=0; g<numGrids; ++g ) {
05164     for ( int i=0; i<nx; ++i ) {
05165      for ( int j=0; j<ny; ++j, d += dim3 ) {
05166       if( *(fmsg++) ) {
05167         #pragma ivdep
05168         for ( int k=0; k<zlistlen; ++k ) {
05169           d[zlist[k]] += *(qmsg++);
05170         }
05171       }
05172      }
05173     }
05174   }
05175 }

void PmeZPencil::recv_untrans ( const PmeUntransMsg msg  ) 

Definition at line 6121 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, j, PmeUntransMsg::ny, PmeUntransMsg::qgrid, and PmeUntransMsg::sourceNode.

Referenced by node_process_untrans().

06121                                                       {
06122 #if ! USE_NODE_PAR_RECEIVE
06123     if(imsg==0) evir=0.;
06124 #endif
06125 
06126   int block3 = initdata.grid.block3;
06127   int dim3 = initdata.grid.dim3;
06128   int kb = msg->sourceNode;
06129   int nz = msg->ny;
06130   const float *md = msg->qgrid;
06131   float *d = data;
06132   for ( int i=0; i<nx; ++i ) {
06133 #if CMK_BLUEGENEL
06134     CmiNetworkProgress();
06135 #endif   
06136     for ( int j=0; j<ny; ++j, d += dim3 ) {
06137       for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
06138 #ifdef ZEROCHECK
06139         if ( (*md) == 0. ) CkPrintf("0 in YZ at %d %d %d %d %d %d %d %d %d\n",
06140                                     thisIndex.x, thisIndex.y, kb, i, j, k, nx, ny, nz);
06141 #endif
06142         d[2*k] = *(md++);
06143         d[2*k+1] = *(md++);
06144       }
06145     }
06146   }
06147 }

void PmeZPencil::recvNodeAck ( PmeAckMsg msg  ) 

Definition at line 6320 of file ComputePme.C.

References node_process_untrans().

06320                                            {
06321   delete msg;
06322   node_process_untrans(0);
06323 }

void PmeZPencil::send_all_ungrid (  ) 

Definition at line 6217 of file ComputePme.C.

References CKLOOP_CTRL_PME_SENDUNTRANS, PmePencil< CBase_PmeZPencil >::initdata, Node::Object(), PmeZPencilSendUngrid(), send_subset_ungrid(), Node::simParameters, ResizeArray< Elem >::size(), SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

06217                                  {
06218 
06219 #if     CMK_SMP && USE_CKLOOP
06220         int useCkLoop = Node::Object()->simParameters->useCkLoop;
06221         if(useCkLoop>=CKLOOP_CTRL_PME_SENDUNTRANS
06222            && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
06223                 //????What's the best value for numChunks?????
06224                 CkLoop_Parallelize(PmeZPencilSendUngrid, 1, (void *)this, grid_msgs.size(), 0, grid_msgs.size()-1, 1); //has to sync
06225                 return;
06226         }
06227 #endif
06228         send_subset_ungrid(0, grid_msgs.size()-1);
06229 }

void PmeZPencil::send_subset_trans ( int  fromIdx,
int  toIdx 
)

Definition at line 5247 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::lattice, PmeTransMsg::nx, PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmePencil< CBase_PmeZPencil >::sequence, PmeTransMsg::sequence, SET_PRIORITY, PmeTransMsg::sourceNode, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by PmeZPencilSendTrans().

05247                                                         {
05248         int zBlocks = initdata.zBlocks;
05249         int block3 = initdata.grid.block3;
05250         int dim3 = initdata.grid.dim3;
05251         for ( int isend=fromIdx; isend<=toIdx; ++isend ) {
05252           int kb = send_order[isend];
05253           int nz = block3;
05254           if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
05255           int hd = ( hasData ? 1 : 0 );
05256           PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
05257           msg->lattice = lattice;
05258           msg->sourceNode = thisIndex.y;
05259           msg->hasData = hasData;
05260           msg->nx = ny;
05261          if ( hasData ) {
05262           float *md = msg->qgrid;
05263           const float *d = data;
05264           for ( int i=0; i<nx; ++i ) {
05265            for ( int j=0; j<ny; ++j, d += dim3 ) {
05266                 for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
05267                   *(md++) = d[2*k];
05268                   *(md++) = d[2*k+1];
05269                 }
05270            }
05271           }
05272          }
05273           msg->sequence = sequence;
05274           SET_PRIORITY(msg,sequence,PME_TRANS_PRIORITY)
05275 
05276     CmiEnableUrgentSend(1);
05277 #if USE_NODE_PAR_RECEIVE
05278       msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
05279 #if Y_PERSIST 
05280       CmiUsePersistentHandle(&trans_handle[isend], 1);
05281 #endif
05282       initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
05283 #if Y_PERSIST 
05284       CmiUsePersistentHandle(NULL, 0);
05285 #endif    
05286 #else
05287 #if Y_PERSIST 
05288       CmiUsePersistentHandle(&trans_handle[isend], 1);
05289 #endif
05290       initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
05291 #if Y_PERSIST 
05292       CmiUsePersistentHandle(NULL, 0);
05293 #endif    
05294 #endif
05295     CmiEnableUrgentSend(0);
05296     }
05297 }

void PmeZPencil::send_subset_ungrid ( int  fromIdx,
int  toIdx 
)

Definition at line 6231 of file ComputePme.C.

References send_ungrid().

Referenced by PmeZPencilSendUngrid(), and send_all_ungrid().

06231                                                          {
06232         for (int limsg=fromIdx; limsg <=toIdx; ++limsg ) {
06233                 PmeGridMsg *msg = grid_msgs[limsg];
06234                 send_ungrid(msg);
06235         }
06236 }

void PmeZPencil::send_trans (  ) 

Definition at line 5299 of file ComputePme.C.

References PmeGrid::block3, CKLOOP_CTRL_PME_SENDTRANS, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::lattice, PmeTransMsg::nx, Node::Object(), PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmeZPencilSendTrans(), PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmePencil< CBase_PmeZPencil >::sequence, PmeTransMsg::sequence, SET_PRIORITY, Node::simParameters, PmeTransMsg::sourceNode, SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_grid().

05299                             {
05300 #if USE_PERSISTENT
05301     if (trans_handle == NULL) setup_persistent();
05302 #endif
05303 #if     CMK_SMP && USE_CKLOOP
05304         int useCkLoop = Node::Object()->simParameters->useCkLoop;
05305         if(useCkLoop>=CKLOOP_CTRL_PME_SENDTRANS
05306            && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
05313                 //send_subset_trans(0, initdata.zBlocks-1);
05314                 CkLoop_Parallelize(PmeZPencilSendTrans, 1, (void *)this, CkMyNodeSize(), 0, initdata.zBlocks-1, 1); //not sync
05315                 return;
05316         }
05317 #endif
05318   int zBlocks = initdata.zBlocks;
05319   int block3 = initdata.grid.block3;
05320   int dim3 = initdata.grid.dim3;
05321   for ( int isend=0; isend<zBlocks; ++isend ) {
05322     int kb = send_order[isend];
05323     int nz = block3;
05324     if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
05325     int hd = ( hasData ? 1 : 0 );
05326     PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
05327     msg->lattice = lattice;
05328     msg->sourceNode = thisIndex.y;
05329     msg->hasData = hasData;
05330     msg->nx = ny;
05331    if ( hasData ) {
05332     float *md = msg->qgrid;
05333     const float *d = data;
05334     for ( int i=0; i<nx; ++i ) {
05335      for ( int j=0; j<ny; ++j, d += dim3 ) {
05336       for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
05337         *(md++) = d[2*k];
05338         *(md++) = d[2*k+1];
05339       }
05340      }
05341     }
05342    }
05343     msg->sequence = sequence;
05344     SET_PRIORITY(msg,sequence,PME_TRANS_PRIORITY)
05345 
05346     CmiEnableUrgentSend(1);
05347 #if USE_NODE_PAR_RECEIVE
05348     msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
05349 #if Y_PERSIST 
05350     CmiUsePersistentHandle(&trans_handle[isend], 1);
05351 #endif
05352     initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
05353 #if Y_PERSIST 
05354     CmiUsePersistentHandle(NULL, 0);
05355 #endif    
05356 #else
05357 #if Y_PERSIST 
05358     CmiUsePersistentHandle(&trans_handle[isend], 1);
05359 #endif
05360     initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
05361 #if Y_PERSIST 
05362     CmiUsePersistentHandle(NULL, 0);
05363 #endif    
05364 #endif
05365     CmiEnableUrgentSend(0);
05366   }
05367 }

void PmeZPencil::send_ungrid ( PmeGridMsg msg  ) 

Definition at line 6238 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::hasData, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, NAMD_bug(), PmePencil< CBase_PmeZPencil >::offload, PME_OFFLOAD_UNGRID_PRIORITY, PME_UNGRID_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmePencilInitMsgData::pmeProxy, PRIORITY_SIZE, PmeGridMsg::qgrid, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, PmeGridMsg::sourceNode, PmePencilInitMsgData::yBlocks, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by send_subset_ungrid().

06238                                             {
06239 
06240 #ifdef NAMD_CUDA
06241   const int UNGRID_PRIORITY = ( offload ? PME_OFFLOAD_UNGRID_PRIORITY : PME_UNGRID_PRIORITY );
06242 #else
06243   const int UNGRID_PRIORITY = PME_UNGRID_PRIORITY ;
06244 #endif
06245 
06246   int pe = msg->sourceNode;
06247   if ( ! msg->hasData ) {
06248     delete msg;
06249     PmeAckMsg *ackmsg = new (PRIORITY_SIZE) PmeAckMsg;
06250     SET_PRIORITY(ackmsg,sequence,UNGRID_PRIORITY)
06251     CmiEnableUrgentSend(1);
06252     initdata.pmeProxy[pe].recvAck(ackmsg);
06253     CmiEnableUrgentSend(0);
06254     return;
06255   }
06256   if ( ! hasData ) NAMD_bug("PmeZPencil::send_ungrid msg->hasData but not pencil->hasData");
06257   msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
06258   int dim3 = initdata.grid.dim3;
06259   int zlistlen = msg->zlistlen;
06260   int *zlist = msg->zlist;
06261   char *fmsg = msg->fgrid;
06262   float *qmsg = msg->qgrid;
06263   float *d = data;
06264   int numGrids = 1;  // pencil FFT doesn't support multiple grids
06265   for ( int g=0; g<numGrids; ++g ) {
06266 #if CMK_BLUEGENEL
06267     CmiNetworkProgress();
06268 #endif    
06269     for ( int i=0; i<nx; ++i ) {
06270       for ( int j=0; j<ny; ++j, d += dim3 ) {
06271         if( *(fmsg++) ) {
06272           for ( int k=0; k<zlistlen; ++k ) {
06273             *(qmsg++) = d[zlist[k]];
06274           }
06275         }
06276       }
06277     }
06278   }
06279   SET_PRIORITY(msg,sequence,UNGRID_PRIORITY)
06280     CmiEnableUrgentSend(1);
06281 #ifdef NAMD_CUDA
06282     if ( offload ) {
06283       initdata.pmeNodeProxy[CkNodeOf(pe)].recvUngrid(msg);
06284     } else
06285 #endif
06286   initdata.pmeProxy[pe].recvUngrid(msg);
06287     CmiEnableUrgentSend(0);
06288 }


The documentation for this class was generated from the following file:

Generated on 19 Sep 2020 for NAMD by  doxygen 1.6.1