PmeZPencil Class Reference

Inheritance diagram for PmeZPencil:
PmePencil< CBase_PmeZPencil > CBase_PmeZPencil

List of all members.

Public Member Functions

PmeZPencil_SDAG_CODE PmeZPencil ()
 PmeZPencil (CkMigrateMessage *)
 ~PmeZPencil ()
void fft_init ()
void recv_grid (const PmeGridMsg *)
void forward_fft ()
void send_trans ()
void send_subset_trans (int fromIdx, int toIdx)
void recv_untrans (const PmeUntransMsg *)
void recvNodeAck (PmeAckMsg *)
void node_process_untrans (PmeUntransMsg *)
void node_process_grid (PmeGridMsg *)
void backward_fft ()
void send_ungrid (PmeGridMsg *)
void send_all_ungrid ()
void send_subset_ungrid (int fromIdx, int toIdx)

Detailed Description

Definition at line 4535 of file ComputePme.C.


Constructor & Destructor Documentation

PmeZPencil_SDAG_CODE PmeZPencil::PmeZPencil (  )  [inline]

Definition at line 4538 of file ComputePme.C.

04538 { __sdag_init(); setMigratable(false); }

PmeZPencil::PmeZPencil ( CkMigrateMessage *   )  [inline]

Definition at line 4539 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::imsg, and PmePencil< CBase_PmeZPencil >::imsgb.

04539 { __sdag_init();  setMigratable (false); imsg=imsgb=0;}

PmeZPencil::~PmeZPencil (  )  [inline]

Definition at line 4540 of file ComputePme.C.

04540                       {
04541         #ifdef NAMD_FFTW
04542         #ifdef NAMD_FFTW_3
04543                 delete [] forward_plans;
04544                 delete [] backward_plans;
04545         #endif
04546         #endif
04547         }


Member Function Documentation

void PmeZPencil::backward_fft (  ) 

Definition at line 6123 of file ComputePme.C.

References CKLOOP_CTRL_PME_BACKWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

06123                               {
06124 #ifdef NAMD_FFTW
06125 #ifdef MANUAL_DEBUG_FFTW3
06126   dumpMatrixFloat3("bw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
06127 #endif
06128 #ifdef NAMD_FFTW_3
06129 #if     CMK_SMP && USE_CKLOOP
06130   int useCkLoop = Node::Object()->simParameters->useCkLoop;
06131   if(useCkLoop>=CKLOOP_CTRL_PME_BACKWARDFFT
06132      && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
06133           //for(int i=0; i<numPlans; i++) fftwf_execute(backward_plans[i]);
06134           //transform the above loop
06135           CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)backward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
06136           return;
06137   }
06138 #endif
06139   fftwf_execute(backward_plan);
06140 #else
06141   rfftwnd_complex_to_real(backward_plan, nx*ny,
06142             (fftw_complex *) data, 1, initdata.grid.dim3/2, work, 1, 0);
06143 #endif
06144 #ifdef MANUAL_DEBUG_FFTW3
06145   dumpMatrixFloat3("bw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
06146 #endif
06147 
06148 #endif
06149   
06150 #if CMK_BLUEGENEL
06151   CmiNetworkProgress();
06152 #endif
06153 
06154 #ifdef FFTCHECK
06155   int dim3 = initdata.grid.dim3;
06156   int K1 = initdata.grid.K1;
06157   int K2 = initdata.grid.K2;
06158   int K3 = initdata.grid.K3;
06159   float scale = 1. / (1. * K1 * K2 * K3);
06160   float maxerr = 0.;
06161   float maxstd = 0.;
06162   int mi, mj, mk;  mi = mj = mk = -1;
06163   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
06164   const float *d = data;
06165   for ( int i=0; i<nx; ++i ) {
06166    for ( int j=0; j<ny; ++j, d += dim3 ) {
06167     for ( int k=0; k<K3; ++k ) {
06168       float std = 10. * (10. * (10. * std_base + i) + j) + k;
06169       float err = scale * d[k] - std;
06170       if ( fabsf(err) > fabsf(maxerr) ) {
06171         maxerr = err;
06172         maxstd = std;
06173         mi = i;  mj = j;  mk = k;
06174       }
06175     }
06176    }
06177   }
06178   CkPrintf("pencil %d %d max error %f at %d %d %d (should be %f)\n",
06179                 thisIndex.x, thisIndex.y, maxerr, mi, mj, mk, maxstd);
06180 #endif
06181 
06182 }

void PmeZPencil::fft_init (  ) 

Definition at line 4736 of file ComputePme.C.

References PmeGrid::block1, PmeGrid::block2, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, SimParameters::FFTWEstimate, fftwf_malloc, SimParameters::FFTWPatient, PmePencilInitMsgData::grid, if(), PmePencil< CBase_PmeZPencil >::initdata, PmeGrid::K1, PmeGrid::K2, PmeGrid::K3, NAMD_die(), PmePencil< CBase_PmeZPencil >::order_init(), PmePencilInitMsgData::pmeNodeProxy, Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, and PmePencilInitMsgData::zBlocks.

04736                           {
04737   CProxy_Node nd(CkpvAccess(BOCclass_group).node);
04738   Node *node = nd.ckLocalBranch();
04739   SimParameters *simParams = node->simParameters;
04740 
04741 #if USE_NODE_PAR_RECEIVE
04742   ((NodePmeMgr *)CkLocalNodeBranch(initdata.pmeNodeProxy))->registerZPencil(thisIndex,this);
04743 #endif
04744 
04745   int K1 = initdata.grid.K1;
04746   int K2 = initdata.grid.K2;
04747   int K3 = initdata.grid.K3;
04748   int dim3 = initdata.grid.dim3;
04749   int block1 = initdata.grid.block1;
04750   int block2 = initdata.grid.block2;
04751 
04752   nx = block1;
04753   if ( (thisIndex.x + 1) * block1 > K1 ) nx = K1 - thisIndex.x * block1;
04754   ny = block2;
04755   if ( (thisIndex.y + 1) * block2 > K2 ) ny = K2 - thisIndex.y * block2;
04756 
04757 #ifdef NAMD_FFTW
04758   CmiLock(ComputePmeMgr::fftw_plan_lock);
04759 
04760   data = (float *) fftwf_malloc( sizeof(float) *nx*ny*dim3);
04761   work = new float[dim3];
04762 
04763   order_init(initdata.zBlocks);
04764 
04765 #ifdef NAMD_FFTW_3
04766   /* need array of sizes for the how many */
04767 
04768   int fftwFlags = simParams->FFTWPatient ? FFTW_PATIENT  : simParams->FFTWEstimate ? FFTW_ESTIMATE  : FFTW_MEASURE ;
04769   int sizeLines=nx*ny;
04770   int planLineSizes[1];
04771   planLineSizes[0]=K3;
04772   int ndim=initdata.grid.dim3; // storage space is initdata.grid.dim3
04773   int ndimHalf=ndim/2;
04774   forward_plan = fftwf_plan_many_dft_r2c(1, planLineSizes, sizeLines,
04775                                          (float *) data, NULL, 1, 
04776                                          ndim,
04777                                          (fftwf_complex *) data, NULL, 1,
04778                                          ndimHalf,
04779                                          fftwFlags);
04780 
04781   backward_plan = fftwf_plan_many_dft_c2r(1, planLineSizes, sizeLines,
04782                                           (fftwf_complex *) data, NULL, 1, 
04783                                           ndimHalf,
04784                                           (float *) data, NULL, 1, 
04785                                           ndim,
04786                                           fftwFlags);
04787 #if     CMK_SMP && USE_CKLOOP
04788   if(simParams->useCkLoop) {
04789           //How many FFT plans to be created? The grain-size issue!!.
04790           //Currently, I am choosing the min(nx, ny) to be coarse-grain
04791           numPlans = (nx<=ny?nx:ny);
04792           if ( numPlans < CkMyNodeSize() ) numPlans = (nx>=ny?nx:ny);
04793           if ( numPlans < CkMyNodeSize() ) numPlans = sizeLines;
04794           int howmany = sizeLines/numPlans;
04795           forward_plans = new fftwf_plan[numPlans];
04796           backward_plans = new fftwf_plan[numPlans];
04797           for(int i=0; i<numPlans; i++) {
04798                   int dimStride = i*ndim*howmany;
04799                   int dimHalfStride = i*ndimHalf*howmany;
04800                   forward_plans[i] = fftwf_plan_many_dft_r2c(1, planLineSizes, howmany,
04801                                                                                                          ((float *)data)+dimStride, NULL, 1,
04802                                                                                                          ndim,
04803                                                                                                          ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
04804                                                                                                          ndimHalf,
04805                                                                                                          fftwFlags);
04806 
04807                   backward_plans[i] = fftwf_plan_many_dft_c2r(1, planLineSizes, howmany,
04808                                                                                                          ((fftwf_complex *)data)+dimHalfStride, NULL, 1,
04809                                                                                                          ndimHalf,
04810                                                                                                          ((float *)data)+dimStride, NULL, 1,
04811                                                                                                          ndim,
04812                                                                                                          fftwFlags);
04813           }
04814   }else 
04815 #endif 
04816   {
04817           forward_plans = NULL;
04818           backward_plans = NULL;
04819   }
04820 #else
04821   forward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_REAL_TO_COMPLEX,
04822         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
04823         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
04824   backward_plan = rfftwnd_create_plan_specific(1, &K3, FFTW_COMPLEX_TO_REAL,
04825         ( simParams->FFTWEstimate ? FFTW_ESTIMATE : FFTW_MEASURE )
04826         | FFTW_IN_PLACE | FFTW_USE_WISDOM, data, 1, work, 1);
04827 #endif
04828   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
04829 #else
04830   NAMD_die("Sorry, FFTW must be compiled in to use PME.");
04831 #endif
04832 
04833 #if USE_NODE_PAR_RECEIVE
04834     evir = 0.;
04835     memset(data, 0, sizeof(float) * nx*ny*dim3);
04836 #endif
04837 }

void PmeZPencil::forward_fft (  ) 

Definition at line 5160 of file ComputePme.C.

References CKLOOP_CTRL_PME_FORWARDFFT, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGrid::K3, Node::Object(), PmeXZPencilFFT(), Node::simParameters, SimParameters::useCkLoop, PmePencil< CBase_PmeZPencil >::work, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_grid().

05160                              {
05161   evir = 0.;
05162 #ifdef FFTCHECK
05163   int dim3 = initdata.grid.dim3;
05164   int K3 = initdata.grid.K3;
05165   float std_base = 100. * (thisIndex.x+1.) + 10. * (thisIndex.y+1.);
05166   float *d = data;
05167   for ( int i=0; i<nx; ++i ) {
05168    for ( int j=0; j<ny; ++j, d += dim3 ) {
05169     for ( int k=0; k<dim3; ++k ) {
05170       d[k] = 10. * (10. * (10. * std_base + i) + j) + k;
05171     }
05172    }
05173   }
05174 #endif
05175 #ifdef NAMD_FFTW
05176 #ifdef MANUAL_DEBUG_FFTW3
05177   dumpMatrixFloat3("fw_z_b", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
05178 #endif
05179 #ifdef NAMD_FFTW_3
05180 #if     CMK_SMP && USE_CKLOOP
05181   int useCkLoop = Node::Object()->simParameters->useCkLoop;
05182   if(useCkLoop>=CKLOOP_CTRL_PME_FORWARDFFT
05183      && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
05184           //for(int i=0; i<numPlans; i++) fftwf_execute(forward_plans[i]);
05185           //transform the above loop
05186           CkLoop_Parallelize(PmeXZPencilFFT, 1, (void *)forward_plans, CkMyNodeSize(), 0, numPlans-1); //sync
05187           return;
05188   }
05189 #endif
05190   fftwf_execute(forward_plan);
05191 #else
05192   rfftwnd_real_to_complex(forward_plan, nx*ny,
05193         data, 1, initdata.grid.dim3, (fftw_complex *) work, 1, 0);
05194 #endif
05195 #ifdef MANUAL_DEBUG_FFTW3
05196   dumpMatrixFloat3("fw_z_a", data, nx, ny, initdata.grid.dim3, thisIndex.x, thisIndex.y, thisIndex.z);
05197 #endif
05198 
05199 #endif
05200 #ifdef ZEROCHECK
05201   int dim3 = initdata.grid.dim3;
05202   int K3 = initdata.grid.K3;
05203   float *d = data;
05204   for ( int i=0; i<nx; ++i ) {
05205    for ( int j=0; j<ny; ++j, d += dim3 ) {
05206     for ( int k=0; k<dim3; ++k ) {
05207       if ( d[k] == 0. ) CkPrintf("0 in Z at %d %d %d %d %d %d %d %d %d\n",
05208         thisIndex.x, thisIndex.y, i, j, k, nx, ny, dim3);
05209     }
05210    }
05211   }
05212 #endif
05213 }

void PmeZPencil::node_process_grid ( PmeGridMsg msg  ) 

Definition at line 6264 of file ComputePme.C.

References forward_fft(), PmePencil< CBase_PmeZPencil >::hasData, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::imsg, recv_grid(), send_trans(), and ResizeArray< Elem >::size().

Referenced by NodePmeMgr::recvZGrid().

06265 {
06266 #if USE_NODE_PAR_RECEIVE
06267   CmiLock(ComputePmeMgr::fftw_plan_lock);
06268   CmiMemoryReadFence();
06269 #endif
06270   recv_grid(msg);
06271   if(msg->hasData) hasData=msg->hasData;
06272   int limsg;
06273   CmiMemoryAtomicFetchAndInc(imsg,limsg);
06274   grid_msgs[limsg] = msg;
06275   //  CkPrintf("[%d] PmeZPencil node_process_grid for %d %d %d has %d of %d imsg %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z, limsg, grid_msgs.size(), imsg);      
06276   if(limsg+1 == grid_msgs.size())
06277     {
06278 
06279       if (hasData)
06280         {
06281           forward_fft();
06282         }
06283       send_trans();
06284       imsg=0;
06285       CmiMemoryWriteFence();
06286       //      CkPrintf("[%d] PmeZPencil grid node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
06287     }
06288 #if USE_NODE_PAR_RECEIVE
06289   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
06290   CmiMemoryWriteFence();
06291 #endif
06292 }

void PmeZPencil::node_process_untrans ( PmeUntransMsg msg  ) 

Definition at line 6299 of file ComputePme.C.

References backward_fft(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::imsgb, PmePencil< CBase_PmeZPencil >::initdata, NAMD_bug(), recv_untrans(), send_all_ungrid(), and PmePencilInitMsgData::zBlocks.

Referenced by recvNodeAck(), and NodePmeMgr::recvZUntrans().

06300 {
06301   if ( msg ) {
06302     if ( ! hasData ) NAMD_bug("PmeZPencil::node_process_untrans non-null msg but not hasData");
06303     recv_untrans(msg);
06304   } else if ( hasData ) NAMD_bug("PmeZPencil::node_process_untrans hasData but null msg");
06305 #if USE_NODE_PAR_RECEIVE
06306   CmiMemoryWriteFence();
06307   CmiLock(ComputePmeMgr::fftw_plan_lock);
06308 #endif    
06309   int limsg;
06310   CmiMemoryAtomicFetchAndInc(imsgb,limsg);
06311   if(limsg+1 == initdata.zBlocks)
06312     {
06313 #if USE_NODE_PAR_RECEIVE
06314       CmiMemoryReadFence();
06315 #endif    
06316       if(hasData) {
06317         backward_fft();
06318       }
06319       send_all_ungrid();
06320       hasData=0;
06321       imsgb=0;
06322       evir = 0;
06323       memset(data, 0, sizeof(float) * nx*ny* initdata.grid.dim3); 
06324       CmiMemoryWriteFence();
06325       //      CkPrintf("[%d] PmeZPencil untrans node_zero imsg for %d %d %d\n",CkMyPe(),thisIndex.x,thisIndex.y,thisIndex.z);
06326     }
06327 #if USE_NODE_PAR_RECEIVE
06328   CmiUnlock(ComputePmeMgr::fftw_plan_lock);
06329 #endif
06330 }

void PmeZPencil::recv_grid ( const PmeGridMsg msg  ) 

Definition at line 5109 of file ComputePme.C.

References ResizeArray< Elem >::begin(), PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, j, PmeGridMsg::lattice, PmePencil< CBase_PmeZPencil >::lattice, PmeGridMsg::qgrid, PmeGridMsg::sequence, PmePencil< CBase_PmeZPencil >::sequence, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by node_process_grid().

05109                                                 {
05110 
05111   int dim3 = initdata.grid.dim3;
05112   if ( imsg == 0 ) {
05113     lattice = msg->lattice;
05114     sequence = msg->sequence;
05115 #if ! USE_NODE_PAR_RECEIVE
05116     memset(data, 0, sizeof(float)*nx*ny*dim3);
05117 #endif
05118   }
05119 
05120   if ( ! msg->hasData ) return;
05121 
05122   int zlistlen = msg->zlistlen;
05123 #ifdef NAMD_KNL
05124   int * __restrict msg_zlist = msg->zlist;
05125   int * __restrict zlist = work_zlist.begin();
05126   __assume_aligned(zlist,64);
05127   for ( int k=0; k<zlistlen; ++k ) {
05128     zlist[k] = msg_zlist[k];
05129   }
05130 #else
05131   int * __restrict zlist = msg->zlist;
05132 #endif
05133   char * __restrict fmsg = msg->fgrid;
05134   float * __restrict qmsg = msg->qgrid;
05135   float * __restrict d = data;
05136   int numGrids = 1;  // pencil FFT doesn't support multiple grids
05137   for ( int g=0; g<numGrids; ++g ) {
05138     for ( int i=0; i<nx; ++i ) {
05139      for ( int j=0; j<ny; ++j, d += dim3 ) {
05140       if( *(fmsg++) ) {
05141         #pragma ivdep
05142         for ( int k=0; k<zlistlen; ++k ) {
05143           d[zlist[k]] += *(qmsg++);
05144         }
05145       }
05146      }
05147     }
05148   }
05149 }

void PmeZPencil::recv_untrans ( const PmeUntransMsg msg  ) 

Definition at line 6095 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmePencil< CBase_PmeZPencil >::evir, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::imsg, PmePencil< CBase_PmeZPencil >::initdata, j, PmeUntransMsg::ny, PmeUntransMsg::qgrid, and PmeUntransMsg::sourceNode.

Referenced by node_process_untrans().

06095                                                       {
06096 #if ! USE_NODE_PAR_RECEIVE
06097     if(imsg==0) evir=0.;
06098 #endif
06099 
06100   int block3 = initdata.grid.block3;
06101   int dim3 = initdata.grid.dim3;
06102   int kb = msg->sourceNode;
06103   int nz = msg->ny;
06104   const float *md = msg->qgrid;
06105   float *d = data;
06106   for ( int i=0; i<nx; ++i ) {
06107 #if CMK_BLUEGENEL
06108     CmiNetworkProgress();
06109 #endif   
06110     for ( int j=0; j<ny; ++j, d += dim3 ) {
06111       for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
06112 #ifdef ZEROCHECK
06113         if ( (*md) == 0. ) CkPrintf("0 in YZ at %d %d %d %d %d %d %d %d %d\n",
06114                                     thisIndex.x, thisIndex.y, kb, i, j, k, nx, ny, nz);
06115 #endif
06116         d[2*k] = *(md++);
06117         d[2*k+1] = *(md++);
06118       }
06119     }
06120   }
06121 }

void PmeZPencil::recvNodeAck ( PmeAckMsg msg  ) 

Definition at line 6294 of file ComputePme.C.

References node_process_untrans().

06294                                            {
06295   delete msg;
06296   node_process_untrans(0);
06297 }

void PmeZPencil::send_all_ungrid (  ) 

Definition at line 6191 of file ComputePme.C.

References CKLOOP_CTRL_PME_SENDUNTRANS, PmePencil< CBase_PmeZPencil >::initdata, Node::Object(), PmeZPencilSendUngrid(), send_subset_ungrid(), Node::simParameters, ResizeArray< Elem >::size(), SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, and PmePencilInitMsgData::yBlocks.

Referenced by node_process_untrans().

06191                                  {
06192 
06193 #if     CMK_SMP && USE_CKLOOP
06194         int useCkLoop = Node::Object()->simParameters->useCkLoop;
06195         if(useCkLoop>=CKLOOP_CTRL_PME_SENDUNTRANS
06196            && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
06197                 //????What's the best value for numChunks?????
06198                 CkLoop_Parallelize(PmeZPencilSendUngrid, 1, (void *)this, grid_msgs.size(), 0, grid_msgs.size()-1, 1); //has to sync
06199                 return;
06200         }
06201 #endif
06202         send_subset_ungrid(0, grid_msgs.size()-1);
06203 }

void PmeZPencil::send_subset_trans ( int  fromIdx,
int  toIdx 
)

Definition at line 5221 of file ComputePme.C.

References PmeGrid::block3, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::lattice, PmeTransMsg::nx, PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmePencil< CBase_PmeZPencil >::sequence, PmeTransMsg::sequence, SET_PRIORITY, PmeTransMsg::sourceNode, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by PmeZPencilSendTrans().

05221                                                         {
05222         int zBlocks = initdata.zBlocks;
05223         int block3 = initdata.grid.block3;
05224         int dim3 = initdata.grid.dim3;
05225         for ( int isend=fromIdx; isend<=toIdx; ++isend ) {
05226           int kb = send_order[isend];
05227           int nz = block3;
05228           if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
05229           int hd = ( hasData ? 1 : 0 );
05230           PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
05231           msg->lattice = lattice;
05232           msg->sourceNode = thisIndex.y;
05233           msg->hasData = hasData;
05234           msg->nx = ny;
05235          if ( hasData ) {
05236           float *md = msg->qgrid;
05237           const float *d = data;
05238           for ( int i=0; i<nx; ++i ) {
05239            for ( int j=0; j<ny; ++j, d += dim3 ) {
05240                 for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
05241                   *(md++) = d[2*k];
05242                   *(md++) = d[2*k+1];
05243                 }
05244            }
05245           }
05246          }
05247           msg->sequence = sequence;
05248           SET_PRIORITY(msg,sequence,PME_TRANS_PRIORITY)
05249 
05250     CmiEnableUrgentSend(1);
05251 #if USE_NODE_PAR_RECEIVE
05252       msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
05253 #if Y_PERSIST 
05254       CmiUsePersistentHandle(&trans_handle[isend], 1);
05255 #endif
05256       initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
05257 #if Y_PERSIST 
05258       CmiUsePersistentHandle(NULL, 0);
05259 #endif    
05260 #else
05261 #if Y_PERSIST 
05262       CmiUsePersistentHandle(&trans_handle[isend], 1);
05263 #endif
05264       initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
05265 #if Y_PERSIST 
05266       CmiUsePersistentHandle(NULL, 0);
05267 #endif    
05268 #endif
05269     CmiEnableUrgentSend(0);
05270     }
05271 }

void PmeZPencil::send_subset_ungrid ( int  fromIdx,
int  toIdx 
)

Definition at line 6205 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::imsg, and send_ungrid().

Referenced by PmeZPencilSendUngrid(), and send_all_ungrid().

06205                                                          {
06206         for (int imsg=fromIdx; imsg <=toIdx; ++imsg ) {
06207                 PmeGridMsg *msg = grid_msgs[imsg];
06208                 send_ungrid(msg);
06209         }
06210 }

void PmeZPencil::send_trans (  ) 

Definition at line 5273 of file ComputePme.C.

References PmeGrid::block3, CKLOOP_CTRL_PME_SENDTRANS, PmePencil< CBase_PmeZPencil >::data, PmeTransMsg::destElem, PmeGrid::dim3, PmePencilInitMsgData::grid, PmeTransMsg::hasData, PmePencil< CBase_PmeZPencil >::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, PmePencil< CBase_PmeZPencil >::lattice, PmeTransMsg::lattice, PmeTransMsg::nx, Node::Object(), PME_TRANS_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmeZPencilSendTrans(), PRIORITY_SIZE, PmeTransMsg::qgrid, PmePencil< CBase_PmeZPencil >::send_order, PmePencil< CBase_PmeZPencil >::sequence, PmeTransMsg::sequence, SET_PRIORITY, Node::simParameters, PmeTransMsg::sourceNode, SimParameters::useCkLoop, PmePencilInitMsgData::xBlocks, PmePencilInitMsgData::yBlocks, PmePencilInitMsgData::ym, PmePencilInitMsgData::yPencil, and PmePencilInitMsgData::zBlocks.

Referenced by node_process_grid().

05273                             {
05274 #if USE_PERSISTENT
05275     if (trans_handle == NULL) setup_persistent();
05276 #endif
05277 #if     CMK_SMP && USE_CKLOOP
05278         int useCkLoop = Node::Object()->simParameters->useCkLoop;
05279         if(useCkLoop>=CKLOOP_CTRL_PME_SENDTRANS
05280            && CkNumPes() >= 2 * initdata.xBlocks * initdata.yBlocks) {
05287                 //send_subset_trans(0, initdata.zBlocks-1);
05288                 CkLoop_Parallelize(PmeZPencilSendTrans, 1, (void *)this, CkMyNodeSize(), 0, initdata.zBlocks-1, 1); //not sync
05289                 return;
05290         }
05291 #endif
05292   int zBlocks = initdata.zBlocks;
05293   int block3 = initdata.grid.block3;
05294   int dim3 = initdata.grid.dim3;
05295   for ( int isend=0; isend<zBlocks; ++isend ) {
05296     int kb = send_order[isend];
05297     int nz = block3;
05298     if ( (kb+1)*block3 > dim3/2 ) nz = dim3/2 - kb*block3;
05299     int hd = ( hasData ? 1 : 0 );
05300     PmeTransMsg *msg = new (hd*nx*ny*nz*2,PRIORITY_SIZE) PmeTransMsg;
05301     msg->lattice = lattice;
05302     msg->sourceNode = thisIndex.y;
05303     msg->hasData = hasData;
05304     msg->nx = ny;
05305    if ( hasData ) {
05306     float *md = msg->qgrid;
05307     const float *d = data;
05308     for ( int i=0; i<nx; ++i ) {
05309      for ( int j=0; j<ny; ++j, d += dim3 ) {
05310       for ( int k=kb*block3; k<(kb*block3+nz); ++k ) {
05311         *(md++) = d[2*k];
05312         *(md++) = d[2*k+1];
05313       }
05314      }
05315     }
05316    }
05317     msg->sequence = sequence;
05318     SET_PRIORITY(msg,sequence,PME_TRANS_PRIORITY)
05319 
05320     CmiEnableUrgentSend(1);
05321 #if USE_NODE_PAR_RECEIVE
05322     msg->destElem=CkArrayIndex3D(thisIndex.x,0,kb);
05323 #if Y_PERSIST 
05324     CmiUsePersistentHandle(&trans_handle[isend], 1);
05325 #endif
05326     initdata.pmeNodeProxy[CmiNodeOf(initdata.ym.ckLocalBranch()->procNum(0,msg->destElem))].recvYTrans(msg);
05327 #if Y_PERSIST 
05328     CmiUsePersistentHandle(NULL, 0);
05329 #endif    
05330 #else
05331 #if Y_PERSIST 
05332     CmiUsePersistentHandle(&trans_handle[isend], 1);
05333 #endif
05334     initdata.yPencil(thisIndex.x,0,kb).recvTrans(msg);
05335 #if Y_PERSIST 
05336     CmiUsePersistentHandle(NULL, 0);
05337 #endif    
05338 #endif
05339     CmiEnableUrgentSend(0);
05340   }
05341 }

void PmeZPencil::send_ungrid ( PmeGridMsg msg  ) 

Definition at line 6212 of file ComputePme.C.

References PmePencil< CBase_PmeZPencil >::data, PmeGrid::dim3, PmeGridMsg::fgrid, PmePencilInitMsgData::grid, PmePencil< CBase_PmeZPencil >::hasData, PmeGridMsg::hasData, PmePencil< CBase_PmeZPencil >::initdata, j, NAMD_bug(), PmePencil< CBase_PmeZPencil >::offload, PME_OFFLOAD_UNGRID_PRIORITY, PME_UNGRID_PRIORITY, PmePencilInitMsgData::pmeNodeProxy, PmePencilInitMsgData::pmeProxy, PRIORITY_SIZE, PmeGridMsg::qgrid, PmePencil< CBase_PmeZPencil >::sequence, SET_PRIORITY, PmeGridMsg::sourceNode, PmePencilInitMsgData::yBlocks, PmeGridMsg::zlist, and PmeGridMsg::zlistlen.

Referenced by send_subset_ungrid().

06212                                             {
06213 
06214 #ifdef NAMD_CUDA
06215   const int UNGRID_PRIORITY = ( offload ? PME_OFFLOAD_UNGRID_PRIORITY : PME_UNGRID_PRIORITY );
06216 #else
06217   const int UNGRID_PRIORITY = PME_UNGRID_PRIORITY ;
06218 #endif
06219 
06220   int pe = msg->sourceNode;
06221   if ( ! msg->hasData ) {
06222     delete msg;
06223     PmeAckMsg *ackmsg = new (PRIORITY_SIZE) PmeAckMsg;
06224     SET_PRIORITY(ackmsg,sequence,UNGRID_PRIORITY)
06225     CmiEnableUrgentSend(1);
06226     initdata.pmeProxy[pe].recvAck(ackmsg);
06227     CmiEnableUrgentSend(0);
06228     return;
06229   }
06230   if ( ! hasData ) NAMD_bug("PmeZPencil::send_ungrid msg->hasData but not pencil->hasData");
06231   msg->sourceNode = thisIndex.x * initdata.yBlocks + thisIndex.y;
06232   int dim3 = initdata.grid.dim3;
06233   int zlistlen = msg->zlistlen;
06234   int *zlist = msg->zlist;
06235   char *fmsg = msg->fgrid;
06236   float *qmsg = msg->qgrid;
06237   float *d = data;
06238   int numGrids = 1;  // pencil FFT doesn't support multiple grids
06239   for ( int g=0; g<numGrids; ++g ) {
06240 #if CMK_BLUEGENEL
06241     CmiNetworkProgress();
06242 #endif    
06243     for ( int i=0; i<nx; ++i ) {
06244       for ( int j=0; j<ny; ++j, d += dim3 ) {
06245         if( *(fmsg++) ) {
06246           for ( int k=0; k<zlistlen; ++k ) {
06247             *(qmsg++) = d[zlist[k]];
06248           }
06249         }
06250       }
06251     }
06252   }
06253   SET_PRIORITY(msg,sequence,UNGRID_PRIORITY)
06254     CmiEnableUrgentSend(1);
06255 #ifdef NAMD_CUDA
06256     if ( offload ) {
06257       initdata.pmeNodeProxy[CkNodeOf(pe)].recvUngrid(msg);
06258     } else
06259 #endif
06260   initdata.pmeProxy[pe].recvUngrid(msg);
06261     CmiEnableUrgentSend(0);
06262 }


The documentation for this class was generated from the following file:

Generated on 8 Dec 2019 for NAMD by  doxygen 1.6.1