NAMD
Classes | Macros | Functions | Variables
ComputePme.C File Reference
#include <sfftw.h>
#include <srfftw.h>
#include <vector>
#include <algorithm>
#include <deque>
#include "InfoStream.h"
#include "Node.h"
#include "PatchMap.h"
#include "PatchMap.inl"
#include "AtomMap.h"
#include "ComputePme.h"
#include "ComputePmeMgr.decl.h"
#include "PmeBase.inl"
#include "PmeRealSpace.h"
#include "PmeKSpace.h"
#include "ComputeNonbondedUtil.h"
#include "PatchMgr.h"
#include "Molecule.h"
#include "ReductionMgr.h"
#include "ComputeMgr.h"
#include "ComputeMgr.decl.h"
#include "Debug.h"
#include "SimParameters.h"
#include "WorkDistrib.h"
#include "varsizemsg.h"
#include "Random.h"
#include "ckhashtable.h"
#include "Priorities.h"
#include "ComputeMoa.h"
#include "ComputeMoaMgr.decl.h"
#include "DeviceCUDA.h"
#include <cuda_runtime.h>
#include <cuda.h>
#include "ComputePmeCUDAKernel.h"
#include "ComputePmeMgr.def.h"

Go to the source code of this file.

Classes

class  PmeAckMsg
 
class  PmeGridMsg
 
class  PmeTransMsg
 
class  PmeSharedTransMsg
 
class  PmeUntransMsg
 
class  PmeSharedUntransMsg
 
class  PmeEvirMsg
 
class  PmePencilMap
 
struct  PmePencilInitMsgData
 
class  PmePencilInitMsg
 
struct  LocalPmeInfo
 
struct  NodePmeInfo
 
struct  sortop_bit_reversed
 
struct  ijpair
 
struct  ijpair_sortop_bit_reversed
 
class  ComputePmeMgr
 
struct  ComputePmeMgr::cuda_submit_charges_args
 
class  NodePmeMgr
 
class  PmePencil< T >
 
class  PmeZPencil
 
class  PmeYPencil
 
class  PmeXPencil
 

Macros

#define fftwf_malloc   fftw_malloc
 
#define fftwf_free   fftw_free
 
#define MIN_DEBUG_LEVEL   3
 
#define NUM_STREAMS   1
 
#define CUDA_STREAM_CREATE(X)   cudaStreamCreate(X)
 
#define CUDA_EVENT_ID_PME_CHARGES   80
 
#define CUDA_EVENT_ID_PME_FORCES   81
 
#define CUDA_EVENT_ID_PME_TICK   82
 
#define CUDA_EVENT_ID_PME_COPY   83
 
#define CUDA_EVENT_ID_PME_KERNEL   84
 
#define count_limit   1000000
 
#define CUDA_POLL(FN, ARG)   CcdCallFnAfter(FN,ARG,0.1)
 
#define EVENT_STRIDE   10
 
#define XCOPY(X)   masterPmeMgr->X = X;
 
#define XCOPY(X)   X = masterPmeMgr->X;
 
#define DEBUG_NODE_PAR_RECV   0
 

Functions

void cuda_errcheck (const char *msg)
 
static int findRecipEvirPe ()
 
void generatePmePeList2 (int *gridPeMap, int numGridPes, int *transPeMap, int numTransPes)
 
int compare_bit_reversed (int a, int b)
 
bool less_than_bit_reversed (int a, int b)
 
ResizeArray< ComputePme * > & getComputes (ComputePmeMgr *mgr)
 
int isPmeProcessor (int p)
 
void Pme_init ()
 
static void PmeSlabSendTrans (int first, int last, void *result, int paraNum, void *param)
 
static void PmeSlabSendUntrans (int first, int last, void *result, int paraNum, void *param)
 
static void PmeSlabSendUngrid (int first, int last, void *result, int paraNum, void *param)
 
void CcdCallBacksReset (void *ignored, double curWallTime)
 
void cudaDie (const char *msg, cudaError_t err=cudaSuccess)
 
void cuda_check_pme_forces (void *arg, double walltime)
 
void cuda_check_pme_charges (void *arg, double walltime)
 
static void PmeXZPencilFFT (int first, int last, void *result, int paraNum, void *param)
 
static void PmeZPencilSendTrans (int first, int last, void *result, int paraNum, void *param)
 
static void PmeYPencilForwardFFT (int first, int last, void *result, int paraNum, void *param)
 
static void PmeYPencilSendTrans (int first, int last, void *result, int paraNum, void *param)
 
static void PmeXPencilSendUntrans (int first, int last, void *result, int paraNum, void *param)
 
static void PmeYPencilBackwardFFT (int first, int last, void *result, int paraNum, void *param)
 
static void PmeYPencilSendUntrans (int first, int last, void *result, int paraNum, void *param)
 
static void PmeZPencilSendUngrid (int first, int last, void *result, int paraNum, void *param)
 

Variables

__thread DeviceCUDAdeviceCUDA
 
char * pencilPMEProcessors
 

Macro Definition Documentation

#define count_limit   1000000

Definition at line 2469 of file ComputePme.C.

Referenced by cuda_check_pme_charges(), and cuda_check_pme_forces().

#define CUDA_EVENT_ID_PME_CHARGES   80
#define CUDA_EVENT_ID_PME_COPY   83
#define CUDA_EVENT_ID_PME_FORCES   81
#define CUDA_EVENT_ID_PME_KERNEL   84
#define CUDA_EVENT_ID_PME_TICK   82
#define CUDA_POLL (   FN,
  ARG 
)    CcdCallFnAfter(FN,ARG,0.1)
#define CUDA_STREAM_CREATE (   X)    cudaStreamCreate(X)
#define DEBUG_NODE_PAR_RECV   0

Definition at line 4979 of file ComputePme.C.

#define EVENT_STRIDE   10

Definition at line 2471 of file ComputePme.C.

Referenced by cuda_check_pme_forces(), and ComputePmeMgr::ungridCalc().

#define fftwf_free   fftw_free

Definition at line 14 of file ComputePme.C.

Referenced by PmePencil< CBase_PmeZPencil >::~PmePencil().

#define fftwf_malloc   fftw_malloc

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Definition at line 13 of file ComputePme.C.

Referenced by PmeZPencil::fft_init(), PmeYPencil::fft_init(), and PmeXPencil::fft_init().

#define MIN_DEBUG_LEVEL   3

Definition at line 47 of file ComputePme.C.

#define NUM_STREAMS   1

Definition at line 514 of file ComputePme.C.

Referenced by ComputePmeMgr::ComputePmeMgr().

#define XCOPY (   X)    masterPmeMgr->X = X;
#define XCOPY (   X)    X = masterPmeMgr->X;

Function Documentation

void CcdCallBacksReset ( void ignored,
double  curWallTime 
)
int compare_bit_reversed ( int  a,
int  b 
)

Definition at line 318 of file ComputePme.C.

Referenced by pe_sortop_bit_reversed::operator()().

318  {
319  int d = a ^ b;
320  int c = 1;
321  if ( d ) while ( ! (d & c) ) {
322  c = c << 1;
323  }
324  return (a & c) - (b & c);
325 }
void cuda_check_pme_charges ( void arg,
double  walltime 
)

Definition at line 3457 of file ComputePme.C.

References CcdCallBacksReset(), ComputePmeMgr::charges_time, ComputePmeMgr::check_charges_count, count_limit, CUDA_EVENT_ID_PME_CHARGES, CUDA_POLL, cudaDie(), ComputePmeMgr::end_charges, ComputePmeMgr::saved_sequence, and ComputePmeMgr::sendChargeGridReady().

Referenced by ComputePmeMgr::pollChargeGridReady().

3457  {
3458  ComputePmeMgr *argp = (ComputePmeMgr *) arg;
3459 
3460  cudaError_t err = cudaEventQuery(argp->end_charges);
3461  if ( err == cudaSuccess ) {
3462  traceUserBracketEvent(CUDA_EVENT_ID_PME_CHARGES,argp->charges_time,walltime);
3463  argp->charges_time = walltime - argp->charges_time;
3464  argp->sendChargeGridReady();
3465  argp->check_charges_count = 0;
3466  } else if ( err != cudaErrorNotReady ) {
3467  char errmsg[256];
3468  sprintf(errmsg,"in cuda_check_pme_charges after polling %d times over %f s on seq %d",
3469  argp->check_charges_count, walltime - argp->charges_time,
3470  argp->saved_sequence);
3471  cudaDie(errmsg,err);
3472  } else if ( ++(argp->check_charges_count) >= count_limit ) {
3473  char errmsg[256];
3474  sprintf(errmsg,"cuda_check_pme_charges polled %d times over %f s on seq %d",
3475  argp->check_charges_count, walltime - argp->charges_time,
3476  argp->saved_sequence);
3477  cudaDie(errmsg,err);
3478  } else {
3479  CcdCallBacksReset(0,walltime); // fix Charm++
3481  }
3482 }
#define CUDA_POLL(FN, ARG)
Definition: ComputePme.C:2470
cudaEvent_t end_charges
Definition: ComputePme.C:426
void sendChargeGridReady()
Definition: ComputePme.C:3521
void CcdCallBacksReset(void *ignored, double curWallTime)
int check_charges_count
Definition: ComputePme.C:432
void cuda_check_pme_charges(void *arg, double walltime)
Definition: ComputePme.C:3457
void cudaDie(const char *msg, cudaError_t err=cudaSuccess)
Definition: CudaUtils.C:9
#define count_limit
Definition: ComputePme.C:2469
int saved_sequence
Definition: ComputePme.C:448
double charges_time
Definition: ComputePme.C:430
#define CUDA_EVENT_ID_PME_CHARGES
void cuda_check_pme_forces ( void arg,
double  walltime 
)

Definition at line 2477 of file ComputePme.C.

References CcdCallBacksReset(), ComputePmeMgr::check_forces_count, count_limit, CUDA_EVENT_ID_PME_FORCES, CUDA_POLL, cudaDie(), ComputePmeMgr::end_forces, EVENT_STRIDE, ComputePmeMgr::forces_count, ComputePmeMgr::forces_done_count, ComputePmeMgr::forces_time, WorkDistrib::messageEnqueueWork(), ComputePmeMgr::pmeComputes, and ComputePmeMgr::saved_sequence.

Referenced by ComputePmeMgr::pollForcesReady().

2477  {
2478  ComputePmeMgr *argp = (ComputePmeMgr *) arg;
2479 
2480  while ( 1 ) { // process multiple events per call
2481  cudaError_t err = cudaEventQuery(argp->end_forces[argp->forces_done_count/EVENT_STRIDE]);
2482  if ( err == cudaSuccess ) {
2483  argp->check_forces_count = 0;
2484  for ( int i=0; i<EVENT_STRIDE; ++i ) {
2486  if ( ++(argp->forces_done_count) == argp->forces_count ) break;
2487  }
2488  if ( argp->forces_done_count == argp->forces_count ) { // last event
2489  traceUserBracketEvent(CUDA_EVENT_ID_PME_FORCES,argp->forces_time,walltime);
2490  argp->forces_time = walltime - argp->forces_time;
2491  //CkPrintf("cuda_check_pme_forces forces_time == %f\n", argp->forces_time);
2492  return;
2493  } else { // more events
2494  continue; // check next event
2495  }
2496  } else if ( err != cudaErrorNotReady ) {
2497  char errmsg[256];
2498  sprintf(errmsg,"in cuda_check_pme_forces for event %d after polling %d times over %f s on seq %d",
2499  argp->forces_done_count/EVENT_STRIDE,
2500  argp->check_forces_count, walltime - argp->forces_time,
2501  argp->saved_sequence);
2502  cudaDie(errmsg,err);
2503  } else if ( ++(argp->check_forces_count) >= count_limit ) {
2504  char errmsg[256];
2505  sprintf(errmsg,"cuda_check_pme_forces for event %d polled %d times over %f s on seq %d",
2506  argp->forces_done_count/EVENT_STRIDE,
2507  argp->check_forces_count, walltime - argp->forces_time,
2508  argp->saved_sequence);
2509  cudaDie(errmsg,err);
2510  } else {
2511  break; // call again
2512  }
2513  } // while ( 1 )
2514  CcdCallBacksReset(0,walltime); // fix Charm++
2516 }
#define CUDA_POLL(FN, ARG)
Definition: ComputePme.C:2470
double forces_time
Definition: ComputePme.C:431
#define EVENT_STRIDE
Definition: ComputePme.C:2471
static void messageEnqueueWork(Compute *)
Definition: WorkDistrib.C:2727
void CcdCallBacksReset(void *ignored, double curWallTime)
#define CUDA_EVENT_ID_PME_FORCES
void cudaDie(const char *msg, cudaError_t err=cudaSuccess)
Definition: CudaUtils.C:9
void cuda_check_pme_forces(void *arg, double walltime)
Definition: ComputePme.C:2477
#define count_limit
Definition: ComputePme.C:2469
ResizeArray< ComputePme * > pmeComputes
Definition: ComputePme.C:454
cudaEvent_t * end_forces
Definition: ComputePme.C:427
int saved_sequence
Definition: ComputePme.C:448
int forces_done_count
Definition: ComputePme.C:429
int check_forces_count
Definition: ComputePme.C:433
void cuda_errcheck ( const char *  msg)

Definition at line 41 of file ComputeNonbondedCUDA.C.

References NAMD_die().

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputePmeMgr::ComputePmeMgr(), cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_exclusions(), cuda_bind_force_table(), cuda_bind_forces(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dEdaSum(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_energy(), cuda_bind_GBIS_intRad(), cuda_bind_GBIS_psiSum(), cuda_bind_lj_table(), cuda_bind_patch_pairs(), cuda_bind_vdw_types(), cuda_bind_virials(), cuda_check_local_progress(), cuda_check_remote_progress(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_GBIS_P3(), cuda_init(), cuda_nonbonded_forces(), ComputePme::doWork(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), ComputePmeMgr::initialize(), ComputePmeMgr::initialize_computes(), and ComputePmeMgr::ungridCalc().

41  {
42  cudaError_t err;
43  if ((err = cudaGetLastError()) != cudaSuccess) {
44  char host[128];
45  gethostname(host, 128); host[127] = 0;
46  char devstr[128] = "";
47  int devnum;
48  if ( cudaGetDevice(&devnum) == cudaSuccess ) {
49  sprintf(devstr, " device %d", devnum);
50  }
51  cudaDeviceProp deviceProp;
52  if ( cudaGetDeviceProperties(&deviceProp, devnum) == cudaSuccess ) {
53  sprintf(devstr, " device %d pci %x:%x:%x", devnum,
54  deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
55  }
56  char errmsg[1024];
57  sprintf(errmsg,"CUDA error %s on Pe %d (%s%s): %s", msg, CkMyPe(), host, devstr, cudaGetErrorString(err));
58  NAMD_die(errmsg);
59  }
60 }
void NAMD_die(const char *err_msg)
Definition: common.C:83
void cudaDie ( const char *  msg,
cudaError_t  err = cudaSuccess 
)

Definition at line 9 of file CudaUtils.C.

Referenced by cuda_check_local_progress(), cuda_check_pme_charges(), cuda_check_pme_forces(), cuda_check_progress(), cuda_check_remote_progress(), DeviceCUDA::initialize(), and read_CUDA_ARCH().

9  {
10  char host[128];
11  gethostname(host, 128); host[127] = 0;
12  char devstr[128] = "";
13  int devnum;
14  if ( cudaGetDevice(&devnum) == cudaSuccess ) {
15  sprintf(devstr, " device %d", devnum);
16  }
17  cudaDeviceProp deviceProp;
18  if ( cudaGetDeviceProperties(&deviceProp, devnum) == cudaSuccess ) {
19  sprintf(devstr, " device %d pci %x:%x:%x", devnum,
20  deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
21  }
22  char errmsg[1024];
23  if (err == cudaSuccess) {
24  sprintf(errmsg,"CUDA error %s on Pe %d (%s%s)", msg, CkMyPe(), host, devstr);
25  } else {
26  sprintf(errmsg,"CUDA error %s on Pe %d (%s%s): %s", msg, CkMyPe(), host, devstr, cudaGetErrorString(err));
27  }
28  NAMD_die(errmsg);
29 }
void NAMD_die(const char *err_msg)
Definition: common.C:83
static int findRecipEvirPe ( )
static

Definition at line 241 of file ComputePme.C.

References NAMD_bug(), PatchMap::numPatchesOnNode(), and PatchMap::Object().

Referenced by PmeXPencil::evir_init(), and ComputePmeMgr::initialize().

241  {
242  PatchMap *patchMap = PatchMap::Object();
243  {
244  int mype = CkMyPe();
245  if ( patchMap->numPatchesOnNode(mype) ) {
246  return mype;
247  }
248  }
249  {
250  int node = CmiMyNode();
251  int firstpe = CmiNodeFirst(node);
252  int nodeSize = CmiNodeSize(node);
253  int myrank = CkMyRank();
254  for ( int i=0; i<nodeSize; ++i ) {
255  int pe = firstpe + (myrank+i)%nodeSize;
256  if ( patchMap->numPatchesOnNode(pe) ) {
257  return pe;
258  }
259  }
260  }
261  {
262  int *pelist;
263  int nodeSize;
264  CmiGetPesOnPhysicalNode(CmiPhysicalNodeID(CkMyPe()), &pelist, &nodeSize);
265  int myrank;
266  for ( int i=0; i<nodeSize; ++i ) {
267  if ( pelist[i] == CkMyPe() ) myrank = i;
268  }
269  for ( int i=0; i<nodeSize; ++i ) {
270  int pe = pelist[(myrank+i)%nodeSize];
271  if ( patchMap->numPatchesOnNode(pe) ) {
272  return pe;
273  }
274  }
275  }
276  {
277  int mype = CkMyPe();
278  int npes = CkNumPes();
279  for ( int i=0; i<npes; ++i ) {
280  int pe = (mype+i)%npes;
281  if ( patchMap->numPatchesOnNode(pe) ) {
282  return pe;
283  }
284  }
285  }
286  NAMD_bug("findRecipEvirPe() failed!");
287  return -999; // should never happen
288 }
static PatchMap * Object()
Definition: PatchMap.h:27
void NAMD_bug(const char *err_msg)
Definition: common.C:123
int numPatchesOnNode(int node)
Definition: PatchMap.h:60
void generatePmePeList2 ( int *  gridPeMap,
int  numGridPes,
int *  transPeMap,
int  numTransPes 
)

Definition at line 292 of file ComputePme.C.

References WorkDistrib::peDiffuseOrdering, and sort.

Referenced by ComputePmeMgr::initialize().

292  {
293  int ncpus = CkNumPes();
294 
295  for ( int i=0; i<numGridPes; ++i ) {
296  gridPeMap[i] = WorkDistrib::peDiffuseOrdering[ncpus - numGridPes + i];
297  }
298  std::sort(gridPeMap,gridPeMap+numGridPes);
299  int firstTransPe = ncpus - numGridPes - numTransPes;
300  if ( firstTransPe < 0 ) {
301  firstTransPe = 0;
302  // 0 should be first in list, skip if possible
303  if ( ncpus > numTransPes ) firstTransPe = 1;
304  }
305  for ( int i=0; i<numTransPes; ++i ) {
306  transPeMap[i] = WorkDistrib::peDiffuseOrdering[firstTransPe + i];
307  }
308  std::sort(transPeMap,transPeMap+numTransPes);
309 }
static int * peDiffuseOrdering
Definition: WorkDistrib.h:115
BlockRadixSort::TempStorage sort
ResizeArray<ComputePme*>& getComputes ( ComputePmeMgr mgr)

Definition at line 587 of file ComputePme.C.

References ComputePmeMgr::pmeComputes.

Referenced by ComputeQM::saveResults().

587  {
588  return mgr->pmeComputes ;
589 }
ResizeArray< ComputePme * > pmeComputes
Definition: ComputePme.C:454
int isPmeProcessor ( int  p)

Definition at line 598 of file ComputePme.C.

References Node::Object(), pencilPMEProcessors, Node::simParameters, simParams, and SimParameters::usePMECUDA.

598  {
600  if (simParams->usePMECUDA) {
601  return 0;
602  } else {
603  return pencilPMEProcessors[p];
604  }
605 }
static Node * Object()
Definition: Node.h:86
SimParameters * simParameters
Definition: Node.h:178
#define simParams
Definition: Output.C:127
char * pencilPMEProcessors
Definition: ComputePme.C:107
bool less_than_bit_reversed ( int  a,
int  b 
)
inline

Definition at line 327 of file ComputePme.C.

Referenced by sortop_bit_reversed::operator()(), and ijpair_sortop_bit_reversed::operator()().

327  {
328  int d = a ^ b;
329  int c = 1;
330  if ( d ) while ( ! (d & c) ) {
331  c = c << 1;
332  }
333  return d && (b & c);
334 }
void Pme_init ( )

Definition at line 854 of file ComputePme.C.

855 {
856 #if USE_TOPO_SFC
857  if (CkMyRank() == 0)
858  tmgr_lock = CmiCreateLock();
859 #endif
860 }
static void PmeSlabSendTrans ( int  first,
int  last,
void result,
int  paraNum,
void param 
)
inlinestatic

Definition at line 1934 of file ComputePme.C.

References ComputePmeMgr::sendTransSubset().

Referenced by ComputePmeMgr::sendTrans().

1934  {
1935  ComputePmeMgr *mgr = (ComputePmeMgr *)param;
1936  mgr->sendTransSubset(first, last);
1937 }
void sendTransSubset(int first, int last)
Definition: ComputePme.C:1955
static void PmeSlabSendUngrid ( int  first,
int  last,
void result,
int  paraNum,
void param 
)
inlinestatic

Definition at line 2364 of file ComputePme.C.

References ComputePmeMgr::sendUngridSubset().

Referenced by ComputePmeMgr::sendUngrid().

2364  {
2365  ComputePmeMgr *mgr = (ComputePmeMgr *)param;
2366  mgr->sendUngridSubset(first, last);
2367 }
void sendUngridSubset(int first, int last)
Definition: ComputePme.C:2385
static void PmeSlabSendUntrans ( int  first,
int  last,
void result,
int  paraNum,
void param 
)
inlinestatic

Definition at line 2178 of file ComputePme.C.

References ComputePmeMgr::sendUntransSubset().

Referenced by ComputePmeMgr::sendUntrans().

2178  {
2179  ComputePmeMgr *mgr = (ComputePmeMgr *)param;
2180  mgr->sendUntransSubset(first, last);
2181 }
void sendUntransSubset(int first, int last)
Definition: ComputePme.C:2210
static void PmeXPencilSendUntrans ( int  first,
int  last,
void result,
int  paraNum,
void param 
)
inlinestatic

Definition at line 5740 of file ComputePme.C.

References PmeXPencil::send_subset_untrans().

Referenced by PmeXPencil::send_untrans().

5740  {
5741  PmeXPencil *xpencil = (PmeXPencil *)param;
5742  xpencil->send_subset_untrans(first, last);
5743 }
void send_subset_untrans(int fromIdx, int toIdx)
Definition: ComputePme.C:5745
static void PmeXZPencilFFT ( int  first,
int  last,
void result,
int  paraNum,
void param 
)
inlinestatic

Definition at line 5177 of file ComputePme.C.

Referenced by PmeZPencil::backward_fft(), PmeXPencil::backward_fft(), PmeZPencil::forward_fft(), and PmeXPencil::forward_fft().

5177  {
5178 #ifdef NAMD_FFTW
5179 #ifdef NAMD_FFTW_3
5180  fftwf_plan *plans = (fftwf_plan *)param;
5181  for(int i=first; i<=last; i++) fftwf_execute(plans[i]);
5182 #endif
5183 #endif
5184 }
static void PmeYPencilBackwardFFT ( int  first,
int  last,
void result,
int  paraNum,
void param 
)
inlinestatic

Definition at line 5915 of file ComputePme.C.

References PmeYPencil::backward_subset_fft().

Referenced by PmeYPencil::backward_fft().

5915  {
5916  PmeYPencil *ypencil = (PmeYPencil *)param;
5917  ypencil->backward_subset_fft(first, last);
5918 }
void backward_subset_fft(int fromIdx, int toIdx)
Definition: ComputePme.C:5920
static void PmeYPencilForwardFFT ( int  first,
int  last,
void result,
int  paraNum,
void param 
)
inlinestatic

Definition at line 5406 of file ComputePme.C.

References PmeYPencil::forward_subset_fft().

Referenced by PmeYPencil::forward_fft().

5406  {
5407  PmeYPencil *ypencil = (PmeYPencil *)param;
5408  ypencil->forward_subset_fft(first, last);
5409 }
void forward_subset_fft(int fromIdx, int toIdx)
Definition: ComputePme.C:5410
static void PmeYPencilSendTrans ( int  first,
int  last,
void result,
int  paraNum,
void param 
)
inlinestatic

Definition at line 5458 of file ComputePme.C.

References PmeYPencil::send_subset_trans().

Referenced by PmeYPencil::send_trans().

5458  {
5459  PmeYPencil *ypencil = (PmeYPencil *)param;
5460  ypencil->send_subset_trans(first, last);
5461 }
void send_subset_trans(int fromIdx, int toIdx)
Definition: ComputePme.C:5463
static void PmeYPencilSendUntrans ( int  first,
int  last,
void result,
int  paraNum,
void param 
)
inlinestatic

Definition at line 5974 of file ComputePme.C.

References PmeYPencil::send_subset_untrans().

Referenced by PmeYPencil::send_untrans().

5974  {
5975  PmeYPencil *ypencil = (PmeYPencil *)param;
5976  ypencil->send_subset_untrans(first, last);
5977 }
void send_subset_untrans(int fromIdx, int toIdx)
Definition: ComputePme.C:5979
static void PmeZPencilSendTrans ( int  first,
int  last,
void result,
int  paraNum,
void param 
)
inlinestatic

Definition at line 5242 of file ComputePme.C.

References PmeZPencil::send_subset_trans().

Referenced by PmeZPencil::send_trans().

5242  {
5243  PmeZPencil *zpencil = (PmeZPencil *)param;
5244  zpencil->send_subset_trans(first, last);
5245 }
void send_subset_trans(int fromIdx, int toIdx)
Definition: ComputePme.C:5247
static void PmeZPencilSendUngrid ( int  first,
int  last,
void result,
int  paraNum,
void param 
)
inlinestatic

Definition at line 6210 of file ComputePme.C.

References PmeZPencil::send_subset_ungrid().

Referenced by PmeZPencil::send_all_ungrid().

6210  {
6211  //to take advantage of the interface which allows 3 user params at most.
6212  //under such situtation, no new parameter list needs to be created!! -Chao Mei
6213  PmeZPencil *zpencil = (PmeZPencil *)param;
6214  zpencil->send_subset_ungrid(first, last);
6215 }
void send_subset_ungrid(int fromIdx, int toIdx)
Definition: ComputePme.C:6231

Variable Documentation

__thread DeviceCUDA* deviceCUDA

Definition at line 18 of file DeviceCUDA.C.

char* pencilPMEProcessors

Definition at line 107 of file ComputePme.C.

Referenced by ComputePmeMgr::initialize(), and isPmeProcessor().