NAMD
Classes | Macros | Functions | Variables
ComputeNonbondedCUDA.C File Reference
#include "common.h"
#include "charm++.h"
#include <cuda_runtime.h>
#include <cuda.h>
#include "WorkDistrib.h"
#include "ComputeMgr.h"
#include "ProxyMgr.h"
#include "ComputeNonbondedCUDAKernel.h"
#include "ComputeNonbondedCUDA.h"
#include "LJTable.h"
#include "ObjectArena.h"
#include "SortAtoms.h"
#include "Priorities.h"
#include <algorithm>
#include "NamdTypes.h"
#include "DeviceCUDA.h"
#include "CudaUtils.h"

Go to the source code of this file.

Classes

struct  exlist_sortop
 
struct  pid_sortop_reverse_priority
 
struct  cr_sortop_distance
 
struct  cr_sortop_reverse_priority
 

Macros

#define SET_EXCL(EXCL, BASE, DIFF)   (EXCL)[((BASE)+(DIFF))>>5] |= (1<<(((BASE)+(DIFF))&31))
 
#define CUDA_POLL(FN, ARG)   CcdCallFnAfter(FN,ARG,0.1)
 
#define GBISP(...)
 
#define count_limit   1000000
 

Functions

void cuda_errcheck (const char *msg)
 
static bool sortop_bitreverse (int a, int b)
 
void send_build_cuda_force_table ()
 
void build_cuda_force_table ()
 
void build_cuda_exclusions ()
 
void register_cuda_compute_self (ComputeID c, PatchID pid)
 
void register_cuda_compute_pair (ComputeID c, PatchID pid[], int t[])
 
void unregister_cuda_compute (ComputeID c)
 
void init_arrays ()
 
void CcdCallBacksReset (void *ignored, double curWallTime)
 
void cuda_check_progress (void *arg, double walltime)
 
void cuda_check_remote_progress (void *arg, double walltime)
 
void cuda_check_local_progress (void *arg, double walltime)
 
void cuda_check_remote_calc (void *arg, double walltime)
 
void cuda_check_local_calc (void *arg, double walltime)
 

Variables

__thread int max_grid_size
 
__thread cudaStream_t stream
 
__thread cudaStream_t stream2
 
__thread DeviceCUDAdeviceCUDA
 
static __thread
ComputeNonbondedCUDA
cudaCompute = 0
 
static __thread ComputeMgrcomputeMgr = 0
 
static __thread int2 * exclusionsByAtom
 
static __thread cudaEvent_t start_calc
 
static __thread cudaEvent_t end_remote_download
 
static __thread cudaEvent_t end_local_download
 
static __thread ResizeArray
< patch_pair > * 
patch_pairs_ptr
 
static __thread ResizeArray
< int > * 
patch_pair_num_ptr
 
static __thread int atom_params_size
 
static __thread atom_param * atom_params
 
static __thread int vdw_types_size
 
static __thread int * vdw_types
 
static __thread int dummy_size
 
static __thread float * dummy_dev
 
static __thread int force_ready_queue_size
 
static __thread int * force_ready_queue
 
static __thread int force_ready_queue_len
 
static __thread int force_ready_queue_next
 
static __thread int block_order_size
 
static __thread int * block_order
 
static __thread int num_atoms
 
static __thread int num_local_atoms
 
static __thread int num_remote_atoms
 
static __thread int virials_size
 
static __thread float * virials
 
static __thread int num_virials
 
static __thread float * slow_virials
 
static __thread int energy_gbis_size
 
static __thread float * energy_gbis
 
static __thread int intRad0H_size
 
static __thread float * intRad0H
 
static __thread int intRadSH_size
 
static __thread float * intRadSH
 
static __thread int bornRadH_size
 
static __thread float * bornRadH
 
static __thread int dHdrPrefixH_size
 
static __thread float * dHdrPrefixH
 
static __thread int cuda_timer_count
 
static __thread double cuda_timer_total
 
static __thread double kernel_time
 
static __thread double remote_submit_time
 
static __thread double local_submit_time
 
static __thread int check_count
 
static __thread int check_remote_count
 
static __thread int check_local_count
 
static __thread int kernel_launch_state = 0
 

Macro Definition Documentation

#define count_limit   1000000
#define CUDA_POLL (   FN,
  ARG 
)    CcdCallFnAfter(FN,ARG,0.1)
#define GBISP (   ...)
#define SET_EXCL (   EXCL,
  BASE,
  DIFF 
)    (EXCL)[((BASE)+(DIFF))>>5] |= (1<<(((BASE)+(DIFF))&31))

Function Documentation

void build_cuda_exclusions ( )

Definition at line 248 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::build_exclusions(), deviceCUDA, and DeviceCUDA::getMasterPe().

Referenced by ComputeMgr::recvBuildCudaExclusions().

248  {
249  if ( deviceCUDA->getMasterPe() != CkMyPe() ) return;
251 }
int getMasterPe()
Definition: DeviceCUDA.h:100
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:18
void build_cuda_force_table ( )
void CcdCallBacksReset ( void ignored,
double  curWallTime 
)
void cuda_check_local_calc ( void arg,
double  walltime 
)

Definition at line 1618 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), computeMgr, CUDA_POLL, deviceCUDA, end_local_download, DeviceCUDA::getNextPeSharingGpu(), and ComputeMgr::sendYieldDevice().

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

1618  {
1619  // in theory we only need end_local_calc, but overlap isn't reliable
1620  // if ( cudaEventQuery(end_local_calc) == cudaSuccess ) {
1621  if ( cudaEventQuery(end_local_download) == cudaSuccess ) {
1622 // CkPrintf("Pe %d yielding to %d after local calc\n", CkMyPe(), next_pe_sharing_gpu);
1624 // CkPrintf("Pe %d yielded to %d after local calc\n", CkMyPe(), next_pe_sharing_gpu);
1625  } else {
1626  CcdCallBacksReset(0,walltime); // fix Charm++
1628  }
1629 }
void sendYieldDevice(int pe)
Definition: ComputeMgr.C:1434
static __thread ComputeMgr * computeMgr
void CcdCallBacksReset(void *ignored, double curWallTime)
static __thread cudaEvent_t end_local_download
void cuda_check_local_calc(void *arg, double walltime)
#define CUDA_POLL(FN, ARG)
int getNextPeSharingGpu()
Definition: DeviceCUDA.h:99
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:18
void cuda_check_local_progress ( void arg,
double  walltime 
)

Definition at line 995 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), check_local_count, check_remote_count, count_limit, cuda_errcheck(), CUDA_POLL, CUDA_TRACE_LOCAL, CUDA_TRACE_POLL_LOCAL, cudaDie(), end_local_download, kernel_time, local_submit_time, WorkDistrib::messageFinishCUDA(), and NAMD_bug().

Referenced by ComputeNonbondedCUDA::doWork(), and ComputeNonbondedCUDA::recvYieldDevice().

995  {
996 
998  cudaError_t err = cudaEventQuery(end_local_download);
999  if ( err == cudaSuccess ) {
1001  kernel_time = walltime - kernel_time;
1002  check_local_count = 0;
1003  cuda_errcheck("at cuda local stream completed");
1005  } else if ( err != cudaErrorNotReady ) {
1006  char errmsg[256];
1007  sprintf(errmsg,"in cuda_check_local_progress after polling %d times over %f s on step %d",
1009  ((ComputeNonbondedCUDA *) arg)->step);
1010  cudaDie(errmsg,err);
1011  } else if ( ++check_local_count >= count_limit ) {
1012  char errmsg[256];
1013  sprintf(errmsg,"cuda_check_local_progress polled %d times over %f s on step %d",
1015  ((ComputeNonbondedCUDA *) arg)->step);
1016  cudaDie(errmsg,err);
1017  } else if ( check_remote_count ) {
1018  NAMD_bug("nonzero check_remote_count in cuda_check_local_progress");
1019  } else {
1020  CcdCallBacksReset(0,walltime); // fix Charm++
1022  }
1023 }
static void messageFinishCUDA(Compute *)
Definition: WorkDistrib.C:2896
static __thread int check_remote_count
void cuda_check_local_progress(void *arg, double walltime)
#define count_limit
void CcdCallBacksReset(void *ignored, double curWallTime)
static __thread cudaEvent_t end_local_download
#define CUDA_POLL(FN, ARG)
static __thread double kernel_time
void NAMD_bug(const char *err_msg)
Definition: common.C:123
void cudaDie(const char *msg, cudaError_t err=cudaSuccess)
Definition: CudaUtils.C:9
static __thread double local_submit_time
#define CUDA_TRACE_POLL_LOCAL
Definition: DeviceCUDA.h:20
void cuda_errcheck(const char *msg)
#define CUDA_TRACE_LOCAL(START, END)
Definition: DeviceCUDA.h:26
static __thread int check_local_count
void cuda_check_progress ( void arg,
double  walltime 
)

Definition at line 927 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), check_count, count_limit, CUDA_POLL, CUDA_TRACE_LOCAL, CUDA_TRACE_POLL_REMOTE, cudaDie(), force_ready_queue, force_ready_queue_len, force_ready_queue_next, kernel_time, and remote_submit_time.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

927  {
929 
930  int flindex;
931  int poll_again = 1;
932  while ( -1 != (flindex = force_ready_queue[force_ready_queue_next]) ) {
933  // CkPrintf("Pe %d forces ready %d is index %d at %lf\n",
934  // CkMyPe(), force_ready_queue_next, flindex, walltime);
937  check_count = 0;
938  if ( force_ready_queue_next == force_ready_queue_len ) {
939  poll_again = 0;
940  CUDA_TRACE_LOCAL(kernel_time,walltime);
941  kernel_time = walltime - kernel_time;
942  // need to guarantee this finishes before the last patch message!
943  ((ComputeNonbondedCUDA *) arg)->workStarted = 0;
944  ((ComputeNonbondedCUDA *) arg)->finishReductions();
945  }
946  ((ComputeNonbondedCUDA *) arg)->messageFinishPatch(flindex);
947  if ( force_ready_queue_next == force_ready_queue_len ) break;
948  }
949  if ( ++check_count >= count_limit ) {
950  char errmsg[256];
951  sprintf(errmsg,"cuda_check_progress polled %d times over %f s on step %d",
952  check_count, walltime - remote_submit_time,
953  ((ComputeNonbondedCUDA *) arg)->step);
954  cudaDie(errmsg,cudaSuccess);
955  }
956  if ( poll_again ) {
957  CcdCallBacksReset(0,walltime); // fix Charm++
959  }
960 }
static __thread int check_count
void cuda_check_progress(void *arg, double walltime)
#define CUDA_TRACE_POLL_REMOTE
Definition: DeviceCUDA.h:17
#define count_limit
void CcdCallBacksReset(void *ignored, double curWallTime)
static __thread int force_ready_queue_next
#define CUDA_POLL(FN, ARG)
static __thread double kernel_time
void cudaDie(const char *msg, cudaError_t err=cudaSuccess)
Definition: CudaUtils.C:9
static __thread double remote_submit_time
static __thread int force_ready_queue_len
static __thread int * force_ready_queue
#define CUDA_TRACE_LOCAL(START, END)
Definition: DeviceCUDA.h:26
void cuda_check_remote_calc ( void arg,
double  walltime 
)

Definition at line 1605 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), computeMgr, CUDA_POLL, deviceCUDA, end_remote_download, DeviceCUDA::getNextPeSharingGpu(), and ComputeMgr::sendYieldDevice().

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

1605  {
1606  // in theory we only need end_remote_calc, but overlap isn't reliable
1607  // if ( cudaEventQuery(end_remote_calc) == cudaSuccess ) {
1608  if ( cudaEventQuery(end_remote_download) == cudaSuccess ) {
1609 // CkPrintf("Pe %d yielding to %d after remote calc\n", CkMyPe(), next_pe_sharing_gpu);
1611 // CkPrintf("Pe %d yielded to %d after remote calc\n", CkMyPe(), next_pe_sharing_gpu);
1612  } else {
1613  CcdCallBacksReset(0,walltime); // fix Charm++
1615  }
1616 }
void sendYieldDevice(int pe)
Definition: ComputeMgr.C:1434
static __thread cudaEvent_t end_remote_download
static __thread ComputeMgr * computeMgr
void CcdCallBacksReset(void *ignored, double curWallTime)
#define CUDA_POLL(FN, ARG)
int getNextPeSharingGpu()
Definition: DeviceCUDA.h:99
void cuda_check_remote_calc(void *arg, double walltime)
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:18
void cuda_check_remote_progress ( void arg,
double  walltime 
)

Definition at line 962 of file ComputeNonbondedCUDA.C.

References CcdCallBacksReset(), check_local_count, check_remote_count, count_limit, cuda_errcheck(), CUDA_POLL, CUDA_TRACE_POLL_REMOTE, CUDA_TRACE_REMOTE, cudaDie(), deviceCUDA, end_remote_download, DeviceCUDA::getMergeGrids(), kernel_time, local_submit_time, WorkDistrib::messageFinishCUDA(), NAMD_bug(), and remote_submit_time.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

962  {
963 
965  cudaError_t err = cudaEventQuery(end_remote_download);
966  if ( err == cudaSuccess ) {
967  local_submit_time = walltime;
969  if ( deviceCUDA->getMergeGrids() ) { // no local
971  }
972  check_remote_count = 0;
973  cuda_errcheck("at cuda remote stream completed");
975  } else if ( err != cudaErrorNotReady ) {
976  char errmsg[256];
977  sprintf(errmsg,"in cuda_check_remote_progress after polling %d times over %f s on step %d",
979  ((ComputeNonbondedCUDA *) arg)->step);
980  cudaDie(errmsg,err);
981  } else if ( ++check_remote_count >= count_limit ) {
982  char errmsg[256];
983  sprintf(errmsg,"cuda_check_remote_progress polled %d times over %f s on step %d",
985  ((ComputeNonbondedCUDA *) arg)->step);
986  cudaDie(errmsg,err);
987  } else if ( check_local_count ) {
988  NAMD_bug("nonzero check_local_count in cuda_check_remote_progress");
989  } else {
990  CcdCallBacksReset(0,walltime); // fix Charm++
992  }
993 }
static void messageFinishCUDA(Compute *)
Definition: WorkDistrib.C:2896
static __thread int check_remote_count
#define CUDA_TRACE_POLL_REMOTE
Definition: DeviceCUDA.h:17
static __thread cudaEvent_t end_remote_download
#define CUDA_TRACE_REMOTE(START, END)
Definition: DeviceCUDA.h:23
int getMergeGrids()
Definition: DeviceCUDA.h:95
#define count_limit
void CcdCallBacksReset(void *ignored, double curWallTime)
#define CUDA_POLL(FN, ARG)
static __thread double kernel_time
void NAMD_bug(const char *err_msg)
Definition: common.C:123
void cudaDie(const char *msg, cudaError_t err=cudaSuccess)
Definition: CudaUtils.C:9
static __thread double remote_submit_time
static __thread double local_submit_time
void cuda_check_remote_progress(void *arg, double walltime)
void cuda_errcheck(const char *msg)
__thread DeviceCUDA * deviceCUDA
Definition: DeviceCUDA.C:18
static __thread int check_local_count
void cuda_errcheck ( const char *  msg)

Definition at line 41 of file ComputeNonbondedCUDA.C.

References NAMD_die().

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA(), ComputePmeMgr::ComputePmeMgr(), cuda_bind_atom_params(), cuda_bind_atoms(), cuda_bind_exclusions(), cuda_bind_force_table(), cuda_bind_forces(), cuda_bind_GBIS_bornRad(), cuda_bind_GBIS_dEdaSum(), cuda_bind_GBIS_dHdrPrefix(), cuda_bind_GBIS_energy(), cuda_bind_GBIS_intRad(), cuda_bind_GBIS_psiSum(), cuda_bind_lj_table(), cuda_bind_patch_pairs(), cuda_bind_vdw_types(), cuda_bind_virials(), cuda_check_local_progress(), cuda_check_remote_progress(), cuda_GBIS_P1(), cuda_GBIS_P2(), cuda_GBIS_P3(), cuda_init(), cuda_nonbonded_forces(), ComputePme::doWork(), ComputeNonbondedCUDA::doWork(), ComputeNonbondedCUDA::finishReductions(), ComputePmeMgr::initialize(), ComputePmeMgr::initialize_computes(), and ComputePmeMgr::ungridCalc().

41  {
42  cudaError_t err;
43  if ((err = cudaGetLastError()) != cudaSuccess) {
44  char host[128];
45  gethostname(host, 128); host[127] = 0;
46  char devstr[128] = "";
47  int devnum;
48  if ( cudaGetDevice(&devnum) == cudaSuccess ) {
49  sprintf(devstr, " device %d", devnum);
50  }
51  cudaDeviceProp deviceProp;
52  if ( cudaGetDeviceProperties(&deviceProp, devnum) == cudaSuccess ) {
53  sprintf(devstr, " device %d pci %x:%x:%x", devnum,
54  deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
55  }
56  char errmsg[1024];
57  sprintf(errmsg,"CUDA error %s on Pe %d (%s%s): %s", msg, CkMyPe(), host, devstr, cudaGetErrorString(err));
58  NAMD_die(errmsg);
59  }
60 }
void NAMD_die(const char *err_msg)
Definition: common.C:83
void init_arrays ( )

Definition at line 886 of file ComputeNonbondedCUDA.C.

References atom_params, atom_params_size, block_order, block_order_size, bornRadH, bornRadH_size, dHdrPrefixH, dHdrPrefixH_size, dummy_dev, dummy_size, energy_gbis, energy_gbis_size, force_ready_queue, force_ready_queue_len, force_ready_queue_next, force_ready_queue_size, intRad0H, intRad0H_size, intRadSH, intRadSH_size, num_atoms, num_local_atoms, num_remote_atoms, num_virials, vdw_types, vdw_types_size, virials, and virials_size.

Referenced by ComputeNonbondedCUDA::ComputeNonbondedCUDA().

886  {
887 
888  atom_params_size = 0;
889  atom_params = NULL;
890 
891  vdw_types_size = 0;
892  vdw_types = NULL;
893 
894  dummy_size = 0;
895  dummy_dev = NULL;
896 
898  force_ready_queue = NULL;
901 
902  block_order_size = 0;
903  block_order = NULL;
904 
905  num_atoms = 0;
906  num_local_atoms = 0;
907  num_remote_atoms = 0;
908 
909  virials_size = 0;
910  virials = NULL;
911  num_virials = 0;
912 
913  energy_gbis_size = 0;
914  energy_gbis = NULL;
915 
916  intRad0H_size = 0;
917  intRad0H = NULL;
918  intRadSH_size = 0;
919  intRadSH = NULL;
920  bornRadH_size = 0;
921  bornRadH = NULL;
922  dHdrPrefixH_size = 0;
923  dHdrPrefixH = NULL;
924 
925 }
static __thread int * block_order
static __thread int intRadSH_size
static __thread int dummy_size
static __thread float * bornRadH
static __thread float * dHdrPrefixH
static __thread int dHdrPrefixH_size
static __thread int force_ready_queue_next
static __thread int intRad0H_size
static __thread float * intRadSH
static __thread int force_ready_queue_size
static __thread int num_remote_atoms
static __thread int virials_size
static __thread float * virials
static __thread int bornRadH_size
static __thread int num_virials
static __thread int vdw_types_size
static __thread int force_ready_queue_len
static __thread int energy_gbis_size
static __thread float * dummy_dev
static __thread float * energy_gbis
static __thread atom_param * atom_params
static __thread int block_order_size
static __thread int * vdw_types
static __thread int atom_params_size
static __thread int * force_ready_queue
static __thread int num_atoms
static __thread int num_local_atoms
static __thread float * intRad0H
void register_cuda_compute_pair ( ComputeID  c,
PatchID  pid[],
int  t[] 
)

Definition at line 390 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::compute_record::c, PatchMap::center(), cudaCompute, ComputeNonbondedCUDA::localComputeRecords, NAMD_bug(), ComputeNonbondedCUDA::compute_record::offset, ComputeNonbondedCUDA::patchMap, ComputeNonbondedCUDA::patchRecords, ComputeNonbondedCUDA::compute_record::pid, ComputeNonbondedCUDA::remoteComputeRecords, ComputeNonbondedCUDA::requirePatch(), Vector::x, Vector::y, and Vector::z.

Referenced by ComputeNonbondedPair::initialize().

390  {
391 
392  if ( ! cudaCompute ) NAMD_bug("register_pair called early");
393 
394  cudaCompute->requirePatch(pid[0]);
395  cudaCompute->requirePatch(pid[1]);
396 
398  cr.c = c;
399  cr.pid[0] = pid[0]; cr.pid[1] = pid[1];
400 
401  int t1 = t[0];
402  int t2 = t[1];
403  Vector offset = cudaCompute->patchMap->center(pid[0])
404  - cudaCompute->patchMap->center(pid[1]);
405  offset.x += (t1%3-1) - (t2%3-1);
406  offset.y += ((t1/3)%3-1) - ((t2/3)%3-1);
407  offset.z += (t1/9-1) - (t2/9-1);
408  cr.offset = offset;
409 
410  if ( cudaCompute->patchRecords[pid[0]].isLocal ) {
412  } else {
414  }
415 }
Definition: Vector.h:64
BigReal z
Definition: Vector.h:66
void NAMD_bug(const char *err_msg)
Definition: common.C:123
BigReal x
Definition: Vector.h:66
ResizeArray< compute_record > localComputeRecords
ResizeArray< compute_record > remoteComputeRecords
ScaledPosition center(int pid) const
Definition: PatchMap.h:99
BigReal y
Definition: Vector.h:66
ResizeArray< patch_record > patchRecords
static __thread ComputeNonbondedCUDA * cudaCompute
void register_cuda_compute_self ( ComputeID  c,
PatchID  pid 
)

Definition at line 373 of file ComputeNonbondedCUDA.C.

References ComputeNonbondedCUDA::compute_record::c, cudaCompute, ComputeNonbondedCUDA::localComputeRecords, NAMD_bug(), ComputeNonbondedCUDA::compute_record::offset, ComputeNonbondedCUDA::patchRecords, ComputeNonbondedCUDA::compute_record::pid, ComputeNonbondedCUDA::remoteComputeRecords, and ComputeNonbondedCUDA::requirePatch().

Referenced by ComputeNonbondedSelf::initialize().

373  {
374 
375  if ( ! cudaCompute ) NAMD_bug("register_self called early");
376 
378 
380  cr.c = c;
381  cr.pid[0] = pid; cr.pid[1] = pid;
382  cr.offset = 0.;
383  if ( cudaCompute->patchRecords[pid].isLocal ) {
385  } else {
387  }
388 }
void NAMD_bug(const char *err_msg)
Definition: common.C:123
ResizeArray< compute_record > localComputeRecords
ResizeArray< compute_record > remoteComputeRecords
ResizeArray< patch_record > patchRecords
static __thread ComputeNonbondedCUDA * cudaCompute
void send_build_cuda_force_table ( )

Copyright (c) 1995, 1996, 1997, 1998, 1999, 2000 by The Board of Trustees of the University of Illinois. All rights reserved.

Definition at line 73 of file ComputeNonbondedCUDA.C.

References computeMgr, and ComputeMgr::sendBuildCudaForceTable().

Referenced by ComputeNonbondedUtil::select().

73  {
75 }
void sendBuildCudaForceTable()
Definition: ComputeMgr.C:1467
static __thread ComputeMgr * computeMgr
static bool sortop_bitreverse ( int  a,
int  b 
)
inlinestatic

Definition at line 62 of file ComputeNonbondedCUDA.C.

Referenced by cr_sortop_reverse_priority::pid_compare_priority().

62  {
63  if ( a == b ) return 0;
64  for ( int bit = 1; bit; bit *= 2 ) {
65  if ( (a&bit) != (b&bit) ) return ((a&bit) < (b&bit));
66  }
67  return 0;
68 }
void unregister_cuda_compute ( ComputeID  c)

Definition at line 417 of file ComputeNonbondedCUDA.C.

References NAMD_bug().

417  { // static
418 
419  NAMD_bug("unregister_compute unimplemented");
420 
421 }
void NAMD_bug(const char *err_msg)
Definition: common.C:123

Variable Documentation

__thread atom_param* atom_params
static
__thread int atom_params_size
static

Definition at line 825 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int* block_order
static
__thread int block_order_size
static

Definition at line 839 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* bornRadH
static
__thread int bornRadH_size
static

Definition at line 860 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int check_count
static

Definition at line 882 of file ComputeNonbondedCUDA.C.

Referenced by cuda_check_progress().

__thread int check_local_count
static
__thread int check_remote_count
static
__thread ComputeMgr* computeMgr = 0
static
__thread int cuda_timer_count
static

Definition at line 865 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::finishReductions().

__thread double cuda_timer_total
static
__thread ComputeNonbondedCUDA* cudaCompute = 0
static
__thread DeviceCUDA* deviceCUDA

Definition at line 18 of file DeviceCUDA.C.

__thread float* dHdrPrefixH
static
__thread int dHdrPrefixH_size
static

Definition at line 862 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* dummy_dev
static
__thread int dummy_size
static
__thread cudaEvent_t end_local_download
static
__thread cudaEvent_t end_remote_download
static
__thread float* energy_gbis
static
__thread int energy_gbis_size
static

Definition at line 852 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int2* exclusionsByAtom
static
__thread int* force_ready_queue
static
__thread int force_ready_queue_len
static
__thread int force_ready_queue_next
static
__thread int force_ready_queue_size
static

Definition at line 834 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* intRad0H
static
__thread int intRad0H_size
static

Definition at line 856 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* intRadSH
static
__thread int intRadSH_size
static

Definition at line 858 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int kernel_launch_state = 0
static

Definition at line 1043 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::recvYieldDevice().

__thread double kernel_time
static
__thread double local_submit_time
static
__thread int max_grid_size
__thread int num_atoms
static
__thread int num_local_atoms
static

Definition at line 843 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int num_remote_atoms
static

Definition at line 844 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread int num_virials
static
__thread ResizeArray<int>* patch_pair_num_ptr
static
__thread ResizeArray<patch_pair>* patch_pairs_ptr
static
__thread double remote_submit_time
static
__thread float* slow_virials
static
__thread cudaEvent_t start_calc
static
__thread cudaStream_t stream
__thread cudaStream_t stream2
__thread int* vdw_types
static
__thread int vdw_types_size
static

Definition at line 828 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().

__thread float* virials
static
__thread int virials_size
static

Definition at line 846 of file ComputeNonbondedCUDA.C.

Referenced by ComputeNonbondedCUDA::doWork(), and init_arrays().