5 #include <cuda_runtime.h>
7 #define CUDA_PME_SPREADCHARGE_EVENT 90
8 #define CUDA_PME_GATHERFORCE_EVENT 91
9 #define CUDA_BONDED_KERNEL_EVENT 92
10 #define CUDA_DEBUG_EVENT 93
11 #define CUDA_NONBONDED_KERNEL_EVENT 94
12 #define CUDA_GBIS1_KERNEL_EVENT 95
13 #define CUDA_GBIS2_KERNEL_EVENT 96
14 #define CUDA_GBIS3_KERNEL_EVENT 97
16 #define CUDA_EVENT_ID_POLL_REMOTE 98
17 #define CUDA_TRACE_POLL_REMOTE \
18 traceUserEvent(CUDA_EVENT_ID_POLL_REMOTE)
19 #define CUDA_EVENT_ID_POLL_LOCAL 99
20 #define CUDA_TRACE_POLL_LOCAL \
21 traceUserEvent(CUDA_EVENT_ID_POLL_LOCAL)
22 #define CUDA_EVENT_ID_BASE 100
23 #define CUDA_TRACE_REMOTE(START,END) \
24 do { int dev; cudaGetDevice(&dev); traceUserBracketEvent( \
25 CUDA_EVENT_ID_BASE + 2 * dev, START, END); } while (0)
26 #define CUDA_TRACE_LOCAL(START,END) \
27 do { int dev; cudaGetDevice(&dev); traceUserBracketEvent( \
28 CUDA_EVENT_ID_BASE + 2 * dev + 1, START, END); } while (0)
39 int devicesperreplica;
67 int numPesSharingDevice;
69 int *pesSharingDevice;
77 cudaDeviceProp* deviceProps;
79 void register_user_events();
117 #endif // DEVICECUDA_H
void setMergeGrids(const int val)
void setGpuIsMine(const int val)
int getMasterPeForDeviceID(int deviceID)
bool device_shared_with_pe(int pe)
int getPesSharingDevice(const int i)
int getNextPeSharingGpu()
int getDeviceIDbyRank(int rank)
bool one_device_per_node()
int getDeviceIDforPe(int pe)
int getNumPesSharingDevice()