NAMD
Functions
CudaUtils.C File Reference
#include <stdio.h>
#include "common.h"
#include "charm++.h"
#include "CudaUtils.h"

Go to the source code of this file.

Functions

void cudaDie (const char *msg, cudaError_t err)
 
void cudaNAMD_bug (const char *msg)
 
void cuda_affinity_initialize ()
 
void clear_device_array_async_T (void *data, const int ndata, cudaStream_t stream, const size_t sizeofT)
 
void clear_device_array_T (void *data, const int ndata, const size_t sizeofT)
 
void allocate_host_T (void **pp, const int len, const size_t sizeofT)
 
void allocate_device_T (void **pp, const int len, const size_t sizeofT)
 
void deallocate_device_T (void **pp)
 
void deallocate_host_T (void **pp)
 
bool reallocate_device_T (void **pp, int *curlen, const int newlen, const float fac, const size_t sizeofT)
 
bool reallocate_host_T (void **pp, int *curlen, const int newlen, const float fac, const unsigned int flag, const size_t sizeofT)
 
void copy_HtoD_async_T (const void *h_array, void *d_array, int array_len, cudaStream_t stream, const size_t sizeofT)
 
void copy_HtoD_T (const void *h_array, void *d_array, int array_len, const size_t sizeofT)
 
void copy_DtoH_async_T (const void *d_array, void *h_array, const int array_len, cudaStream_t stream, const size_t sizeofT)
 
void copy_DtoH_T (const void *d_array, void *h_array, const int array_len, const size_t sizeofT)
 
void copy_DtoD_async_T (const void *d_src, void *d_dst, const int array_len, cudaStream_t stream, const size_t sizeofT)
 
void copy_DtoD_T (const void *d_src, void *d_dst, const int array_len, const size_t sizeofT)
 
void copy_PeerDtoD_async_T (const int src_dev, const int dst_dev, const void *d_src, void *d_dst, const int array_len, cudaStream_t stream, const size_t sizeofT)
 
void copy3D_HtoD_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
 
void copy3D_DtoH_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
 
void copy3D_DtoD_T (void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
 
void copy3D_PeerDtoD_T (int src_dev, int dst_dev, void *src_data, void *dst_data, int src_x0, int src_y0, int src_z0, size_t src_xsize, size_t src_ysize, int dst_x0, int dst_y0, int dst_z0, size_t dst_xsize, size_t dst_ysize, size_t width, size_t height, size_t depth, size_t sizeofT, cudaStream_t stream)
 

Function Documentation

void allocate_device_T ( void **  pp,
const int  len,
const size_t  sizeofT 
)

Definition at line 75 of file CudaUtils.C.

References cudaCheck.

Referenced by allocate_device().

75  {
76  cudaCheck(cudaMalloc(pp, sizeofT*len));
77 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
void allocate_host_T ( void **  pp,
const int  len,
const size_t  sizeofT 
)

Definition at line 65 of file CudaUtils.C.

References cudaCheck.

Referenced by allocate_host().

65  {
66  cudaCheck(cudaMallocHost(pp, sizeofT*len));
67 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
void clear_device_array_async_T ( void data,
const int  ndata,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 51 of file CudaUtils.C.

References cudaCheck.

Referenced by clear_device_array().

51  {
52  cudaCheck(cudaMemsetAsync(data, 0, sizeofT*ndata, stream));
53 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
void clear_device_array_T ( void data,
const int  ndata,
const size_t  sizeofT 
)

Definition at line 55 of file CudaUtils.C.

References cudaCheck.

Referenced by clear_device_array_sync().

55  {
56  cudaCheck(cudaMemset(data, 0, sizeofT*ndata));
57 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
void copy3D_DtoD_T ( void src_data,
void dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 275 of file CudaUtils.C.

References cudaCheck.

Referenced by copy3D_DtoD().

281  {
282  cudaMemcpy3DParms parms = {0};
283 
284  parms.srcPos = make_cudaPos(sizeofT*src_x0, src_y0, src_z0);
285  parms.srcPtr = make_cudaPitchedPtr(src_data, sizeofT*src_xsize, src_xsize, src_ysize);
286 
287  parms.dstPos = make_cudaPos(sizeofT*dst_x0, dst_y0, dst_z0);
288  parms.dstPtr = make_cudaPitchedPtr(dst_data, sizeofT*dst_xsize, dst_xsize, dst_ysize);
289 
290  parms.extent = make_cudaExtent(sizeofT*width, height, depth);
291  parms.kind = cudaMemcpyDeviceToDevice;
292 
293  cudaCheck(cudaMemcpy3DAsync(&parms, stream));
294 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
void copy3D_DtoH_T ( void src_data,
void dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 250 of file CudaUtils.C.

References cudaCheck.

Referenced by copy3D_DtoH().

256  {
257  cudaMemcpy3DParms parms = {0};
258 
259  parms.srcPos = make_cudaPos(sizeofT*src_x0, src_y0, src_z0);
260  parms.srcPtr = make_cudaPitchedPtr(src_data, sizeofT*src_xsize, src_xsize, src_ysize);
261 
262  parms.dstPos = make_cudaPos(sizeofT*dst_x0, dst_y0, dst_z0);
263  parms.dstPtr = make_cudaPitchedPtr(dst_data, sizeofT*dst_xsize, dst_xsize, dst_ysize);
264 
265  parms.extent = make_cudaExtent(sizeofT*width, height, depth);
266  parms.kind = cudaMemcpyDeviceToHost;
267 
268  cudaCheck(cudaMemcpy3DAsync(&parms, stream));
269 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
void copy3D_HtoD_T ( void src_data,
void dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 225 of file CudaUtils.C.

References cudaCheck.

Referenced by copy3D_HtoD().

231  {
232  cudaMemcpy3DParms parms = {0};
233 
234  parms.srcPos = make_cudaPos(sizeofT*src_x0, src_y0, src_z0);
235  parms.srcPtr = make_cudaPitchedPtr(src_data, sizeofT*src_xsize, src_xsize, src_ysize);
236 
237  parms.dstPos = make_cudaPos(sizeofT*dst_x0, dst_y0, dst_z0);
238  parms.dstPtr = make_cudaPitchedPtr(dst_data, sizeofT*dst_xsize, dst_xsize, dst_ysize);
239 
240  parms.extent = make_cudaExtent(sizeofT*width, height, depth);
241  parms.kind = cudaMemcpyHostToDevice;
242 
243  cudaCheck(cudaMemcpy3DAsync(&parms, stream));
244 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
void copy3D_PeerDtoD_T ( int  src_dev,
int  dst_dev,
void src_data,
void dst_data,
int  src_x0,
int  src_y0,
int  src_z0,
size_t  src_xsize,
size_t  src_ysize,
int  dst_x0,
int  dst_y0,
int  dst_z0,
size_t  dst_xsize,
size_t  dst_ysize,
size_t  width,
size_t  height,
size_t  depth,
size_t  sizeofT,
cudaStream_t  stream 
)

Definition at line 300 of file CudaUtils.C.

References cudaCheck.

Referenced by copy3D_PeerDtoD().

307  {
308  cudaMemcpy3DPeerParms parms = {0};
309 
310  parms.srcDevice = src_dev;
311  parms.dstDevice = dst_dev;
312 
313  parms.srcPos = make_cudaPos(sizeofT*src_x0, src_y0, src_z0);
314  parms.srcPtr = make_cudaPitchedPtr(src_data, sizeofT*src_xsize, src_xsize, src_ysize);
315 
316  parms.dstPos = make_cudaPos(sizeofT*dst_x0, dst_y0, dst_z0);
317  parms.dstPtr = make_cudaPitchedPtr(dst_data, sizeofT*dst_xsize, dst_xsize, dst_ysize);
318 
319  parms.extent = make_cudaExtent(sizeofT*width, height, depth);
320 
321  cudaCheck(cudaMemcpy3DPeerAsync(&parms, stream));
322 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
void copy_DtoD_async_T ( const void d_src,
void d_dst,
const int  array_len,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 202 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_DtoD().

203  {
204  cudaCheck(cudaMemcpyAsync(d_dst, d_src, sizeofT*array_len, cudaMemcpyDeviceToDevice, stream));
205 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
void copy_DtoD_T ( const void d_src,
void d_dst,
const int  array_len,
const size_t  sizeofT 
)

Definition at line 207 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_DtoD_sync().

207  {
208  cudaCheck(cudaMemcpy(d_dst, d_src, sizeofT*array_len, cudaMemcpyDeviceToDevice));
209 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
void copy_DtoH_async_T ( const void d_array,
void h_array,
const int  array_len,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 189 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_DtoH().

190  {
191  cudaCheck(cudaMemcpyAsync(h_array, d_array, sizeofT*array_len, cudaMemcpyDeviceToHost, stream));
192 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
void copy_DtoH_T ( const void d_array,
void h_array,
const int  array_len,
const size_t  sizeofT 
)

Definition at line 194 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_DtoH_sync().

194  {
195  cudaCheck(cudaMemcpy(h_array, d_array, sizeofT*array_len, cudaMemcpyDeviceToHost));
196 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
void copy_HtoD_async_T ( const void h_array,
void d_array,
int  array_len,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 175 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_HtoD().

176  {
177  cudaCheck(cudaMemcpyAsync(d_array, h_array, sizeofT*array_len, cudaMemcpyHostToDevice, stream));
178 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
void copy_HtoD_T ( const void h_array,
void d_array,
int  array_len,
const size_t  sizeofT 
)

Definition at line 180 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_HtoD_sync().

181  {
182  cudaCheck(cudaMemcpy(d_array, h_array, sizeofT*array_len, cudaMemcpyHostToDevice));
183 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
void copy_PeerDtoD_async_T ( const int  src_dev,
const int  dst_dev,
const void d_src,
void d_dst,
const int  array_len,
cudaStream_t  stream,
const size_t  sizeofT 
)

Definition at line 215 of file CudaUtils.C.

References cudaCheck.

Referenced by copy_PeerDtoD().

217  {
218  cudaCheck(cudaMemcpyPeerAsync(d_dst, dst_dev, d_src, src_dev, sizeofT*array_len, stream));
219 }
__thread cudaStream_t stream
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
void cuda_affinity_initialize ( )

Definition at line 33 of file CudaUtils.C.

Referenced by all_init().

33  {
34  int devcnt = 0;
35  cudaError_t err = cudaGetDeviceCount(&devcnt);
36  if ( devcnt == 1 ) { // only one device so it must be ours
37  int *dummy;
38  if ( err == cudaSuccess ) err = cudaSetDevice(0);
39  if ( err == cudaSuccess ) err = cudaSetDeviceFlags(cudaDeviceMapHost);
40  if ( err == cudaSuccess ) err = cudaMalloc(&dummy, 4);
41  }
42  if ( err != cudaSuccess ) {
43  char host[128];
44  gethostname(host, 128); host[127] = 0;
45  fprintf(stderr,"CUDA initialization error on %s: %s\n", host, cudaGetErrorString(err));
46  }
47 }
void cudaDie ( const char *  msg,
cudaError_t  err 
)

Definition at line 9 of file CudaUtils.C.

References NAMD_die().

Referenced by cuda_check_local_progress(), cuda_check_pme_charges(), cuda_check_pme_forces(), cuda_check_progress(), cuda_check_remote_progress(), DeviceCUDA::initialize(), and read_CUDA_ARCH().

9  {
10  char host[128];
11  gethostname(host, 128); host[127] = 0;
12  char devstr[128] = "";
13  int devnum;
14  if ( cudaGetDevice(&devnum) == cudaSuccess ) {
15  sprintf(devstr, " device %d", devnum);
16  }
17  cudaDeviceProp deviceProp;
18  if ( cudaGetDeviceProperties(&deviceProp, devnum) == cudaSuccess ) {
19  sprintf(devstr, " device %d pci %x:%x:%x", devnum,
20  deviceProp.pciDomainID, deviceProp.pciBusID, deviceProp.pciDeviceID);
21  }
22  char errmsg[1024];
23  if (err == cudaSuccess) {
24  sprintf(errmsg,"CUDA error %s on Pe %d (%s%s)", msg, CkMyPe(), host, devstr);
25  } else {
26  sprintf(errmsg,"CUDA error %s on Pe %d (%s%s): %s", msg, CkMyPe(), host, devstr, cudaGetErrorString(err));
27  }
28  NAMD_die(errmsg);
29 }
void NAMD_die(const char *err_msg)
Definition: common.C:83
void cudaNAMD_bug ( const char *  msg)

Definition at line 31 of file CudaUtils.C.

References NAMD_bug().

Referenced by CudaFFTCompute::backward(), CudaFFTCompute::forward(), gather_force(), and spread_charge().

31 {NAMD_bug(msg);}
void NAMD_bug(const char *err_msg)
Definition: common.C:123
void deallocate_device_T ( void **  pp)

Definition at line 84 of file CudaUtils.C.

References cudaCheck.

Referenced by deallocate_device().

84  {
85 
86  if (*pp != NULL) {
87  cudaCheck(cudaFree((void *)(*pp)));
88  *pp = NULL;
89  }
90 
91 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
void deallocate_host_T ( void **  pp)

Definition at line 98 of file CudaUtils.C.

References cudaCheck.

Referenced by deallocate_host().

98  {
99 
100  if (*pp != NULL) {
101  cudaCheck(cudaFreeHost((void *)(*pp)));
102  *pp = NULL;
103  }
104 
105 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
bool reallocate_device_T ( void **  pp,
int *  curlen,
const int  newlen,
const float  fac,
const size_t  sizeofT 
)

Definition at line 117 of file CudaUtils.C.

References cudaCheck.

Referenced by reallocate_device().

117  {
118 
119  if (*pp != NULL && *curlen < newlen) {
120  cudaCheck(cudaFree((void *)(*pp)));
121  *pp = NULL;
122  }
123 
124  if (*pp == NULL) {
125  if (fac > 1.0f) {
126  *curlen = (int)(((double)(newlen))*(double)fac);
127  } else {
128  *curlen = newlen;
129  }
130  cudaCheck(cudaMalloc(pp, sizeofT*(*curlen)));
131  return true;
132  }
133 
134  return false;
135 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
bool reallocate_host_T ( void **  pp,
int *  curlen,
const int  newlen,
const float  fac,
const unsigned int  flag,
const size_t  sizeofT 
)

Definition at line 150 of file CudaUtils.C.

References cudaCheck.

Referenced by reallocate_host().

151  {
152 
153  if (*pp != NULL && *curlen < newlen) {
154  cudaCheck(cudaFreeHost((void *)(*pp)));
155  *pp = NULL;
156  }
157 
158  if (*pp == NULL) {
159  if (fac > 1.0f) {
160  *curlen = (int)(((double)(newlen))*(double)fac);
161  } else {
162  *curlen = newlen;
163  }
164  cudaCheck(cudaHostAlloc(pp, sizeofT*(*curlen), flag));
165  return true;
166  }
167 
168  return false;
169 }
#define cudaCheck(stmt)
Definition: CudaUtils.h:79