00001 /*************************************************************************** 00002 *cr 00003 *cr (C) Copyright 1995-2019 The Board of Trustees of the 00004 *cr University of Illinois 00005 *cr All Rights Reserved 00006 *cr 00007 ***************************************************************************/ 00008 /*************************************************************************** 00009 * RCS INFORMATION: 00010 * 00011 * $RCSfile: CUDAParPrefixOps.h,v $ 00012 * $Author: johns $ $Locker: $ $State: Exp $ 00013 * $Revision: 1.8 $ $Date: 2020/02/26 20:16:56 $ 00014 * 00015 ***************************************************************************/ 00022 #include "ProfileHooks.h" // needed here for GTC profile tests 00023 00024 // force use of either CUB-based back-end implementation instead of 00025 // using Thrust, which is the default. Thrust is shipped with CUDA 00026 // presently, but CUB as-yet, is not. Unless we ship CUB with the 00027 // VMD src, we'll need to retain the ability compile either way 00028 // for a while yet. 00029 #if 0 00030 #define VMDUSECUB 1 00031 #endif 00032 00033 // 00034 // Exclusive prefix sum 00035 // 00036 template <typename T> 00037 long dev_excl_scan_sum_tmpsz(T *in_d, long nitems, T *out_d, T ival); 00038 00039 template <typename T> 00040 void dev_excl_scan_sum(T *in_d, long nitems, T *out_d, 00041 void *scanwork_d, long tsz, T ival); 00042 00043 00044 // 00045 // Inclusive prefix sum 00046 // 00047 template <typename T> 00048 long dev_incl_scan_sum_tmpsz(T *in_d, long nitems, T *out_d); 00049 00050 template <typename T> 00051 void dev_incl_scan_sum(T *in_d, long nitems, T *out_d, 00052 void *scanwork_d, long tsz); 00053