NAMD
CudaNonbondedTables.C
Go to the documentation of this file.
1 #include "charm++.h"
2 #include "NamdTypes.h"
3 #include "ComputeNonbondedUtil.h"
4 #include "LJTable.h"
5 #include "CudaUtils.h"
6 #include "CudaNonbondedTables.h"
7 
8 #ifdef NAMD_CUDA
9 
10 CudaNonbondedTables::CudaNonbondedTables(const int deviceID) : deviceID(deviceID) {
11 
12  vdwCoefTable = NULL;
13  vdwCoefTableWidth = 0;
14  vdwCoefTableTex = 0;
15 
16  forceTableTex = 0;
17  energyTableTex = 0;
18 
19  exclusionTable = NULL;
20  r2_table = NULL;
21  exclusionTableTex = 0;
22  r2_table_tex = 0;
23 
24  modifiedExclusionForceTableTex = 0;
25  modifiedExclusionEnergyTableTex = 0;
26 
27  cudaCheck(cudaSetDevice(deviceID));
28  buildForceAndEnergyTables(4096);
29  buildVdwCoefTable();
30 }
31 
33  cudaCheck(cudaSetDevice(deviceID));
34  if (vdwCoefTable != NULL) deallocate_device<float2>(&vdwCoefTable);
35  if (exclusionTable != NULL) deallocate_device<float4>(&exclusionTable);
36  if (r2_table != NULL) deallocate_device<float>(&r2_table);
37 
38  cudaCheck(cudaFreeArray(forceArray));
39  cudaCheck(cudaFreeArray(energyArray));
40 
41  cudaCheck(cudaDestroyTextureObject(vdwCoefTableTex));
42  cudaCheck(cudaDestroyTextureObject(forceTableTex));
43  cudaCheck(cudaDestroyTextureObject(energyTableTex));
44 
45  cudaCheck(cudaDestroyTextureObject(exclusionTableTex));
46  cudaCheck(cudaDestroyTextureObject(r2_table_tex));
47 }
48 
49 template <typename T>
50 void bindTextureObject(int size, T* h_table, T*& d_table, cudaTextureObject_t& tex, bool update=false) {
51  // Copy to device
52  if ( ! update) {
53  allocate_device<T>(&d_table, size);
54  }
55  else {
56  cudaCheck(cudaDestroyTextureObject(tex));
57  }
58  copy_HtoD_sync<T>(h_table, d_table, size);
59 
60  // Create texture object
61  cudaResourceDesc resDesc;
62  memset(&resDesc, 0, sizeof(resDesc));
63  resDesc.resType = cudaResourceTypeLinear;
64  resDesc.res.linear.devPtr = d_table;
65  resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
66  resDesc.res.linear.desc.x = sizeof(float)*8; // bits per channel
67  if (sizeof(T) >= sizeof(float)*2) resDesc.res.linear.desc.y = sizeof(float)*8; // bits per channel
68  if (sizeof(T) >= sizeof(float)*3) resDesc.res.linear.desc.z = sizeof(float)*8; // bits per channel
69  if (sizeof(T) >= sizeof(float)*4) resDesc.res.linear.desc.w = sizeof(float)*8; // bits per channel
70  resDesc.res.linear.sizeInBytes = size*sizeof(T);
71 
72  cudaTextureDesc texDesc;
73  memset(&texDesc, 0, sizeof(texDesc));
74  texDesc.readMode = cudaReadModeElementType;
75 
76  cudaCheck(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL));
77 }
78 
79 //
80 // Builds the VdW Lennard-Jones coefficient table. Swiped from ComputeNonbondedCUDA.C
81 // NOTE: Can only be called once
82 //
83 void CudaNonbondedTables::buildVdwCoefTable(bool update/*=false*/) {
84 
85  const LJTable* const ljTable = ComputeNonbondedUtil:: ljTable;
86  const int dim = ljTable->get_table_dim();
87 
88  // round dim up to odd multiple of 16
89  int tsize = (((dim+16+31)/32)*32)-16;
90  if ( tsize < dim ) NAMD_bug("CudaNonbondedTables::buildVdwCoefTable bad tsize");
91 
92  float2 *h_vdwCoefTable = new float2[tsize*tsize];
93  float2 *h_exclusionVdwCoefTable = new float2[tsize*tsize];
94  float2 *row = h_vdwCoefTable;
95  float2 *exclusionRow = h_exclusionVdwCoefTable;
96  for ( int i=0; i<dim; ++i, row += tsize, exclusionRow += tsize ) {
97  for ( int j=0; j<dim; ++j ) {
98  const LJTable::TableEntry *e = ljTable->table_val(i,j);
99  row[j].x = e->A * ComputeNonbondedUtil::scaling;
100  row[j].y = e->B * ComputeNonbondedUtil::scaling;
101  exclusionRow[j].x = ((e+1)->A - e->A)* ComputeNonbondedUtil::scaling;
102  exclusionRow[j].y = ((e+1)->B - e->B)* ComputeNonbondedUtil::scaling;
103  }
104  }
105 
106  vdwCoefTableWidth = tsize;
107 
108  bindTextureObject<float2>(tsize*tsize, h_vdwCoefTable, vdwCoefTable, vdwCoefTableTex, update);
109  bindTextureObject<float2>(tsize*tsize, h_exclusionVdwCoefTable, exclusionVdwCoefTable, exclusionVdwCoefTableTex, update);
110 
111  delete [] h_vdwCoefTable;
112  delete [] h_exclusionVdwCoefTable;
113 
114  if ( ! CmiPhysicalNodeID(CkMyPe()) ) {
115  CkPrintf("Info: Updated CUDA LJ table with %d x %d elements.\n", dim, dim);
116  }
117 }
118 
119 // Update tables from newer CPU values
121  buildVdwCoefTable(true);
122 }
123 
124 //
125 // Builds force and energy tables. Swiped from ComputeNonbondedCUDA.C
126 //
127 template <typename T>
128 void buildForceAndEnergyTable(const int tableSize, const double* r2list, const BigReal* src_table, const bool flip,
129  const BigReal prefac, const int dst_stride, T* dst_force, T* dst_energy) {
130 
131  const BigReal r2_delta = ComputeNonbondedUtil:: r2_delta;
132  const int r2_delta_exp = ComputeNonbondedUtil:: r2_delta_exp;
133  const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
134 
135  union { double f; int32 i[2]; } byte_order_test;
136  byte_order_test.f = 1.0; // should occupy high-order bits only
137  int32 *r2iilist = (int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
138 
139  for ( int i=1; i<tableSize; ++i ) {
140  double r = ((double) tableSize) / ( (double) i + 0.5 );
141  int table_i = (r2iilist[2*i] >> 14) + r2_delta_expc; // table_i >= 0
142 
143  if ( r > ComputeNonbondedUtil::cutoff ) {
144  dst_force[i*dst_stride] = 0.;
145  dst_energy[i*dst_stride] = 0.;
146  continue;
147  }
148 
149  BigReal diffa = r2list[i] - ComputeNonbondedUtil::r2_table[table_i];
150 
151  BigReal table_a, table_b, table_c, table_d;
152  if (flip) {
153  table_a = src_table[4*table_i+3];
154  table_b = src_table[4*table_i+2];
155  table_c = src_table[4*table_i+1];
156  table_d = src_table[4*table_i];
157  } else {
158  table_a = src_table[4*table_i];
159  table_b = src_table[4*table_i+1];
160  table_c = src_table[4*table_i+2];
161  table_d = src_table[4*table_i+3];
162  }
163 
164  BigReal grad = ( 3. * table_d * diffa + 2. * table_c ) * diffa + table_b;
165  dst_force[i*dst_stride] = prefac * 2. * grad;
166  BigReal ener = table_a + diffa * ( ( table_d * diffa + table_c ) * diffa + table_b);
167  dst_energy[i*dst_stride] = prefac * ener;
168  }
169 
170  dst_force[0] = 0.;
171  dst_energy[0] = dst_energy[1*dst_stride];
172 }
173 
174 template <typename T>
175 void bindTextureObject(int tableSize, int tableWidth, T* h_table,
176  cudaArray_t& array, cudaTextureObject_t& tableTex) {
177 
178  cudaChannelFormatDesc desc;
179  memset(&desc, 0, sizeof(desc));
180  desc.x = sizeof(T)*8;
181  if (tableWidth >= 2) desc.y = sizeof(T)*8;
182  if (tableWidth >= 3) desc.z = sizeof(T)*8;
183  if (tableWidth >= 4) desc.w = sizeof(T)*8;
184  desc.f = cudaChannelFormatKindFloat;
185  cudaCheck(cudaMallocArray(&array, &desc, tableSize, 1));
186  cudaCheck(cudaMemcpyToArray(array, 0, 0, h_table, tableSize*sizeof(T)*tableWidth, cudaMemcpyHostToDevice));
187 
188  cudaResourceDesc resDesc;
189  memset(&resDesc, 0, sizeof(resDesc));
190  resDesc.resType = cudaResourceTypeArray;
191  resDesc.res.array.array = array;
192 
193  cudaTextureDesc texDesc;
194  memset(&texDesc, 0, sizeof(texDesc));
195  texDesc.addressMode[0] = cudaAddressModeClamp;
196  texDesc.filterMode = cudaFilterModeLinear;
197  texDesc.normalizedCoords = 1;
198 
199  cudaCheck(cudaCreateTextureObject(&tableTex, &resDesc, &texDesc, NULL));
200 }
201 
202 void CudaNonbondedTables::buildForceAndEnergyTables(int tableSize) {
203 
204  // Build r2list
205  const BigReal r2_delta = ComputeNonbondedUtil:: r2_delta;
206  const int r2_delta_exp = ComputeNonbondedUtil:: r2_delta_exp;
207  const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
208 
209  double* r2list = new double[tableSize]; // double to match cpu code
210  for ( int i=1; i<tableSize; ++i ) {
211  double r = ((double) tableSize) / ( (double) i + 0.5 );
212  r2list[i] = r*r + r2_delta;
213  }
214 
215  // Non-bonded
216  {
217  float4* t = new float4[tableSize];
218  float4* et = new float4[tableSize];
219 
220  buildForceAndEnergyTable<float>(tableSize, r2list, ComputeNonbondedUtil::fast_table, false, 1.0,
221  4, &t[0].x, &et[0].x);
222 
223  buildForceAndEnergyTable<float>(tableSize, r2list, ComputeNonbondedUtil::vdwb_table, false, -1.0,
224  4, &t[0].y, &et[0].y);
225 
226  buildForceAndEnergyTable<float>(tableSize, r2list, ComputeNonbondedUtil::vdwa_table, false, 1.0,
227  4, &t[0].z, &et[0].z);
228 
229  buildForceAndEnergyTable<float>(tableSize, r2list, ComputeNonbondedUtil::scor_table, false, 1.0,
230  4, &t[0].w, &et[0].w);
231 
232  bindTextureObject<float>(tableSize, 4, (float *)t, forceArray, forceTableTex);
233  bindTextureObject<float>(tableSize, 4, (float *)et, energyArray, energyTableTex);
234  delete [] t;
235  delete [] et;
236  }
237 
238  // Modified exclusions
239  {
240  float4* t = new float4[tableSize];
241  float4* et = new float4[tableSize];
242 
243  buildForceAndEnergyTable<float>(tableSize, r2list, ComputeNonbondedUtil::fast_table, false, -1.0,
244  4, &t[0].x, &et[0].x);
245 
246  buildForceAndEnergyTable<float>(tableSize, r2list, ComputeNonbondedUtil::vdwb_table, false, -1.0,
247  4, &t[0].y, &et[0].y);
248 
249  buildForceAndEnergyTable<float>(tableSize, r2list, ComputeNonbondedUtil::vdwa_table, false, 1.0,
250  4, &t[0].z, &et[0].z);
251 
252  buildForceAndEnergyTable<float>(tableSize, r2list, ComputeNonbondedUtil::slow_table, true, -1.0,
253  4, &t[0].w, &et[0].w);
254  // delete [] flip_slow_table;
255 
256  bindTextureObject<float>(tableSize, 4, (float *)t, modifiedExclusionForceArray, modifiedExclusionForceTableTex);
257  bindTextureObject<float>(tableSize, 4, (float *)et, modifiedExclusionEnergyArray, modifiedExclusionEnergyTableTex);
258  delete [] t;
259  delete [] et;
260  }
261 
262  // Exclusions
263  {
264  BigReal* corr_full_table = new BigReal[ComputeNonbondedUtil::table_length*4];
265  for (int i=0;i < ComputeNonbondedUtil::table_length*4;i++) {
267  }
268 
269  float4* h_exclusionTable = new float4[ComputeNonbondedUtil::table_length];
270  float* h_r2_table = new float[ComputeNonbondedUtil::table_length];
271  for (int i=0;i < ComputeNonbondedUtil::table_length;i++) {
272  h_exclusionTable[i].x = 6.0*corr_full_table[4*i + 3];
273  h_exclusionTable[i].y = 4.0*corr_full_table[4*i + 2];
274  h_exclusionTable[i].z = 2.0*corr_full_table[4*i + 1];
275  h_exclusionTable[i].w = 1.0*corr_full_table[4*i + 0];
276  h_r2_table[i] = ComputeNonbondedUtil::r2_table[i];
277  }
278 
279  bindTextureObject<float>(ComputeNonbondedUtil::table_length, h_r2_table, r2_table, r2_table_tex);
280  bindTextureObject<float4>(ComputeNonbondedUtil::table_length, h_exclusionTable, exclusionTable, exclusionTableTex);
281 
282  delete [] h_exclusionTable;
283  delete [] h_r2_table;
284 
285  }
286 
287  if ( ! CmiPhysicalNodeID(CkMyPe()) ) {
288  CkPrintf("Info: Updated CUDA force table with %d elements.\n", tableSize);
289  }
290 
291  delete [] r2list;
292 }
293 
294 #endif // NAMD_CUDA
static BigReal * fast_table
static BigReal * scor_table
short int32
Definition: dumpdcd.c:24
const BigReal A
static BigReal * vdwa_table
int get_table_dim() const
Definition: LJTable.h:44
void bindTextureObject(int size, T *h_table, T *&d_table, cudaTextureObject_t &tex, bool update=false)
static BigReal * full_table
const TableEntry * table_val(unsigned int i, unsigned int j) const
Definition: LJTable.h:35
void buildForceAndEnergyTable(const int tableSize, const double *r2list, const BigReal *src_table, const bool flip, const BigReal prefac, const int dst_stride, T *dst_force, T *dst_energy)
void NAMD_bug(const char *err_msg)
Definition: common.C:123
static BigReal * slow_table
static BigReal * vdwb_table
static const LJTable * ljTable
const BigReal B
static BigReal * corr_table
CudaNonbondedTables(const int deviceID)
#define cudaCheck(stmt)
Definition: CudaUtils.h:79
double BigReal
Definition: common.h:112