13 vdwCoefTableWidth = 0;
19 exclusionTable = NULL;
21 exclusionTableTex = 0;
24 modifiedExclusionForceTableTex = 0;
25 modifiedExclusionEnergyTableTex = 0;
28 buildForceAndEnergyTables(4096);
34 if (vdwCoefTable != NULL) deallocate_device<float2>(&vdwCoefTable);
35 if (exclusionTable != NULL) deallocate_device<float4>(&exclusionTable);
36 if (r2_table != NULL) deallocate_device<float>(&r2_table);
41 cudaCheck(cudaDestroyTextureObject(vdwCoefTableTex));
42 cudaCheck(cudaDestroyTextureObject(forceTableTex));
43 cudaCheck(cudaDestroyTextureObject(energyTableTex));
45 cudaCheck(cudaDestroyTextureObject(exclusionTableTex));
46 cudaCheck(cudaDestroyTextureObject(r2_table_tex));
50 void bindTextureObject(
int size, T* h_table, T*& d_table, cudaTextureObject_t& tex,
bool update=
false) {
53 allocate_device<T>(&d_table, size);
58 copy_HtoD_sync<T>(h_table, d_table, size);
61 cudaResourceDesc resDesc;
62 memset(&resDesc, 0,
sizeof(resDesc));
63 resDesc.resType = cudaResourceTypeLinear;
64 resDesc.res.linear.devPtr = d_table;
65 resDesc.res.linear.desc.f = cudaChannelFormatKindFloat;
66 resDesc.res.linear.desc.x =
sizeof(float)*8;
67 if (
sizeof(T) >=
sizeof(float)*2) resDesc.res.linear.desc.y =
sizeof(float)*8;
68 if (
sizeof(T) >=
sizeof(float)*3) resDesc.res.linear.desc.z =
sizeof(float)*8;
69 if (
sizeof(T) >=
sizeof(float)*4) resDesc.res.linear.desc.w =
sizeof(float)*8;
70 resDesc.res.linear.sizeInBytes = size*
sizeof(T);
72 cudaTextureDesc texDesc;
73 memset(&texDesc, 0,
sizeof(texDesc));
74 texDesc.readMode = cudaReadModeElementType;
76 cudaCheck(cudaCreateTextureObject(&tex, &resDesc, &texDesc, NULL));
83 void CudaNonbondedTables::buildVdwCoefTable(
bool update) {
89 int tsize = (((dim+16+31)/32)*32)-16;
90 if ( tsize < dim )
NAMD_bug(
"CudaNonbondedTables::buildVdwCoefTable bad tsize");
93 float2 *h_exclusionVdwCoefTable =
new float2[tsize*tsize];
94 float2 *row = h_vdwCoefTable;
95 float2 *exclusionRow = h_exclusionVdwCoefTable;
96 for (
int i=0; i<dim; ++i, row += tsize, exclusionRow += tsize ) {
97 for (
int j=0; j<dim; ++j ) {
106 vdwCoefTableWidth = tsize;
108 bindTextureObject<float2>(tsize*tsize, h_vdwCoefTable, vdwCoefTable, vdwCoefTableTex, update);
109 bindTextureObject<float2>(tsize*tsize, h_exclusionVdwCoefTable, exclusionVdwCoefTable, exclusionVdwCoefTableTex, update);
111 delete [] h_vdwCoefTable;
112 delete [] h_exclusionVdwCoefTable;
114 if ( ! CmiPhysicalNodeID(CkMyPe()) ) {
115 CkPrintf(
"Info: Updated CUDA LJ table with %d x %d elements.\n", dim, dim);
121 buildVdwCoefTable(
true);
127 template <
typename T>
129 const BigReal prefac,
const int dst_stride, T* dst_force, T* dst_energy) {
133 const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
135 union {
double f;
int32 i[2]; } byte_order_test;
136 byte_order_test.f = 1.0;
137 int32 *r2iilist = (
int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
139 for (
int i=1; i<tableSize; ++i ) {
140 double r = ((double) tableSize) / ( (double) i + 0.5 );
141 int table_i = (r2iilist[2*i] >> 14) + r2_delta_expc;
144 dst_force[i*dst_stride] = 0.;
145 dst_energy[i*dst_stride] = 0.;
151 BigReal table_a, table_b, table_c, table_d;
153 table_a = src_table[4*table_i+3];
154 table_b = src_table[4*table_i+2];
155 table_c = src_table[4*table_i+1];
156 table_d = src_table[4*table_i];
158 table_a = src_table[4*table_i];
159 table_b = src_table[4*table_i+1];
160 table_c = src_table[4*table_i+2];
161 table_d = src_table[4*table_i+3];
164 BigReal grad = ( 3. * table_d * diffa + 2. * table_c ) * diffa + table_b;
165 dst_force[i*dst_stride] = prefac * 2. * grad;
166 BigReal ener = table_a + diffa * ( ( table_d * diffa + table_c ) * diffa + table_b);
167 dst_energy[i*dst_stride] = prefac * ener;
171 dst_energy[0] = dst_energy[1*dst_stride];
174 template <
typename T>
176 cudaArray_t& array, cudaTextureObject_t& tableTex) {
178 cudaChannelFormatDesc desc;
179 memset(&desc, 0,
sizeof(desc));
180 desc.x =
sizeof(T)*8;
181 if (tableWidth >= 2) desc.y =
sizeof(T)*8;
182 if (tableWidth >= 3) desc.z =
sizeof(T)*8;
183 if (tableWidth >= 4) desc.w =
sizeof(T)*8;
184 desc.f = cudaChannelFormatKindFloat;
185 cudaCheck(cudaMallocArray(&array, &desc, tableSize, 1));
186 cudaCheck(cudaMemcpyToArray(array, 0, 0, h_table, tableSize*
sizeof(T)*tableWidth, cudaMemcpyHostToDevice));
188 cudaResourceDesc resDesc;
189 memset(&resDesc, 0,
sizeof(resDesc));
190 resDesc.resType = cudaResourceTypeArray;
191 resDesc.res.array.array = array;
193 cudaTextureDesc texDesc;
194 memset(&texDesc, 0,
sizeof(texDesc));
195 texDesc.addressMode[0] = cudaAddressModeClamp;
196 texDesc.filterMode = cudaFilterModeLinear;
197 texDesc.normalizedCoords = 1;
199 cudaCheck(cudaCreateTextureObject(&tableTex, &resDesc, &texDesc, NULL));
202 void CudaNonbondedTables::buildForceAndEnergyTables(
int tableSize) {
207 const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
209 double* r2list =
new double[tableSize];
210 for (
int i=1; i<tableSize; ++i ) {
211 double r = ((double) tableSize) / ( (double) i + 0.5 );
212 r2list[i] = r*r + r2_delta;
217 float4* t =
new float4[tableSize];
218 float4* et =
new float4[tableSize];
221 4, &t[0].x, &et[0].x);
224 4, &t[0].y, &et[0].y);
227 4, &t[0].z, &et[0].z);
230 4, &t[0].w, &et[0].w);
232 bindTextureObject<float>(tableSize, 4, (
float *)t, forceArray, forceTableTex);
233 bindTextureObject<float>(tableSize, 4, (
float *)et, energyArray, energyTableTex);
240 float4* t =
new float4[tableSize];
241 float4* et =
new float4[tableSize];
244 4, &t[0].x, &et[0].x);
247 4, &t[0].y, &et[0].y);
250 4, &t[0].z, &et[0].z);
253 4, &t[0].w, &et[0].w);
256 bindTextureObject<float>(tableSize, 4, (
float *)t, modifiedExclusionForceArray, modifiedExclusionForceTableTex);
257 bindTextureObject<float>(tableSize, 4, (
float *)et, modifiedExclusionEnergyArray, modifiedExclusionEnergyTableTex);
272 h_exclusionTable[i].x = 6.0*corr_full_table[4*i + 3];
273 h_exclusionTable[i].y = 4.0*corr_full_table[4*i + 2];
274 h_exclusionTable[i].z = 2.0*corr_full_table[4*i + 1];
275 h_exclusionTable[i].w = 1.0*corr_full_table[4*i + 0];
282 delete [] h_exclusionTable;
283 delete [] h_r2_table;
287 if ( ! CmiPhysicalNodeID(CkMyPe()) ) {
288 CkPrintf(
"Info: Updated CUDA force table with %d elements.\n", tableSize);
static BigReal * fast_table
static BigReal * scor_table
static BigReal * vdwa_table
int get_table_dim() const
void bindTextureObject(int size, T *h_table, T *&d_table, cudaTextureObject_t &tex, bool update=false)
static BigReal * full_table
static BigReal * r2_table
const TableEntry * table_val(unsigned int i, unsigned int j) const
void buildForceAndEnergyTable(const int tableSize, const double *r2list, const BigReal *src_table, const bool flip, const BigReal prefac, const int dst_stride, T *dst_force, T *dst_energy)
void NAMD_bug(const char *err_msg)
static BigReal * slow_table
static BigReal * vdwb_table
static const LJTable * ljTable
static BigReal * corr_table
CudaNonbondedTables(const int deviceID)