6 #if __MIC_PAD_PLGEN_CTRL != 0
9 #if MIC_HANDCODE_FORCE_SINGLE != 0
10 const int _plI_fs_outer_step = 16;
12 const int _plI_fs_outer_step = 8;
18 for (
int _plI_fs_outer = 0; _plI_fs_outer < plSize; _plI_fs_outer += _plI_fs_outer_step) {
21 const int i = (plArray[_plI_fs_outer] >> 16) & 0xFFFF;
24 #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
25 const CALC_TYPE p_i_x = ((CALC_TYPE)p_0[i].
x) + ((CALC_TYPE)params.offset.x);
26 const CALC_TYPE p_i_y = ((CALC_TYPE)p_0[i].
y) + ((CALC_TYPE)params.offset.y);
27 const CALC_TYPE p_i_z = ((CALC_TYPE)p_0[i].
z) + ((CALC_TYPE)params.offset.z);
28 const CALC_TYPE p_i_q = (CALC_TYPE)(p_0[i].
charge);
29 const int p_i_vdwType = pExt_0[i].vdw_type;
31 const CALC_TYPE p_i_x = ((CALC_TYPE)p_0_x[i]) + ((CALC_TYPE)params.offset.x);
32 const CALC_TYPE p_i_y = ((CALC_TYPE)p_0_y[i]) + ((CALC_TYPE)params.offset.y);
33 const CALC_TYPE p_i_z = ((CALC_TYPE)p_0_z[i]) + ((CALC_TYPE)params.offset.z);
34 const CALC_TYPE p_i_q = (CALC_TYPE)(p_0_q[i]);
35 const int p_i_vdwType = pExt_0_vdwType[i];
39 double tmp_x_i_sum = 0.0;
40 double tmp_y_i_sum = 0.0;
41 double tmp_z_i_sum = 0.0;
42 double tmp_w_i_sum = 0.0;
43 double fulltmp_x_i_sum = 0.0;
44 double fulltmp_y_i_sum = 0.0;
45 double fulltmp_z_i_sum = 0.0;
47 #if MIC_EXCL_CHECKSUM_FULL != 0
49 #define EXCL_CHECKSUM_CLAUSE reduction(+ : exclusionSum)
51 #define EXCL_CHECKSUM_CLAUSE
55 #pragma omp simd vectorlength(16) \
56 reduction(+ : tmp_x_i_sum, tmp_y_i_sum, tmp_z_i_sum, tmp_w_i_sum, \
57 fulltmp_x_i_sum, fulltmp_y_i_sum, fulltmp_z_i_sum ) \
59 for (
int _plI_fs_inner = 0; _plI_fs_inner < _plI_fs_outer_step; _plI_fs_inner++) {
60 const int plI = _plI_fs_outer + _plI_fs_inner;
61 if ((plArray[plI] & 0xFFFF) != 0xFFFF) {
68 #pragma loop_count (1000)
70 #pragma loop_count (10000)
72 for (
int plI = 0; plI < plSize; plI++) {
77 const int ij = plArray[plI];
78 #if __MIC_PAD_PLGEN_CTRL != 0
81 const int i = (ij >> 16) & 0xFFFF;
83 const int j = (ij ) & 0xFFFF;
87 #if MIC_PREFETCH_DISTANCE > 0
88 const int pfIJ = plArray[plI + MIC_PREFETCH_DISTANCE];
89 const int pfI = (pfIJ >> 16) & 0xFFFF;
90 const int pfJ = (pfIJ ) & 0xFFFF;
91 _mm_prefetch((
char*)(p_0_x + pfI), MIC_PREFETCH_HINT);
92 _mm_prefetch((
char*)(p_0_y + pfI), MIC_PREFETCH_HINT);
93 _mm_prefetch((
char*)(p_0_z + pfI), MIC_PREFETCH_HINT);
94 _mm_prefetch((
char*)(p_0_q + pfI), MIC_PREFETCH_HINT);
95 _mm_prefetch((
char*)(f_0_x + pfI), MIC_PREFETCH_HINT);
96 _mm_prefetch((
char*)(f_0_y + pfI), MIC_PREFETCH_HINT);
97 _mm_prefetch((
char*)(f_0_z + pfI), MIC_PREFETCH_HINT);
98 _mm_prefetch((
char*)(p_1_x + pfJ), MIC_PREFETCH_HINT);
99 _mm_prefetch((
char*)(p_1_y + pfJ), MIC_PREFETCH_HINT);
100 _mm_prefetch((
char*)(p_1_z + pfJ), MIC_PREFETCH_HINT);
101 _mm_prefetch((
char*)(p_1_q + pfJ), MIC_PREFETCH_HINT);
102 _mm_prefetch((
char*)(f_1_x + pfJ), MIC_PREFETCH_HINT);
103 _mm_prefetch((
char*)(f_1_y + pfJ), MIC_PREFETCH_HINT);
104 _mm_prefetch((
char*)(f_1_z + pfJ), MIC_PREFETCH_HINT);
108 #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
109 #if __MIC_PAD_PLGEN_CTRL != 0
112 const CALC_TYPE p_i_x = ((CALC_TYPE)p_0[i].x) + ((CALC_TYPE)params.offset.x);
113 const CALC_TYPE p_i_y = ((CALC_TYPE)p_0[i].y) + ((CALC_TYPE)params.offset.y);
114 const CALC_TYPE p_i_z = ((CALC_TYPE)p_0[i].z) + ((CALC_TYPE)params.offset.z);
116 const CALC_TYPE p_j_x = (CALC_TYPE)(p_1[j].x);
117 const CALC_TYPE p_j_y = (CALC_TYPE)(p_1[j].y);
118 const CALC_TYPE p_j_z = (CALC_TYPE)(p_1[j].z);
120 #if __MIC_PAD_PLGEN_CTRL != 0
123 const CALC_TYPE p_i_x = ((CALC_TYPE)p_0_x[i]) + ((CALC_TYPE)params.offset.x);
124 const CALC_TYPE p_i_y = ((CALC_TYPE)p_0_y[i]) + ((CALC_TYPE)params.offset.y);
125 const CALC_TYPE p_i_z = ((CALC_TYPE)p_0_z[i]) + ((CALC_TYPE)params.offset.z);
127 const CALC_TYPE p_j_x = (CALC_TYPE)(p_1_x[j]);
128 const CALC_TYPE p_j_y = (CALC_TYPE)(p_1_y[j]);
129 const CALC_TYPE p_j_z = (CALC_TYPE)(p_1_z[j]);
133 CALC_TYPE
p_ij_x = p_i_x - p_j_x;
134 CALC_TYPE
p_ij_y = p_i_y - p_j_y;
135 CALC_TYPE
p_ij_z = p_i_z - p_j_z;
137 #if REFINE_PAIRLISTS != 0
138 CALC_TYPE r2 = (CALC_TYPE)(r2Array[plI]);
140 CALC_TYPE r2 = (p_ij_x *
p_ij_x) + (p_ij_y * p_ij_y) + (p_ij_z *
p_ij_z) + r2_delta;
141 if (r2 < cutoff2_delta) {
144 #if (MIC_EXCL_CHECKSUM_FULL != 0) && (0 EXCLUDED(+1) MODIFIED(+1))
145 #if __MIC_PAD_PLGEN_CTRL != 0
148 params.exclusionSum += 1;
153 #if MIC_HANDCODE_FORCE_SINGLE != 0
154 const unsigned int table_i = ((int)((__intel_castf32_u32(r2)) >> 17)) + r2_delta_expc;
156 const unsigned int table_i = ((int)((__intel_castf64_u64(r2)) >> 46)) + r2_delta_expc;
159 #if MIC_HANDCODE_FORCE_CALCR2TABLE != 0
167 CALC_TYPE r2_base = r2_delta * (1 << (table_i >> 6));
168 CALC_TYPE r2_del = r2_base * ((CALC_TYPE)0.015625f);
169 CALC_TYPE r2_table_i = r2_base + r2_del * (table_i & 0x3F);
171 CALC_TYPE r2_table_i = r2_table[table_i];
173 CALC_TYPE diffa = r2 - r2_table_i;
174 const CALC_TYPE *
const table_four_ptr =
SHORT(table_short)
NOSHORT(table_noshort);
175 const
int table_four_idx = 16 * table_i;
179 #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
180 #if __MIC_PAD_PLGEN_CTRL != 0
183 const CALC_TYPE p_i_q = (CALC_TYPE)(p_0[i].charge);
185 const CALC_TYPE p_j_q = (CALC_TYPE)(p_1[j].charge);
187 #if __MIC_PAD_PLGEN_CTRL != 0
190 const CALC_TYPE p_i_q = (CALC_TYPE)(p_0_q[i]);
192 const CALC_TYPE p_j_q = (CALC_TYPE)(p_1_q[j]);
194 CALC_TYPE kqq = p_i_q * p_j_q;
198 #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
199 #if __MIC_PAD_PLGEN_CTRL != 0
202 int p_i_vdwType = pExt_0[i].vdw_type;
204 int p_j_vdwType = pExt_1[j].vdw_type;
206 #if __MIC_PAD_PLGEN_CTRL != 0
209 int p_i_vdwType = pExt_0_vdwType[i];
211 int p_j_vdwType = pExt_1_vdwType[j];
215 const int lj_pars_offset = (4 * (p_i_vdwType * lj_table_dim + p_j_vdwType))
MODIFIED(+ 2);
216 CALC_TYPE
A = scaling * lj_table_base_ptr[lj_pars_offset ];
217 CALC_TYPE
B = scaling * lj_table_base_ptr[lj_pars_offset + 1];
224 CALC_TYPE
vdw_d = A * table_four_ptr[table_four_idx + 0] - B * table_four_ptr[table_four_idx + 4];
225 CALC_TYPE
vdw_c = A * table_four_ptr[table_four_idx + 1] - B * table_four_ptr[table_four_idx + 5];
226 CALC_TYPE
vdw_b = A * table_four_ptr[table_four_idx + 2] - B * table_four_ptr[table_four_idx + 6];
227 CALC_TYPE
vdw_a = A * table_four_ptr[table_four_idx + 3] - B * table_four_ptr[table_four_idx + 7];
230 CALC_TYPE
vdw_val = ((diffa * vdw_d * (1/6.0) +
vdw_c * (1/4.0)) * diffa +
vdw_b * (1/2.0)) * diffa +
vdw_a;
238 CALC_TYPE fast_d = kqq * table_four_ptr[table_four_idx + 8];
239 CALC_TYPE
fast_c = kqq * table_four_ptr[table_four_idx + 9];
240 CALC_TYPE
fast_b = kqq * table_four_ptr[table_four_idx + 10];
241 CALC_TYPE
fast_a = kqq * table_four_ptr[table_four_idx + 11];
244 CALC_TYPE modfckqq = (1.0 - modf_mod) * kqq;
245 CALC_TYPE fast_d = modfckqq * table_four_ptr[table_four_idx + 8];
246 CALC_TYPE fast_c = modfckqq * table_four_ptr[table_four_idx + 9];
247 CALC_TYPE fast_b = modfckqq * table_four_ptr[table_four_idx + 10];
248 CALC_TYPE fast_a = modfckqq * table_four_ptr[table_four_idx + 11];
252 CALC_TYPE fast_val = ((diffa * fast_d * (1/6.0) +
fast_c * (1/4.0)) * diffa +
fast_b * (1/2.0)) * diffa +
fast_a;
253 #if (0 NOT_ALCHPAIR(+1))
259 #if (0 NOT_ALCHPAIR(+1))
269 CALC_TYPE tmp_x = force_r *
p_ij_x;
273 #
if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
274 #
if __MIC_PAD_PLGEN_CTRL != 0
275 tmp_x_i_sum += tmp_x;
281 #if __MIC_PAD_PLGEN_CTRL != 0
282 tmp_x_i_sum += tmp_x;
289 CALC_TYPE tmp_y = force_r *
p_ij_y;
292 #
if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
293 #
if __MIC_PAD_PLGEN_CTRL != 0
294 tmp_y_i_sum += tmp_y;
300 #if __MIC_PAD_PLGEN_CTRL != 0
301 tmp_y_i_sum += tmp_y;
308 CALC_TYPE tmp_z = force_r *
p_ij_z;
310 #
if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
311 #
if __MIC_PAD_PLGEN_CTRL != 0
312 tmp_z_i_sum += tmp_z;
318 #if __MIC_PAD_PLGEN_CTRL != 0
319 tmp_z_i_sum += tmp_z;
331 CALC_TYPE slow_d = table_four_ptr[table_four_idx + 8
SHORT(+ 4)];
332 CALC_TYPE slow_c = table_four_ptr[table_four_idx + 9
SHORT(+ 4)];
333 CALC_TYPE slow_b = table_four_ptr[table_four_idx + 10
SHORT(+ 4)];
334 CALC_TYPE slow_a = table_four_ptr[table_four_idx + 11
SHORT(+ 4)];
336 #if (0 SHORT( EXCLUDED(+1) MODIFIED(+1) ))
337 const int slow_idx = 4 * table_i;
345 slow_a += 1.0 * slow_table[slow_idx + 3];
346 slow_b += 2.0 * slow_table[slow_idx + 2];
347 slow_c += 4.0 * slow_table[slow_idx + 1];
348 slow_d += 6.0 * slow_table[slow_idx + 0];
351 slow_d -= table_four_ptr[table_four_idx + 12];
352 slow_c -= table_four_ptr[table_four_idx + 13];
353 slow_b -= table_four_ptr[table_four_idx + 14];
354 slow_a -= table_four_ptr[table_four_idx + 15];
363 slow_a += 1.0 * modf_mod * slow_table[slow_idx + 3];
364 slow_b += 2.0 * modf_mod * slow_table[slow_idx + 2];
365 slow_c += 4.0 * modf_mod * slow_table[slow_idx + 1];
366 slow_d += 6.0 * modf_mod * slow_table[slow_idx + 0];
369 slow_d -= modf_mod * table_four_ptr[table_four_idx + 12];
370 slow_c -= modf_mod * table_four_ptr[table_four_idx + 13];
371 slow_b -= modf_mod * table_four_ptr[table_four_idx + 14];
372 slow_a -= modf_mod * table_four_ptr[table_four_idx + 15];
381 CALC_TYPE slow_val = ((diffa * slow_d * (1/6.0) + slow_c * (1/4.0)) * diffa + slow_b * (1/2.0)) * diffa + slow_a;
382 #if (0 NOT_ALCHPAIR(+1))
383 fullElectEnergy -= slow_val;
388 #if (0 NOT_ALCHPAIR(FAST(NOSHORT(+1))))
395 CALC_TYPE slow_dir = (diffa * slow_d + slow_c) * diffa + slow_b;
396 CALC_TYPE fullforce_r = slow_dir;
398 CALC_TYPE fulltmp_x = fullforce_r *
p_ij_x;
399 PAIR( fullElectVirial_xx += fulltmp_x * p_ij_x; )
400 PAIR( fullElectVirial_xy += fulltmp_x * p_ij_y; )
401 PAIR( fullElectVirial_xz += fulltmp_x * p_ij_z; )
402 #
if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
403 #
if __MIC_PAD_PLGEN_CTRL != 0
404 fulltmp_x_i_sum += fulltmp_x;
406 fullf_0[i].x += fulltmp_x;
408 fullf_1[j].x -= fulltmp_x;
410 #if __MIC_PAD_PLGEN_CTRL != 0
411 fulltmp_x_i_sum += fulltmp_x;
413 fullf_0_x[i] += fulltmp_x;
415 fullf_1_x[j] -= fulltmp_x;
418 CALC_TYPE fulltmp_y = fullforce_r *
p_ij_y;
419 PAIR( fullElectVirial_yy += fulltmp_y * p_ij_y; )
420 PAIR( fullElectVirial_yz += fulltmp_y * p_ij_z; )
421 #
if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
422 #
if __MIC_PAD_PLGEN_CTRL != 0
423 fulltmp_y_i_sum += fulltmp_y;
425 fullf_0[i].y += fulltmp_y;
427 fullf_1[j].y -= fulltmp_y;
429 #if __MIC_PAD_PLGEN_CTRL != 0
430 fulltmp_y_i_sum += fulltmp_y;
432 fullf_0_y[i] += fulltmp_y;
434 fullf_1_y[j] -= fulltmp_y;
437 CALC_TYPE fulltmp_z = fullforce_r *
p_ij_z;
438 PAIR( fullElectVirial_zz += fulltmp_z * p_ij_z; )
439 #
if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
440 #
if __MIC_PAD_PLGEN_CTRL != 0
441 fulltmp_z_i_sum += fulltmp_z;
443 fullf_0[i].z += fulltmp_z;
445 fullf_1[j].z -= fulltmp_z;
447 #if __MIC_PAD_PLGEN_CTRL != 0
448 fulltmp_z_i_sum += fulltmp_z;
450 fullf_0_z[i] += fulltmp_z;
452 fullf_1_z[j] -= fulltmp_z;
457 #if REFINE_PAIRLISTS == 0
462 #if __MIC_PAD_PLGEN_CTRL != 0
467 #if MIC_EXCL_CHECKSUM_FULL != 0
468 params.exclusionSum += exclusionSum;
470 #undef EXCL_CHECKSUM_CLAUSE
472 #if (0 FAST(SHORT(+1)))
473 #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
474 f_0[i].x += tmp_x_i_sum;
475 f_0[i].y += tmp_y_i_sum;
476 f_0[i].z += tmp_z_i_sum;
478 f_0_x[i] += tmp_x_i_sum;
479 f_0_y[i] += tmp_y_i_sum;
480 f_0_z[i] += tmp_z_i_sum;
485 #if MIC_HANDCODE_FORCE_SOA_VS_AOS != 0
486 fullf_0[i].x += fulltmp_x_i_sum;
487 fullf_0[i].y += fulltmp_y_i_sum;
488 fullf_0[i].z += fulltmp_z_i_sum;
490 fullf_0_x[i] += fulltmp_x_i_sum;
491 fullf_0_y[i] += fulltmp_y_i_sum;
492 fullf_0_z[i] += fulltmp_z_i_sum;
register BigReal virial_xy
register BigReal virial_xz
register BigReal fast_dir
register BigReal virial_yz
register BigReal electEnergy
register const BigReal p_ij_z
register BigReal virial_yy
register BigReal virial_zz
register const BigReal p_ij_x
register BigReal virial_xx
k< npairi;++k){TABENERGY(const int numtypes=simParams->tableNumTypes;const float table_spacing=simParams->tableSpacing;const int npertype=(int)(namdnearbyint(simParams->tableMaxDist/simParams->tableSpacing)+1);) int table_i=(r2iilist[2 *k] >> 14)+r2_delta_expc;const int j=pairlisti[k];#define p_j BigReal diffa=r2list[k]-r2_table[table_i];#define table_four_i TABENERGY(register const int tabtype=-1-(lj_pars->A< 0?lj_pars->A:0);) BigReal kqq=kq_i *p_j-> charge
register const BigReal p_ij_y