NAMD
ComputeNonbondedBase.h
Go to the documentation of this file.
1 
7 // Several special cases are defined:
8 // NBTYPE: exclusion method (NBPAIR, NBSELF -- mutually exclusive)
9 // FULLELECT full electrostatics calculation?
10 
11 #ifdef ARCH_POWERPC
12 #include <builtins.h>
13 #endif
14 
15 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
16 #include <emmintrin.h> // We're using SSE2 intrinsics
17 #if defined(__INTEL_COMPILER)
18 #define __align(X) __declspec(align(X) )
19 #elif defined(__GNUC__) || defined(__PGI)
20 #define __align(X) __attribute__((aligned(X) ))
21 #else
22 #define __align(X) __declspec(align(X) )
23 #endif
24 #endif
25 
26 #ifdef DEFINITION // (
27  #include "LJTable.h"
28  #include "Molecule.h"
29  #include "ComputeNonbondedUtil.h"
30 #endif // )
31  #include "Parameters.h"
32 #if NAMD_ComputeNonbonded_SortAtoms != 0
33  #include "PatchMap.h"
34 #endif
35 
36 // determining class name
37 #undef NAME
38 #undef CLASS
39 #undef CLASSNAME
40 #define NAME CLASSNAME(calc)
41 
42 #undef PAIR
43 #if NBTYPE == NBPAIR
44  #define PAIR(X) X
45  #define CLASS ComputeNonbondedPair
46  #define CLASSNAME(X) ENERGYNAME( X ## _pair )
47 #else
48  #define PAIR(X)
49 #endif
50 
51 #undef SELF
52 #if NBTYPE == NBSELF
53  #define SELF(X) X
54  #define CLASS ComputeNonbondedSelf
55  #define CLASSNAME(X) ENERGYNAME( X ## _self )
56 #else
57  #define SELF(X)
58 #endif
59 
60 #undef ENERGYNAME
61 #undef ENERGY
62 #undef NOENERGY
63 #ifdef CALCENERGY
64  #define ENERGY(X) X
65  #define NOENERGY(X)
66  #define ENERGYNAME(X) SLOWONLYNAME( X ## _energy )
67 #else
68  #define ENERGY(X)
69  #define NOENERGY(X) X
70  #define ENERGYNAME(X) SLOWONLYNAME( X )
71 #endif
72 
73 #undef SLOWONLYNAME
74 #undef FAST
75 #undef NOFAST
76 #ifdef SLOWONLY
77  #define FAST(X)
78  #define NOFAST(X) X
79  #define SLOWONLYNAME(X) MERGEELECTNAME( X ## _slow )
80 #else
81  #define FAST(X) X
82  #define NOFAST(X)
83  #define SLOWONLYNAME(X) MERGEELECTNAME( X )
84 #endif
85 
86 #undef MERGEELECTNAME
87 #undef SHORT
88 #undef NOSHORT
89 #ifdef MERGEELECT
90  #define SHORT(X)
91  #define NOSHORT(X) X
92  #define MERGEELECTNAME(X) FULLELECTNAME( X ## _merge )
93 #else
94  #define SHORT(X) X
95  #define NOSHORT(X)
96  #define MERGEELECTNAME(X) FULLELECTNAME( X )
97 #endif
98 
99 #undef FULLELECTNAME
100 #undef FULL
101 #undef NOFULL
102 #ifdef FULLELECT
103  #define FULLELECTNAME(X) TABENERGYNAME( X ## _fullelect )
104  #define FULL(X) X
105  #define NOFULL(X)
106 #else
107  #define FULLELECTNAME(X) TABENERGYNAME( X )
108  #define FULL(X)
109  #define NOFULL(X) X
110 #endif
111 
112 #undef TABENERGYNAME
113 #undef TABENERGY
114 #undef NOTABENERGY
115 #ifdef TABENERGYFLAG
116  #define TABENERGYNAME(X) FEPNAME( X ## _tabener )
117  #define TABENERGY(X) X
118  #define NOTABENERGY(X)
119 #else
120  #define TABENERGYNAME(X) FEPNAME( X )
121  #define TABENERGY(X)
122  #define NOTABENERGY(X) X
123 #endif
124 
125 // Here are what these macros stand for:
126 // FEP/NOT_FEP: FEP free energy perturbation is active/inactive
127 // (does NOT use LAM)
128 // LES: locally-enhanced sampling is active
129 // LAM: scale the potential by a factor lambda (except FEP)
130 // INT: measure interaction energies
131 // PPROF: pressure profiling
132 
133 #undef FEPNAME
134 #undef FEP
135 #undef LES
136 #undef INT
137 #undef PPROF
138 #undef LAM
139 #undef CUDA
140 #undef ALCH
141 #undef TI
142 #undef GO
143 #define FEPNAME(X) LAST( X )
144 #define FEP(X)
145 #define ALCHPAIR(X)
146 #define NOT_ALCHPAIR(X) X
147 #define LES(X)
148 #define INT(X)
149 #define PPROF(X)
150 #define LAM(X)
151 #define CUDA(X)
152 #define ALCH(X)
153 #define TI(X)
154 #define GO(X)
155 #ifdef FEPFLAG
156  #undef FEPNAME
157  #undef FEP
158  #undef ALCH
159  #define FEPNAME(X) LAST( X ## _fep )
160  #define FEP(X) X
161  #define ALCH(X) X
162 #endif
163 #ifdef TIFLAG
164  #undef FEPNAME
165  #undef TI
166  #undef ALCH
167  #define FEPNAME(X) LAST( X ## _ti )
168  #define TI(X) X
169  #define ALCH(X) X
170 #endif
171 #ifdef LESFLAG
172  #undef FEPNAME
173  #undef LES
174  #undef LAM
175  #define FEPNAME(X) LAST( X ## _les )
176  #define LES(X) X
177  #define LAM(X) X
178 #endif
179 #ifdef INTFLAG
180  #undef FEPNAME
181  #undef INT
182  #define FEPNAME(X) LAST( X ## _int )
183  #define INT(X) X
184 #endif
185 #ifdef PPROFFLAG
186  #undef FEPNAME
187  #undef INT
188  #undef PPROF
189  #define FEPNAME(X) LAST( X ## _pprof )
190  #define INT(X) X
191  #define PPROF(X) X
192 #endif
193 #ifdef GOFORCES
194  #undef FEPNAME
195  #undef GO
196  #define FEPNAME(X) LAST( X ## _go )
197  #define GO(X) X
198 #endif
199 #ifdef NAMD_CUDA
200  #undef CUDA
201  #define CUDA(X) X
202 #endif
203 
204 #define LAST(X) X
205 
206 // see if things are really messed up
207 SELF( PAIR( foo bar ) )
208 LES( FEP( foo bar ) )
209 LES( INT( foo bar ) )
210 FEP( INT( foo bar ) )
211 LAM( INT( foo bar ) )
212 FEP( NOENERGY( foo bar ) )
213 ENERGY( NOENERGY( foo bar ) )
214 TABENERGY(NOTABENERGY( foo bar ) )
215 
216 #define KNL_MAKE_DEPENDS_INCLUDE
218 #undef KNL_MAKE_DEPENDS_INCLUDE
219 
220 #undef KNL
221 #undef NOKNL
222 #ifdef NAMD_KNL
223  #if ( TABENERGY(1+) FEP(1+) TI(1+) INT(1+) LES(1+) GO(1+) PPROF(1+) NOFAST(1+) 0 )
224  #define KNL(X)
225  #define NOKNL(X) X
226  #else
227  #define KNL(X) X
228  #define NOKNL(X)
229  #endif
230 #else
231  #define KNL(X)
232  #define NOKNL(X) X
233 #endif
234 
235 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
236  #define COMPONENT_DOTPRODUCT(A,B) ((A##_x * B##_x) + (A##_y * B##_y) + (A##_z * B##_z))
237 #endif
238 
239 
240 // ************************************************************
241 // function header
243  ( nonbonded *params )
244 
245 // function body
246 {
247  // int NAME; // to track errors via unused variable warnings
248 
249  if ( ComputeNonbondedUtil::commOnly ) return;
250 
251 #ifdef NAMD_CUDA
252  NOENERGY(return;)
253 #endif
254 
255  // speedup variables
256  BigReal *reduction = params->reduction;
258 
259  PPROF(
260  BigReal *pressureProfileReduction = params->pressureProfileReduction;
261  const BigReal invThickness = 1.0 / pressureProfileThickness;
262  )
263 
264  Pairlists &pairlists = *(params->pairlists);
265 #ifdef NAMD_CUDA
266  int savePairlists = 0;
267  int usePairlists = 0;
268 #else
269  int savePairlists = params->savePairlists;
270  int usePairlists = params->usePairlists;
271 #endif
272  pairlists.reset();
273  // PAIR(iout << "--------\n" << endi;)
274 
275  // BEGIN LA
276  const int doLoweAndersen = params->doLoweAndersen;
277  // END LA
278 
279  // local variables
280  int exclChecksum = 0;
281  FAST
282  (
283  // JLai
284  ENERGY( BigReal vdwEnergy = 0;
285  GO( BigReal groLJEnergy = 0;
286  BigReal groGaussEnergy = 0;
287  BigReal goEnergyNative = 0;
288  BigReal goEnergyNonnative = 0; ) )
289  SHORT
290  (
291  ENERGY( BigReal electEnergy = 0; )
292  Vector f_net = 0.;
293  )
294 
295  FEP
296  (
297  ENERGY( BigReal vdwEnergy_s = 0; )
298  SHORT
299  (
300  ENERGY( BigReal electEnergy_s = 0; )
301  )
302  )
303  )
304 
305 #ifndef A2_QPX
306  FULL
307  (
308  ENERGY( BigReal fullElectEnergy = 0; )
309  Vector fullf_net = 0.;
310  FEP
311  (
312  ENERGY( BigReal fullElectEnergy_s = 0; )
313  )
314  )
315 #else
316  Vector fullf_net = 0.;
317  BigReal fullElectEnergy = 0;
318  BigReal fullElectEnergy_s = 0;
319 #endif
320 
321  // Bringing stuff into local namespace for speed.
322  const BigReal offset_x = params->offset.x;
323  const BigReal offset_y = params->offset.y;
324  const BigReal offset_z = params->offset.z;
325 
326  // Note that offset_f assumes positions are relative to patch centers
327  const float offset_x_f = params->offset_f.x;
328  const float offset_y_f = params->offset_f.y;
329  const float offset_z_f = params->offset_f.z;
330 
331  register const BigReal plcutoff2 = \
332  params->plcutoff * params->plcutoff;
333  register const BigReal groupplcutoff2 = \
334  params->groupplcutoff * params->groupplcutoff;
337  LJTable::TableEntry ljNull; ljNull.A = 0; ljNull.B = 0;
338  const LJTable::TableEntry* const lj_null_pars = &ljNull;
339  const Molecule* const mol = ComputeNonbondedUtil:: mol;
340  SHORT
341  (
342  const BigReal* const table_four = ComputeNonbondedUtil:: table_short;
343  )
344  FULL
345  (
346  SHORT
347  (
349  )
350  NOSHORT
351  (
352 //#if 1 ALCH(-1)
353  const BigReal* const table_four = ComputeNonbondedUtil:: table_noshort;
354 //#else // have to switch this for ALCH
355 // BigReal* table_four = ComputeNonbondedUtil:: table_noshort;
356 //#endif
357  )
358  )
360  const float scaling_f = scaling;
361 #ifdef A2_QPX
362  vector4double scalingv = vec_splats(scaling);
363 #endif
364  const BigReal modf_mod = 1.0 - scale14;
365  FAST
366  (
368  const float switchOn2_f = switchOn2;
372  const float c1_f = c1;
373  const float c3_f = c3;
374  )
377  // const int r2_delta_expc = 64 * (r2_delta_exp - 127);
378  const int r2_delta_expc = 64 * (r2_delta_exp - 1023);
379 
380  ALCH(
381  const BigReal switchdist2 = ComputeNonbondedUtil::switchOn2;
383  const BigReal diff = cutoff2 - switchdist2;
384  const BigReal switchfactor = 1./(diff*diff*diff);
388 
389  /* BKR - Enable dynamic lambda by passing the timestep to simParams.
390  The SimParameters object now has all knowledge of the lambda schedule
391  and all operations should ask _it_ for lambda, rather than recomputing
392  it.
393  */
394  const BigReal alchLambda = simParams->getCurrentLambda(params->step);
395 
396  /*lambda values 'up' are for atoms scaled up with lambda (partition 1)*/
397  BigReal lambdaUp = alchLambda; // needed in ComputeNonbondedBase2.h!
398  BigReal elecLambdaUp = simParams->getElecLambda(lambdaUp);
399  BigReal vdwLambdaUp = simParams->getVdwLambda(lambdaUp);
400  BigReal repLambdaUp = simParams->getRepLambda(lambdaUp);
401  BigReal vdwShiftUp = alchVdwShiftCoeff*(1 - vdwLambdaUp);
402  FEP(BigReal lambda2Up = simParams->getCurrentLambda2(params->step);)
403  FEP(BigReal elecLambda2Up = simParams->getElecLambda(lambda2Up);)
404  FEP(BigReal vdwLambda2Up = simParams->getVdwLambda(lambda2Up);)
405  FEP(BigReal repLambda2Up = simParams->getRepLambda(lambda2Up);)
406  FEP(BigReal vdwShift2Up = alchVdwShiftCoeff*(1 - vdwLambda2Up);)
407 
408  /*lambda values 'down' are for atoms scaled down with lambda (partition 2)*/
409  BigReal lambdaDown = 1 - alchLambda; // needed in ComputeNonbondedBase2.h!
410  BigReal elecLambdaDown = simParams->getElecLambda(lambdaDown);
411  BigReal vdwLambdaDown = simParams->getVdwLambda(lambdaDown);
412  BigReal repLambdaDown = simParams->getRepLambda(lambdaDown);
413  BigReal vdwShiftDown = alchVdwShiftCoeff*(1 - vdwLambdaDown);
414  FEP(BigReal lambda2Down = 1 - simParams->getCurrentLambda2(params->step);)
415  FEP(BigReal elecLambda2Down = simParams->getElecLambda(lambda2Down);)
416  FEP(BigReal vdwLambda2Down = simParams->getVdwLambda(lambda2Down);)
417  FEP(BigReal repLambda2Down = simParams->getRepLambda(lambda2Down);)
418  FEP(BigReal vdwShift2Down = alchVdwShiftCoeff*(1 - vdwLambda2Down);)
419 
420  // Thermodynamic Integration Notes:
421  // Separation of atom pairs into different pairlist according to lambda
422  // coupling, for alchemical free energy calculations. Normal pairlists
423  // (pairlist[nxm]_save) are re-used for non-lambda-coupled pairs; new ones
424  // (pairlist[nxm][12]_save are created for atoms switched up or down with
425  // lambda respectively.
426  // This makes TI coding far easier and more readable, since it's necessary
427  // to store dU/dlambda in different variables depending on whether atoms are
428  // being switched up or down. Further, it allows the prior explicit coding of
429  // the separation-shifted vdW potential to stay in place without a big
430  // performance hit, since it's no longer necessary to calculate vdW potentials
431  // explicity for the bulk (non-alchemical) interactions - so that part of the
432  // free energy code stays readable and easy to modify. Finally there should
433  // be some performance gain over the old FEP implementation as we no
434  // longer have to check the partitions of each atom pair and switch
435  // parameters accordingly for every single NonbondedBase2.h loop - this is
436  // done at the pairlist level
437 
438  // XXX pswitchTable is repeatedly calculated each time
439  // since its value is fixed, it should be calculated once up front
440  int pswitchTable[5*5];
441  // determines which pairlist alchemical pairings get sent to
442  // 0: non-alchemical pairs, partition 0 <-> partition 0
443  // 1 and 3: atoms scaled up as lambda increases, p0<->p1, p0<->p3
444  // 2 and 4: atoms scaled down as lambda increases, p0<->p2, p0<->p4
445  // all p1<->p2, p1<->p4, p3<->p2, p3<->p4 interactions to be dropped (99)
446  // in general, 'up' refers to 1 and 3, 'down' refers to 2 and 4
447  for (int ip=0; ip<5; ++ip) {
448  for (int jp=0; jp<5; ++jp ) {
449  pswitchTable[ip+5*jp] = 0;
450  if ((ip==1 && jp==0) || (ip==0 && jp==1)) pswitchTable[ip+5*jp] = 1;
451  if ((ip==2 && jp==0) || (ip==0 && jp==2)) pswitchTable[ip+5*jp] = 2;
452  if ((ip==3 && jp==0) || (ip==0 && jp==3)) pswitchTable[ip+5*jp] = 3;
453  if ((ip==4 && jp==0) || (ip==0 && jp==4)) pswitchTable[ip+5*jp] = 4;
454 
455  if (ip && jp && (abs(ip - jp) != 2)) pswitchTable[ip+5*jp] = 99; // no interaction
457  // no decoupling: interactions within an alchemical moiety are treated like
458  // normal alchemical pairs
459  if ((ip == 1 && jp == 1) || (ip == 1 && jp == 3) || (ip == 3 && jp == 1)) pswitchTable[ip+5*jp] = 1;
460  if ((ip == 2 && jp == 2) || (ip == 2 && jp == 4) || (ip == 4 && jp == 2)) pswitchTable[ip+5*jp] = 2;
461  if (ip == 3 && jp == 3) pswitchTable[ip+5*jp] = 3;
462  if (ip == 4 && jp == 4) pswitchTable[ip+5*jp] = 4;
463  }
465  // decoupling: PME calculates extra grids so that while PME
466  // interaction with the full system is switched off, a new PME grid
467  // containing only alchemical atoms is switched on. Full interactions
468  // between alchemical atoms are maintained; potentials within one
469  // partition need not be scaled here.
470  if (ip == 1 && jp == 1) pswitchTable[ip+5*jp] = 0; //Single topology only supports alchDecouple off!
471  if (ip == 2 && jp == 2) pswitchTable[ip+5*jp] = 0; //So 'alchDecouple on' only works for dual topology!
472  }
473  }
474  }
475 
476  // dU/dlambda variables for thermodynamic integration
477  TI(
478  BigReal vdwEnergy_ti_1 = 0;
479  BigReal vdwEnergy_ti_2 = 0;
480  SHORT(BigReal electEnergy_ti_1 = 0;
481  BigReal electEnergy_ti_2 = 0;)
482  FULL(BigReal fullElectEnergy_ti_1 = 0;
483  BigReal fullElectEnergy_ti_2 = 0;)
484  )
485  )
486 
487 
488  const int i_upper = params->numAtoms[0];
489  register const int j_upper = params->numAtoms[1];
490  register int j;
491  register int i;
492  const CompAtom *p_0 = params->p[0];
493  const CompAtom *p_1 = params->p[1];
494  KNL( const CompAtomFlt *pFlt_0 = params->pFlt[0]; )
495  KNL( const CompAtomFlt *pFlt_1 = params->pFlt[1]; )
496  const CompAtomExt *pExt_0 = params->pExt[0];
497  const CompAtomExt *pExt_1 = params->pExt[1];
498 
499  char * excl_flags_buff = 0;
500  const int32 * full_excl = 0;
501  const int32 * mod_excl = 0;
502 
503  plint *pairlistn_save; int npairn;
504  plint *pairlistx_save; int npairx;
505  plint *pairlistm_save; int npairm;
506 
507  ALCH(
508  // n = normal, x = excluded, m = modified
509  // A[1-4] = alchemical partition 1-4
510  plint *pairlistnA1_save; int npairnA1;
511  plint *pairlistxA1_save; int npairxA1;
512  plint *pairlistmA1_save; int npairmA1;
513  plint *pairlistnA2_save; int npairnA2;
514  plint *pairlistxA2_save; int npairxA2;
515  plint *pairlistmA2_save; int npairmA2;
516  plint *pairlistnA3_save; int npairnA3;
517  plint *pairlistxA3_save; int npairxA3;
518  plint *pairlistmA3_save; int npairmA3;
519  plint *pairlistnA4_save; int npairnA4;
520  plint *pairlistxA4_save; int npairxA4;
521  plint *pairlistmA4_save; int npairmA4;
522  )
523 
524  NBWORKARRAYSINIT(params->workArrays);
525 
526  int arraysize = j_upper+5;
527 
528  NBWORKARRAY(int,pairlisti,arraysize)
529  NBWORKARRAY(BigReal,r2list,arraysize)
530  KNL( NBWORKARRAY(float,r2list_f,arraysize) )
531  KNL( NBWORKARRAY(float,xlist,arraysize) )
532  KNL( NBWORKARRAY(float,ylist,arraysize) )
533  KNL( NBWORKARRAY(float,zlist,arraysize) )
534 
535  union { double f; int32 i[2]; } byte_order_test;
536  byte_order_test.f = 1.0; // should occupy high-order bits only
537  int32 *r2iilist = (int32*)r2list + ( byte_order_test.i[0] ? 0 : 1 );
538 
539  if ( ! ( savePairlists || ! usePairlists ) ) arraysize = 0;
540 
541 
542  // DMK - Atom Sort
543  //
544  // Basic Idea: Determine the line between the center of masses of
545  // the two patches. Project and then sort the lists of atoms
546  // along this line. Then, as the pairlists are being generated
547  // for the atoms in the first atom list, use the sorted
548  // list to only add atoms from the second list that are between
549  // +/- ~cutoff from the atoms position on the line.
550  #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
551 
552  // NOTE: For the first atom list, just the sort values themselves need to be
553  // calculated (a BigReal vs. SortEntry data structure). However, a second
554  // SortEntry array is needed to perform the merge sort on the second list of
555  // atoms. Use the atomSort_[0/1]_sortValues__ arrays to sort the second
556  // list of atoms and then use the left over array to make a version of the
557  // list that only includes non-fixed groups (fixg).
558 
559  NBWORKARRAY(SortEntry, atomSort_0_sortValues__, arraysize);
560  NBWORKARRAY(SortEntry, atomSort_1_sortValues__, arraysize);
561  NBWORKARRAY(BigReal, p_0_sortValues, arraysize);
562 
563  register SortEntry* p_1_sortValues = atomSort_0_sortValues__;
564  register SortEntry* p_1_sortValues_fixg = atomSort_1_sortValues__;
565 
566  int p_0_sortValues_len = 0;
567  int p_1_sortValues_len = 0;
568  int p_1_sortValues_fixg_len = 0;
569 
570  // Calculate the distance between to projected points on the line that
571  // represents the cutoff distance.
572  BigReal atomSort_windowRadius = sqrt(groupplcutoff2);
573 
574  if (savePairlists || !usePairlists) {
575 
576  register const BigReal projLineVec_x = params->projLineVec.x;
577  register const BigReal projLineVec_y = params->projLineVec.y;
578  register const BigReal projLineVec_z = params->projLineVec.z;
579 
580  // Calculate the sort values for the atoms in patch 1
581  {
582  register int nbgs = p_1->nonbondedGroupSize;
583  register BigReal p_x = p_1->position.x;
584  register BigReal p_y = p_1->position.y;
585  register BigReal p_z = p_1->position.z;
586  register int index = 0;
587 
588  for (register int j = nbgs; j < j_upper; j += nbgs) {
589 
590  // Set p_j_next to point to the atom for the next iteration and begin
591  // loading the 'nbgs' value for that atom.
592  register const CompAtom* p_j_next = p_1 + j;
593  nbgs = p_j_next->nonbondedGroupSize;
594 
595  // Calculate the distance along the projection vector
596  // NOTE: If the vector from the origin to the point is 'A' and the vector
597  // between the patches is 'B' then to project 'A' onto 'B' we take the dot
598  // product of the two vectors 'A dot B' divided by the length of 'B'.
599  // So... projection of A onto B = (A dot B) / length(B), however, note
600  // that length(B) == 1, therefore the sort value is simply (A dot B)
601  register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
602 
603  // Start loading the next iteration's atom's position
604  p_x = p_j_next->position.x;
605  p_y = p_j_next->position.y;
606  p_z = p_j_next->position.z;
607 
608  // Store the caclulated sort value into the array of sort values
609  register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
610  p_1_sortValStorePtr->index = index;
611  p_1_sortValStorePtr->sortValue = sortVal;
612  p_1_sortValues_len++;
613 
614  // Update index for the next iteration
615  index = j;
616 
617  } // end while (j < j_upper)
618 
619  register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
620 
621  register SortEntry* p_1_sortValStorePtr = p_1_sortValues + p_1_sortValues_len;
622  p_1_sortValStorePtr->index = index;
623  p_1_sortValStorePtr->sortValue = sortVal;
624  p_1_sortValues_len++;
625  }
626 
627  // NOTE: This list and another version of it with only non-fixed
628  // atoms will be used in place of grouplist and fixglist.
629  #if 0 // Selection Sort
630  sortEntries_selectionSort(p_1_sortValues, p_1_sortValues_len);
631  #elif 0 // Bubble Sort
632  sortEntries_bubbleSort(p_1_sortValues, p_1_sortValues_len);
633  #else // Merge Sort
634  #if NAMD_ComputeNonbonded_SortAtoms_LessBranches == 0
635  sortEntries_mergeSort_v1(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
636  #else
637  sortEntries_mergeSort_v2(p_1_sortValues, p_1_sortValues_fixg, p_1_sortValues_len);
638  #endif
639  #endif
640 
641  // Calculate the sort values for the atoms in patch 0
642  {
643  register int nbgs = p_0->nonbondedGroupSize;
644  register BigReal p_x = p_0->position.x + offset_x;
645  register BigReal p_y = p_0->position.y + offset_y;
646  register BigReal p_z = p_0->position.z + offset_z;
647  register int index = 0;
648 
649  for (register int i = nbgs; i < i_upper; i += nbgs) {
650 
651  // Set p_i_next to point to the atom for the next iteration and begin
652  // loading the 'nbgs' value for that atom.
653  register const CompAtom* p_i_next = p_0 + i;
654  nbgs = p_i_next->nonbondedGroupSize;
655 
656  // Calculate the distance along the projection vector
657  register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
658 
659  // Start loading the next iteration's atom's position
660  p_x = p_i_next->position.x + offset_x;
661  p_y = p_i_next->position.y + offset_y;
662  p_z = p_i_next->position.z + offset_z;
663 
664  // Store the calculated sort value into the array of sort values
665  register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
666  *p_0_sortValStorePtr = sortVal;
667 
668  // Update index for the next iteration
669  index = i;
670  }
671 
672  register BigReal sortVal = COMPONENT_DOTPRODUCT(p,projLineVec);
673 
674  register BigReal* p_0_sortValStorePtr = p_0_sortValues + index;
675  *p_0_sortValStorePtr = sortVal;
676 
677  p_0_sortValues_len = i_upper;
678  }
679 
680  } // end if (savePairlists || !usePairlists)
681 
682  #endif // NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) )
683 
684  // Atom Sort : The grouplist and fixglist arrays are not needed when the
685  // the atom sorting code is in use.
686  #if ! (NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR( + 1 ) ) )
687  NBWORKARRAY(int,grouplist,arraysize);
688  NBWORKARRAY(int,fixglist,arraysize);
689  #endif
690 
691  NBWORKARRAY(int,goodglist,arraysize);
692  NBWORKARRAY(plint,pairlistx,arraysize);
693  NBWORKARRAY(plint,pairlistm,arraysize);
694  NBWORKARRAY(int,pairlist,arraysize);
695  NBWORKARRAY(int,pairlist2,arraysize);
696  ALCH(
697  NBWORKARRAY(plint,pairlistnA1,arraysize);
698  NBWORKARRAY(plint,pairlistxA1,arraysize);
699  NBWORKARRAY(plint,pairlistmA1,arraysize);
700  NBWORKARRAY(plint,pairlistnA2,arraysize);
701  NBWORKARRAY(plint,pairlistxA2,arraysize);
702  NBWORKARRAY(plint,pairlistmA2,arraysize);
703  NBWORKARRAY(plint,pairlistnA3,arraysize);
704  NBWORKARRAY(plint,pairlistxA3,arraysize);
705  NBWORKARRAY(plint,pairlistmA3,arraysize);
706  NBWORKARRAY(plint,pairlistnA4,arraysize);
707  NBWORKARRAY(plint,pairlistxA4,arraysize);
708  NBWORKARRAY(plint,pairlistmA4,arraysize);
709  )
710 
711  int fixg_upper = 0;
712  int g_upper = 0;
713 
714  if ( savePairlists || ! usePairlists ) {
715 
716  #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
717 
718  // Create a sorted list of non-fixed groups
719  register int fixg = 0;
720  for (int tmpI = 0; tmpI < p_1_sortValues_len; tmpI++) {
721  register SortEntry* p_1_sortEntry = p_1_sortValues + tmpI;
722  register int p_1_index = p_1_sortEntry->index;
723  if (!pExt_1[p_1_index].groupFixed) {
724  register SortEntry* p_1_sortEntry_fixg = p_1_sortValues_fixg + p_1_sortValues_fixg_len;
725  p_1_sortEntry_fixg->index = p_1_sortEntry->index;
726  p_1_sortEntry_fixg->sortValue = p_1_sortEntry->sortValue;
727  p_1_sortValues_fixg_len++;
728  }
729  }
730 
731  #else
732 
733  register int g = 0;
734  for ( j = 0; j < j_upper; ++j ) {
735  if ( p_1[j].nonbondedGroupSize ) {
736  grouplist[g++] = j;
737  }
738  }
739  g_upper = g;
740  if ( g_upper ) grouplist[g_upper] = grouplist[g_upper-1];
741  int fixg = 0;
742 
743  if ( fixedAtomsOn ) {
744  for ( g = 0; g < g_upper; ++g ) {
745  j = grouplist[g];
746  if ( ! pExt_1[j].groupFixed ) {
747  fixglist[fixg++] = j;
748  }
749  }
750  }
751 
752  fixg_upper = fixg;
753  if ( fixg_upper ) fixglist[fixg_upper] = fixglist[fixg_upper-1];
754 
755  #endif // NAMD_ComputeNonbonded_SortAtoms != 0
756 
757  pairlists.addIndex();
758  pairlists.setIndexValue(i_upper);
759 
760  } else { // if ( savePairlists || ! usePairlists )
761 
762  if ( pairlists.getIndexValue() != i_upper )
763  NAMD_bug("pairlist i_upper mismatch!");
764 
765  } // if ( savePairlists || ! usePairlists )
766 
767  SELF(
768  int j_hgroup = 0;
769  int g_lower = 0;
770  int fixg_lower = 0;
771  )
772  int pairlistindex=0;
773  int pairlistoffset=0;
774 
775 
776 
777 #if ( SHORT( FAST( 1+ ) ) 0 )
778  Force *f_0 = params->ff[0];
779 #if ( PAIR( 1+ ) 0 )
780  Force *f_1 = params->ff[1];
781 #else
782 #define f_1 f_0
783 #endif
784 #endif
785 #if ( FULL( 1+ ) 0 )
786  Force *fullf_0 = params->fullf[0];
787 #if ( PAIR( 1+ ) 0 )
788  Force *fullf_1 = params->fullf[1];
789 #else
790 #define fullf_1 fullf_0
791 #endif
792 #endif
793 
794 
795  int maxPart = params->numParts - 1;
796  int groupCount = params->minPart;
797  PAIR(
798  if ( savePairlists || ! usePairlists ) {
799  i = 0;
800  pairlists.addIndex();
801  } else {
802  i = pairlists.getIndexValue();
803  }
804  )
805  PAIR(for ( ; i < (i_upper);)) SELF(for ( i=0; i < (i_upper- 1);i++))
806  {
807  const CompAtom &p_i = p_0[i];
808  KNL( const CompAtomFlt &pFlt_i = pFlt_0[i]; )
809  const CompAtomExt &pExt_i = pExt_0[i];
810 
811  PAIR(if (savePairlists || ! usePairlists){)
812  if ( p_i.hydrogenGroupSize ) {
813  if ( groupCount ) { // skip this group
814  --groupCount;
815  i += p_i.hydrogenGroupSize;
816 #ifdef ARCH_POWERPC
817  __dcbt((void *) &(p_0[i]));
818 #endif
819  SELF(--i;)
820  continue;
821  } else { // compute this group
822  groupCount = maxPart;
823  }
824  }
825  PAIR(})
826 
827  register const BigReal p_i_x = p_i.position.x + offset_x;
828  register const BigReal p_i_y = p_i.position.y + offset_y;
829  register const BigReal p_i_z = p_i.position.z + offset_z;
830 #if KNL(1)+0
831  register const BigReal p_i_x_f = pFlt_i.position.x + offset_x_f;
832  register const BigReal p_i_y_f = pFlt_i.position.y + offset_y_f;
833  register const BigReal p_i_z_f = pFlt_i.position.z + offset_z_f;
834 #endif
835 #ifdef A2_QPX
836  vector4double p_i_v = {p_i_x, p_i_y, p_i_z, 0.0};
837 #endif
838 
839  ALCH(const int p_i_partition = p_i.partition;)
840 
841  PPROF(
842  const int p_i_partition = p_i.partition;
843  int n1 = (int)floor((p_i.position.z-pressureProfileMin)*invThickness);
845  )
846 
847  SELF ( if ( p_i.hydrogenGroupSize ) j_hgroup = i + p_i.hydrogenGroupSize; )
848 
849  if ( savePairlists || ! usePairlists ) {
850 
851  #ifdef MEM_OPT_VERSION
852  const ExclusionCheck *exclcheck = mol->get_excl_check_for_idx(pExt_i.exclId);
853  const int excl_min = pExt_i.id + exclcheck->min;
854  const int excl_max = pExt_i.id + exclcheck->max;
855  #else
856  const ExclusionCheck *exclcheck = mol->get_excl_check_for_atom(pExt_i.id);
857  const int excl_min = exclcheck->min;
858  const int excl_max = exclcheck->max;
859  #endif
860  const char * excl_flags_var;
861  if ( exclcheck->flags ) excl_flags_var = exclcheck->flags - excl_min;
862  else { // need to build list on the fly
863 
864  //TODO: Should change later!!!!!!!!!! --Chao Mei
865  //Now just for the sake of passing compilation
866  #ifndef MEM_OPT_VERSION
867  if ( excl_flags_buff ) {
868  int nl,l;
869  nl = full_excl[0] + 1;
870  for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = 0;
871  nl = mod_excl[0] + 1;
872  for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = 0;
873  } else {
874  excl_flags_buff = new char[mol->numAtoms];
875  memset( (void*) excl_flags_buff, 0, mol->numAtoms);
876  }
877  int nl,l;
878  full_excl = mol->get_full_exclusions_for_atom(pExt_i.id);
879  nl = full_excl[0] + 1;
880  for ( l=1; l<nl; ++l ) excl_flags_buff[full_excl[l]] = EXCHCK_FULL;
881  mod_excl = mol->get_mod_exclusions_for_atom(pExt_i.id);
882  nl = mod_excl[0] + 1;
883  for ( l=1; l<nl; ++l ) excl_flags_buff[mod_excl[l]] = EXCHCK_MOD;
884  excl_flags_var = excl_flags_buff;
885  #endif
886 
887  }
888  const char * const excl_flags = excl_flags_var;
889 
890  if ( p_i.nonbondedGroupSize ) {
891 
892  pairlistindex = 0; // initialize with 0 elements
893  pairlistoffset=0;
894  const int groupfixed = ( fixedAtomsOn && pExt_i.groupFixed );
895 
896  // If patch divisions are not made by hydrogen groups, then
897  // hydrogenGroupSize is set to 1 for all atoms. Thus we can
898  // carry on as if we did have groups - only less efficiently.
899  // An optimization in this case is to not rebuild the temporary
900  // pairlist but to include every atom in it. This should be a
901  // a very minor expense.
902 
903  SELF
904  (
905  if ( p_i.hydrogenGroupSize ) {
906  // exclude child hydrogens of i
907  // j_hgroup = i + p_i.hydrogenGroupSize; (moved above)
908  while ( g_lower < g_upper &&
909  grouplist[g_lower] < j_hgroup ) ++g_lower;
910  while ( fixg_lower < fixg_upper &&
911  fixglist[fixg_lower] < j_hgroup ) ++fixg_lower;
912  }
913  // add all child or sister hydrogens of i
914  for ( j = i + 1; j < j_hgroup; ++j ) {
915  pairlist[pairlistindex++] = j;
916  }
917  )
918 
919  // add remaining atoms to pairlist via hydrogen groups
920  register int *pli = pairlist + pairlistindex;
921 
922  {
923  // Atom Sort : Modify the values of g and gu based on the added information
924  // of the linear projections (sort values) information.
925  #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
926 
927  register int g = 0;
928  register BigReal p_i_sortValue = p_0_sortValues[i];
929  const BigReal p_i_sortValue_plus_windowRadius = p_i_sortValue + atomSort_windowRadius;
930  register SortEntry* sortValues = ( groupfixed ? p_1_sortValues_fixg : p_1_sortValues );
931 
932  // Find the actual gu (upper bound in sorted list for this outer-loop atom) based on the sort values
933  register int lower = 0;
934  register int upper = (groupfixed ? p_1_sortValues_fixg_len : p_1_sortValues_len);
935  while ((upper - lower) > 1) {
936  register int j = ((lower + upper) >> 1);
937  register BigReal jSortVal = sortValues[j].sortValue;
938  if (jSortVal < p_i_sortValue_plus_windowRadius) {
939  lower = j;
940  } else {
941  upper = j;
942  }
943  }
944  const int gu = (sortValues[lower].sortValue >= p_i_sortValue_plus_windowRadius) ? lower : upper;
945 
946  #else
947 
948  register int *gli = goodglist;
949  const int *glist = ( groupfixed ? fixglist : grouplist );
950  SELF( const int gl = ( groupfixed ? fixg_lower : g_lower ); )
951  const int gu = ( groupfixed ? fixg_upper : g_upper );
952  register int g = PAIR(0) SELF(gl);
953 
954  #endif
955 
956 
957  if ( g < gu ) {
958  int hu = 0;
959 #ifndef NAMD_KNL
960 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
961  if ( gu - g > 6 ) {
962 
963  #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
964  register SortEntry* sortEntry0 = sortValues + g;
965  register SortEntry* sortEntry1 = sortValues + g + 1;
966  register int jprev0 = sortEntry0->index;
967  register int jprev1 = sortEntry1->index;
968  #else
969  register int jprev0 = glist[g ];
970  register int jprev1 = glist[g + 1];
971  #endif
972 
973  register int j0;
974  register int j1;
975 
976  __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
977  __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
978  __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
979 
980  // these don't change here, so we could move them into outer scope
981  const __m128d P_I_X = _mm_set1_pd(p_i_x);
982  const __m128d P_I_Y = _mm_set1_pd(p_i_y);
983  const __m128d P_I_Z = _mm_set1_pd(p_i_z);
984 
985  g += 2;
986  for ( ; g < gu - 2; g +=2 ) {
987  // compute 1d distance, 2-way parallel
988  j0 = jprev0;
989  j1 = jprev1;
990 
991  __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
992  __m128d R2_01 = _mm_mul_pd(T_01, T_01);
993  T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
994  R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
995  T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
996  R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
997 
998  #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
999  sortEntry0 = sortValues + g;
1000  sortEntry1 = sortValues + g + 1;
1001  jprev0 = sortEntry0->index;
1002  jprev1 = sortEntry1->index;
1003  #else
1004  jprev0 = glist[g ];
1005  jprev1 = glist[g+1];
1006  #endif
1007 
1008  PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1009  PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1010  PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1011 
1012  __align(16) double r2_01[2];
1013  _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
1014 
1015  // XXX these could possibly benefit from SSE-based conditionals
1016  bool test0 = ( r2_01[0] < groupplcutoff2 );
1017  bool test1 = ( r2_01[1] < groupplcutoff2 );
1018 
1019  //removing ifs benefits on many architectures
1020  //as the extra stores will only warm the cache up
1021  goodglist [ hu ] = j0;
1022  goodglist [ hu + test0 ] = j1;
1023 
1024  hu += test0 + test1;
1025  }
1026  g-=2;
1027  }
1028 #elif defined (A2_QPX)
1029  if ( gu - g > 6 ) {
1030 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
1031  register SortEntry* sortEntry0 = sortValues + g;
1032  register SortEntry* sortEntry1 = sortValues + g + 1;
1033  register int jprev0 = sortEntry0->index;
1034  register int jprev1 = sortEntry1->index;
1035  #else
1036  register int jprev0 = glist[g ];
1037  register int jprev1 = glist[g + 1];
1038  #endif
1039 
1040  __dcbt ((void*)(p_1 + jprev0));
1041  register int j0;
1042  register int j1;
1043  vector4double pj_v_0, pj_v_1;
1044  vector4double v_0, v_1;
1045  register BigReal r2_0, r2_1;
1046 
1047  pj_v_0 = vec_ld(jprev0 * sizeof(CompAtom), (BigReal *)p_1);
1048  pj_v_1 = vec_ld(jprev1 * sizeof(CompAtom), (BigReal *)p_1);
1049 
1050  g += 2;
1051  for ( ; g < gu - 2; g +=2 ) {
1052  // compute 1d distance, 2-way parallel
1053  j0 = jprev0;
1054  j1 = jprev1;
1055 
1056 #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
1057  sortEntry0 = sortValues + g;
1058  sortEntry1 = sortValues + g + 1;
1059  jprev0 = sortEntry0->index;
1060  jprev1 = sortEntry1->index;
1061  #else
1062  jprev0 = glist[g ];
1063  jprev1 = glist[g+1];
1064  #endif
1065 
1066  v_0 = vec_sub (p_i_v, pj_v_0);
1067  v_1 = vec_sub (p_i_v, pj_v_1);
1068  v_0 = vec_mul (v_0, v_0);
1069  v_1 = vec_mul (v_1, v_1);
1070 
1071  r2_0 = vec_extract(v_0, 0) + vec_extract(v_0, 1) + vec_extract(v_0, 2);
1072  r2_1 = vec_extract(v_1, 0) + vec_extract(v_1, 1) + vec_extract(v_1, 2);
1073 
1074  pj_v_0 = vec_ld(jprev0 * sizeof(CompAtom), (BigReal *)p_1);
1075  pj_v_1 = vec_ld(jprev1 * sizeof(CompAtom), (BigReal *)p_1);
1076 
1077  size_t test0 = ( groupplcutoff2 > r2_0 );
1078  size_t test1 = ( groupplcutoff2 > r2_1 );
1079 
1080  //removing ifs benefits on many architectures
1081  //as the extra stores will only warm the cache up
1082  goodglist [ hu ] = j0;
1083  goodglist [ hu + test0 ] = j1;
1084 
1085  hu += test0 + test1;
1086  }
1087  g-=2;
1088  }
1089 #else
1090  if ( gu - g > 6 ) {
1091 
1092  #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
1093  register SortEntry* sortEntry0 = sortValues + g;
1094  register SortEntry* sortEntry1 = sortValues + g + 1;
1095  register int jprev0 = sortEntry0->index;
1096  register int jprev1 = sortEntry1->index;
1097  #else
1098  register int jprev0 = glist[g ];
1099  register int jprev1 = glist[g + 1];
1100  #endif
1101 
1102  register int j0;
1103  register int j1;
1104 
1105  register BigReal pj_x_0, pj_x_1;
1106  register BigReal pj_y_0, pj_y_1;
1107  register BigReal pj_z_0, pj_z_1;
1108  register BigReal t_0, t_1, r2_0, r2_1;
1109 
1110  pj_x_0 = p_1[jprev0].position.x;
1111  pj_x_1 = p_1[jprev1].position.x;
1112 
1113  pj_y_0 = p_1[jprev0].position.y;
1114  pj_y_1 = p_1[jprev1].position.y;
1115 
1116  pj_z_0 = p_1[jprev0].position.z;
1117  pj_z_1 = p_1[jprev1].position.z;
1118 
1119  g += 2;
1120  for ( ; g < gu - 2; g +=2 ) {
1121  // compute 1d distance, 2-way parallel
1122  j0 = jprev0;
1123  j1 = jprev1;
1124 
1125  t_0 = p_i_x - pj_x_0;
1126  t_1 = p_i_x - pj_x_1;
1127  r2_0 = t_0 * t_0;
1128  r2_1 = t_1 * t_1;
1129 
1130  t_0 = p_i_y - pj_y_0;
1131  t_1 = p_i_y - pj_y_1;
1132  r2_0 += t_0 * t_0;
1133  r2_1 += t_1 * t_1;
1134 
1135  t_0 = p_i_z - pj_z_0;
1136  t_1 = p_i_z - pj_z_1;
1137  r2_0 += t_0 * t_0;
1138  r2_1 += t_1 * t_1;
1139 
1140  #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
1141  sortEntry0 = sortValues + g;
1142  sortEntry1 = sortValues + g + 1;
1143  jprev0 = sortEntry0->index;
1144  jprev1 = sortEntry1->index;
1145  #else
1146  jprev0 = glist[g ];
1147  jprev1 = glist[g+1];
1148  #endif
1149 
1150  pj_x_0 = p_1[jprev0].position.x;
1151  pj_x_1 = p_1[jprev1].position.x;
1152  pj_y_0 = p_1[jprev0].position.y;
1153  pj_y_1 = p_1[jprev1].position.y;
1154  pj_z_0 = p_1[jprev0].position.z;
1155  pj_z_1 = p_1[jprev1].position.z;
1156 
1157  bool test0 = ( r2_0 < groupplcutoff2 );
1158  bool test1 = ( r2_1 < groupplcutoff2 );
1159 
1160  //removing ifs benefits on many architectures
1161  //as the extra stores will only warm the cache up
1162  goodglist [ hu ] = j0;
1163  goodglist [ hu + test0 ] = j1;
1164 
1165  hu += test0 + test1;
1166  }
1167  g-=2;
1168  }
1169 #endif
1170 #endif // NAMD_KNL
1171 #pragma omp simd simdlen(16)
1172  for (; g < gu; g++) {
1173 
1174  #if NAMD_ComputeNonbonded_SortAtoms != 0 && ( 0 PAIR ( + 1 ) )
1175  register SortEntry* sortEntry = sortValues + g;
1176  register int j = sortEntry->index;
1177  #else
1178  int j = glist[g];
1179  #endif
1180 
1181  BigReal p_j_x = p_1[j].position.x;
1182  BigReal p_j_y = p_1[j].position.y;
1183  BigReal p_j_z = p_1[j].position.z;
1184 
1185  BigReal r2 = p_i_x - p_j_x;
1186  r2 *= r2;
1187  BigReal t2 = p_i_y - p_j_y;
1188  r2 += t2 * t2;
1189  t2 = p_i_z - p_j_z;
1190  r2 += t2 * t2;
1191 
1192 #pragma omp ordered simd monotonic(hu:1)
1193  if ( r2 <= groupplcutoff2 )
1194  goodglist[hu ++] = j;
1195  }
1196 
1197  for ( int h=0; h<hu; ++h ) {
1198  int j = goodglist[h];
1199  int nbgs = p_1[j].nonbondedGroupSize;
1200  pli[0] = j; // copy over the next three in any case
1201  pli[1] = j+1;
1202  pli[2] = j+2;
1203  if ( nbgs & 4 ) { // if nbgs > 3, assume nbgs <= 5
1204  pli[3] = j+3;
1205  pli[4] = j+4;
1206  }
1207  pli += nbgs;
1208  }
1209  }
1210  }
1211 
1212  pairlistindex = pli - pairlist;
1213  // make sure padded element on pairlist points to real data
1214  if ( pairlistindex ) {
1215  pairlist[pairlistindex] = pairlist[pairlistindex-1];
1216  } PAIR( else { // skip empty loops if no pairs were found
1217  i += p_i.nonbondedGroupSize;
1218  continue;
1219  } )
1220  } // if i is nonbonded group parent
1221  SELF
1222  (
1223  // self-comparisions require list to be incremented
1224  // pair-comparisions use entire list (pairlistoffset is 0)
1225  else pairlistoffset++;
1226  )
1227 
1228  const int atomfixed = ( fixedAtomsOn && pExt_i.atomFixed );
1229 
1230  register int *pli = pairlist2;
1231 
1232  plint *pairlistn = pairlists.newlist(j_upper + 5 + 5 ALCH( + 20 ));
1233  register plint *plin = pairlistn;
1234 
1235 
1236  if ( qmForcesOn ) {
1237 
1238  Real qmGroup_i = mol->get_qmAtomGroup(pExt_i.id);
1239 
1240  for (int k=pairlistoffset; k<pairlistindex; k++) {
1241  j = pairlist[k];
1242 
1243  Real qmGroup_j = mol->get_qmAtomGroup(pExt_1[j].id);
1244 
1245  // There can be no non-bonded interaction between an QM-QM pair of the same QM group
1246  if (qmGroup_i > 0) {
1247  if (qmGroup_i == qmGroup_j) {
1248  continue;
1249  } else {
1250  *(pli++) = j;
1251  }
1252  } else {
1253  *(pli++) = j;
1254  }
1255  }
1256 
1257  int npair2_int = pli - pairlist2;
1258  pli = pairlist2;
1259  for (int k=0; k<npair2_int; k++) {
1260 
1261  j = pairlist2[k];
1262 
1263  BigReal p_j_x = p_1[j].position.x;
1264  BigReal r2 = p_i_x - p_j_x;
1265  r2 *= r2;
1266  BigReal p_j_y = p_1[j].position.y;
1267  BigReal t2 = p_i_y - p_j_y;
1268  r2 += t2 * t2;
1269  BigReal p_j_z = p_1[j].position.z;
1270  t2 = p_i_z - p_j_z;
1271  r2 += t2 * t2;
1272 
1273  if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
1274  int atom2 = pExt_1[j].id;
1275  if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
1276  else *(plin++) = j;
1277  }
1278  }
1279  } else
1280 
1281 
1282 
1283  INT(
1284  if ( pairInteractionOn ) {
1285  const int ifep_type = p_i.partition;
1286  if (pairInteractionSelf) {
1287  if (ifep_type != 1) { PAIR(++i;) continue; }
1288  for (int k=pairlistoffset; k<pairlistindex; k++) {
1289  j = pairlist[k];
1290  const int jfep_type = p_1[j].partition;
1291  // for pair-self, both atoms must be in group 1.
1292  if (jfep_type == 1) {
1293  *(pli++) = j;
1294  }
1295  }
1296  } else {
1297  if (ifep_type != 1 && ifep_type != 2) { PAIR(++i;) continue; }
1298  for (int k=pairlistoffset; k<pairlistindex; k++) {
1299  j = pairlist[k];
1300  const int jfep_type = p_1[j].partition;
1301  // for pair, must have one from each group.
1302  if (ifep_type + jfep_type == 3) {
1303  *(pli++) = j;
1304  }
1305  }
1306  }
1307  int npair2_int = pli - pairlist2;
1308  pli = pairlist2;
1309  for (int k=0; k<npair2_int; k++) {
1310  j = pairlist2[k];
1311  BigReal p_j_x = p_1[j].position.x;
1312  BigReal r2 = p_i_x - p_j_x;
1313  r2 *= r2;
1314  BigReal p_j_y = p_1[j].position.y;
1315  BigReal t2 = p_i_y - p_j_y;
1316  r2 += t2 * t2;
1317  BigReal p_j_z = p_1[j].position.z;
1318  t2 = p_i_z - p_j_z;
1319  r2 += t2 * t2;
1320  if ( ( ! (atomfixed && pExt_1[j].atomFixed) ) && (r2 <= plcutoff2) ) {
1321  int atom2 = pExt_1[j].id;
1322  if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
1323  else *(plin++) = j;
1324  }
1325  }
1326  } else
1327  )
1328  if ( atomfixed ) {
1329  for (int k=pairlistoffset; k<pairlistindex; k++) {
1330  j = pairlist[k];
1331  BigReal p_j_x = p_1[j].position.x;
1332  BigReal r2 = p_i_x - p_j_x;
1333  r2 *= r2;
1334  BigReal p_j_y = p_1[j].position.y;
1335  BigReal t2 = p_i_y - p_j_y;
1336  r2 += t2 * t2;
1337  BigReal p_j_z = p_1[j].position.z;
1338  t2 = p_i_z - p_j_z;
1339  r2 += t2 * t2;
1340  if ( (! pExt_1[j].atomFixed) && (r2 <= plcutoff2) ) {
1341  int atom2 = pExt_1[j].id;
1342  if ( atom2 >= excl_min && atom2 <= excl_max ) *(pli++) = j;
1343  else *(plin++) = j;
1344  }
1345  }
1346  } else {
1347  int k = pairlistoffset;
1348  int ku = pairlistindex;
1349  if ( k < ku ) {
1350 #ifndef NAMD_KNL
1351 #if defined(__SSE2__) && ! defined(NAMD_DISABLE_SSE)
1352  if ( ku - k > 6 ) {
1353  register int jprev0 = pairlist [k ];
1354  register int jprev1 = pairlist [k + 1];
1355 
1356  register int j0;
1357  register int j1;
1358 
1359  __m128d PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1360  __m128d PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1361  __m128d PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1362 
1363  // these don't change here, so we could move them into outer scope
1364  const __m128d P_I_X = _mm_set1_pd(p_i_x);
1365  const __m128d P_I_Y = _mm_set1_pd(p_i_y);
1366  const __m128d P_I_Z = _mm_set1_pd(p_i_z);
1367 
1368  int atom2_0 = pExt_1[jprev0].id;
1369  int atom2_1 = pExt_1[jprev1].id;
1370 
1371  k += 2;
1372  for ( ; k < ku - 2; k +=2 ) {
1373  // compute 1d distance, 2-way parallel
1374  j0 = jprev0;
1375  j1 = jprev1;
1376 
1377  __m128d T_01 = _mm_sub_pd(P_I_X, PJ_X_01);
1378  __m128d R2_01 = _mm_mul_pd(T_01, T_01);
1379  T_01 = _mm_sub_pd(P_I_Y, PJ_Y_01);
1380  R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
1381  T_01 = _mm_sub_pd(P_I_Z, PJ_Z_01);
1382  R2_01 = _mm_add_pd(R2_01, _mm_mul_pd(T_01, T_01));
1383 
1384  jprev0 = pairlist[k];
1385  jprev1 = pairlist[k+1];
1386 
1387  PJ_X_01 = _mm_set_pd(p_1[jprev1].position.x, p_1[jprev0].position.x);
1388  PJ_Y_01 = _mm_set_pd(p_1[jprev1].position.y, p_1[jprev0].position.y);
1389  PJ_Z_01 = _mm_set_pd(p_1[jprev1].position.z, p_1[jprev0].position.z);
1390 
1391  __align(16) double r2_01[2];
1392  _mm_store_pd(r2_01, R2_01); // 16-byte-aligned store
1393 
1394  if (r2_01[0] <= plcutoff2) {
1395  if ( atom2_0 >= excl_min && atom2_0 <= excl_max )
1396  *(pli++) = j0;
1397  else
1398  *(plin++) = j0;
1399  }
1400  atom2_0 = pExt_1[jprev0].id;
1401 
1402  if (r2_01[1] <= plcutoff2) {
1403  if ( atom2_1 >= excl_min && atom2_1 <= excl_max )
1404  *(pli++) = j1;
1405  else
1406  *(plin++) = j1;
1407  }
1408  atom2_1 = pExt_1[jprev1].id;
1409  }
1410  k-=2;
1411  }
1412 #elif defined(A2_QPX)
1413  if ( ku - k > 6 ) {
1414  register int jprev0 = pairlist [k];
1415  register int jprev1 = pairlist [k + 1];
1416 
1417  register int j0;
1418  register int j1;
1419  vector4double pj_v_0, pj_v_1;
1420  vector4double v_0, v_1;
1421  BigReal r2_0, r2_1;
1422 
1423  pj_v_0 = vec_ld(jprev0 * sizeof(CompAtom), (BigReal *)p_1);
1424  pj_v_1 = vec_ld(jprev1 * sizeof(CompAtom), (BigReal *)p_1);
1425 
1426  int atom2_0 = pExt_1[jprev0].id;
1427  int atom2_1 = pExt_1[jprev1].id;
1428 
1429  k += 2;
1430  for ( ; k < ku - 2; k +=2 ) {
1431  // compute 1d distance, 2-way parallel
1432  j0 = jprev0;
1433  j1 = jprev1;
1434 
1435  v_0 = vec_sub (p_i_v, pj_v_0);
1436  v_1 = vec_sub (p_i_v, pj_v_1);
1437  v_0 = vec_mul (v_0, v_0);
1438  v_1 = vec_mul (v_1, v_1);
1439 
1440  r2_0 = vec_extract(v_0, 0) + vec_extract(v_0, 1) + vec_extract(v_0, 2);
1441  r2_1 = vec_extract(v_1, 0) + vec_extract(v_1, 1) + vec_extract(v_1, 2);
1442 
1443  jprev0 = pairlist[k];
1444  jprev1 = pairlist[k+1];
1445 
1446  pj_v_0 = vec_ld(jprev0 * sizeof(CompAtom), (BigReal *)p_1);
1447  pj_v_1 = vec_ld(jprev1 * sizeof(CompAtom), (BigReal *)p_1);
1448 
1449  if (r2_0 <= plcutoff2) {
1450  if ( atom2_0 >= excl_min && atom2_0 <= excl_max )
1451  *(pli++) = j0;
1452  else
1453  *(plin++) = j0;
1454  }
1455  atom2_0 = pExt_1[jprev0].id;
1456 
1457  if (r2_1 <= plcutoff2) {
1458  if ( atom2_1 >= excl_min && atom2_1 <= excl_max )
1459  *(pli++) = j1;
1460  else
1461  *(plin++) = j1;
1462  }
1463  atom2_1 = pExt_1[jprev1].id;
1464  }
1465  k-=2;
1466  }
1467 #else
1468  if ( ku - k > 6 ) {
1469  register int jprev0 = pairlist [k];
1470  register int jprev1 = pairlist [k + 1];
1471 
1472  register int j0;
1473  register int j1;
1474 
1475  register BigReal pj_x_0, pj_x_1;
1476  register BigReal pj_y_0, pj_y_1;
1477  register BigReal pj_z_0, pj_z_1;
1478  register BigReal t_0, t_1, r2_0, r2_1;
1479 
1480  pj_x_0 = p_1[jprev0].position.x;
1481  pj_x_1 = p_1[jprev1].position.x;
1482 
1483  pj_y_0 = p_1[jprev0].position.y;
1484  pj_y_1 = p_1[jprev1].position.y;
1485 
1486  pj_z_0 = p_1[jprev0].position.z;
1487  pj_z_1 = p_1[jprev1].position.z;
1488 
1489  int atom2_0 = pExt_1[jprev0].id;
1490  int atom2_1 = pExt_1[jprev1].id;
1491 
1492  k += 2;
1493  for ( ; k < ku - 2; k +=2 ) {
1494  // compute 1d distance, 2-way parallel
1495  j0 = jprev0;
1496  j1 = jprev1;
1497 
1498  t_0 = p_i_x - pj_x_0;
1499  t_1 = p_i_x - pj_x_1;
1500  r2_0 = t_0 * t_0;
1501  r2_1 = t_1 * t_1;
1502 
1503  t_0 = p_i_y - pj_y_0;
1504  t_1 = p_i_y - pj_y_1;
1505  r2_0 += t_0 * t_0;
1506  r2_1 += t_1 * t_1;
1507 
1508  t_0 = p_i_z - pj_z_0;
1509  t_1 = p_i_z - pj_z_1;
1510  r2_0 += t_0 * t_0;
1511  r2_1 += t_1 * t_1;
1512 
1513  jprev0 = pairlist[k];
1514  jprev1 = pairlist[k+1];
1515 
1516  pj_x_0 = p_1[jprev0].position.x;
1517  pj_x_1 = p_1[jprev1].position.x;
1518  pj_y_0 = p_1[jprev0].position.y;
1519  pj_y_1 = p_1[jprev1].position.y;
1520  pj_z_0 = p_1[jprev0].position.z;
1521  pj_z_1 = p_1[jprev1].position.z;
1522 
1523  if (r2_0 <= plcutoff2) {
1524  if ( atom2_0 >= excl_min && atom2_0 <= excl_max )
1525  *(pli++) = j0;
1526  else
1527  *(plin++) = j0;
1528  }
1529  atom2_0 = pExt_1[jprev0].id;
1530 
1531  if (r2_1 <= plcutoff2) {
1532  if ( atom2_1 >= excl_min && atom2_1 <= excl_max )
1533  *(pli++) = j1;
1534  else
1535  *(plin++) = j1;
1536  }
1537  atom2_1 = pExt_1[jprev1].id;
1538  }
1539  k-=2;
1540  }
1541 #endif
1542 #endif // NAMD_KNL
1543 
1544 #pragma omp simd simdlen(16)
1545  for (; k < ku; k++) {
1546  int j = pairlist[k];
1547  int atom2 = pExt_1[j].id;
1548 
1549  BigReal p_j_x = p_1[j].position.x;
1550  BigReal p_j_y = p_1[j].position.y;
1551  BigReal p_j_z = p_1[j].position.z;
1552 
1553  BigReal r2 = p_i_x - p_j_x;
1554  r2 *= r2;
1555  BigReal t2 = p_i_y - p_j_y;
1556  r2 += t2 * t2;
1557  t2 = p_i_z - p_j_z;
1558  r2 += t2 * t2;
1559 
1560 #pragma omp ordered simd monotonic(plin:1, pli:1)
1561  if (r2 <= plcutoff2) {
1562  if ( atom2 >= excl_min && atom2 <= excl_max )
1563  *(pli++) = j;
1564  else
1565  *(plin++) = j;
1566  }
1567  }
1568  }
1569  }
1570 
1571  PAIR(
1572  if ( plin == pairlistn && pli == pairlist2 ) {
1573  ++i;
1574  continue;
1575  }
1576  pairlists.setIndexValue(i);
1577  )
1578 
1579  plint *plix = pairlistx;
1580  plint *plim = pairlistm;
1581  ALCH(
1582  plint *plinA1 = pairlistnA1;
1583  plint *plixA1 = pairlistxA1;
1584  plint *plimA1 = pairlistmA1;
1585  plint *plinA2 = pairlistnA2;
1586  plint *plixA2 = pairlistxA2;
1587  plint *plimA2 = pairlistmA2;
1588  plint *plinA3 = pairlistnA3;
1589  plint *plixA3 = pairlistxA3;
1590  plint *plimA3 = pairlistmA3;
1591  plint *plinA4 = pairlistnA4;
1592  plint *plixA4 = pairlistxA4;
1593  plint *plimA4 = pairlistmA4;
1594  )
1595 
1596  int k;
1597 
1598 #if 0 ALCH(+1)
1599  int unsortedNpairn = plin - pairlistn;
1600  plin = pairlistn;
1601  for ( k=0; k<unsortedNpairn; ++k ) {
1602  int j = pairlistn[k];
1603  switch(pswitchTable[p_i_partition + 5*(p_1[j].partition)]) {
1604  case 0: *(plin++) = j; break;
1605  case 1: *(plinA1++) = j; break;
1606  case 2: *(plinA2++) = j; break;
1607  case 3: *(plinA3++) = j; break;
1608  case 4: *(plinA4++) = j; break;
1609  }
1610  }
1611 #endif
1612 
1613  int npair2 = pli - pairlist2;
1614  // if ( npair2 ) pairlist2[npair2] = pairlist2[npair2-1];
1615  // removed code for implicit exclusions within hydrogen groups -JCP
1616  for (k=0; k < npair2; ++k ) {
1617  int j = pairlist2[k];
1618  int atom2 = pExt_1[j].id;
1619  int excl_flag = excl_flags[atom2];
1620  ALCH(int pswitch = pswitchTable[p_i_partition + 5*(p_1[j].partition)];)
1621  switch ( excl_flag ALCH( + 3 * pswitch)) {
1622  case 0: *(plin++) = j; break;
1623  case 1: *(plix++) = j; break;
1624  case 2: *(plim++) = j; break;
1625  ALCH(
1626  case 3: *(plinA1++) = j; break;
1627  case 6: *(plinA2++) = j; break;
1628  case 9: *(plinA3++) = j; break;
1629  case 12: *(plinA4++) = j; break;
1630  case 5: *(plimA1++) = j; break;
1631  case 8: *(plimA2++) = j; break;
1632  case 11: *(plimA3++) = j; break;
1633  case 14: *(plimA4++) = j; break;
1634  case 4: *(plixA1++) = j; break;
1635  case 7: *(plixA2++) = j; break;
1636  case 10: *(plixA3++) = j; break;
1637  case 13: *(plixA4++) = j; break;
1638  )
1639  }
1640  }
1641 
1642  npairn = plin - pairlistn;
1643  pairlistn_save = pairlistn;
1644  pairlistn_save[npairn] = npairn ? pairlistn_save[npairn-1] : -1;
1645  pairlists.newsize(npairn + 1);
1646 
1647  npairx = plix - pairlistx;
1648  pairlistx_save = pairlists.newlist();
1649  for ( k=0; k<npairx; ++k ) {
1650  pairlistx_save[k] = pairlistx[k];
1651  }
1652  pairlistx_save[k] = k ? pairlistx_save[k-1] : -1;
1653  pairlists.newsize(npairx + 1);
1654 
1655  npairm = plim - pairlistm;
1656  pairlistm_save = pairlists.newlist();
1657  for ( k=0; k<npairm; ++k ) {
1658  pairlistm_save[k] = pairlistm[k];
1659  }
1660  pairlistm_save[k] = k ? pairlistm_save[k-1] : -1;
1661  pairlists.newsize(npairm + 1);
1662 
1663 
1664 #if 0 ALCH(+1)
1665 #define PAIRLISTFROMARRAY(NPAIRS,PL1,PL2,PLSAVE) \
1666 { \
1667  (NPAIRS) = (PL2) - (PL1); \
1668  (PLSAVE) = pairlists.newlist(); \
1669  for ( k=0; k<(NPAIRS); ++k ) { \
1670  (PLSAVE)[k] = (PL1)[k]; \
1671  } \
1672  (PLSAVE)[k] = k ? (PLSAVE)[k-1] : -1; \
1673  pairlists.newsize((NPAIRS) + 1); \
1674 }
1675 
1676  PAIRLISTFROMARRAY(npairnA1,pairlistnA1,plinA1,pairlistnA1_save);
1677  PAIRLISTFROMARRAY(npairxA1,pairlistxA1,plixA1,pairlistxA1_save);
1678  PAIRLISTFROMARRAY(npairmA1,pairlistmA1,plimA1,pairlistmA1_save);
1679  PAIRLISTFROMARRAY(npairnA2,pairlistnA2,plinA2,pairlistnA2_save);
1680  PAIRLISTFROMARRAY(npairxA2,pairlistxA2,plixA2,pairlistxA2_save);
1681  PAIRLISTFROMARRAY(npairmA2,pairlistmA2,plimA2,pairlistmA2_save);
1682  PAIRLISTFROMARRAY(npairnA3,pairlistnA3,plinA3,pairlistnA3_save);
1683  PAIRLISTFROMARRAY(npairxA3,pairlistxA3,plixA3,pairlistxA3_save);
1684  PAIRLISTFROMARRAY(npairmA3,pairlistmA3,plimA3,pairlistmA3_save);
1685  PAIRLISTFROMARRAY(npairnA4,pairlistnA4,plinA4,pairlistnA4_save);
1686  PAIRLISTFROMARRAY(npairxA4,pairlistxA4,plixA4,pairlistxA4_save);
1687  PAIRLISTFROMARRAY(npairmA4,pairlistmA4,plimA4,pairlistmA4_save);
1688 #undef PAIRLISTFROMARRAY
1689 
1690 #endif
1691 
1692  if ( ! savePairlists ) pairlists.reset(); // limit space usage
1693  PAIR( pairlists.addIndex(); )
1694 
1695  // PAIR( iout << i << " " << i_upper << " save\n" << endi;)
1696  } else { // if ( savePairlists || ! usePairlists )
1697  // PAIR( iout << i << " " << i_upper << " use\n" << endi;)
1698 
1699  pairlists.nextlist(&pairlistn_save,&npairn); --npairn;
1700  pairlists.nextlist(&pairlistx_save,&npairx); --npairx;
1701  pairlists.nextlist(&pairlistm_save,&npairm); --npairm;
1702  ALCH(
1703  pairlists.nextlist(&pairlistnA1_save,&npairnA1); --npairnA1;
1704  pairlists.nextlist(&pairlistxA1_save,&npairxA1); --npairxA1;
1705  pairlists.nextlist(&pairlistmA1_save,&npairmA1); --npairmA1;
1706  pairlists.nextlist(&pairlistnA2_save,&npairnA2); --npairnA2;
1707  pairlists.nextlist(&pairlistxA2_save,&npairxA2); --npairxA2;
1708  pairlists.nextlist(&pairlistmA2_save,&npairmA2); --npairmA2;
1709  pairlists.nextlist(&pairlistnA3_save,&npairnA3); --npairnA3;
1710  pairlists.nextlist(&pairlistxA3_save,&npairxA3); --npairxA3;
1711  pairlists.nextlist(&pairlistmA3_save,&npairmA3); --npairmA3;
1712  pairlists.nextlist(&pairlistnA4_save,&npairnA4); --npairnA4;
1713  pairlists.nextlist(&pairlistxA4_save,&npairxA4); --npairxA4;
1714  pairlists.nextlist(&pairlistmA4_save,&npairmA4); --npairmA4;
1715  )
1716 
1717  } // if ( savePairlists || ! usePairlists )
1718 
1719  LES( BigReal *lambda_table_i =
1720  lambda_table + (lesFactor+1) * p_i.partition; )
1721 
1722  INT(
1723  const BigReal force_sign = (
1725  ( ( p_i.partition == 1 ) ? 1. : -1. ) : 0.
1726  );
1727 
1728  )
1729 
1730  const BigReal kq_i = COULOMB * p_i.charge * scaling * dielectric_1;
1731  const LJTable::TableEntry * const lj_row =
1732  ljTable->table_row(p_i.vdwType);
1733 
1734 #ifdef A2_QPX
1735  vector4double kq_iv = (vector4double)(0.0);
1736  vector4double f_i_v = (vector4double)(0.0);
1737  vector4double fullf_i_v = (vector4double)(0.0);
1738  vector4double full_cnst = (vector4double)(0.);
1739 #define f_i_x vec_extract(f_i_v, 0)
1740 #define f_i_y vec_extract(f_i_v, 1)
1741 #define f_i_z vec_extract(f_i_v, 2)
1742 #define fullf_i_x vec_extract(fullf_i_v, 0)
1743 #define fullf_i_y vec_extract(fullf_i_v, 1)
1744 #define fullf_i_z vec_extract(fullf_i_v, 2)
1745 #else
1746  SHORT( FAST( BigReal f_i_x = 0.; ) )
1747  SHORT( FAST( BigReal f_i_y = 0.; ) )
1748  SHORT( FAST( BigReal f_i_z = 0.; ) )
1749  FULL( BigReal fullf_i_x = 0.; )
1750  FULL( BigReal fullf_i_y = 0.; )
1751  FULL( BigReal fullf_i_z = 0.; )
1752 #endif
1753 
1754  int npairi;
1755  int k;
1756 
1757 #if KNL(1)+0
1758  const float kq_i_f = kq_i;
1759 
1760  npairi = pairlist_from_pairlist_knl(ComputeNonbondedUtil::cutoff2_f,
1761  p_i_x_f, p_i_y_f, p_i_z_f, pFlt_1, pairlistn_save, npairn, pairlisti,
1762  r2list_f, xlist, ylist, zlist);
1763 #else
1765  p_i_x, p_i_y, p_i_z, p_1, pairlistn_save, npairn, pairlisti,
1766  r2_delta, r2list);
1767 #endif
1768 
1769  if ( npairi ) {
1770 
1771 // BEGIN NBTHOLE OF DRUDE MODEL
1772 #if (FAST(1+)0)
1773  if (drudeNbthole && p_i.hydrogenGroupSize > 1) {
1774 
1775  Parameters *parameters = params->parameters;
1776  const NbtholePairValue * const nbthole_array = parameters->nbthole_array;
1777  const int NumNbtholePairParams = parameters->NumNbtholePairParams;
1778  BigReal drudeNbtholeCut = simParams -> drudeNbtholeCut;
1779  NOKNL( BigReal drudeNbtholeCut2 = (drudeNbtholeCut * drudeNbtholeCut) + r2_delta; )
1780  KNL( float drudeNbtholeCut2 = (drudeNbtholeCut * drudeNbtholeCut); )
1781  BigReal CC = COULOMB * scaling * dielectric_1;
1782  int kk;
1783 
1784  for (k = 0; k < npairi; k++) {
1785  NOKNL( if (r2list[k] > drudeNbtholeCut2) { continue; } )
1786  KNL( if (r2list_f[k] > drudeNbtholeCut2) { continue; } )
1787 
1788  const int j = pairlisti[k];
1789  const CompAtom& p_j = p_1[j];
1790 
1791  if ( p_j.hydrogenGroupSize < 2 ) { continue; }
1792 
1793  for (kk = 0;kk < NumNbtholePairParams; kk++) {
1794 
1795  if (((nbthole_array[kk].ind1 == p_i.vdwType) &&
1796  (nbthole_array[kk].ind2 == p_j.vdwType)) ||
1797  ((nbthole_array[kk].ind2 == p_i.vdwType) &&
1798  (nbthole_array[kk].ind1 == p_j.vdwType))) {
1799  break;
1800  }
1801  }
1802  if ( kk < NumNbtholePairParams ) {
1803 
1804  BigReal aprod = mol->GetAtomAlpha(pExt_0[i].id) * mol->GetAtomAlpha(pExt_1[j].id);
1805  const BigReal aa = nbthole_array[kk].tholeij * powf(aprod, -1.f/6);
1806  const BigReal qqaa = CC * p_0[i].charge * p_1[j].charge;
1807  const BigReal qqad = CC * p_0[i].charge * p_1[j+1].charge;
1808  const BigReal qqda = CC * p_0[i+1].charge * p_1[j].charge;
1809  const BigReal qqdd = CC * p_0[i+1].charge * p_1[j+1].charge;
1810 
1811  Vector raa = (p_0[i].position + params->offset) - p_1[j].position;
1812  Vector rad = (p_0[i].position + params->offset) - p_1[j+1].position;
1813  Vector rda = (p_0[i+1].position + params->offset) - p_1[j].position;
1814  Vector rdd = (p_0[i+1].position + params->offset) - p_1[j+1].position;
1815 
1816  BigReal raa_invlen = raa.rlength(); // reciprocal of length
1817  BigReal rad_invlen = rad.rlength();
1818  BigReal rda_invlen = rda.rlength();
1819  BigReal rdd_invlen = rdd.rlength();
1820 
1821  BigReal auaa = aa / raa_invlen;
1822  BigReal auad = aa / rad_invlen;
1823  BigReal auda = aa / rda_invlen;
1824  BigReal audd = aa / rdd_invlen;
1825 
1826  BigReal expauaa = exp(-auaa);
1827  BigReal expauad = exp(-auad);
1828  BigReal expauda = exp(-auda);
1829  BigReal expaudd = exp(-audd);
1830 
1831  BigReal polyauaa = 1 + 0.5*auaa;
1832  BigReal polyauad = 1 + 0.5*auad;
1833  BigReal polyauda = 1 + 0.5*auda;
1834  BigReal polyaudd = 1 + 0.5*audd;
1835 
1836  BigReal ethole = 0;
1837  ethole += qqaa * raa_invlen * (- polyauaa * expauaa);
1838  ethole += qqad * rad_invlen * (- polyauad * expauad);
1839  ethole += qqda * rda_invlen * (- polyauda * expauda);
1840  ethole += qqdd * rdd_invlen * (- polyaudd * expaudd);
1841 
1842  polyauaa = 1 + auaa*polyauaa;
1843  polyauad = 1 + auad*polyauad;
1844  polyauda = 1 + auda*polyauda;
1845  polyaudd = 1 + audd*polyaudd;
1846 
1847  BigReal raa_invlen3 = raa_invlen * raa_invlen * raa_invlen;
1848  BigReal rad_invlen3 = rad_invlen * rad_invlen * rad_invlen;
1849  BigReal rda_invlen3 = rda_invlen * rda_invlen * rda_invlen;
1850  BigReal rdd_invlen3 = rdd_invlen * rdd_invlen * rdd_invlen;
1851 
1852  BigReal dfaa = qqaa * raa_invlen3 * (polyauaa*expauaa);
1853  BigReal dfad = qqad * rad_invlen3 * (polyauad*expauad);
1854  BigReal dfda = qqda * rda_invlen3 * (polyauda*expauda);
1855  BigReal dfdd = qqdd * rdd_invlen3 * (polyaudd*expaudd);
1856 
1857  Vector faa = -dfaa * raa;
1858  Vector fad = -dfad * rad;
1859  Vector fda = -dfda * rda;
1860  Vector fdd = -dfdd * rdd;
1861 
1862  SHORT(f_net) NOSHORT(fullf_net) += (faa + fad) + (fda + fdd);
1863  params->ff[0][i] += faa + fad;
1864  params->ff[0][i+1] += fda + fdd;
1865  params->ff[1][j] -= faa + fda;
1866  params->ff[1][j+1] -= fad + fdd;
1867 
1868  reduction[electEnergyIndex] += ethole;
1869  }
1870  }
1871  }
1872 #endif
1873 // END NBTHOLE OF DRUDE MODEL
1874 
1875  // BEGIN LA
1876 #if (FAST(1+)0)
1877  if (doLoweAndersen && p_i.hydrogenGroupSize) {
1878  BigReal loweAndersenCutoff = simParams->loweAndersenCutoff;
1879  NOKNL( BigReal loweAndersenCutoff2 = (loweAndersenCutoff * loweAndersenCutoff) + r2_delta; )
1880  KNL( float loweAndersenCutoff2 = (loweAndersenCutoff * loweAndersenCutoff); )
1881  BigReal loweAndersenProb = simParams->loweAndersenRate * (simParams->dt * simParams->nonbondedFrequency) * 0.001; // loweAndersenRate is in 1/ps
1882  const bool loweAndersenUseCOMvelocity = (simParams->rigidBonds != RIGID_NONE);
1883  const BigReal kbT = BOLTZMANN * (simParams->loweAndersenTemp);
1884  const BigReal dt_inv = TIMEFACTOR / (simParams->dt * simParams->nonbondedFrequency);
1885  //const BigReal dt_inv = 1.0 / simParams->dt;
1886  //BigReal kbT = 8.3145e-7 * (simParams->loweAndersenTemp); // in A^2/fs^2 * K
1887 
1888  const CompAtom* v_0 = params->v[0];
1889  const CompAtom* v_1 = params->v[1];
1890  const CompAtom& v_i = v_0[i];
1891  Mass mass_i = v_i.charge;
1892  Velocity vel_i = v_i.position;
1893  Position pos_i = p_i.position;
1894  int atom_i = pExt_0[i].id;
1895 
1896  if (loweAndersenUseCOMvelocity) {
1897  Mass mass = 0;
1898  Velocity vel = 0;
1899  Position pos = 0;
1900  for (int l = 0; l < p_i.hydrogenGroupSize; l++) {
1901  vel += v_0[i+l].charge * v_0[i+l].position;
1902  pos += v_0[i+l].charge * p_0[i+l].position;
1903  mass += v_0[i+l].charge;
1904  }
1905  vel_i = vel / mass;
1906  pos_i = pos / mass;
1907  mass_i = mass;
1908  }
1909 
1910  // Note v[0].charge is actually mass
1911  //Random rand(CkMyPe()); // ?? OK ?? NO!!!!
1912  Random *rand = params->random;
1913  for (k = 0; k < npairi; k++) {
1914  NOKNL( if (r2list[k] > loweAndersenCutoff2) { continue; } )
1915  KNL( if (r2list_f[k] > loweAndersenCutoff2) { continue; } )
1916 
1917  const int j = pairlisti[k];
1918  const CompAtom& v_j = v_1[j];
1919  const CompAtom& p_j = p_1[j];
1920 
1921  if (!p_j.hydrogenGroupSize) { continue; }
1922  if (rand->uniform() > loweAndersenProb) { continue; }
1923 
1924  Mass mass_j = v_j.charge;
1925  Velocity vel_j = v_j.position;
1926  Position pos_j = p_j.position;
1927  int atom_j = pExt_1[j].id;
1928 
1929  if (loweAndersenUseCOMvelocity) {
1930  Mass mass = 0;
1931  Velocity vel = 0;
1932  Position pos = 0;
1933  for (int l = 0; l < p_j.hydrogenGroupSize; l++) {
1934  vel += v_1[j+l].charge * v_1[j+l].position;
1935  pos += v_1[j+l].charge * p_1[j+l].position;
1936  mass += v_1[j+l].charge;
1937  }
1938  vel_j = vel / mass;
1939  pos_j = pos / mass;
1940  mass_j = mass;
1941  }
1942 
1943  //Velocity deltaV = v_i.position - v_j.position;
1944  Velocity deltaV = vel_i - vel_j;
1945  Mass mu_ij = (mass_i * mass_j)/(mass_i + mass_j);
1946  //Vector sep = (p_i.position + params->offset) - p_j.position;
1947  Vector sep = (pos_i + params->offset) - pos_j;
1948  Vector sigma_ij = sep.unit();
1949  BigReal lambda = rand->gaussian() * sqrt(kbT / mu_ij);
1950  Force force = mu_ij * dt_inv * (lambda - (deltaV * sigma_ij)) * sigma_ij;
1951 
1952  //DebugM(1, "atom1 atom2 = " << atom1 << " " << atom2 << " lambda = " << lambda << " force = " << force << " deltaP = " << sep.length() << " sqrt(r2) = " << sqrt(r2list[k]) << "\n");
1953  //DebugM(1, "atom1 atom2 = " << atom_i << " " << atom_j << " mass1 mass2 = " << mass_i << " " << mass_j << " hgrp1 hgrp2 " << p_i.hydrogenGroupSize << " " << p_j.hydrogenGroupSize << "\n");
1954 
1955  if (loweAndersenUseCOMvelocity) {
1956  BigReal inv_mass_i = 1.0 / mass_i;
1957  BigReal inv_mass_j = 1.0 / mass_j;
1958  for (int l = 0; l < p_i.hydrogenGroupSize; l++) {
1959  params->ff[0][i+l] += (v_0[i+l].charge * inv_mass_i) * force;
1960  }
1961  for (int l = 0; l < p_j.hydrogenGroupSize; l++) {
1962  params->ff[1][j+l] -= (v_1[j+l].charge * inv_mass_j) * force;
1963  }
1964  } else {
1965  params->ff[0][i] += force;
1966  params->ff[1][j] -= force;
1967  }
1968  SHORT(f_net) NOSHORT(fullf_net) += force;
1969  }
1970  }
1971 #endif
1972  // END LA
1973 
1974 #define NORMAL(X) X
1975 #define EXCLUDED(X)
1976 #define MODIFIED(X)
1977 #define PRAGMA_SIMD
1978 #if KNL(1+)0
1979  switch ( vdw_switch_mode ) {
1980 
1981 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_ENERGY
1982  case VDW_SWITCH_MODE:
1983 #include "ComputeNonbondedBase2KNL.h"
1984  break;
1985 #undef VDW_SWITCH_MODE
1986 
1987 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_MARTINI
1988  case VDW_SWITCH_MODE:
1989 #include "ComputeNonbondedBase2KNL.h"
1990  break;
1991 #undef VDW_SWITCH_MODE
1992 
1993 #define VDW_SWITCH_MODE VDW_SWITCH_MODE_FORCE
1994  case VDW_SWITCH_MODE:
1995 #include "ComputeNonbondedBase2KNL.h"
1996  break;
1997 #undef VDW_SWITCH_MODE
1998 
1999  }
2000 #else
2001 #include "ComputeNonbondedBase2.h"
2002 #endif
2003 #undef PRAGMA_SIMD
2004 #undef NORMAL
2005 #undef EXCLUDED
2006 #undef MODIFIED
2007 
2008  } // if ( npairi )
2009 
2011  p_i_x, p_i_y, p_i_z, p_1, pairlistm_save, npairm, pairlisti,
2012  r2_delta, r2list);
2013  exclChecksum += npairi;
2014 
2015 #define NORMAL(X)
2016 #define EXCLUDED(X)
2017 #define MODIFIED(X) X
2018 #include "ComputeNonbondedBase2.h"
2019 #undef NORMAL
2020 #undef EXCLUDED
2021 #undef MODIFIED
2022 
2023 #ifdef FULLELECT
2025  p_i_x, p_i_y, p_i_z, p_1, pairlistx_save, npairx, pairlisti,
2026  r2_delta, r2list);
2027  exclChecksum += npairi;
2028 
2029 #undef FAST
2030 #define FAST(X)
2031 #define NORMAL(X)
2032 #define EXCLUDED(X) X
2033 #define MODIFIED(X)
2034 #include "ComputeNonbondedBase2.h"
2035 #undef FAST
2036 #ifdef SLOWONLY
2037  #define FAST(X)
2038 #else
2039  #define FAST(X) X
2040 #endif
2041 #undef NORMAL
2042 #undef EXCLUDED
2043 #undef MODIFIED
2044 #else
2045  exclChecksum += npairx;
2046 #endif
2047 
2048 #if 0 ALCH(+1) // nonbondedbase2 for alchemical-separeted pairlists
2049 
2050  #undef ALCHPAIR
2051  #define ALCHPAIR(X) X
2052  #undef NOT_ALCHPAIR
2053  #define NOT_ALCHPAIR(X)
2054  BigReal myLambda; FEP(BigReal myLambda2;)
2055  BigReal myElecLambda; FEP(BigReal myElecLambda2;)
2056  BigReal myVdwLambda; FEP(BigReal myVdwLambda2;)
2057  BigReal myRepLambda; FEP(BigReal myRepLambda2;)
2058  BigReal myVdwShift; FEP(BigReal myVdwShift2;)
2059  BigReal alch_vdw_energy; BigReal alch_vdw_force;
2060  FEP(BigReal alch_vdw_energy_2; ) TI(BigReal alch_vdw_dUdl;)
2061  BigReal shiftedElec; BigReal shiftedElecForce;
2062 
2063  /********************************************************************/
2064  /*******NONBONDEDBASE2 FOR NORMAL INTERACTIONS SCALED BY LAMBDA******/
2065  /********************************************************************/
2066  #define NORMAL(X) X
2067  #define EXCLUDED(X)
2068  #define MODIFIED(X)
2069 
2070  #define ALCH1(X) X
2071  #define ALCH2(X)
2072  #define ALCH3(X)
2073  #define ALCH4(X)
2075  p_i_x, p_i_y, p_i_z, p_1, pairlistnA1_save, npairnA1, pairlisti,
2076  r2_delta, r2list);
2077  #include "ComputeNonbondedBase2.h" // normal, direction 'up'
2078  #undef ALCH1
2079  #undef ALCH2
2080  #undef ALCH3
2081  #undef ALCH4
2082 
2083  #define ALCH1(X)
2084  #define ALCH2(X) X
2085  #define ALCH3(X)
2086  #define ALCH4(X)
2088  p_i_x, p_i_y, p_i_z, p_1, pairlistnA2_save, npairnA2, pairlisti,
2089  r2_delta, r2list);
2090  #include "ComputeNonbondedBase2.h" // normal, direction 'down'
2091  #undef ALCH1
2092  #undef ALCH2
2093  #undef ALCH3
2094  #undef ALCH4
2095 
2096  #define ALCH3(X) X
2097  #define ALCH1(X)
2098  #define ALCH2(X)
2099  #define ALCH4(X)
2101  p_i_x, p_i_y, p_i_z, p_1, pairlistnA3_save, npairnA3, pairlisti,
2102  r2_delta, r2list);
2103  #include "ComputeNonbondedBase2.h" // normal, direction 'up'
2104  #undef ALCH1
2105  #undef ALCH2
2106  #undef ALCH3
2107  #undef ALCH4
2108 
2109  #define ALCH4(X) X
2110  #define ALCH1(X)
2111  #define ALCH2(X)
2112  #define ALCH3(X)
2114  p_i_x, p_i_y, p_i_z, p_1, pairlistnA4_save, npairnA4, pairlisti,
2115  r2_delta, r2list);
2116  #include "ComputeNonbondedBase2.h" // normal, direction 'down'
2117  #undef ALCH1
2118  #undef ALCH2
2119  #undef ALCH3
2120  #undef ALCH4
2121 
2122  #undef NORMAL
2123  #undef EXCLUDED
2124  #undef MODIFIED
2125  /********************************************************************/
2126  /******NONBONDEDBASE2 FOR MODIFIED INTERACTIONS SCALED BY LAMBDA*****/
2127  /********************************************************************/
2128  #define NORMAL(X)
2129  #define EXCLUDED(X)
2130  #define MODIFIED(X) X
2131 
2132  #define ALCH1(X) X
2133  #define ALCH2(X)
2134  #define ALCH3(X)
2135  #define ALCH4(X)
2137  p_i_x, p_i_y, p_i_z, p_1, pairlistmA1_save, npairmA1, pairlisti,
2138  r2_delta, r2list);
2139  exclChecksum += npairi;
2140  #include "ComputeNonbondedBase2.h" // modified, direction 'up'
2141  #undef ALCH1
2142  #undef ALCH2
2143  #undef ALCH3
2144  #undef ALCH4
2145 
2146  #define ALCH1(X)
2147  #define ALCH2(X) X
2148  #define ALCH3(X)
2149  #define ALCH4(X)
2151  p_i_x, p_i_y, p_i_z, p_1, pairlistmA2_save, npairmA2, pairlisti,
2152  r2_delta, r2list);
2153  exclChecksum += npairi;
2154  #include "ComputeNonbondedBase2.h" // modified, direction 'down'
2155  #undef ALCH1
2156  #undef ALCH2
2157  #undef ALCH3
2158  #undef ALCH4
2159 
2160  #define ALCH1(X)
2161  #define ALCH2(X)
2162  #define ALCH3(X) X
2163  #define ALCH4(X)
2165  p_i_x, p_i_y, p_i_z, p_1, pairlistmA3_save, npairmA3, pairlisti,
2166  r2_delta, r2list);
2167  exclChecksum += npairi;
2168  #include "ComputeNonbondedBase2.h" // modified, direction 'up'
2169  #undef ALCH1
2170  #undef ALCH2
2171  #undef ALCH3
2172  #undef ALCH4
2173 
2174  #define ALCH1(X)
2175  #define ALCH2(X)
2176  #define ALCH3(X)
2177  #define ALCH4(X) X
2179  p_i_x, p_i_y, p_i_z, p_1, pairlistmA4_save, npairmA4, pairlisti,
2180  r2_delta, r2list);
2181  exclChecksum += npairi;
2182  #include "ComputeNonbondedBase2.h" // modified, direction 'down'
2183  #undef ALCH1
2184  #undef ALCH2
2185  #undef ALCH3
2186  #undef ALCH4
2187 
2188  #undef NORMAL
2189  #undef EXCLUDED
2190  #undef MODIFIED
2191 
2192  /********************************************************************/
2193  /******NONBONDEDBASE2 FOR EXCLUDED INTERACTIONS SCALED BY LAMBDA*****/
2194  /********************************************************************/
2195  #ifdef FULLELECT
2196  #undef FAST
2197  #define FAST(X)
2198  #define NORMAL(X)
2199  #define EXCLUDED(X) X
2200  #define MODIFIED(X)
2201 
2202  #define ALCH1(X) X
2203  #define ALCH2(X)
2204  #define ALCH3(X)
2205  #define ALCH4(X)
2207  p_i_x, p_i_y, p_i_z, p_1, pairlistxA1_save, npairxA1, pairlisti,
2208  r2_delta, r2list);
2209  exclChecksum += npairi;
2210  #include "ComputeNonbondedBase2.h" //excluded, direction 'up'
2211  #undef ALCH1
2212  #undef ALCH2
2213  #undef ALCH3
2214  #undef ALCH4
2215 
2216  #define ALCH1(X)
2217  #define ALCH2(X) X
2218  #define ALCH3(X)
2219  #define ALCH4(X)
2221  p_i_x, p_i_y, p_i_z, p_1, pairlistxA2_save, npairxA2, pairlisti,
2222  r2_delta, r2list);
2223  exclChecksum += npairi;
2224  #include "ComputeNonbondedBase2.h" //excluded, direction 'down'
2225  #undef ALCH1
2226  #undef ALCH2
2227  #undef ALCH3
2228  #undef ALCH4
2229 
2230  #define ALCH1(X)
2231  #define ALCH2(X)
2232  #define ALCH3(X) X
2233  #define ALCH4(X)
2235  p_i_x, p_i_y, p_i_z, p_1, pairlistxA3_save, npairxA3, pairlisti,
2236  r2_delta, r2list);
2237  exclChecksum += npairi;
2238  #include "ComputeNonbondedBase2.h"
2239  #undef ALCH1
2240  #undef ALCH2
2241  #undef ALCH3
2242  #undef ALCH4
2243 
2244  #define ALCH1(X)
2245  #define ALCH2(X)
2246  #define ALCH3(X)
2247  #define ALCH4(X) X
2249  p_i_x, p_i_y, p_i_z, p_1, pairlistxA4_save, npairxA4, pairlisti,
2250  r2_delta, r2list);
2251  exclChecksum += npairi;
2252  #include "ComputeNonbondedBase2.h"
2253  #undef ALCH1
2254  #undef ALCH2
2255  #undef ALCH3
2256  #undef ALCH4
2257 
2258  #undef FAST
2259  #ifdef SLOWONLY
2260  #define FAST(X)
2261  #else
2262  #define FAST(X) X
2263  #endif
2264  #undef NORMAL
2265  #undef EXCLUDED
2266  #undef MODIFIED
2267  #else
2268  exclChecksum += npairxA1 + npairxA2 + npairxA3 + npairxA4;
2269  #endif
2270 
2271  #undef ALCHPAIR
2272  #define ALCHPAIR(X)
2273  #undef NOT_ALCHPAIR
2274  #define NOT_ALCHPAIR(X) X
2275 
2276 #endif // end nonbondedbase2.h includes for alchemical pairlists
2277 
2278 #ifdef NETWORK_PROGRESS
2279  CkNetworkProgress();
2280 #endif
2281 
2282 #ifdef ARCH_POWERPC
2283  //data cache block touch the position structure
2284  __dcbt ((void *) &(p_0[i+1]));
2285  //__prefetch_by_load ((void *)&(groupCount));
2286 #endif
2287 
2288 #ifndef NAMD_CUDA
2289  SHORT( FAST( f_net.x += f_i_x; ) )
2290  SHORT( FAST( f_net.y += f_i_y; ) )
2291  SHORT( FAST( f_net.z += f_i_z; ) )
2292  FULL( fullf_net.x += fullf_i_x; )
2293  FULL( fullf_net.y += fullf_i_y; )
2294  FULL( fullf_net.z += fullf_i_z; )
2295  SHORT( FAST( f_0[i].x += f_i_x; ) )
2296  SHORT( FAST( f_0[i].y += f_i_y; ) )
2297  SHORT( FAST( f_0[i].z += f_i_z; ) )
2298  FULL( fullf_0[i].x += fullf_i_x; )
2299  FULL( fullf_0[i].y += fullf_i_y; )
2300  FULL( fullf_0[i].z += fullf_i_z; )
2301 #endif
2302 PAIR(
2303  if ( savePairlists || ! usePairlists ) {
2304  i++;
2305  } else {
2306  i = pairlists.getIndexValue();
2307  }
2308 )
2309 
2310  // PAIR( iout << i << " " << i_upper << " end\n" << endi;)
2311  } // for i
2312 
2313  // PAIR(iout << "++++++++\n" << endi;)
2314  PAIR( if ( savePairlists ) { pairlists.setIndexValue(i); } )
2315 
2316 #ifdef f_1
2317 #undef f_1
2318 #endif
2319 #if ( SHORT( FAST( 1+ ) ) 0 )
2320 #if ( PAIR( 1+ ) 0 )
2321  {
2322 #ifndef NAMD_CUDA
2323  BigReal virial_xx = f_net.x * params->offset_f.x;
2324  BigReal virial_xy = f_net.x * params->offset_f.y;
2325  BigReal virial_xz = f_net.x * params->offset_f.z;
2326  BigReal virial_yy = f_net.y * params->offset_f.y;
2327  BigReal virial_yz = f_net.y * params->offset_f.z;
2328  BigReal virial_zz = f_net.z * params->offset_f.z;
2329 
2330  reduction[virialIndex_XX] += virial_xx;
2331  reduction[virialIndex_XY] += virial_xy;
2332  reduction[virialIndex_XZ] += virial_xz;
2333  reduction[virialIndex_YX] += virial_xy;
2334  reduction[virialIndex_YY] += virial_yy;
2335  reduction[virialIndex_YZ] += virial_yz;
2336  reduction[virialIndex_ZX] += virial_xz;
2337  reduction[virialIndex_ZY] += virial_yz;
2338  reduction[virialIndex_ZZ] += virial_zz;
2339 #endif
2340  }
2341 #endif
2342 #endif
2343 
2344 #ifdef fullf_1
2345 #undef fullf_1
2346 #endif
2347 #if ( FULL( 1+ ) 0 )
2348 #if ( PAIR( 1+ ) 0 )
2349  {
2350 #ifndef NAMD_CUDA
2351  BigReal fullElectVirial_xx = fullf_net.x * params->offset_f.x;
2352  BigReal fullElectVirial_xy = fullf_net.x * params->offset_f.y;
2353  BigReal fullElectVirial_xz = fullf_net.x * params->offset_f.z;
2354  BigReal fullElectVirial_yy = fullf_net.y * params->offset_f.y;
2355  BigReal fullElectVirial_yz = fullf_net.y * params->offset_f.z;
2356  BigReal fullElectVirial_zz = fullf_net.z * params->offset_f.z;
2357 
2358  reduction[fullElectVirialIndex_XX] += fullElectVirial_xx;
2359  reduction[fullElectVirialIndex_XY] += fullElectVirial_xy;
2360  reduction[fullElectVirialIndex_XZ] += fullElectVirial_xz;
2361  reduction[fullElectVirialIndex_YX] += fullElectVirial_xy;
2362  reduction[fullElectVirialIndex_YY] += fullElectVirial_yy;
2363  reduction[fullElectVirialIndex_YZ] += fullElectVirial_yz;
2364  reduction[fullElectVirialIndex_ZX] += fullElectVirial_xz;
2365  reduction[fullElectVirialIndex_ZY] += fullElectVirial_yz;
2366  reduction[fullElectVirialIndex_ZZ] += fullElectVirial_zz;
2367 #endif
2368  }
2369 #endif
2370 #endif
2371 
2372 #ifndef NAMD_CUDA
2373  reduction[exclChecksumIndex] += exclChecksum;
2374 #endif
2375  FAST
2376  (
2377  // JLai
2378  ENERGY( reduction[vdwEnergyIndex] += vdwEnergy;
2379  GO( reduction[groLJEnergyIndex] += groLJEnergy;
2380  reduction[groGaussEnergyIndex] += groGaussEnergy;
2381  reduction[goNativeEnergyIndex] += goEnergyNative;
2382  reduction[goNonnativeEnergyIndex] += goEnergyNonnative; ) )
2383  SHORT
2384  (
2385  ENERGY( reduction[electEnergyIndex] += electEnergy; )
2386  )
2387  ALCH
2388  (
2389  TI(reduction[vdwEnergyIndex_ti_1] += vdwEnergy_ti_1;)
2390  TI(reduction[vdwEnergyIndex_ti_2] += vdwEnergy_ti_2;)
2391  FEP( reduction[vdwEnergyIndex_s] += vdwEnergy_s; )
2392  SHORT
2393  (
2394  FEP( reduction[electEnergyIndex_s] += electEnergy_s; )
2395  TI(reduction[electEnergyIndex_ti_1] += electEnergy_ti_1;)
2396  TI(reduction[electEnergyIndex_ti_2] += electEnergy_ti_2;)
2397  )
2398  )
2399  )
2400  FULL
2401  (
2402  ENERGY( reduction[fullElectEnergyIndex] += fullElectEnergy; )
2403  ALCH
2404  (
2405  FEP( reduction[fullElectEnergyIndex_s] += fullElectEnergy_s; )
2406  TI(reduction[fullElectEnergyIndex_ti_1] += fullElectEnergy_ti_1;)
2407  TI(reduction[fullElectEnergyIndex_ti_2] += fullElectEnergy_ti_2;)
2408  )
2409  )
2410 
2411  delete [] excl_flags_buff;
2412 
2413 }
2414 
register BigReal virial_xy
Pairlists * pairlists
unsigned char partition
Definition: NamdTypes.h:56
void sortEntries_mergeSort_v1(SortEntry *&se, SortEntry *&buf, int seLen)
register BigReal virial_xz
#define PAIR(X)
register BigReal virial_yz
BigReal uniform(void)
Definition: Random.h:109
Parameters * parameters
unsigned int hydrogenGroupSize
Definition: NamdTypes.h:58
#define BOLTZMANN
Definition: common.h:47
void sortEntries_selectionSort(SortEntry *const se, const int seLen)
short int32
Definition: dumpdcd.c:24
int index
Definition: NamdTypes.h:211
#define LAM(X)
BigReal gaussian(void)
Definition: Random.h:116
#define NOKNL(X)
static void partition(int *order, const FullAtom *atoms, int begin, int end)
Definition: SortAtoms.C:41
#define PPROF(X)
CompAtom * p[2]
Definition: Vector.h:64
const int32 * get_mod_exclusions_for_atom(int anum) const
Definition: Molecule.h:1159
register BigReal electEnergy
static const Molecule * mol
Definition: NamdTypes.h:210
char * flags
Definition: Molecule.h:71
float Real
Definition: common.h:109
#define COULOMB
Definition: common.h:46
CompAtom * v[2]
int pairlist_from_pairlist(BigReal cutoff2, BigReal p_i_x, BigReal p_i_y, BigReal p_i_z, const CompAtom *p_j, const plint *list, int list_size, int *newlist, BigReal r2_delta, BigReal *r2list)
BigReal z
Definition: Vector.h:66
#define NBWORKARRAY(TYPE, NAME, SIZE)
Position position
Definition: NamdTypes.h:53
#define NOSHORT(X)
BigReal * reduction
static BigReal pressureProfileThickness
#define p_j
SimParameters * simParameters
#define EXCHCK_MOD
Definition: Molecule.h:86
void sortEntries_bubbleSort(SortEntry *const se, const int seLen)
#define TI(X)
#define INT(X)
Charge charge
Definition: NamdTypes.h:54
#define NBWORKARRAYSINIT(ARRAYS)
#define NOTABENERGY(X)
register BigReal virial_yy
const Real * get_qmAtomGroup() const
Definition: Molecule.h:819
unsigned short plint
void pp_clamp(int &n, int nslabs)
BigReal rlength(void)
Definition: Vector.h:177
Definition: Random.h:37
plint getIndexValue()
void nextlist(plint **list, int *list_size)
int NumNbtholePairParams
Definition: Parameters.h:272
static BigReal * table_noshort
void NAMD_bug(const char *err_msg)
Definition: common.C:123
static BigReal pressureProfileMin
#define NAME
#define EXCHCK_FULL
Definition: Molecule.h:85
#define SHORT(X)
BigReal sortValue
Definition: NamdTypes.h:212
gridSize z
int Bool
Definition: common.h:133
#define SELF(X)
#define ALCH(X)
CompAtomExt * pExt[2]
register BigReal virial_zz
BigReal x
Definition: Vector.h:66
int numAtoms
Definition: Molecule.h:556
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cudaTextureObject_t cudaTextureObject_t cudaTextureObject_t const int const float plcutoff2
#define FAST(X)
NbtholePairValue * nbthole_array
Definition: Parameters.h:250
#define FEP(X)
static BigReal * lambda_table
static BigReal * slow_table
#define simParams
Definition: Output.C:127
short vdwType
Definition: NamdTypes.h:55
#define KNL(X)
BigReal y
Definition: Vector.h:66
static const LJTable * ljTable
#define ENERGY(X)
register BigReal virial_xx
#define FULL(X)
void newsize(int list_size)
static BigReal * table_short
#define TIMEFACTOR
Definition: common.h:48
const ExclusionCheck * get_excl_check_for_atom(int anum) const
Definition: Molecule.h:1173
void setIndexValue(plint i)
float Mass
Definition: ComputeGBIS.inl:20
gridSize y
unsigned int nonbondedGroupSize
Definition: NamdTypes.h:57
#define COMPONENT_DOTPRODUCT(A, B)
#define LES(X)
Force * fullf[2]
static BigReal alchVdwShiftCoeff
BigReal GetAtomAlpha(int i) const
Definition: Molecule.h:482
gridSize x
BigReal * pressureProfileReduction
plint * newlist(int max_size)
#define NOENERGY(X)
#define RIGID_NONE
Definition: SimParameters.h:77
void sortEntries_mergeSort_v2(SortEntry *&se, SortEntry *&buf, int seLen)
const int32 * get_full_exclusions_for_atom(int anum) const
Definition: Molecule.h:1157
Vector unit(void) const
Definition: Vector.h:182
const TableEntry * table_row(unsigned int i) const
Definition: LJTable.h:31
#define GO(X)
ComputeNonbondedWorkArrays * workArrays
double BigReal
Definition: common.h:114
#define TABENERGY(X)