NAMD
ComputeNonbondedInl.h
Go to the documentation of this file.
1 
7 /*
8  Common operations for ComputeNonbonded classes
9 */
10 
11 #ifndef COMPUTENONBONDEDINL_H
12 #define COMPUTENONBONDEDINL_H
13 
14 #ifndef NAMD_RESTRICT
15 #define restrict
16 #endif
17 
18 #include "ComputeNonbondedUtil.h"
19 #include "SimParameters.h"
20 #include "Molecule.h"
21 #include "LJTable.h"
22 #include "ReductionMgr.h"
23 #include "ReserveArray.h"
24 
25 #include "PressureProfile.h"
26 
27 // BEGIN LA
28 #include "Random.h"
29 // END LA
30 
31 #include <algorithm>
32 
33 #ifdef NAMD_KNL
34 inline int pairlist_from_pairlist_knl(float cutoff2,
35  float p_i_x, float p_i_y, float p_i_z,
36  const CompAtomFlt *p_j,
37  const plint *list, int list_size, int *newlist,
38  float *r2list, float *xlist, float *ylist, float *zlist) {
39 
40  int *nli = newlist;
41  float *r2i = r2list;
42  float *xli = xlist;
43  float *yli = ylist;
44  float *zli = zlist;
45 
46  if ( list_size <= 0) return 0;
47 
48  int g = 0;
49 
50  float p_j_x, p_j_y, p_j_z;
51  float x2, y2, z2, r2;
52 #pragma vector aligned
53 #pragma ivdep
54  for ( g = 0 ; g < list_size; ++g ) {
55  int gi=list[g];
56  p_j_x = p_j[ gi ].position.x;
57  p_j_y = p_j[ gi ].position.y;
58  p_j_z = p_j[ gi ].position.z;
59 
60  x2 = p_i_x - p_j_x;
61  r2 = x2 * x2;
62  y2 = p_i_y - p_j_y;
63  r2 += y2 * y2;
64  z2 = p_i_z - p_j_z;
65  r2 += z2 * z2;
66 
67  if ( r2 <= cutoff2 ) {
68  *nli = gi; ++nli;
69  *r2i = r2; ++r2i;
70  *xli = x2; ++xli;
71  *yli = y2; ++yli;
72  *zli = z2; ++zli;
73  }
74  }
75 
76  return nli - newlist;
77 }
78 #endif// NAMD_KNL
79 
81  BigReal p_i_x, BigReal p_i_y, BigReal p_i_z,
82  const CompAtom *p_j,
83  const plint *list, int list_size, int *newlist,
84  BigReal r2_delta, BigReal *r2list) {
85 
86  BigReal cutoff2_delta = cutoff2 + r2_delta;
87  int *nli = newlist;
88  BigReal *r2i = r2list;
89 
90  if ( list_size <= 0) return 0;
91 
92  int g = 0;
93 
94 #ifdef NAMD_KNL
95 
96  BigReal p_j_x, p_j_y, p_j_z;
97  BigReal x2, y2, z2, r2;
98 #pragma vector aligned
99 #pragma ivdep
100  for ( g = 0 ; g < list_size; ++g ) {
101  int gi=list[g];
102  p_j_x = p_j[ gi ].position.x;
103  p_j_y = p_j[ gi ].position.y;
104  p_j_z = p_j[ gi ].position.z;
105 
106  x2 = p_i_x - p_j_x;
107  r2 = x2 * x2 + r2_delta;
108  y2 = p_i_y - p_j_y;
109  r2 += y2 * y2;
110  z2 = p_i_z - p_j_z;
111  r2 += z2 * z2;
112 
113  if ( r2 <= cutoff2_delta ) {
114  *nli = gi; ++nli;
115  *r2i = r2; ++r2i;
116  }
117  }
118 
119 #else // NAMD_KNL
120 #ifndef SIMPLE_PAIRLIST
121 #ifdef A2_QPX
122  //***************************************************************
123  //* 4-way unrolled and software-pipelined and optiized via QPX
124  //***************************************************************
125 
126  int jout = 0;
127  if ( list_size > 16) {
128  // prefetch
129  int jcur0 = list[g];
130  int jcur1 = list[g + 1];
131  int jcur2 = list[g + 2];
132  int jcur3 = list[g + 3];
133 
134  int j0, j1, j2, j3;
135 
136  vector4double pj_v_0, pj_v_1, pj_v_2, pj_v_3;
137  vector4double v_0, v_1, v_2, v_3;
138  register BigReal r2_0, r2_1, r2_2, r2_3;
139 
140  vector4double p_i_v = {p_i_x, p_i_y, p_i_z, 0.};
141  vector4double r2_delta_v = {r2_delta};
142 
143  pj_v_0 = vec_ld(jcur0 * sizeof(CompAtom), (BigReal *)p_j);
144  pj_v_1 = vec_ld(jcur1 * sizeof(CompAtom), (BigReal *)p_j);
145  pj_v_2 = vec_ld(jcur2 * sizeof(CompAtom), (BigReal *)p_j);
146  pj_v_3 = vec_ld(jcur3 * sizeof(CompAtom), (BigReal *)p_j);
147 
148  for ( g = 4 ; g < list_size - 4; g += 4 ) {
149  // compute 1d distance, 4-way parallel
150  //Save the previous iterations values, gives more flexibility
151  //to the compiler to schedule the loads and the computation
152  j0 = jcur0; j1 = jcur1;
153  j2 = jcur2; j3 = jcur3;
154 
155  jcur0 = list[g ]; jcur1 = list[g + 1];
156  jcur2 = list[g + 2]; jcur3 = list[g + 3];
157 
158  __dcbt((void*)(p_j + jcur0));
159 
160  v_0 = vec_sub (p_i_v, pj_v_0);
161  v_1 = vec_sub (p_i_v, pj_v_1);
162  v_2 = vec_sub (p_i_v, pj_v_2);
163  v_3 = vec_sub (p_i_v, pj_v_3);
164 
165  v_0 = vec_madd (v_0, v_0, r2_delta_v);
166  v_1 = vec_madd (v_1, v_1, r2_delta_v);
167  v_2 = vec_madd (v_2, v_2, r2_delta_v);
168  v_3 = vec_madd (v_3, v_3, r2_delta_v);
169 
170  pj_v_0 = vec_ld(jcur0 * sizeof(CompAtom), (BigReal *)p_j);
171  pj_v_1 = vec_ld(jcur1 * sizeof(CompAtom), (BigReal *)p_j);
172  pj_v_2 = vec_ld(jcur2 * sizeof(CompAtom), (BigReal *)p_j);
173  pj_v_3 = vec_ld(jcur3 * sizeof(CompAtom), (BigReal *)p_j);
174 
175  r2_0 = vec_extract(v_0, 0) + vec_extract(v_0, 1) + vec_extract(v_0, 2);
176  r2_1 = vec_extract(v_1, 0) + vec_extract(v_1, 1) + vec_extract(v_1, 2);
177  r2_2 = vec_extract(v_2, 0) + vec_extract(v_2, 1) + vec_extract(v_2, 2);
178  r2_3 = vec_extract(v_3, 0) + vec_extract(v_3, 1) + vec_extract(v_3, 2);
179 
180  size_t test0, test1, test2, test3;
181  size_t jout0, jout1, jout2, jout3;
182 
183  test0 = ( r2_0 < cutoff2_delta );
184  test1 = ( r2_1 < cutoff2_delta );
185  test2 = ( r2_2 < cutoff2_delta );
186  test3 = ( r2_3 < cutoff2_delta );
187 
188  jout0 = jout;
189  nli[ jout0 ] = j0; r2i[ jout0 ] = r2_0;
190  jout += test0; jout1 = jout;
191 
192  nli[ jout1 ] = j1; r2i[ jout1 ] = r2_1;
193  jout += test1; jout2 = jout;
194 
195  nli[ jout2 ] = j2; r2i[ jout2 ] = r2_2;
196  jout += test2; jout3 = jout;
197 
198  nli[ jout3 ] = j3; r2i[ jout3 ] = r2_3;
199  jout += test3;
200  }
201  g -= 4;
202  }
203 
204  nli += jout;
205  r2i += jout;
206 #else
207  //***************************************************************
208  //* 4-way unrolled and software-pipelined
209  //***************************************************************
210 
211  int jout = 0;
212  if ( list_size > 16) {
213  // prefetch
214  int jcur0 = list[g];
215  int jcur1 = list[g + 1];
216  int jcur2 = list[g + 2];
217  int jcur3 = list[g + 3];
218 
219  int j0, j1, j2, j3;
220 
221  register BigReal pj_x_0, pj_x_1, pj_x_2, pj_x_3;
222  register BigReal pj_y_0, pj_y_1, pj_y_2, pj_y_3;
223  register BigReal pj_z_0, pj_z_1, pj_z_2, pj_z_3;
224 
225  register BigReal t_0, t_1, t_2, t_3, r2_0, r2_1, r2_2, r2_3;
226 
227  pj_x_0 = p_j[jcur0].position.x;
228  pj_x_1 = p_j[jcur1].position.x;
229  pj_x_2 = p_j[jcur2].position.x;
230  pj_x_3 = p_j[jcur3].position.x;
231  pj_y_0 = p_j[jcur0].position.y;
232  pj_y_1 = p_j[jcur1].position.y;
233  pj_y_2 = p_j[jcur2].position.y;
234  pj_y_3 = p_j[jcur3].position.y;
235  pj_z_0 = p_j[jcur0].position.z;
236  pj_z_1 = p_j[jcur1].position.z;
237  pj_z_2 = p_j[jcur2].position.z;
238  pj_z_3 = p_j[jcur3].position.z;
239 
240  for ( g = 4 ; g < list_size - 4; g += 4 ) {
241  // compute 1d distance, 4-way parallel
242 
243  //Save the previous iterations values, gives more flexibility
244  //to the compiler to schedule the loads and the computation
245  j0 = jcur0; j1 = jcur1;
246  j2 = jcur2; j3 = jcur3;
247 
248  jcur0 = list[g ]; jcur1 = list[g + 1];
249  jcur2 = list[g + 2]; jcur3 = list[g + 3];
250 
251 #ifdef ARCH_POWERPC
252  __dcbt ((void *) &p_j[jcur0]);
253 #endif
254 
255  //Compute X distance
256  t_0 = p_i_x - pj_x_0; t_1 = p_i_x - pj_x_1;
257  t_2 = p_i_x - pj_x_2; t_3 = p_i_x - pj_x_3;
258 
259  r2_0 = t_0 * t_0 + r2_delta;
260  r2_1 = t_1 * t_1 + r2_delta;
261  r2_2 = t_2 * t_2 + r2_delta;
262  r2_3 = t_3 * t_3 + r2_delta;
263 
264  //Compute y distance
265  t_0 = p_i_y - pj_y_0; t_1 = p_i_y - pj_y_1;
266  t_2 = p_i_y - pj_y_2; t_3 = p_i_y - pj_y_3;
267  r2_0 += t_0 * t_0; r2_1 += t_1 * t_1;
268  r2_2 += t_2 * t_2; r2_3 += t_3 * t_3;
269 
270  //compute z distance
271  t_0 = p_i_z - pj_z_0; t_1 = p_i_z - pj_z_1;
272  t_2 = p_i_z - pj_z_2; t_3 = p_i_z - pj_z_3;
273  r2_0 += t_0 * t_0; r2_1 += t_1 * t_1;
274  r2_2 += t_2 * t_2; r2_3 += t_3 * t_3;
275 
276  pj_x_0 = p_j[jcur0].position.x;
277  pj_x_1 = p_j[jcur1].position.x;
278  pj_x_2 = p_j[jcur2].position.x;
279  pj_x_3 = p_j[jcur3].position.x;
280  pj_y_0 = p_j[jcur0].position.y;
281  pj_y_1 = p_j[jcur1].position.y;
282  pj_y_2 = p_j[jcur2].position.y;
283  pj_y_3 = p_j[jcur3].position.y;
284  pj_z_0 = p_j[jcur0].position.z;
285  pj_z_1 = p_j[jcur1].position.z;
286  pj_z_2 = p_j[jcur2].position.z;
287  pj_z_3 = p_j[jcur3].position.z;
288 
289  bool test0, test1, test2, test3;
290 
291  test0 = ( r2_0 < cutoff2_delta );
292  test1 = ( r2_1 < cutoff2_delta );
293  test2 = ( r2_2 < cutoff2_delta );
294  test3 = ( r2_3 < cutoff2_delta );
295 
296  int jout0, jout1, jout2, jout3;
297 
298  jout0 = jout;
299  nli[ jout0 ] = j0; r2i[ jout0 ] = r2_0;
300  jout += test0; jout1 = jout;
301  nli[ jout1 ] = j1; r2i[ jout1 ] = r2_1;
302  jout += test1; jout2 = jout;
303  nli[ jout2 ] = j2; r2i[ jout2 ] = r2_2;
304  jout += test2; jout3 = jout;
305  nli[ jout3 ] = j3; r2i[ jout3 ] = r2_3;
306 
307  jout += test3;
308  }
309  g -= 4;
310  }
311 
312  nli += jout;
313  r2i += jout;
314 #endif
315 #endif
316 
317  int j2 = list[g];
318  BigReal p_j_x = p_j[j2].position.x;
319  BigReal p_j_y = p_j[j2].position.y;
320  BigReal p_j_z = p_j[j2].position.z;
321  while ( g < list_size ) {
322  int j = j2;
323  j2 = list[++g];
324  BigReal t2 = p_i_x - p_j_x;
325  BigReal r2 = t2 * t2 + r2_delta;
326  p_j_x = p_j[j2].position.x;
327  t2 = p_i_y - p_j_y;
328  r2 += t2 * t2;
329  p_j_y = p_j[j2].position.y;
330  t2 = p_i_z - p_j_z;
331  r2 += t2 * t2;
332  p_j_z = p_j[j2].position.z;
333  if ( r2 <= cutoff2_delta ) {
334  *nli= j; ++nli;
335  *r2i = r2; ++r2i;
336  }
337  }
338 
339 #endif // NAMD_KNL
340 
341  return nli - newlist;
342 }
343 
344 // clear all
345 // define interaction type (pair or self)
346 #define NBPAIR 1
347 #define NBSELF 2
348 
349 
350 
351 // Various atom sorting functions
352 #if NAMD_ComputeNonbonded_SortAtoms != 0
353 
354 inline void sortEntries_selectionSort(SortEntry * const se, const int seLen) {
355 
356  register int i;
357 
358  for (i = 0; i < seLen; i++) {
359 
360  // Search through the remaining elements, finding the lowest
361  // value, and then swap it with the first remaining element.
362  // Start by assuming the first element is the smallest.
363  register int smallestIndex = i;
364  register BigReal smallestValue = se[i].sortValue;
365  register int j;
366  for (j = i + 1; j < seLen; j++) {
367  register BigReal currentValue = se[j].sortValue;
368  if (currentValue < smallestValue) {
369  smallestIndex = j;
370  smallestValue = currentValue;
371  }
372  }
373 
374  // Swap the first remaining element with the smallest element
375  if (smallestIndex != i) {
376  register SortEntry* entryA = se + i;
377  register SortEntry* entryB = se + smallestIndex;
378  register unsigned int tmpIndex = entryA->index;
379  register BigReal tmpSortValue = entryA->sortValue;
380  entryA->index = entryB->index;
381  entryA->sortValue = entryB->sortValue;
382  entryB->index = tmpIndex;
383  entryB->sortValue = tmpSortValue;
384  }
385  }
386 }
387 
388 inline void sortEntries_bubbleSort(SortEntry * const se, const int seLen) {
389 
390  register int keepSorting = 0;
391 
392  do {
393 
394  // Reset the keepSorting flag (assume no swaps will occur)
395  keepSorting = 0;
396 
397  // Loop through the pairs and swap if needed
398  register SortEntry* sortEntry1 = se;
399  for (int i = 1; i < seLen; i++) {
400 
401  register SortEntry* sortEntry0 = sortEntry1;
402  sortEntry1 = se + i;
403  register BigReal sortEntry0_sortValue = sortEntry0->sortValue;
404  register BigReal sortEntry1_sortValue = sortEntry1->sortValue;
405 
406  if (sortEntry0_sortValue > sortEntry1_sortValue) {
407  register int sortEntry0_index = sortEntry0->index;
408  register int sortEntry1_index = sortEntry1->index;
409  sortEntry0->index = sortEntry1_index;
410  sortEntry0->sortValue = sortEntry1_sortValue;
411  sortEntry1->index = sortEntry0_index;
412  sortEntry1->sortValue = sortEntry0_sortValue;
413  keepSorting = 1;
414  }
415  }
416 
417  } while (keepSorting != 0); // Loop again if at least one set of
418  // elements was swapped.
419 }
420 
421 // NOTE: The buf parameter should point to a buffer that this function
422 // can use as a temp storage location (scratch pad).
423 // NOTE: This function may swap the values of se and buf, but will not
424 // return any other value (nor will it set either to NULL).
425 inline void sortEntries_mergeSort_v1(SortEntry * &se, SortEntry * &buf, int seLen) {
426 
427  register SortEntry* srcArray = se;
428  register SortEntry* dstArray = buf;
429 
430  // Start with each element being a separate list. Start
431  // merging the "lists" into larger lists.
432  register int subListSize = 1;
433  while (subListSize < seLen) {
434 
435  // NOTE: This iteration consumes sublists of length
436  // subListSize and produces sublists of length
437  // (2*subListSize). So, keep looping while the length of a
438  // single sorted sublist is not the size of the entire array.
439 
440  // Iterate through the lists, merging each consecutive pair of lists.
441  register int firstListOffset = 0;
442  while (firstListOffset < seLen) {
443 
445 
446  register int numElements = std::min(2 * subListSize, seLen - firstListOffset);
447  register int list0len;
448  register int list1len;
449  if (numElements > subListSize) {
450  list0len = subListSize; // First list full
451  list1len = numElements - subListSize; // 1+ elements in second list
452  } else {
453  list0len = numElements; // 1+ elements in first list
454  list1len = 0; // Zero elements in second list
455  }
456 
457  register SortEntry* list0ptr = srcArray + firstListOffset;
458  register SortEntry* list1ptr = list0ptr + subListSize;
459  register SortEntry* dstptr = dstArray + firstListOffset;
460 
462 
463  // While there are elements in both lists, pick from one
464  while (list0len > 0 && list1len > 0) {
465 
466  register BigReal sortValue0 = list0ptr->sortValue;
467  register BigReal sortValue1 = list1ptr->sortValue;
468 
469  if (sortValue0 < sortValue1) { // choose first list (list0)
470 
471  // Copy the value from srcArray to dstArray
472  register int index0 = list0ptr->index;
473  dstptr->sortValue = sortValue0;
474  dstptr->index = index0;
475 
476  // Move the pointers forward for the sublists
477  dstptr++;
478  list0ptr++;
479  list0len--;
480 
481  } else { // choose second list (list1)
482 
483  // Copy the value from srcArray to dstArray
484  register int index1 = list1ptr->index;
485  dstptr->sortValue = sortValue1;
486  dstptr->index = index1;
487 
488  // Move the pointers forward for the sublists
489  dstptr++;
490  list1ptr++;
491  list1len--;
492  }
493 
494  } // end while (list0len > 0 && list1len > 0)
495 
496  // NOTE: Either list0len or list1len is zero at this point
497  // so only one of the following loops should execute.
498 
499  // Drain remaining elements from the first list (list0)
500  while (list0len > 0) {
501 
502  // Copy the value from srcArray to dstArray
503  register BigReal sortValue0 = list0ptr->sortValue;
504  register int index0 = list0ptr->index;
505  dstptr->sortValue = sortValue0;
506  dstptr->index = index0;
507 
508  // Move the pointers forward for the sublists
509  dstptr++;
510  list0ptr++;
511  list0len--;
512 
513  } // end while (list0len > 0)
514 
515  // Drain remaining elements from the first list (list1)
516  while (list1len > 0) {
517 
518  // Copy the value from srcArray to dstArray
519  register BigReal sortValue1 = list1ptr->sortValue;
520  register int index1 = list1ptr->index;
521  dstptr->sortValue = sortValue1;
522  dstptr->index = index1;
523 
524  // Move the pointers forward for the sublists
525  dstptr++;
526  list1ptr++;
527  list1len--;
528 
529  } // end while (list1len > 0)
530 
531  // Move forward to the next pair of sub-lists
532  firstListOffset += (2 * subListSize);
533 
534  } // end while (firstListOffset < seLen) {
535 
536  // Swap the dstArray and srcArray pointers
537  register SortEntry* tmpPtr = dstArray;
538  dstArray = srcArray;
539  srcArray = tmpPtr;
540 
541  // Double the subListSize
542  subListSize <<= 1;
543 
544  } // end while (subListSize < seLen)
545 
546  // Set the sort values pointers (NOTE: srcArray and dstArray are
547  // swapped at the end of each iteration of the merge sort outer-loop).
548  buf = dstArray;
549  se = srcArray;
550 }
551 
552 // NOTE: The buf parameter should point to a buffer that this function
553 // can use as a temp storage location (scratch pad).
554 // NOTE: This function may swap the values of se and buf, but will not
555 // return any other value (nor will it set either to NULL).
556 inline void sortEntries_mergeSort_v2(SortEntry * &se, SortEntry * &buf, int seLen) {
557 
558  // NOTE: This macro "returns" either val0 (if test == 0) or val1 (if
559  // test == 1). It expects test to be either 0 or 1 (no other values).
560  #define __TERNARY_ASSIGN(test, val0, val1) ((test * val0) + ((1 - test) * val1))
561 
562  register SortEntry* srcArray = se;
563  register SortEntry* dstArray = buf;
564 
565  // Start with each element being a separate list. Start
566  // merging the "lists" into larger lists.
567  register int subListSize = 1;
568  while (subListSize < seLen) {
569 
570  // NOTE: This iteration consumes sublists of length
571  // subListSize and produces sublists of length
572  // (2*subListSize). So, keep looping while the length of a
573  // single sorted sublist is not the size of the entire array.
574 
575  // Iterate through the lists, merging each consecutive pair of lists.
576  register int firstListOffset = 0;
577  while (firstListOffset < seLen) {
578 
580 
581  // Calculate the number of elements for both sublists...
582  // min(2 * subListSize, seLen - firstListOffset);
583  register int numElements;
584  {
585  register int numElements_val0 = 2 * subListSize;
586  register int numElements_val1 = seLen - firstListOffset;
587  register bool numElements_test = (numElements_val0 < numElements_val1);
588  numElements = __TERNARY_ASSIGN(numElements_test, numElements_val0, numElements_val1);
589  }
590 
591  // Setup the pointers for the source and destination arrays
592  register SortEntry* dstptr = dstArray + firstListOffset; // destination array pointer
593  register SortEntry* list0ptr = srcArray + firstListOffset; // source list 0 pointer
594  register SortEntry* list1ptr = list0ptr + subListSize; // source list 1 pointer
595  register SortEntry* list0ptr_end; // pointer to end of source list0's elements (element after last)
596  register SortEntry* list1ptr_end; // pointer to end of source list1's elements (element after last)
597  {
598  register bool lenTest = (numElements > subListSize);
599  register int list0len_val0 = subListSize;
600  register int list1len_val0 = numElements - subListSize;
601  register int list0len_val1 = numElements; // NOTE: list1len_val1 = 0
602  register int list0len = __TERNARY_ASSIGN(lenTest, list0len_val0, list0len_val1);
603  register int list1len = __TERNARY_ASSIGN(lenTest, list1len_val0, 0);
604  // avoid pre-load of sortValue1 from past end of array
605  if ( ! lenTest ) list1ptr = list0ptr;
606  list0ptr_end = list0ptr + list0len;
607  list1ptr_end = list1ptr + list1len;
608  }
609 
610  // The firstListOffset variable won't be used again until the next
611  // iteration, so go ahead and update it now...
612  // Move forward to the next pair of sub-lists
613  firstListOffset += (2 * subListSize);
614 
616 
617  // Pre-load values from both source arrays
618  register BigReal sortValue0 = list0ptr->sortValue;
619  register BigReal sortValue1 = list1ptr->sortValue;
620  register int index0 = list0ptr->index;
621  register int index1 = list1ptr->index;
622 
623  // While both lists have at least one element in them, compare the
624  // heads of each list and place the smaller of the two in the
625  // destination array.
626  while (list0ptr < list0ptr_end && list1ptr < list1ptr_end) {
627 
628  // Compare the values
629  register bool test = (sortValue0 < sortValue1);
630 
631  // Place the "winner" in the destination array
632  dstptr->sortValue = __TERNARY_ASSIGN(test, sortValue0, sortValue1);
633  dstptr->index = __TERNARY_ASSIGN(test, index0, index1);
634  dstptr++;
635 
636  // Update the pointers
637  list0ptr += __TERNARY_ASSIGN(test, 1, 0);
638  list1ptr += __TERNARY_ASSIGN(test, 0, 1);
639 
640  // Refill the sortValue and index register
641  // NOTE: These memory locations are likely to be in cache
642  sortValue0 = list0ptr->sortValue;
643  sortValue1 = list1ptr->sortValue;
644  index0 = list0ptr->index;
645  index1 = list1ptr->index;
646 
647  } // end while (list0ptr < list0ptr_end && list1ptr < list1ptr_end)
648 
649  // NOTE: At this point, at least one of the lists is empty so no
650  // more than one of the loops will be executed.
651 
652  // Drain the remaining elements from list0
653  while (list0ptr < list0ptr_end) {
654 
655  // Place the value into the destination array
656  dstptr->sortValue = sortValue0;
657  dstptr->index = index0;
658  dstptr++;
659 
660  // Load the next entry in list0
661  list0ptr++;
662  sortValue0 = list0ptr->sortValue;
663  index0 = list0ptr->index;
664 
665  } // end while (list0ptr < list0ptr_end)
666 
667  // Drain the remaining elements from list1
668  while (list1ptr < list1ptr_end) {
669 
670  // Place the value into the destination array
671  dstptr->sortValue = sortValue1;
672  dstptr->index = index1;
673  dstptr++;
674 
675  // Load the next entry in list1
676  list1ptr++;
677  sortValue1 = list1ptr->sortValue;
678  index1 = list1ptr->index;
679 
680  } // end while (list1ptr < list1ptr_end)
681 
682  } // end while (firstListOffset < seLen) {
683 
684  // Swap the dstArray and srcArray pointers
685  register SortEntry* tmpPtr = dstArray;
686  dstArray = srcArray;
687  srcArray = tmpPtr;
688 
689  // Double the subListSize
690  subListSize <<= 1;
691 
692  } // end while (subListSize < seLen)
693 
694  // Set the sort values pointers (NOTE: srcArray and dstArray are
695  // swapped at the end of each iteration of the merge sort outer-loop).
696  buf = dstArray;
697  se = srcArray;
698 
699  #undef __TERNARY_ASSIGN
700 }
701 
702 #endif // NAMD_ComputeNonbonded_SortAtoms != 0
703 
704 
705 #endif // COMPUTENONBONDEDINL_H
706 
void sortEntries_mergeSort_v1(SortEntry *&se, SortEntry *&buf, int seLen)
#define __TERNARY_ASSIGN(test, val0, val1)
void sortEntries_selectionSort(SortEntry *const se, const int seLen)
int index
Definition: NamdTypes.h:211
Definition: NamdTypes.h:210
int pairlist_from_pairlist(BigReal cutoff2, BigReal p_i_x, BigReal p_i_y, BigReal p_i_z, const CompAtom *p_j, const plint *list, int list_size, int *newlist, BigReal r2_delta, BigReal *r2list)
BigReal z
Definition: Vector.h:66
Position position
Definition: NamdTypes.h:53
#define p_j
void sortEntries_bubbleSort(SortEntry *const se, const int seLen)
unsigned short plint
BigReal sortValue
Definition: NamdTypes.h:212
BigReal x
Definition: Vector.h:66
BigReal y
Definition: Vector.h:66
__global__ void const int const TileList *__restrict__ TileExcl *__restrict__ const int *__restrict__ const int const float2 *__restrict__ const int *__restrict__ const float3 const float3 const float3 const float4 *__restrict__ const float cutoff2
void sortEntries_mergeSort_v2(SortEntry *&se, SortEntry *&buf, int seLen)
double BigReal
Definition: common.h:112