19 int nPatches,
int nPes) :
21 processorArray, nComps,
29 void AlgRecBisection::rec_divide(
int n, Partition &p)
34 double load1, currentload;
39 CmiPrintf(
"rec_divide: partition n:%d count:%d load:%f (%d %d %d, %d %d %d)\n", n, p.count, p.load, p.origin[0], p.origin[1], p.origin[2], p.corner[0], p.corner[1], p.corner[2]);
43 partitions[currentp++] = p;
54 load1 = (1.0*n1/n) * p.load;
64 for (i=XDIR; i<=ZDIR; i++) {
65 int myspan = p.corner[i] - p.origin[i];
66 if (myspan > maxSpan) {
73 int dir2 = (maxdir+1)%3;
74 int dir3 = (maxdir+2)%3;
78 midpos = p.origin[maxdir];
81 if (computeLoad[vArray[maxdir][i].
id].refno != p.refno)
continue;
82 if (vArray[maxdir][i].v<p.origin[maxdir])
continue;
83 if (vArray[maxdir][i].v>p.corner[maxdir])
break;
85 int cid = vArray[maxdir][i].id;
87 if ( computeLoad[cid].v[dir2] >= p.origin[dir2] &&
88 computeLoad[cid].v[dir2] <= p.corner[dir2] &&
89 computeLoad[cid].v[dir3] >= p.origin[dir3] &&
90 computeLoad[cid].v[dir3] <= p.corner[dir3] ) {
92 if (currentload <= load1) {
93 computeLoad[cid].refno = p1.refno;
94 currentload += computeLoad[cid].load;
96 midpos = computeLoad[cid].v[maxdir];
109 computeLoad[cid].refno = p2.refno;
115 CmiPrintf(
"DIR:%d %d load:%f\n", maxdir, midpos, currentload);
119 p1.corner[maxdir] = midpos;
120 p2.origin[maxdir] = midpos;
122 p1.load = currentload;
124 p2.load = p.load - p1.load;
125 p2.count = p.count - p1.count;
127 CmiPrintf(
"p1: n:%d count:%d load:%f\n", n1, p1.count, p1.load);
128 CmiPrintf(
"p2: n:%d count:%d load:%f\n", n2, p2.count, p2.load);
130 CmiAssert(! (p1 == p2));
135 void AlgRecBisection::setVal(
int x,
int y,
int z)
139 computeLoad[i].tv = 1000000*computeLoad[i].v[
x]+
140 1000*computeLoad[i].v[
y]+
144 CmiPrintf(
"original:%d\n", x);
146 CmiPrintf(
"%d ", computeLoad[i].tv);
151 int AlgRecBisection::sort_partition(
int x,
int p,
int r)
153 int mid = computeLoad[vArray[
x][p].id].tv;
157 while (computeLoad[vArray[x][j].
id].tv > mid && j>i) j--;
158 while (computeLoad[vArray[x][i].
id].tv < mid && i<j) i++;
160 if (computeLoad[vArray[x][i].
id].tv == computeLoad[vArray[x][j].
id].tv)
162 if (computeLoad[vArray[x][i].
id].tv != mid)
NAMD_die(
"my god!\n");
167 VecArray tmp = vArray[
x][i];
168 vArray[
x][i] = vArray[
x][j];
176 void AlgRecBisection::qsort(
int x,
int p,
int r)
179 int q = sort_partition(x, p, r);
186 void AlgRecBisection::quicksort(
int x)
191 qsort(x, 0, numComputes-1);
194 CmiPrintf(
"result:%d\n", x);
196 CmiPrintf(
"%d ", computeLoad[vArray[x][i].
id].tv);
201 void AlgRecBisection::mapPartitionsToNodes()
205 for (i=0; i<
P; i++) partitions[i].node = i;
209 int **pool =
new int *[
P];
210 for (i=0; i<
P; i++) pool[i] =
new int[P];
211 for (i=0; i<
P; i++)
for (j=0; j<
P; j++) pool[i][j] = 0;
217 if (computeLoad[i].refno == partitions[j].refno)
228 for (i=0; i<
P; i++) {
229 for (j=0; j<
P; j++) CmiPrintf(
"%d ", pool[i][j]);
235 int index=-1, node=0, eager=-1;
236 for (j=0; j<
P; j++) {
237 if (partitions[j].node != -1)
continue;
238 int wantmost=-1, maxnodes=-1;
239 for (k=0; k<P; k++) if (pool[j][k] > maxnodes && !partitions[k].mapped) {wantmost=k; maxnodes = pool[j][k];}
240 if (maxnodes > eager) {
241 index = j; node = wantmost; eager = maxnodes;
244 if (eager == -1)
break;
245 partitions[index].node = node;
246 partitions[node].mapped = 1;
249 for (i=0; i<
P; i++)
delete [] pool[i];
253 CmiPrintf(
"partitions to nodes mapping: ");
254 for (i=0; i<
P; i++) CmiPrintf(
"%d ", partitions[i].node);
258 void AlgRecBisection::strategy()
266 for (i=XDIR; i<=ZDIR; i++) vArray[i] =
new VecArray[numComputes];
268 CmiPrintf(
"AlgRecBisection: numComputes:%d\n", numComputes);
279 computeLoad[i].id = i;
280 int a1 = patchMap->
index_a(pid1);
281 int b1 = patchMap->
index_b(pid1);
282 int c1 = patchMap->
index_c(pid1);
283 int a2 = patchMap->
index_a(pid2);
284 int b2 = patchMap->
index_b(pid2);
285 int c2 = patchMap->
index_c(pid1);
286 computeLoad[i].v[0] = a1 + a2;
287 computeLoad[i].v[1] = b1 + b2;
288 computeLoad[i].v[2] = c1 + c2;
291 computeLoad[i].refno = 0;
293 for (j=XDIR; j<=ZDIR; j++) {
295 vArray[j][i].v = computeLoad[i].v[j];
300 double t = CmiWallTimer();
305 CmiPrintf(
"qsort time: %f\n", CmiWallTimer() - t);
308 partitions =
new Partition[npartition];
310 top_partition.origin[XDIR] = 0;
311 top_partition.origin[YDIR] = 0;
312 top_partition.origin[ZDIR] = 0;
313 top_partition.corner[XDIR] = 2*(size_x-1);
314 top_partition.corner[YDIR] = 2*(size_y-1);
315 top_partition.corner[ZDIR] = 2*(size_z-1);
317 top_partition.refno = 0;
318 top_partition.load = 0.0;
326 rec_divide(npartition, top_partition);
328 CmiPrintf(
"After partitioning: \n");
329 for (i=0; i<
P; i++) {
330 CmiPrintf(
"[%d] (%d,%d,%d) (%d,%d,%d) load:%f count:%d\n", i, partitions[i].origin[0], partitions[i].origin[1], partitions[i].origin[2], partitions[i].corner[0], partitions[i].corner[1], partitions[i].corner[2], partitions[i].
load, partitions[i].count);
336 mapPartitionsToNodes();
339 int *num =
new int[
P];
340 for (i=0; i<
P; i++) num[i] = 0;
345 if (computeLoad[i].refno == partitions[j].refno)
350 if (num[i] != partitions[i].count)
351 NAMD_die(
"AlgRecBisection: Compute counts don't agree!\n");
360 delete [] computeLoad;
361 for (i=0; i<3; i++)
delete [] vArray[i];
362 delete [] partitions;
379 CmiPrintf(
"AlgRecBisection finished time: %f\n", CmiWallTimer() - t);
BlockLoad::TempStorage load
int gridsize_c(void) const
static PatchMap * Object()
int index_a(int pid) const
void assign(computeInfo *c, processorInfo *pRec)
processorInfo * processors
void printLoads(int phase=0)
int gridsize_a(void) const
void multirefine(double overload_start=1.02)
void NAMD_bug(const char *err_msg)
int index_b(int pid) const
void NAMD_die(const char *err_msg)
int index_c(int pid) const
AlgRecBisection(computeInfo *computeArray, patchInfo *patchArray, processorInfo *processorArray, int nComps, int nPatches, int nPes)
int gridsize_b(void) const
const char * strategyName