16 #include "ComputeMsmMsaMgr.decl.h"
21 #include "ComputeMgr.decl.h"
23 #define MIN_DEBUG_LEVEL 3
35 void MsmMsaData::pup(PUP::er &p)
37 p|ispx, p|ispy, p|ispz;
38 p|hx_1, p|hy_1, p|hz_1;
40 p|origin_x, p|origin_y, p|origin_z;
41 p|nlevels, p|maxlevels, p|toplevel;
53 p|num_clients_qh, p|num_clients_eh;
54 p|num_anterpolation_chares;
55 p|num_interpolation_chares;
56 p|num_restriction_chares;
57 p|num_prolongation_chares;
58 p|num_gridcutoff_chares;
59 p|dim_gridcutoff_chares;
60 p|dim_gridtransfer_chares;
61 p|num_total_restriction_chares;
62 p|num_total_prolongation_chares;
63 p|num_total_gridcutoff_chares;
65 p|num_points_per_chare;
69 void MsmMsaData::print()
72 printf(
"MSM data:\n");
73 printf(
"ispx=%d ispy=%d ispz=%d\n", ispx, ispy, ispz);
74 printf(
"hx=%g hy=%g hz=%g\n", 1/hx_1, 1/hy_1, 1/hz_1);
76 printf(
"origin = %g %g %g\n", origin_x, origin_y, origin_z);
77 printf(
"nlevels=%d maxlevels=%d toplevel=%d\n", nlevels, maxlevels, toplevel);
78 printf(
"approx=%d split=%d\n", approx,
split);
82 struct ComputeMsmMsaAtom {
88 class MsmMsaCoordMsg :
public CMessage_MsmMsaCoordMsg {
92 ComputeMsmMsaAtom *coord;
95 class ComputeMsmMsaMgr :
public CBase_ComputeMsmMsaMgr {
100 void initialize(CkQdMsg *);
102 void setCompute(ComputeMsmMsa *c) { msmCompute = c; }
104 void recvMsmMsaData(
const MsmMsaData &);
106 void initWorkers(CkQdMsg *);
107 void startWorkers(CkQdMsg *);
109 void anterpolate(MsmMsaCoordMsg *);
110 void interpolate(CkQdMsg *);
112 const MsmMsaData &getMsmMsaData()
const {
return msmData; }
115 CProxy_ComputeMsmMsaMgr msmProxy;
116 ComputeMsmMsa *msmCompute;
118 MsmMsaCoordMsg *coordMsg;
125 CProxy_MsmMsaLevel msmLevelProxy;
126 CProxy_MsmMsaEnergy msmEnergyProxy;
128 MsmMsaGrid::Accum qhacc;
129 MsmMsaGrid::Read qhread;
130 MsmMsaGrid::Accum ehacc;
131 MsmMsaGrid::Read ehread;
132 int msa_setup_anterpolation;
133 int msa_setup_interpolation;
136 class MsmMsaLevel :
public CBase_MsmMsaLevel {
138 MsmMsaLevel(MsmMsaGrid &qh, MsmMsaGrid &eh, MsmMsaGrid &q2h, MsmMsaGrid &e2h);
139 MsmMsaLevel(MsmMsaGrid &qh, MsmMsaGrid &eh);
140 MsmMsaLevel(CkMigrateMessage *m) { }
149 class MsmMsaGridCutoff :
public CBase_MsmMsaGridCutoff {
151 MsmMsaGridCutoff(
int level, MsmMsaGrid &qh, MsmMsaGrid &eh);
152 MsmMsaGridCutoff(CkMigrateMessage *m) { }
156 MsmMsaGrid::Accum qhacc, ehacc;
157 MsmMsaGrid::Read qhread, ehread;
164 class MsmMsaRestriction :
public CBase_MsmMsaRestriction {
166 MsmMsaRestriction(
int level, MsmMsaGrid &qh, MsmMsaGrid &q2h);
167 MsmMsaRestriction(CkMigrateMessage *m) { }
171 MsmMsaGrid::Accum qhacc, q2hacc;
172 MsmMsaGrid::Read qhread, q2hread;
179 class MsmMsaProlongation :
public CBase_MsmMsaProlongation {
181 MsmMsaProlongation(
int level, MsmMsaGrid &eh, MsmMsaGrid &e2h);
182 MsmMsaProlongation(CkMigrateMessage *m) { }
186 MsmMsaGrid::Accum ehacc, e2hacc;
187 MsmMsaGrid::Read ehread, e2hread;
194 class MsmMsaEnergy :
public CBase_MsmMsaEnergy {
196 MsmMsaEnergy(MsmMsaGrid &qh, MsmMsaGrid &eh);
197 MsmMsaEnergy(CkMigrateMessage *m) { }
202 MsmMsaGrid::Accum qhacc, ehacc;
203 MsmMsaGrid::Read qhread, ehread;
213 ComputeMsmMsa::ComputeMsmMsa(
ComputeID c) :
216 CProxy_ComputeMsmMsaMgr::ckLocalBranch(
217 CkpvAccess(BOCclass_group).computeMsmMsaMgr)->setCompute(
this);
223 ComputeMsmMsa::~ComputeMsmMsa()
227 void ComputeMsmMsa::doWork()
232 if ( ! patchList[0].p->flags.doFullElectrostatics ) {
233 for (ap = ap.begin(); ap != ap.end(); ap++) {
235 Results *r = (*ap).forceBox->open();
236 (*ap).positionBox->close(&x);
237 (*ap).forceBox->close(&r);
244 int numLocalAtoms = 0;
245 for (ap = ap.begin(); ap != ap.end(); ap++) {
246 numLocalAtoms += (*ap).p->getNumAtoms();
249 MsmMsaCoordMsg *msg =
new (numLocalAtoms, 0) MsmMsaCoordMsg;
250 msg->numAtoms = numLocalAtoms;
251 msg->lattice = patchList[0].p->flags.lattice;
252 ComputeMsmMsaAtom *data_ptr = msg->coord;
255 for (ap = ap.begin(); ap != ap.end(); ap++) {
256 CompAtom *x = (*ap).positionBox->open();
258 if ( patchList[0].p->flags.doMolly ) {
259 (*ap).positionBox->close(&x);
260 x = (*ap).avgPositionBox->open();
262 int numAtoms = (*ap).p->getNumAtoms();
264 for (
int i=0; i < numAtoms; i++) {
266 data_ptr->msmcharge = qscaling * x[i].
charge;
267 data_ptr->id = xExt[i].
id;
271 if ( patchList[0].p->flags.doMolly ) { (*ap).avgPositionBox->close(&x); }
272 else { (*ap).positionBox->close(&x); }
275 CProxy_ComputeMsmMsaMgr msmProxy(
276 CkpvAccess(BOCclass_group).computeMsmMsaMgr);
277 msmProxy[CkMyPe()].anterpolate(msg);
278 msmProxy[CkMyPe()].interpolate(
new CkQdMsg);
282 void ComputeMsmMsa::saveResults(
int n,
const MsmMsaForce force[],
double self_energy)
288 for (ap = ap.begin(); ap != ap.end(); ap++) {
289 Results *r = (*ap).forceBox->open();
291 int numAtoms = (*ap).p->getNumAtoms();
292 for (
int i=0; i < numAtoms; i++, j++) {
293 f[i].
x += force[j].x;
294 f[i].
y += force[j].y;
295 f[i].
z += force[j].z;
297 (*ap).forceBox->close(&r);
304 ComputeMsmMsaMgr::ComputeMsmMsaMgr() :
305 msmProxy(thisgroup), msmCompute(0),
306 coordMsg(0), numAtoms(0), force(0), numForces(0),
307 msa_setup_anterpolation(0), msa_setup_interpolation(0)
309 CkpvAccess(BOCclass_group).computeMsmMsaMgr = thisgroup;
312 ComputeMsmMsaMgr::~ComputeMsmMsaMgr()
322 double padding,
double gridspacing,
int isperiodic)
324 const double ZERO_TOLERANCE = 1e-4;
325 double xlen = 1, inv_xlen = 1;
326 double ylen = 1, inv_ylen = 1;
327 double zlen = 1, inv_zlen = 1;
331 double rpadx = padding * ulen_1;
332 double rpady = padding * vlen_1;
333 double rpadz = padding * wlen_1;
334 double rhx = gridspacing * ulen_1;
335 double rhy = gridspacing * vlen_1;
336 double rhz = gridspacing * wlen_1;
337 double xmin = smin.
x, xmax = smax.
x;
338 double ymin = smin.
y, ymax = smax.
y;
339 double zmin = smin.
z, zmax = smax.
z;
349 d = coord[0].position - c;
357 d = coord[i].position - c;
361 if (s.x < xmin) xmin = s.
x;
362 else if (s.x > xmax) xmax = s.x;
363 if (s.y < ymin) ymin = s.y;
364 else if (s.y > ymax) ymax = s.y;
365 if (s.z < zmin) zmin = s.z;
366 else if (s.z > zmax) zmax = s.z;
369 printf(
"*** xmin=%.4f xmax=%.4f\n", xmin, xmax);
370 printf(
"*** ymin=%.4f ymax=%.4f\n", ymin, ymax);
371 printf(
"*** zmin=%.4f zmax=%.4f\n", zmin, zmax);
375 if ( ! is_periodic_x) {
379 double mupper = ceil(xmax / (2*rhx));
380 double mlower = floor(xmin / (2*rhx));
384 rc.
x = 0.5*(xmin + xmax);
386 if (xlen < ZERO_TOLERANCE) {
394 if ( ! is_periodic_y) {
398 double mupper = ceil(ymax / (2*rhy));
399 double mlower = floor(ymin / (2*rhy));
403 rc.
y = 0.5*(ymin + ymax);
405 if (ylen < ZERO_TOLERANCE) {
413 if ( ! is_periodic_z) {
417 double mupper = ceil(zmax / (2*rhz));
418 double mlower = floor(zmin / (2*rhz));
422 rc.
z = 0.5*(zmin + zmax);
424 if (zlen < ZERO_TOLERANCE) {
433 printf(
"xmin=%g xmax=%g\n", xmin, xmax);
434 printf(
"ymin=%g ymax=%g\n", ymin, ymax);
435 printf(
"zmin=%g zmax=%g\n", zmin, zmax);
436 printf(
"xlen=%g ylen=%g zlen=%g\n", xlen, ylen, zlen);
437 printf(
"rc_x=%g rc_y=%g rc_z=%g\n", rc.
x, rc.
y, rc.
z);
441 c.
x = (u.
x*rc.
x + v.
x*rc.
y + w.
x*rc.
z) + c.
x;
442 c.
y = (u.
y*rc.
x + v.
y*rc.
y + w.
y*rc.
z) + c.
y;
443 c.
z = (u.
z*rc.
x + v.
z*rc.
y + w.
z*rc.
z) + c.
z;
446 printf(
"c_x=%g c_y=%g c_z=%g\n", c.
x, c.
y, c.
z);
459 struct MsmMsaInterpParams {
489 const int nu = InterpParams[approx].nu;
493 const double hmin = (4./5) * gridspacing;
494 const double hmax = 1.5 * hmin;
517 double h = gridspacing;
518 int n = (int) floorf(len / h) + 1;
528 void ComputeMsmMsaMgr::initialize(CkQdMsg *msg)
532 if (CkMyPe() != 0)
return;
536 const double a = simParams->
cutoff;
548 Vector u(lattice.
a()), v(lattice.
b()), w(lattice.
c());
551 if ( (msmflags & allperiodic) != allperiodic ) {
557 padding, gridspacing, msmflags);
560 if (u.
x <= 0 || u.
y != 0 || u.
z != 0 ||
561 v.
x != 0 || v.
y <= 0 || v.
z != 0 ||
562 w.
x != 0 || w.
y != 0 || w.
z <= 0) {
563 NAMD_die(
"MSM requires cell basis be along x, y, and z coordinate axes.");
567 MsmMsaData &p = msmData;
569 const int nu = InterpParams[approx].nu;
570 const int omega = InterpParams[approx].omega;
576 const double xlen = u.
x;
577 const double ylen = v.
y;
578 const double zlen = w.
z;
583 int ia, ib, ja, jb, ka, kb, ni, nj, nk;
591 int level, toplevel, maxlevels;
603 rc =
setup_hgrid_1d(xlen, gridspacing, hx, nx, ia, ib, ispx, approx);
604 if (rc)
NAMD_die(
"MSM failed to setup grid along x dimension.");
606 rc =
setup_hgrid_1d(ylen, gridspacing, hy, ny, ja, jb, ispy, approx);
607 if (rc)
NAMD_die(
"MSM failed to setup grid along y dimension.");
609 rc =
setup_hgrid_1d(zlen, gridspacing, hz, nz, ka, kb, ispz, approx);
610 if (rc)
NAMD_die(
"MSM failed to setup grid along z dimension.");
614 p.hx_1 = (float) (1/hx);
615 p.hy_1 = (float) (1/hy);
616 p.hz_1 = (float) (1/hz);
619 gx = c.
x - ((nx >> 1) * hx);
620 gy = c.
y - ((ny >> 1) * hy);
621 gz = c.
z - ((nz >> 1) * hz);
623 p.origin_x = (float) gx;
624 p.origin_y = (float) gy;
625 p.origin_z = (float) gz;
639 for (maxlevels = 1; n > 0; n >>= 1) maxlevels++;
642 int omega3 = omega * omega * omega;
643 int nhalf = (int) sqrtf(ni*nj*nk);
644 lastnelems = (nhalf > omega3 ? nhalf : omega3);
653 p.maxlevels = maxlevels;
654 p.grid_len.resize(maxlevels);
655 p.grid_idstart.resize(maxlevels);
656 p.scaling.resize(maxlevels);
660 if (pm->maxlevels < maxlevels) {
661 void *vqh, *veh, *vgc;
663 vqh = realloc(pm->qh, maxlevels *
sizeof(NL_MsmMsagrid_float));
665 veh = realloc(pm->eh, maxlevels *
sizeof(NL_MsmMsagrid_float));
667 vgc = realloc(pm->gc, maxlevels *
sizeof(NL_MsmMsagrid_float));
669 pm->qh_f = (NL_MsmMsagrid_float *) vqh;
670 pm->eh_f = (NL_MsmMsagrid_float *) veh;
671 pm->gc_f = (NL_MsmMsagrid_float *) vgc;
673 for (level = pm->maxlevels; level < maxlevels; level++) {
680 vqh = realloc(pm->qh, maxlevels *
sizeof(NL_MsmMsagrid_double));
682 veh = realloc(pm->eh, maxlevels *
sizeof(NL_MsmMsagrid_double));
684 vgc = realloc(pm->gc, maxlevels *
sizeof(NL_MsmMsagrid_double));
686 pm->qh = (NL_MsmMsagrid_double *) vqh;
687 pm->eh = (NL_MsmMsagrid_double *) veh;
688 pm->gc = (NL_MsmMsagrid_double *) vgc;
690 for (level = pm->maxlevels; level < maxlevels; level++) {
696 pm->maxlevels = maxlevels;
707 GRID_RESIZE( &(pm->qh_f[level]),
float, ia, ni, ja, nj, ka, nk);
708 GRID_RESIZE( &(pm->eh_f[level]),
float, ia, ni, ja, nj, ka, nk);
711 GRID_RESIZE( &(pm->qh[level]),
double, ia, ni, ja, nj, ka, nk);
712 GRID_RESIZE( &(pm->eh[level]),
double, ia, ni, ja, nj, ka, nk);
715 p.grid_len[level] =
Int3(ni,nj,nk);
716 p.grid_idstart[level] =
Int3(ia,ja,ka);
717 p.scaling[level] = (float) scaling;
720 if (++level == nlevels) done |= 0x07;
722 alldone = (done == 0x07);
725 int nelems = ni * nj * nk;
726 if (nelems <= lastnelems) done |= 0x07;
732 if (ni & 1) done |= 0x07;
733 else if (ni == 2) done |= 0x01;
736 ia = -((-ia+1)/2) - nu;
739 if (ni <= omega) done |= 0x01;
745 if (nj & 1) done |= 0x07;
746 else if (nj == 2) done |= 0x02;
749 ja = -((-ja+1)/2) - nu;
752 if (nj <= omega) done |= 0x02;
758 if (nk & 1) done |= 0x07;
759 else if (nk == 2) done |= 0x04;
762 ka = -((-ka+1)/2) - nu;
765 if (nk <= omega) done |= 0x04;
768 }
while ( ! alldone );
772 toplevel = (ispany ? nlevels : nlevels - 1);
776 ni = (int) ceil(2*a/hx) - 1;
777 nj = (int) ceil(2*a/hy) - 1;
778 nk = (int) ceil(2*a/hz) - 1;
780 p.gc_len =
Int3(2*ni+1, 2*nj+1, 2*nk+1);
781 p.gc_idstart =
Int3(-ni, -nj, -nk);
782 p.gc.resize( (2*ni+1)*(2*nj+1)*(2*nk+1) );
785 for (k = -nk; k <= nk; k++) {
786 for (j = -nj; j <= nj; j++) {
787 for (i = -ni; i <= ni; i++) {
788 double s, t, gs, gt, g;
789 s = sqrt((i*hx)*(i*hx) + (j*hy)*(j*hy) + (k*hz)*(k*hz)) / a;
796 SPOLY(>, &d, t, split);
797 g = (gs - 0.5 * gt) / a;
800 SPOLY(&gs, &d, s, split);
801 SPOLY(>, &d, t, split);
802 g = (gs - 0.5 * gt) / a;
804 p.gc[index] = (float) g;
810 if (toplevel < nlevels) {
814 CkPrintf(
"MSM setting top level weights\n");
816 ni = p.grid_len[toplevel].nx;
817 nj = p.grid_len[toplevel].ny;
818 nk = p.grid_len[toplevel].nz;
820 p.toplevel = toplevel;
821 p.gctop_len =
Int3(2*ni+1, 2*nj+1, 2*nk+1);
822 p.gctop_idstart =
Int3(-ni, -nj, -nk);
823 p.gctop.resize( (2*ni+1)*(2*nj+1)*(2*nk+1) );
826 for (k = -nk; k <= nk; k++) {
827 for (j = -nj; j <= nj; j++) {
828 for (i = -ni; i <= ni; i++) {
830 s = sqrt((i*hx)*(i*hx) + (j*hy)*(j*hy) + (k*hz)*(k*hz)) / a;
835 SPOLY(&gs, &d, s, split);
837 p.gctop[index] = (float) (gs / a);
847 SPOLY(&gs, &d, s, split);
848 p.self_energy_const = gs / a;
851 for (n = 0; n < natoms; n++) {
852 double q = atom[4*n + 3];
855 self_energy *= 0.5*s;
863 p.num_restriction_chares.resize(nlevels-1);
864 p.num_prolongation_chares.resize(nlevels-1);
865 p.dim_gridtransfer_chares.resize(nlevels-1);
867 p.num_gridcutoff_chares.resize(nlevels);
868 p.dim_gridcutoff_chares.resize(nlevels);
869 p.num_clients_qh.resize(nlevels);
870 p.num_clients_eh.resize(nlevels);
873 p.num_interpolation_chares = p.num_anterpolation_chares;
874 p.num_total_gridcutoff_chares = 0;
875 p.num_total_restriction_chares = 0;
876 p.num_total_prolongation_chares = 0;
877 for (n = 0; n < nlevels; n++) {
878 int ni = p.grid_len[n].nx;
879 int nj = p.grid_len[n].ny;
880 int nk = p.grid_len[n].nz;
884 int nc = nci * ncj * nck;
886 CkPrintf(
"n=%d nc=%d nci=%d ncj=%d nck=%d\n", n, nc, nci, ncj, nck);
888 p.num_gridcutoff_chares[n] = nc;
889 p.dim_gridcutoff_chares[n].nx = nci;
890 p.dim_gridcutoff_chares[n].ny = ncj;
891 p.dim_gridcutoff_chares[n].nz = nck;
892 p.num_total_gridcutoff_chares += nc;
894 p.num_restriction_chares[n-1] = nc;
895 p.num_prolongation_chares[n-1] = nc;
896 p.dim_gridtransfer_chares[n-1].nx = nci;
897 p.dim_gridtransfer_chares[n-1].ny = ncj;
898 p.dim_gridtransfer_chares[n-1].nz = nck;
899 p.num_total_restriction_chares += nc;
900 p.num_total_prolongation_chares += nc;
903 p.num_energy_chares = nc;
907 p.num_clients_qh[0] = p.num_anterpolation_chares +
908 p.num_gridcutoff_chares[0] + p.num_energy_chares;
909 p.num_clients_eh[0] = p.num_interpolation_chares +
910 p.num_gridcutoff_chares[0] + p.num_energy_chares;
912 p.num_clients_qh[0] += p.num_restriction_chares[0];
913 p.num_clients_eh[0] += p.num_prolongation_chares[0];
915 for (n = 1; n < nlevels; n++) {
916 p.num_clients_qh[n] = p.num_gridcutoff_chares[n] +
917 p.num_restriction_chares[n-1];
918 p.num_clients_eh[n] = p.num_gridcutoff_chares[n] +
919 p.num_prolongation_chares[n-1];
921 p.num_clients_qh[n] += p.num_restriction_chares[n];
922 p.num_clients_eh[n] += p.num_prolongation_chares[n];
927 CkPrintf(
"nlevels = %d\n", nlevels);
928 for (n = 0; n < nlevels; n++) {
929 CkPrintf(
"num clients qh[%d] = %d\n", n, p.num_clients_qh[n]);
930 CkPrintf(
"num clients eh[%d] = %d\n", n, p.num_clients_eh[n]);
932 CkPrintf(
"num anterpolation chares = %d\n", p.num_anterpolation_chares);
933 CkPrintf(
"num interpolation chares = %d\n", p.num_interpolation_chares);
934 for (n = 0; n < nlevels; n++) {
935 CkPrintf(
"num grid cutoff chares[%d] = %d\n",
936 n, p.num_gridcutoff_chares[n]);
938 for (n = 0; n < nlevels-1; n++) {
939 CkPrintf(
"num restriction chares[%d] = %d\n",
940 n, p.num_restriction_chares[n]);
942 for (n = 0; n < nlevels-1; n++) {
943 CkPrintf(
"num prolongation chares[%d] = %d\n",
944 n, p.num_prolongation_chares[n]);
951 CkAbort(
"attempted to reallocate MSAs");
953 char *qh_memory =
new char[nlevels *
sizeof(MsmMsaGrid)];
954 qh =
static_cast<MsmMsaGrid *
>((
void *)qh_memory);
955 char *eh_memory =
new char[nlevels *
sizeof(MsmMsaGrid)];
956 eh =
static_cast<MsmMsaGrid *
>((
void *)eh_memory);
958 p.qh.resize(nlevels);
959 p.eh.resize(nlevels);
961 for (n = 0; n < nlevels; n++) {
962 ia = p.grid_idstart[n].nx;
963 ib = p.grid_len[n].nx + ia - 1;
964 ja = p.grid_idstart[n].ny;
965 jb = p.grid_len[n].ny + ja - 1;
966 ka = p.grid_idstart[n].nz;
967 kb = p.grid_len[n].nz + ka - 1;
971 new(qh_memory + n*
sizeof(MsmMsaGrid))MsmMsaGrid(ia, ib, ja, jb, ka, kb,
972 p.num_clients_qh[n]);
973 new(eh_memory + n*
sizeof(MsmMsaGrid))MsmMsaGrid(ia, ib, ja, jb, ka, kb,
974 p.num_clients_eh[n]);
976 p.qh[n] = MsmMsaGrid(ia, ib, ja, jb, ka, kb, p.num_clients_qh[n]);
977 p.eh[n] = MsmMsaGrid(ia, ib, ja, jb, ka, kb, p.num_clients_eh[n]);
980 msmProxy.recvMsmMsaData(msmData);
984 void ComputeMsmMsaMgr::recvMsmMsaData(
const MsmMsaData &m)
996 void ComputeMsmMsaMgr::initWorkers(CkQdMsg *msg)
1000 if (CkMyPe() != 0)
return;
1003 MsmMsaData &p = msmData;
1005 msmLevelProxy = CProxy_MsmMsaLevel::ckNew();
1006 for (n = 0; n < p.nlevels-1; n++) {
1007 msmLevelProxy[n].insert(p.qh[n], p.eh[n], p.qh[n+1], p.eh[n+1]);
1009 msmLevelProxy[n].insert(p.qh[n], p.eh[n]);
1010 msmLevelProxy.doneInserting();
1012 CkPrintf(
"Created %d MsmMsaLevel chares\n", p.nlevels);
1015 msmEnergyProxy = CProxy_MsmMsaEnergy::ckNew(p.qh[0], p.eh[0], p.num_energy_chares);
1017 CkPrintf(
"Created %d MsmMsaEnergy chares\n", p.num_energy_chares);
1022 void ComputeMsmMsaMgr::startWorkers(CkQdMsg *msg)
1026 if (CkMyPe() != 0)
return;
1030 msmLevelProxy.compute();
1031 msmEnergyProxy.compute();
1041 3, 5, 5, 7, 7, 9, 9, 3,
1045 void ComputeMsmMsaMgr::anterpolate(MsmMsaCoordMsg *msg)
1048 CkPrintf(
"Anterpolation compute %d starting, PE %d\n", thisIndex, CkMyPe());
1051 ComputeMsmMsaAtom *coord = coordMsg->coord;
1052 int numAtoms = coordMsg->numAtoms;
1053 MsmMsaData &p = msmData;
1054 if ( ! msa_setup_anterpolation) {
1055 p.qh[0].enroll(p.num_clients_qh[0]);
1056 qhacc = p.qh[0].getInitialAccum();
1057 qhread = qhacc.syncToRead();
1058 msa_setup_anterpolation = 1;
1060 int ni = p.grid_len[0].nx;
1061 int nj = p.grid_len[0].ny;
1062 int nk = p.grid_len[0].nz;
1063 int ia = p.grid_idstart[0].nx;
1064 int ja = p.grid_idstart[0].ny;
1065 int ka = p.grid_idstart[0].nz;
1066 int ib = ia + ni - 1;
1067 int jb = ja + nj - 1;
1068 int kb = ka + nk - 1;
1069 qhacc = qhread.syncToEAccum();
1071 CkPrintf(
"Anterpolation compute %d doing work, PE %d\n", thisIndex, CkMyPe());
1082 const int s_size = PolyDegree[p.approx] + 1;
1083 const int s_edge = (PolyDegree[p.approx] - 1) >> 1;
1085 for (n = 0; n < numAtoms; n++) {
1087 float q = coord[n].msmcharge;
1092 float rx_hx = (coord[n].position.x - p.origin_x) * p.hx_1;
1093 float ry_hy = (coord[n].position.y - p.origin_y) * p.hy_1;
1094 float rz_hz = (coord[n].position.z - p.origin_z) * p.hz_1;
1097 int ilo = (
int) floorf(rx_hx) - s_edge;
1098 int jlo = (int) floorf(ry_hy) - s_edge;
1099 int klo = (int) floorf(rz_hz) - s_edge;
1103 delta = rx_hx - (float) ilo;
1105 delta = ry_hy - (float) jlo;
1107 delta = rz_hz - (float) klo;
1111 int iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
1112 ja <= jlo && (jlo+(s_size-1)) <= jb &&
1113 ka <= klo && (klo+(s_size-1)) <= kb );
1118 for (k = 0; k < s_size; k++) {
1119 float ck = zphi[k] * q;
1120 for (j = 0; j < s_size; j++) {
1121 float cjk = yphi[j] * ck;
1122 for (i = 0; i < s_size; i++) {
1123 qhacc.accumulate(i+ilo,j+jlo,k+klo) += xphi[i] * cjk;
1133 if (ilo < ia)
do { ilo += ni; }
while (ilo < ia);
1134 else if (ilo > ib)
do { ilo -= ni; }
while (ilo > ib);
1136 else if (ilo < ia || (ilo+(s_size-1)) > ib) {
1137 CkPrintf(
"Error anterpolation: ilo=%d out-of-range [%d,%d]\n",
1138 ilo, ia, (ib-s_size+1));
1143 if (jlo < ja)
do { jlo += nj; }
while (jlo < ja);
1144 else if (jlo > jb)
do { jlo -= nj; }
while (jlo > jb);
1146 else if (jlo < ja || (jlo+(s_size-1)) > jb) {
1147 CkPrintf(
"Error anterpolation: jlo=%d out-of-range [%d,%d]\n",
1148 jlo, ja, (jb-s_size+1));
1153 if (klo < ka)
do { klo += nk; }
while (klo < ka);
1154 else if (klo > kb)
do { klo -= nk; }
while (klo > kb);
1156 else if (klo < ka || (klo+(s_size-1)) > kb) {
1157 CkPrintf(
"Error anterpolation: klo=%d out-of-range [%d,%d]\n",
1158 klo, ka, (kb-s_size+1));
1164 for (k = 0, kp = klo; k < s_size; k++, kp++) {
1165 if (kp > kb) kp = ka;
1166 float ck = zphi[k] * q;
1167 for (j = 0, jp = jlo; j < s_size; j++, jp++) {
1168 if (jp > jb) jp = ja;
1169 float cjk = yphi[j] * ck;
1170 for (i = 0, ip = ilo; i < s_size; i++, ip++) {
1171 if (ip > ib) ip = ia;
1172 qhacc.accumulate(ip,jp,kp) += xphi[i] * cjk;
1182 qhread = qhacc.syncToRead();
1184 CkPrintf(
"Anterpolation compute %d exiting, PE %d\n", thisIndex, CkMyPe());
1189 void ComputeMsmMsaMgr::interpolate(CkQdMsg *msg)
1193 CkPrintf(
"Interpolation compute %d starting, PE %d\n", thisIndex, CkMyPe());
1195 ComputeMsmMsaAtom *coord = coordMsg->coord;
1196 int numAtoms = coordMsg->numAtoms;
1197 if (numForces < numAtoms) {
1199 force =
new MsmMsaForce[numAtoms];
1200 numForces = numAtoms;
1202 MsmMsaData &p = msmData;
1203 if ( ! msa_setup_interpolation) {
1204 p.eh[0].enroll(p.num_clients_eh[0]);
1205 ehacc = p.eh[0].getInitialAccum();
1206 ehread = ehacc.syncToRead();
1207 msa_setup_interpolation = 1;
1209 int ni = p.grid_len[0].nx;
1210 int nj = p.grid_len[0].ny;
1211 int nk = p.grid_len[0].nz;
1212 int ia = p.grid_idstart[0].nx;
1213 int ja = p.grid_idstart[0].ny;
1214 int ka = p.grid_idstart[0].nz;
1215 int ib = ia + ni - 1;
1216 int jb = ja + nj - 1;
1217 int kb = ka + nk - 1;
1218 ehacc = ehread.syncToEAccum();
1219 ehread = ehacc.syncToRead();
1221 CkPrintf(
"Interpolation compute %d doing work, PE %d\n", thisIndex, CkMyPe());
1226 CkPrintf(
"+++ eh[0,0,0] = %g\n", ehread.get(0,0,0));
1229 for (k = ka; k <= kb; k++) {
1230 for (j = ja; j <= jb; j++) {
1231 for (i = ia; i <= ib; i++) {
1232 CkPrintf(
"+++ eh[%d,%d,%d] = %g\n", i, j, k, ehread.get(i,j,k));
1253 const int s_size = PolyDegree[p.approx] + 1;
1254 const int s_edge = (PolyDegree[p.approx] - 1) >> 1;
1256 for (n = 0; n < numAtoms; n++) {
1258 float q = coord[n].msmcharge;
1269 float rx_hx = (coord[n].position.x - p.origin_x) * p.hx_1;
1270 float ry_hy = (coord[n].position.y - p.origin_y) * p.hy_1;
1271 float rz_hz = (coord[n].position.z - p.origin_z) * p.hz_1;
1274 int ilo = (
int) floorf(rx_hx) - s_edge;
1275 int jlo = (int) floorf(ry_hy) - s_edge;
1276 int klo = (int) floorf(rz_hz) - s_edge;
1280 delta = rx_hx - (float) ilo;
1282 delta = ry_hy - (float) jlo;
1284 delta = rz_hz - (float) klo;
1288 int iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib &&
1289 ja <= jlo && (jlo+(s_size-1)) <= jb &&
1290 ka <= klo && (klo+(s_size-1)) <= kb );
1296 for (k = 0; k < s_size; k++) {
1297 for (j = 0; j < s_size; j++) {
1298 float cx = yphi[j] * zphi[k];
1299 float cy = dyphi[j] * zphi[k];
1300 float cz = yphi[j] * dzphi[k];
1301 for (i = 0; i < s_size; i++) {
1302 float e = ehread.get(i+ilo,j+jlo,k+klo);
1303 fx += e * dxphi[i] * cx;
1304 fy += e * xphi[i] * cy;
1305 fz += e * xphi[i] * cz;
1315 if (ilo < ia)
do { ilo += ni; }
while (ilo < ia);
1316 else if (ilo > ib)
do { ilo -= ni; }
while (ilo > ib);
1318 else if (ilo < ia || (ilo+(s_size-1)) > ib) {
1319 CkPrintf(
"Error interpolation: ilo=%d out-of-range [%d,%d]\n",
1320 ilo, ia, (ib-s_size+1));
1325 if (jlo < ja)
do { jlo += nj; }
while (jlo < ja);
1326 else if (jlo > jb)
do { jlo -= nj; }
while (jlo > jb);
1328 else if (jlo < ja || (jlo+(s_size-1)) > jb) {
1329 CkPrintf(
"Error interpolation: jlo=%d out-of-range [%d,%d]\n",
1330 jlo, ja, (jb-s_size+1));
1335 if (klo < ka)
do { klo += nk; }
while (klo < ka);
1336 else if (klo > kb)
do { klo -= nk; }
while (klo > kb);
1338 else if (klo < ka || (klo+(s_size-1)) > kb) {
1339 CkPrintf(
"Error interpolation: klo=%d out-of-range [%d,%d]\n",
1340 klo, ka, (kb-s_size+1));
1347 for (k = 0, kp = klo; k < s_size; k++, kp++) {
1348 if (kp > kb) kp = ka;
1349 for (j = 0, jp = jlo; j < s_size; j++, jp++) {
1350 if (jp > jb) jp = ja;
1351 float cx = yphi[j] * zphi[k];
1352 float cy = dyphi[j] * zphi[k];
1353 float cz = yphi[j] * dzphi[k];
1354 for (i = 0, ip = ilo; i < s_size; i++, ip++) {
1355 if (ip > ib) ip = ia;
1356 float e = ehread.get(ip,jp,kp);
1357 fx += e * dxphi[i] * cx;
1358 fy += e * xphi[i] * cy;
1359 fz += e * xphi[i] * cz;
1366 force[n].x = -q * fx;
1367 force[n].y = -q * fy;
1368 force[n].z = -q * fz;
1371 double self_energy = 0.5*p.self_energy_const*qself;
1377 CkPrintf(
"Interpolation compute %d read sync done, PE %d\n",
1378 thisIndex, CkMyPe());
1381 msmCompute->saveResults(numAtoms, force, self_energy);
1383 int startAtomID = thisIndex * MsmMsa::NUM_ATOMS_PER_CHARE;
1384 msmProxy.doneForces(startAtomID, nforces, forcebuffer);
1387 CkPrintf(
"Interpolation compute %d exiting, PE %d\n", thisIndex, CkMyPe());
1392 MsmMsaLevel::MsmMsaLevel(MsmMsaGrid &qh, MsmMsaGrid &eh, MsmMsaGrid &q2h, MsmMsaGrid &e2h)
1394 const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
1395 CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
1396 int level = thisIndex;
1398 const Int3 &dg = p.dim_gridcutoff_chares[level];
1399 gridcutoff = CProxy_MsmMsaGridCutoff::ckNew(level, qh, eh, dg.
nx, dg.
ny, dg.
nz);
1400 const Int3 &dt = p.dim_gridtransfer_chares[level];
1401 restriction = CProxy_MsmMsaRestriction::ckNew(level, qh, q2h, dt.
nx, dt.
ny, dt.
nz);
1404 CkPrintf(
"Created %d grid cutoff chares\n", p.num_gridcutoff_chares[level]);
1409 MsmMsaLevel::MsmMsaLevel(MsmMsaGrid &qh, MsmMsaGrid &eh)
1411 const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
1412 CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
1413 int level = thisIndex;
1415 const Int3 &dg = p.dim_gridcutoff_chares[level];
1416 gridcutoff = CProxy_MsmMsaGridCutoff::ckNew(level, qh, eh, dg.
nx, dg.
ny, dg.
nz);
1418 CkPrintf(
"Created %d grid cutoff chares\n", p.num_gridcutoff_chares[level]);
1423 void MsmMsaLevel::compute()
1425 if (thisIndex != lastlevel) {
1433 MsmMsaGridCutoff::MsmMsaGridCutoff(
int level, MsmMsaGrid &qh_, MsmMsaGrid &eh_)
1437 const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
1438 CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
1440 const Int3 &len = p.grid_len[mylevel];
1441 const Int3 &idstart = p.grid_idstart[mylevel];
1442 mia = idstart.
nx + thisIndex.x * p.num_points_per_chare.nx;
1443 mja = idstart.
ny + thisIndex.y * p.num_points_per_chare.ny;
1444 mka = idstart.
nz + thisIndex.z * p.num_points_per_chare.nz;
1445 mib = mia + p.num_points_per_chare.nx - 1;
1446 mjb = mja + p.num_points_per_chare.ny - 1;
1447 mkb = mka + p.num_points_per_chare.nz - 1;
1448 if (mib > idstart.
nx + len.
nx - 1) {
1449 mib = idstart.
nx + len.
nx - 1;
1451 if (mjb > idstart.
ny + len.
ny - 1) {
1452 mjb = idstart.
ny + len.
ny - 1;
1454 if (mkb > idstart.
nz + len.
nz - 1) {
1455 mkb = idstart.
nz + len.
nz - 1;
1461 void MsmMsaGridCutoff::compute()
1464 CkPrintf(
"Grid cutoff compute (%d,%d,%d) PE %d\n",
1465 thisIndex.x, thisIndex.y, thisIndex.z, CkMyPe());
1467 const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
1468 CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
1470 qh.enroll(p.num_clients_qh[mylevel]);
1471 eh.enroll(p.num_clients_eh[mylevel]);
1472 qhacc = qh.getInitialAccum();
1473 qhread = qhacc.syncToRead();
1474 ehacc = eh.getInitialAccum();
1475 ehread = ehacc.syncToRead();
1479 qhacc = qhread.syncToEAccum();
1480 qhread = qhacc.syncToRead();
1481 ehacc = ehread.syncToEAccum();
1486 int ni = p.grid_len[mylevel].nx;
1487 int nj = p.grid_len[mylevel].ny;
1488 int nk = p.grid_len[mylevel].nz;
1489 int ia = p.grid_idstart[mylevel].nx;
1490 int ja = p.grid_idstart[mylevel].ny;
1491 int ka = p.grid_idstart[mylevel].nz;
1492 int ib = ia + ni - 1;
1493 int jb = ja + nj - 1;
1494 int kb = ka + nk - 1;
1495 int ispnone = ! ( p.ispx || p.ispy || p.ispz );
1496 int gia, gib, gja, gjb, gka, gkb, gni, gnj;
1497 const float *gc = 0;
1498 if (mylevel != p.toplevel) {
1500 gia = p.gc_idstart.nx;
1501 gib = gia + p.gc_len.nx - 1;
1502 gja = p.gc_idstart.ny;
1503 gjb = gja + p.gc_len.ny - 1;
1504 gka = p.gc_idstart.nz;
1505 gkb = gka + p.gc_len.nz - 1;
1511 gia = p.gctop_idstart.nx;
1512 gib = gia + p.gctop_len.nx - 1;
1513 gja = p.gctop_idstart.ny;
1514 gjb = gja + p.gctop_len.ny - 1;
1515 gka = p.gctop_idstart.nz;
1516 gkb = gka + p.gctop_len.nz - 1;
1517 gni = p.gctop_len.nx;
1518 gnj = p.gctop_len.ny;
1521 int gia_clip, gib_clip;
1522 int gja_clip, gjb_clip;
1523 int gka_clip, gkb_clip;
1525 int kgoff, jkgoff, ngindex;
1527 float scaling = p.scaling[mylevel];
1531 if (mylevel==0 && thisIndex.x==0 && thisIndex.y==0 && thisIndex.z==0) {
1533 CkPrintf(
"+++ qh[0,0,0] = %g\n", qhread.get(0,0,0));
1534 CkPrintf(
"+++ p.toplevel = %d\n", p.toplevel);
1535 CkPrintf(
"+++ scaling = %g\n", scaling);
1537 for (k = gka; k <= gkb; k++) {
1538 for (j = gja; j <= gjb; j++) {
1539 for (i = gia; i <= gib; i++, index++) {
1540 printf(
"+++ gc[%d,%d,%d] = %g\n", i, j, k, gc[index]);
1551 for (k = mka; k <= mkb; k++) {
1554 gka_clip = (k + gka < ka ? ka - k : gka);
1555 gkb_clip = (k + gkb > kb ? kb - k : gkb);
1557 for (j = mja; j <= mjb; j++) {
1560 gja_clip = (j + gja < ja ? ja - j : gja);
1561 gjb_clip = (j + gjb > jb ? jb - j : gjb);
1563 for (i = mia; i <= mib; i++) {
1566 gia_clip = (i + gia < ia ? ia - i : gia);
1567 gib_clip = (i + gib > ib ? ib - i : gib);
1571 for (kd = gka_clip; kd <= gkb_clip; kd++) {
1572 kgoff = (kd-gka) * gnj;
1574 for (jd = gja_clip; jd <= gjb_clip; jd++) {
1575 jkgoff = (kgoff + (jd-gja)) * gni;
1577 for (
id = gia_clip;
id <= gib_clip;
id++) {
1578 ngindex = jkgoff + (
id-gia);
1580 eh_sum += qhread.get(i+
id,j+jd,k+kd) * gc[ngindex];
1586 ehacc.accumulate(i,j,k) += scaling * eh_sum;
1598 for (k = mka; k <= mkb; k++) {
1602 gka_clip = (k + gka < ka ? ka - k : gka);
1603 gkb_clip = (k + gkb > kb ? kb - k : gkb);
1604 if (klo < ka) klo = ka;
1609 if (klo < ka)
do { klo += nk; }
while (klo < ka);
1613 for (j = mja; j <= mjb; j++) {
1617 gja_clip = (j + gja < ja ? ja - j : gja);
1618 gjb_clip = (j + gjb > jb ? jb - j : gjb);
1619 if (jlo < ja) jlo = ja;
1624 if (jlo < ja)
do { jlo += nj; }
while (jlo < ja);
1628 for (i = mia; i <= mib; i++) {
1632 gia_clip = (i + gia < ia ? ia - i : gia);
1633 gib_clip = (i + gib > ib ? ib - i : gib);
1634 if (ilo < ia) ilo = ia;
1639 if (ilo < ia)
do { ilo += ni; }
while (ilo < ia);
1645 for (kd = gka_clip, kp = klo; kd <= gkb_clip; kd++, kp++) {
1647 if (kp > kb) kp = ka;
1648 kgoff = (kd-gka) * gnj;
1650 for (jd = gja_clip, jp = jlo; jd <= gjb_clip; jd++, jp++) {
1652 if (jp > jb) jp = ja;
1653 jkgoff = (kgoff + (jd-gja)) * gni;
1655 for (
id = gia_clip, ip = ilo;
id <= gib_clip;
id++, ip++) {
1657 if (ip > ib) ip = ia;
1658 ngindex = jkgoff + (
id-gia);
1660 eh_sum += qhread.get(ip,jp,kp) * gc[ngindex];
1666 ehacc.accumulate(i,j,k) += scaling * eh_sum;
1676 ehread = ehacc.syncToRead();
1678 CkPrintf(
"Grid cutoff compute %d exiting, PE %d\n", thisIndex, CkMyPe());
1690 5, 7, 7, 9, 9, 11, 11, 7
1700 {-5, -3, -1, 0, 1, 3, 5},
1703 {-5, -3, -1, 0, 1, 3, 5},
1706 {-7, -5, -3, -1, 0, 1, 3, 5, 7},
1709 {-7, -5, -3, -1, 0, 1, 3, 5, 7},
1712 {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
1715 {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9},
1718 {-3, -2, -1, 0, 1, 2, 3},
1725 {-1.f/16, 9.f/16, 1, 9.f/16, -1.f/16},
1728 {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
1731 {3.f/256, -25.f/256, 75.f/128, 1, 75.f/128, -25.f/256, 3.f/256},
1734 { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
1735 -245.f/2048, 49.f/2048, -5.f/2048 },
1738 { -5.f/2048, 49.f/2048, -245.f/2048, 1225.f/2048, 1, 1225.f/2048,
1739 -245.f/2048, 49.f/2048, -5.f/2048 },
1742 { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384,
1743 19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384,
1744 -405.f/65536, 35.f/65536 },
1747 { 35.f/65536, -405.f/65536, 567.f/16384, -2205.f/16384,
1748 19845.f/32768, 1, 19845.f/32768, -2205.f/16384, 567.f/16384,
1749 -405.f/65536, 35.f/65536 },
1752 { 1.f/48, 1.f/6, 23.f/48, 2.f/3, 23.f/48, 1.f/6, 1.f/48 },
1756 MsmMsaRestriction::MsmMsaRestriction(
int level, MsmMsaGrid &qh_, MsmMsaGrid &q2h_)
1757 : qh(qh_), q2h(q2h_)
1760 const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
1761 CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
1763 const Int3 &len = p.grid_len[mylevel+1];
1764 const Int3 &idstart = p.grid_idstart[mylevel+1];
1765 mia = idstart.
nx + thisIndex.x * p.num_points_per_chare.nx;
1766 mja = idstart.
ny + thisIndex.y * p.num_points_per_chare.ny;
1767 mka = idstart.
nz + thisIndex.z * p.num_points_per_chare.nz;
1768 mib = mia + p.num_points_per_chare.nx - 1;
1769 mjb = mja + p.num_points_per_chare.ny - 1;
1770 mkb = mka + p.num_points_per_chare.nz - 1;
1771 if (mib > idstart.
nx + len.
nx - 1) {
1772 mib = idstart.
nx + len.
nx - 1;
1774 if (mjb > idstart.
ny + len.
ny - 1) {
1775 mjb = idstart.
ny + len.
ny - 1;
1777 if (mkb > idstart.
nz + len.
nz - 1) {
1778 mkb = idstart.
nz + len.
nz - 1;
1784 void MsmMsaRestriction::compute()
1787 CkPrintf(
"Restriction compute %d, PE %d\n", thisIndex, CkMyPe());
1789 const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
1790 CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
1792 qh.enroll(p.num_clients_qh[mylevel]);
1793 q2h.enroll(p.num_clients_qh[mylevel+1]);
1794 qhacc = qh.getInitialAccum();
1795 qhread = qhacc.syncToRead();
1796 q2hacc = q2h.getInitialAccum();
1797 q2hread = q2hacc.syncToRead();
1801 qhacc = qhread.syncToEAccum();
1802 qhread = qhacc.syncToRead();
1803 q2hacc = q2hread.syncToEAccum();
1805 CkPrintf(
"Restriction compute %d doing work, PE %d\n", thisIndex, CkMyPe());
1811 const int nstencil = Nstencil[p.approx];
1812 const int *offset = IndexOffset[p.approx];
1813 const float *phi = PhiStencil[p.approx];
1815 int ni1 = p.grid_len[mylevel].nx;
1816 int nj1 = p.grid_len[mylevel].ny;
1817 int nk1 = p.grid_len[mylevel].nz;
1818 int ia1 = p.grid_idstart[mylevel].nx;
1819 int ja1 = p.grid_idstart[mylevel].ny;
1820 int ka1 = p.grid_idstart[mylevel].nz;
1821 int ib1 = ia1 + ni1 - 1;
1822 int jb1 = ja1 + nj1 - 1;
1823 int kb1 = ka1 + nk1 - 1;
1824 int i, j, k, i1, j1, k1, i2, j2, k2;
1825 int i1off, j1off, k1off;
1829 for (k2 = mka; k2 <= mkb; k2++) {
1831 for (j2 = mja; j2 <= mjb; j2++) {
1833 for (i2 = mia; i2 <= mib; i2++) {
1837 for (k = 0; k < nstencil; k++) {
1838 k1off = k1 + offset[k];
1840 if (p.ispz)
do { k1off += nk1; }
while (k1off < ka1);
1843 else if (k1off > kb1) {
1844 if (p.ispz)
do { k1off -= nk1; }
while (k1off > kb1);
1847 for (j = 0; j < nstencil; j++) {
1848 j1off = j1 + offset[j];
1850 if (p.ispy)
do { j1off += nj1; }
while (j1off < ja1);
1853 else if (j1off > jb1) {
1854 if (p.ispy)
do { j1off -= nj1; }
while (j1off > jb1);
1857 cjk = phi[j] * phi[k];
1858 for (i = 0; i < nstencil; i++) {
1859 i1off = i1 + offset[i];
1861 if (p.ispx)
do { i1off += ni1; }
while (i1off < ia1);
1864 else if (i1off > ib1) {
1865 if (p.ispx)
do { i1off -= ni1; }
while (i1off > ib1);
1868 q2h_sum += qhread.get(i1off,j1off,k1off) * phi[i] * cjk;
1873 q2hacc.accumulate(i2,j2,k2) += q2h_sum;
1881 q2hread = q2hacc.syncToRead();
1883 CkPrintf(
"Restriction compute %d exiting, PE %d\n", thisIndex, CkMyPe());
1889 MsmMsaProlongation::MsmMsaProlongation(
int level, MsmMsaGrid &eh_, MsmMsaGrid &e2h_)
1890 : eh(eh_), e2h(e2h_)
1893 const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
1894 CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
1896 const Int3 &len = p.grid_len[mylevel+1];
1897 const Int3 &idstart = p.grid_idstart[mylevel+1];
1898 mia = idstart.
nx + thisIndex.x * p.num_points_per_chare.nx;
1899 mja = idstart.
ny + thisIndex.y * p.num_points_per_chare.ny;
1900 mka = idstart.
nz + thisIndex.z * p.num_points_per_chare.nz;
1901 mib = mia + p.num_points_per_chare.nx - 1;
1902 mjb = mja + p.num_points_per_chare.ny - 1;
1903 mkb = mka + p.num_points_per_chare.nz - 1;
1904 if (mib > idstart.
nx + len.
nx - 1) {
1905 mib = idstart.
nx + len.
nx - 1;
1907 if (mjb > idstart.
ny + len.
ny - 1) {
1908 mjb = idstart.
ny + len.
ny - 1;
1910 if (mkb > idstart.
nz + len.
nz - 1) {
1911 mkb = idstart.
nz + len.
nz - 1;
1917 void MsmMsaProlongation::compute()
1920 CkPrintf(
"Prolongation compute %d, PE %d\n", thisIndex, CkMyPe());
1922 const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
1923 CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
1925 eh.enroll(p.num_clients_eh[mylevel]);
1926 e2h.enroll(p.num_clients_eh[mylevel+1]);
1927 ehacc = eh.getInitialAccum();
1928 ehread = ehacc.syncToRead();
1929 e2hacc = e2h.getInitialAccum();
1930 e2hread = e2hacc.syncToRead();
1934 ehacc = ehread.syncToEAccum();
1935 e2hacc = e2hread.syncToEAccum();
1936 e2hread = e2hacc.syncToRead();
1938 CkPrintf(
"Prolongation compute %d doing work, PE %d\n", thisIndex, CkMyPe());
1944 const int nstencil = Nstencil[p.approx];
1945 const int *offset = IndexOffset[p.approx];
1946 const float *phi = PhiStencil[p.approx];
1948 int ni1 = p.grid_len[mylevel].nx;
1949 int nj1 = p.grid_len[mylevel].ny;
1950 int nk1 = p.grid_len[mylevel].nz;
1951 int ia1 = p.grid_idstart[mylevel].nx;
1952 int ja1 = p.grid_idstart[mylevel].ny;
1953 int ka1 = p.grid_idstart[mylevel].nz;
1954 int ib1 = ia1 + ni1 - 1;
1955 int jb1 = ja1 + nj1 - 1;
1956 int kb1 = ka1 + nk1 - 1;
1957 int i, j, k, i1, j1, k1, i2, j2, k2;
1958 int i1off, j1off, k1off;
1962 for (k2 = mka; k2 <= mkb; k2++) {
1964 for (j2 = mja; j2 <= mjb; j2++) {
1966 for (i2 = mia; i2 <= mib; i2++) {
1969 for (k = 0; k < nstencil; k++) {
1970 k1off = k1 + offset[k];
1972 if (p.ispz)
do { k1off += nk1; }
while (k1off < ka1);
1975 else if (k1off > kb1) {
1976 if (p.ispz)
do { k1off -= nk1; }
while (k1off > kb1);
1979 for (j = 0; j < nstencil; j++) {
1980 j1off = j1 + offset[j];
1982 if (p.ispy)
do { j1off += nj1; }
while (j1off < ja1);
1985 else if (j1off > jb1) {
1986 if (p.ispy)
do { j1off -= nj1; }
while (j1off > jb1);
1989 cjk = phi[j] * phi[k];
1990 for (i = 0; i < nstencil; i++) {
1991 i1off = i1 + offset[i];
1993 if (p.ispx)
do { i1off += ni1; }
while (i1off < ia1);
1996 else if (i1off > ib1) {
1997 if (p.ispx)
do { i1off -= ni1; }
while (i1off > ib1);
2000 ehacc.accumulate(i1off,j1off,k1off) +=
2001 e2hread(i2,j2,k2) * phi[i] * cjk;
2013 ehread = ehacc.syncToRead();
2015 CkPrintf(
"Prolongation compute %d exiting, PE %d\n", thisIndex, CkMyPe());
2021 MsmMsaEnergy::MsmMsaEnergy(MsmMsaGrid &qh_, MsmMsaGrid &eh_) : qh(qh_), eh(eh_)
2023 const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
2024 CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
2025 int npoints = p.num_points_per_chare.nx *
2026 p.num_points_per_chare.ny * p.num_points_per_chare.nz;
2027 qhbuffer =
new float[npoints];
2028 int ni = p.grid_len[0].nx;
2029 int nj = p.grid_len[0].ny;
2030 int nk = p.grid_len[0].nz;
2034 mlk = thisIndex / (ncj * nci);
2035 int krem = thisIndex % (ncj * nci);
2039 mia = p.grid_idstart[0].nx + mli * p.num_points_per_chare.nx;
2040 mja = p.grid_idstart[0].ny + mlj * p.num_points_per_chare.ny;
2041 mka = p.grid_idstart[0].nz + mlk * p.num_points_per_chare.nz;
2042 mib = mia + p.num_points_per_chare.nx - 1;
2043 mjb = mja + p.num_points_per_chare.ny - 1;
2044 mkb = mka + p.num_points_per_chare.nz - 1;
2045 if (mib > p.grid_idstart[0].nx + ni - 1) {
2046 mib = p.grid_idstart[0].nx + ni - 1;
2048 if (mjb > p.grid_idstart[0].ny + nj - 1) {
2049 mjb = p.grid_idstart[0].ny + nj - 1;
2051 if (mkb > p.grid_idstart[0].nz + nk - 1) {
2052 mkb = p.grid_idstart[0].nz + nk - 1;
2058 MsmMsaEnergy::~MsmMsaEnergy()
2064 void MsmMsaEnergy::compute()
2067 CkPrintf(
"Energy compute %d, PE %d\n", thisIndex, CkMyPe());
2069 const MsmMsaData &p = CProxy_ComputeMsmMsaMgr::ckLocalBranch(
2070 CkpvAccess(BOCclass_group).computeMsmMsaMgr)->getMsmMsaData();
2072 qh.enroll(p.num_clients_qh[0]);
2073 eh.enroll(p.num_clients_eh[0]);
2074 qhacc = qh.getInitialAccum();
2075 qhread = qhacc.syncToRead();
2076 ehacc = eh.getInitialAccum();
2077 ehread = ehacc.syncToRead();
2082 qhacc = qhread.syncToEAccum();
2083 qhread = qhacc.syncToRead();
2095 for (index = 0, k = mka; k <= mkb; k++) {
2096 for (j = mja; j <= mjb; j++) {
2097 for (i = mia; i <= mib; i++, index++) {
2098 qhbuffer[index] = qhread.get(i,j,k);
2105 ehacc = ehread.syncToEAccum();
2106 ehread = ehacc.syncToRead();
2108 CkPrintf(
"Energy compute %d doing work, PE %d\n", thisIndex, CkMyPe());
2110 for (index = 0, k = mka; k <= mkb; k++) {
2111 for (j = mja; j <= mjb; j++) {
2112 for (i = mia; i <= mib; i++, index++) {
2113 sum += qhbuffer[index] * ehread.get(i,j,k);
2119 reduction->submit();
2121 CkPrintf(
"Energy compute %d exiting, PE %d\n", thisIndex, CkMyPe());
2127 #include "ComputeMsmMsaMgr.def.h"
2129 #endif // CHARM_HAS_MSA
#define GRID_RESIZE(_p, TYPE, __i0, __ni, __j0, __nj, __k0, __nk)
static const int Nstencil[NL_MSM_APPROX_END]
static PatchMap * Object()
SimParameters * simParameters
SubmitReduction * willSubmit(int setID, int size=-1)
static ReductionMgr * Object(void)
static const int IndexOffset[NL_MSM_APPROX_END][MAX_NSTENCIL]
static int restriction(NL_Msm *, int level)
#define ROUNDUP_QUOTIENT(n, k)
void NAMD_die(const char *err_msg)
#define STENCIL_1D(_phi, _delta, _approx)
std::vector< std::string > split(const std::string &text, std::string delimiter)
static const double PhiStencil[NL_MSM_APPROX_END][MAX_NSTENCIL]
static void rescale_nonperiodic_cell(Vector &u, Vector &v, Vector &w, Vector &c, Vector &ru, Vector &rv, Vector &rw, int isperiodic, int numatoms, const ComputeMsmSerialAtom *coord, double padding, double gridspacing)
static int setup_hgrid_1d(NL_Msm *pm, double len, double *hh, int *nn, int *aindex, int *bindex, int isperiodic)
static int gridcutoff(NL_Msm *, int level)
#define SPOLY(pg, pdg, ra, split)
static const int PolyDegree[NL_MSM_APPROX_END]
void get_extremes(ScaledPosition &xmin, ScaledPosition &xmax) const
#define D_STENCIL_1D(_dphi, _phi, _h_1, _delta, _approx)
static int prolongation(NL_Msm *, int level)