#include "msm_defn.h"
Go to the source code of this file.
Defines | |
#define | STENCIL_1D(_phi, _delta, _approx) |
#define | D_STENCIL_1D(_dphi, _phi, _h_1, _delta, _approx) |
Enumerations | |
enum | { MAX_POLY_DEGREE = 9 } |
enum | { MAX_NSTENCIL = 11 } |
Functions | |
static int | anterpolation (NL_Msm *) |
static int | interpolation (NL_Msm *) |
static int | restriction (NL_Msm *, int level) |
static int | prolongation (NL_Msm *, int level) |
static int | restriction_factored (NL_Msm *, int level) |
static int | prolongation_factored (NL_Msm *, int level) |
static int | gridcutoff (NL_Msm *, int level) |
int | NL_msm_compute_long_range (NL_Msm *msm) |
Variables | |
static const int | PolyDegree [NL_MSM_APPROX_END] |
static const double | PhiStencilFactored [NL_MSM_APPROX_END][2 *MAX_POLY_DEGREE+1] |
static const int | Nstencil [NL_MSM_APPROX_END] |
static const int | IndexOffset [NL_MSM_APPROX_END][MAX_NSTENCIL] |
static const double | PhiStencil [NL_MSM_APPROX_END][MAX_NSTENCIL] |
#define D_STENCIL_1D | ( | _dphi, | |||
_phi, | |||||
_h_1, | |||||
_delta, | |||||
_approx | ) |
Calculate the stencil of basis function and derivatives of (1/h)Phi. dphi - stencil array (up to size MAX_POLY_DEGREE+1) phi - stencil array (up to size MAX_POLY_DEGREE+1) h_1 - 1/h, h is the grid spacing delta - normalized distance of atom from lowest grid point in stencil approx - APPROX enum value from msm.h
Definition at line 468 of file msm_longrng.c.
Referenced by interpolation().
#define STENCIL_1D | ( | _phi, | |||
_delta, | |||||
_approx | ) |
Calculate the stencil of basis function values of Phi. phi - stencil array (up to size MAX_POLY_DEGREE+1) delta - normalized distance of atom from lowest grid point in stencil approx - APPROX enum value from msm.h
Definition at line 280 of file msm_longrng.c.
Referenced by anterpolation().
anonymous enum |
Approximation formulaes are up to degree 9 polynomials.
Definition at line 159 of file msm_longrng.c.
00159 { MAX_POLY_DEGREE = 9 };
anonymous enum |
Max stencil length is basically PolyDegree+2 for those approximations that interpolate. (We skip over zero in the complete stencils above.)
Definition at line 205 of file msm_longrng.c.
00205 { MAX_NSTENCIL = 11 };
int anterpolation | ( | NL_Msm * | msm | ) | [static] |
Compute the long-range part of MSM forces.
The entire procedure can be depicted as an inverse V-cycle with bars across each level to represent the short-range gridded calculations. The algorithm first performs anterpolation of the charge to the finest level grid. Each intermediate grid level, below the coarsest level at the top, calculates a more slowly varying long-range part of the interaction by restricting the charges to a grid with twice the spacing. Each grid level also has a short-range cutoff part between charges that contributes to the potentials. Also summed to the potentials at each intermediate grid level are the long-range contributions prolongated from a grid with twice the spacing. Finally, the forces are interpolated (or approximated if the basis functions do not interpolate) from the finest level grid of potentials. The steps of the algorithm are detailed in section 2.3 of the thesis.
The restriction and prolongation operations use the factored approach having a work constant which is linear in the degree of the polynomial, i.e. O(pM), where p is the degree and M is the number of grid points.
The algorithm below uses stencil widths expressed as a function of the degree of the basis function polynomial Phi. The constants for restriction and prolongation stencils are stored in an array indexed by the APPROX enum defined in msm.h header. Calculations of the grid stencils of the 1D Phi and its derivative, needed for interpolation and anterpolation, are expressed as macros that use the APPROX enum to select the polynomial pieces from a switch statement. Periodic boundaries are handled by wrapping around the edges of the grid, as discussed in section 6.2 of the thesis.
XXX The short-range gridded calculations contribute to the virial, but it is not calculated.
XXX The factored restriction and prolongation procedures are not suitable for parallel and GPU computation due to the sequential dependence along each dimension of the grid.
Definition at line 779 of file msm_longrng.c.
References NL_Msm_t::approx, ASSERT, NL_Msm_t::atom, GRID_INDEX, GRID_INDEX_CHECK, GRID_ZERO, NL_Msm_t::gx, NL_Msm_t::gy, NL_Msm_t::gz, NL_Msm_t::hx, NL_Msm_t::hy, NL_Msm_t::hz, j, MAX_POLY_DEGREE, NL_Msm_t::msmflags, NL_MSM_ERROR_RANGE, NL_MSM_PERIODIC_VEC1, NL_MSM_PERIODIC_VEC2, NL_MSM_PERIODIC_VEC3, NL_MSM_SUCCESS, NL_Msm_t::numatoms, PolyDegree, NL_Msm_t::qh, and STENCIL_1D.
Referenced by NL_msm_compute_long_range().
00780 { 00781 const double *atom = msm->atom; 00782 const int natoms = msm->numatoms; 00783 00784 double xphi[MAX_POLY_DEGREE+1]; /* Phi stencil along x-dimension */ 00785 double yphi[MAX_POLY_DEGREE+1]; /* Phi stencil along y-dimension */ 00786 double zphi[MAX_POLY_DEGREE+1]; /* Phi stencil along z-dimension */ 00787 double rx_hx, ry_hy, rz_hz; /* normalized distance from atom to origin */ 00788 #ifndef TEST_INLINING 00789 double delta; /* normalized distance to stencil point */ 00790 #else 00791 double t; /* normalized distance to stencil point */ 00792 #endif 00793 double ck, cjk; 00794 const double hx_1 = 1/msm->hx; 00795 const double hy_1 = 1/msm->hy; 00796 const double hz_1 = 1/msm->hz; 00797 const double xm0 = msm->gx; /* grid origin x */ 00798 const double ym0 = msm->gy; /* grid origin y */ 00799 const double zm0 = msm->gz; /* grid origin z */ 00800 double q; 00801 00802 NL_Msmgrid_double *qhgrid = &(msm->qh[0]); 00803 double *qh = qhgrid->data; 00804 const int ni = qhgrid->ni; 00805 const int nj = qhgrid->nj; 00806 const int nk = qhgrid->nk; 00807 const int ia = qhgrid->i0; 00808 const int ib = ia + ni - 1; 00809 const int ja = qhgrid->j0; 00810 const int jb = ja + nj - 1; 00811 const int ka = qhgrid->k0; 00812 const int kb = ka + nk - 1; 00813 00814 const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1); 00815 const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2); 00816 const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3); 00817 int iswithin; 00818 00819 int n, i, j, k, ilo, jlo, klo, koff; 00820 int jkoff, index; 00821 00822 const int approx = msm->approx; 00823 const int s_size = PolyDegree[approx] + 1; /* stencil size */ 00824 const int s_edge = (PolyDegree[approx] - 1) >> 1; /* stencil "edge" size */ 00825 00826 GRID_ZERO(qhgrid); 00827 00828 for (n = 0; n < natoms; n++) { 00829 00830 /* atomic charge */ 00831 q = atom[4*n + 3]; 00832 if (0==q) continue; 00833 00834 /* distance between atom and origin measured in grid points */ 00835 rx_hx = (atom[4*n ] - xm0) * hx_1; 00836 ry_hy = (atom[4*n + 1] - ym0) * hy_1; 00837 rz_hz = (atom[4*n + 2] - zm0) * hz_1; 00838 00839 /* find smallest numbered grid point in stencil */ 00840 ilo = (int) floor(rx_hx) - s_edge; 00841 jlo = (int) floor(ry_hy) - s_edge; 00842 klo = (int) floor(rz_hz) - s_edge; 00843 00844 /* calculate Phi stencils along each dimension */ 00845 #ifndef TEST_INLINING 00846 delta = rx_hx - (double) ilo; 00847 STENCIL_1D(xphi, delta, approx); 00848 delta = ry_hy - (double) jlo; 00849 STENCIL_1D(yphi, delta, approx); 00850 delta = rz_hz - (double) klo; 00851 STENCIL_1D(zphi, delta, approx); 00852 #else 00853 t = rx_hx - (double) ilo; 00854 xphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \ 00855 t--; \ 00856 xphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \ 00857 t--; \ 00858 xphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \ 00859 t--; \ 00860 xphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \ 00861 00862 t = ry_hy - (double) jlo; 00863 yphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \ 00864 t--; \ 00865 yphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \ 00866 t--; \ 00867 yphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \ 00868 t--; \ 00869 yphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \ 00870 00871 t = rz_hz - (double) klo; 00872 zphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \ 00873 t--; \ 00874 zphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \ 00875 t--; \ 00876 zphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \ 00877 t--; \ 00878 zphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \ 00879 00880 #endif 00881 00882 /* test to see if stencil is within edges of grid */ 00883 iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib && 00884 ja <= jlo && (jlo+(s_size-1)) <= jb && 00885 ka <= klo && (klo+(s_size-1)) <= kb ); 00886 00887 if ( iswithin ) { /* no wrapping needed */ 00888 00889 /* determine charge on cube of grid points around atom */ 00890 for (k = 0; k < s_size; k++) { 00891 koff = (k + klo) * nj; 00892 ck = zphi[k] * q; 00893 for (j = 0; j < s_size; j++) { 00894 jkoff = (koff + (j + jlo)) * ni; 00895 cjk = yphi[j] * ck; 00896 for (i = 0; i < s_size; i++) { 00897 index = jkoff + (i + ilo); 00898 GRID_INDEX_CHECK(qhgrid, i+ilo, j+jlo, k+klo); 00899 ASSERT(GRID_INDEX(qhgrid, i+ilo, j+jlo, k+klo) == index); 00900 qh[index] += xphi[i] * cjk; 00901 } 00902 } 00903 } 00904 } /* if */ 00905 00906 else { /* requires wrapping around grid */ 00907 int ip, jp, kp; 00908 00909 /* adjust ilo, jlo, klo so they are within grid indexing */ 00910 if (ispx) { 00911 if (ilo < ia) do { ilo += ni; } while (ilo < ia); 00912 else if (ilo > ib) do { ilo -= ni; } while (ilo > ib); 00913 } 00914 else if (ilo < ia || (ilo+(s_size-1)) > ib) { 00915 return NL_MSM_ERROR_RANGE; 00916 } 00917 00918 if (ispy) { 00919 if (jlo < ja) do { jlo += nj; } while (jlo < ja); 00920 else if (jlo > jb) do { jlo -= nj; } while (jlo > jb); 00921 } 00922 else if (jlo < ja || (jlo+(s_size-1)) > jb) { 00923 return NL_MSM_ERROR_RANGE; 00924 } 00925 00926 if (ispz) { 00927 if (klo < ka) do { klo += nk; } while (klo < ka); 00928 else if (klo > kb) do { klo -= nk; } while (klo > kb); 00929 } 00930 else if (klo < ka || (klo+(s_size-1)) > kb) { 00931 return NL_MSM_ERROR_RANGE; 00932 } 00933 00934 /* determine charge on cube of grid points around atom, with wrapping */ 00935 for (k = 0, kp = klo; k < s_size; k++, kp++) { 00936 if (kp > kb) kp = ka; /* wrap stencil around grid */ 00937 koff = kp * nj; 00938 ck = zphi[k] * q; 00939 for (j = 0, jp = jlo; j < s_size; j++, jp++) { 00940 if (jp > jb) jp = ja; /* wrap stencil around grid */ 00941 jkoff = (koff + jp) * ni; 00942 cjk = yphi[j] * ck; 00943 for (i = 0, ip = ilo; i < s_size; i++, ip++) { 00944 if (ip > ib) ip = ia; /* wrap stencil around grid */ 00945 index = jkoff + ip; 00946 GRID_INDEX_CHECK(qhgrid, ip, jp, kp); 00947 ASSERT(GRID_INDEX(qhgrid, ip, jp, kp) == index); 00948 qh[index] += xphi[i] * cjk; 00949 } 00950 } 00951 } 00952 } /* else */ 00953 00954 } /* end loop over atoms */ 00955 00956 return NL_MSM_SUCCESS; 00957 } /* anterpolation */
int gridcutoff | ( | NL_Msm * | msm, | |
int | level | |||
) | [static] |
Definition at line 1677 of file msm_longrng.c.
References ASSERT, NL_Msm_t::eh, NL_Msm_t::gc, GRID_INDEX, GRID_INDEX_CHECK, j, NL_Msm_t::msmflags, NL_MSM_PERIODIC_VEC1, NL_MSM_PERIODIC_VEC2, NL_MSM_PERIODIC_VEC3, NL_MSM_SUCCESS, and NL_Msm_t::qh.
Referenced by NL_msm_compute_long_range().
01678 { 01679 double eh_sum; 01680 01681 /* grids of charge and potential */ 01682 const NL_Msmgrid_double *qhgrid = &(msm->qh[level]); 01683 const double *qh = qhgrid->data; 01684 NL_Msmgrid_double *ehgrid = &(msm->eh[level]); 01685 double *eh = ehgrid->data; 01686 const int ia = qhgrid->i0; /* lowest x-index */ 01687 const int ib = ia + qhgrid->ni - 1; /* highest x-index */ 01688 const int ja = qhgrid->j0; /* lowest y-index */ 01689 const int jb = ja + qhgrid->nj - 1; /* highest y-index */ 01690 const int ka = qhgrid->k0; /* lowest z-index */ 01691 const int kb = ka + qhgrid->nk - 1; /* highest z-index */ 01692 const int ni = qhgrid->ni; /* length along x-dim */ 01693 const int nj = qhgrid->nj; /* length along y-dim */ 01694 const int nk = qhgrid->nk; /* length along z-dim */ 01695 01696 /* grid of weights for pairwise grid point interactions within cutoff */ 01697 const NL_Msmgrid_double *gcgrid = &(msm->gc[level]); 01698 const double *gc = gcgrid->data; 01699 const int gia = gcgrid->i0; /* lowest x-index */ 01700 const int gib = gia + gcgrid->ni - 1; /* highest x-index */ 01701 const int gja = gcgrid->j0; /* lowest y-index */ 01702 const int gjb = gja + gcgrid->nj - 1; /* highest y-index */ 01703 const int gka = gcgrid->k0; /* lowest z-index */ 01704 const int gkb = gka + gcgrid->nk - 1; /* highest z-index */ 01705 const int gni = gcgrid->ni; /* length along x-dim */ 01706 const int gnj = gcgrid->nj; /* length along y-dim */ 01707 01708 const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1); 01709 const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2); 01710 const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3); 01711 01712 const int ispnone = !(ispx || ispy || ispz); 01713 01714 int i, j, k; 01715 int gia_clip, gib_clip; 01716 int gja_clip, gjb_clip; 01717 int gka_clip, gkb_clip; 01718 int koff; 01719 long jkoff, index; 01720 int id, jd, kd; 01721 int knoff; 01722 long jknoff, nindex; 01723 int kgoff, jkgoff, ngindex; 01724 01725 if ( ispnone ) { /* non-periodic boundaries */ 01726 01727 /* loop over all grid points */ 01728 for (k = ka; k <= kb; k++) { 01729 01730 /* clip gc ranges to keep offset for k index within grid */ 01731 gka_clip = (k + gka < ka ? ka - k : gka); 01732 gkb_clip = (k + gkb > kb ? kb - k : gkb); 01733 01734 koff = k * nj; /* find eh flat index */ 01735 01736 for (j = ja; j <= jb; j++) { 01737 01738 /* clip gc ranges to keep offset for j index within grid */ 01739 gja_clip = (j + gja < ja ? ja - j : gja); 01740 gjb_clip = (j + gjb > jb ? jb - j : gjb); 01741 01742 jkoff = (koff + j) * ni; /* find eh flat index */ 01743 01744 for (i = ia; i <= ib; i++) { 01745 01746 /* clip gc ranges to keep offset for i index within grid */ 01747 gia_clip = (i + gia < ia ? ia - i : gia); 01748 gib_clip = (i + gib > ib ? ib - i : gib); 01749 01750 index = jkoff + i; /* eh flat index */ 01751 01752 /* sum over "sphere" of weighted charge */ 01753 eh_sum = 0; 01754 for (kd = gka_clip; kd <= gkb_clip; kd++) { 01755 knoff = (k + kd) * nj; /* find qh flat index */ 01756 kgoff = kd * gnj; /* find gc flat index */ 01757 01758 for (jd = gja_clip; jd <= gjb_clip; jd++) { 01759 jknoff = (knoff + (j + jd)) * ni; /* find qh flat index */ 01760 jkgoff = (kgoff + jd) * gni; /* find gc flat index */ 01761 01762 for (id = gia_clip; id <= gib_clip; id++) { 01763 nindex = jknoff + (i + id); /* qh flat index */ 01764 ngindex = jkgoff + id; /* gc flat index */ 01765 01766 GRID_INDEX_CHECK(qhgrid, i+id, j+jd, k+kd); 01767 ASSERT(GRID_INDEX(qhgrid, i+id, j+jd, k+kd) == nindex); 01768 01769 GRID_INDEX_CHECK(gcgrid, id, jd, kd); 01770 ASSERT(GRID_INDEX(gcgrid, id, jd, kd) == ngindex); 01771 01772 eh_sum += qh[nindex] * gc[ngindex]; /* sum weighted charge */ 01773 } 01774 } 01775 } /* end loop over "sphere" of charge */ 01776 01777 GRID_INDEX_CHECK(ehgrid, i, j, k); 01778 ASSERT(GRID_INDEX(ehgrid, i, j, k) == index); 01779 eh[index] = eh_sum; /* store potential */ 01780 } 01781 } 01782 } /* end loop over all grid points */ 01783 01784 } /* if nonperiodic boundaries */ 01785 else { 01786 /* some boundary is periodic */ 01787 int ilo, jlo, klo; 01788 int ip, jp, kp; 01789 01790 /* loop over all grid points */ 01791 for (k = ka; k <= kb; k++) { 01792 klo = k + gka; 01793 if ( ! ispz ) { /* nonperiodic z */ 01794 /* clip gc ranges to keep offset for k index within grid */ 01795 gka_clip = (k + gka < ka ? ka - k : gka); 01796 gkb_clip = (k + gkb > kb ? kb - k : gkb); 01797 if (klo < ka) klo = ka; /* keep lowest qh index within grid */ 01798 } 01799 else { /* periodic z */ 01800 gka_clip = gka; 01801 gkb_clip = gkb; 01802 if (klo < ka) do { klo += nk; } while (klo < ka); 01803 } 01804 ASSERT(klo <= kb); 01805 01806 koff = k * nj; /* find eh flat index */ 01807 01808 for (j = ja; j <= jb; j++) { 01809 jlo = j + gja; 01810 if ( ! ispy ) { /* nonperiodic y */ 01811 /* clip gc ranges to keep offset for j index within grid */ 01812 gja_clip = (j + gja < ja ? ja - j : gja); 01813 gjb_clip = (j + gjb > jb ? jb - j : gjb); 01814 if (jlo < ja) jlo = ja; /* keep lowest qh index within grid */ 01815 } 01816 else { /* periodic y */ 01817 gja_clip = gja; 01818 gjb_clip = gjb; 01819 if (jlo < ja) do { jlo += nj; } while (jlo < ja); 01820 } 01821 ASSERT(jlo <= jb); 01822 01823 jkoff = (koff + j) * ni; /* find eh flat index */ 01824 01825 for (i = ia; i <= ib; i++) { 01826 ilo = i + gia; 01827 if ( ! ispx ) { /* nonperiodic x */ 01828 /* clip gc ranges to keep offset for i index within grid */ 01829 gia_clip = (i + gia < ia ? ia - i : gia); 01830 gib_clip = (i + gib > ib ? ib - i : gib); 01831 if (ilo < ia) ilo = ia; /* keep lowest qh index within grid */ 01832 } 01833 else { /* periodic x */ 01834 gia_clip = gia; 01835 gib_clip = gib; 01836 if (ilo < ia) do { ilo += ni; } while (ilo < ia); 01837 } 01838 ASSERT(ilo <= ib); 01839 01840 index = jkoff + i; /* eh flat index */ 01841 01842 /* sum over "sphere" of weighted charge */ 01843 eh_sum = 0; 01844 for (kd = gka_clip, kp = klo; kd <= gkb_clip; kd++, kp++) { 01845 /* clipping makes conditional always fail for nonperiodic */ 01846 if (kp > kb) kp = ka; /* wrap z direction */ 01847 knoff = kp * nj; /* find qh flat index */ 01848 kgoff = kd * gnj; /* find gc flat index */ 01849 01850 for (jd = gja_clip, jp = jlo; jd <= gjb_clip; jd++, jp++) { 01851 /* clipping makes conditional always fail for nonperiodic */ 01852 if (jp > jb) jp = ja; /* wrap y direction */ 01853 jknoff = (knoff + jp) * ni; /* find qh flat index */ 01854 jkgoff = (kgoff + jd) * gni; /* find gc flat index */ 01855 01856 for (id = gia_clip, ip = ilo; id <= gib_clip; id++, ip++) { 01857 /* clipping makes conditional always fail for nonperiodic */ 01858 if (ip > ib) ip = ia; /* wrap x direction */ 01859 nindex = jknoff + ip; /* qh flat index */ 01860 ngindex = jkgoff + id; /* gc flat index */ 01861 01862 GRID_INDEX_CHECK(qhgrid, ip, jp, kp); 01863 ASSERT(GRID_INDEX(qhgrid, ip, jp, kp) == nindex); 01864 01865 GRID_INDEX_CHECK(gcgrid, id, jd, kd); 01866 ASSERT(GRID_INDEX(gcgrid, id, jd, kd) == ngindex); 01867 01868 eh_sum += qh[nindex] * gc[ngindex]; /* sum weighted charge */ 01869 01870 } 01871 } 01872 } /* end loop over "sphere" of charge */ 01873 01874 GRID_INDEX_CHECK(ehgrid, i, j, k); 01875 ASSERT(GRID_INDEX(ehgrid, i, j, k) == index); 01876 eh[index] = eh_sum; /* store potential */ 01877 } 01878 } 01879 } /* end loop over all grid points */ 01880 01881 } /* else some boundary is periodic */ 01882 01883 return NL_MSM_SUCCESS; 01884 }
int interpolation | ( | NL_Msm * | msm | ) | [static] |
Definition at line 960 of file msm_longrng.c.
References NL_Msm_t::approx, ASSERT, NL_Msm_t::atom, D_STENCIL_1D, NL_Msm_t::eh, NL_Msm_t::felec, GRID_INDEX, GRID_INDEX_CHECK, NL_Msm_t::gx, NL_Msm_t::gy, NL_Msm_t::gz, NL_Msm_t::gzero, NL_Msm_t::hx, NL_Msm_t::hy, NL_Msm_t::hz, j, MAX_POLY_DEGREE, NL_Msm_t::msmflags, NL_MSM_ERROR_RANGE, NL_MSM_PERIODIC_VEC1, NL_MSM_PERIODIC_VEC2, NL_MSM_PERIODIC_VEC3, NL_MSM_SUCCESS, NL_Msm_t::numatoms, PolyDegree, NL_Msm_t::qh, and NL_Msm_t::uelec.
Referenced by NL_msm_compute_long_range().
00961 { 00962 double *f = msm->felec; 00963 const double *atom = msm->atom; 00964 const int natoms = msm->numatoms; 00965 00966 double xphi[MAX_POLY_DEGREE+1]; /* Phi stencil along x-dimension */ 00967 double yphi[MAX_POLY_DEGREE+1]; /* Phi stencil along y-dimension */ 00968 double zphi[MAX_POLY_DEGREE+1]; /* Phi stencil along z-dimension */ 00969 double dxphi[MAX_POLY_DEGREE+1]; /* derivative of Phi along x-dimension */ 00970 double dyphi[MAX_POLY_DEGREE+1]; /* derivative of Phi along y-dimension */ 00971 double dzphi[MAX_POLY_DEGREE+1]; /* derivative of Phi along z-dimension */ 00972 double rx_hx, ry_hy, rz_hz; /* normalized distance from atom to origin */ 00973 #ifndef TEST_INLINING 00974 double delta; /* normalized distance to stencil point */ 00975 #else 00976 double t; /* normalized distance to stencil point */ 00977 #endif 00978 const double hx_1 = 1/msm->hx; 00979 const double hy_1 = 1/msm->hy; 00980 const double hz_1 = 1/msm->hz; 00981 const double xm0 = msm->gx; /* grid origin x */ 00982 const double ym0 = msm->gy; /* grid origin y */ 00983 const double zm0 = msm->gz; /* grid origin z */ 00984 double q; 00985 double u = 0; 00986 00987 const NL_Msmgrid_double *ehgrid = &(msm->eh[0]); 00988 const double *eh = ehgrid->data; 00989 const double *ebuf = ehgrid->buffer; 00990 const NL_Msmgrid_double *qhgrid = &(msm->qh[0]); 00991 const double *qbuf = qhgrid->buffer; 00992 const int ni = ehgrid->ni; 00993 const int nj = ehgrid->nj; 00994 const int nk = ehgrid->nk; 00995 const int ia = ehgrid->i0; 00996 const int ib = ia + ni - 1; 00997 const int ja = ehgrid->j0; 00998 const int jb = ja + nj - 1; 00999 const int ka = ehgrid->k0; 01000 const int kb = ka + nk - 1; 01001 01002 const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1); 01003 const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2); 01004 const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3); 01005 int iswithin; 01006 01007 double fx, fy, fz, cx, cy, cz; 01008 01009 int n, i, j, k, ilo, jlo, klo, koff; 01010 long jkoff, index; 01011 const int nn = (ni*nj) * nk; 01012 01013 const int approx = msm->approx; 01014 const int s_size = PolyDegree[approx] + 1; /* stencil size */ 01015 const int s_edge = (PolyDegree[approx] - 1) >> 1; /* stencil "edge" size */ 01016 01017 for (n = 0; n < natoms; n++) { 01018 01019 /* atomic charge */ 01020 q = atom[4*n + 3]; 01021 if (0==q) continue; 01022 01023 /* distance between atom and origin measured in grid points */ 01024 rx_hx = (atom[4*n ] - xm0) * hx_1; 01025 ry_hy = (atom[4*n + 1] - ym0) * hy_1; 01026 rz_hz = (atom[4*n + 2] - zm0) * hz_1; 01027 01028 /* find smallest numbered grid point in stencil */ 01029 ilo = (int) floor(rx_hx) - s_edge; 01030 jlo = (int) floor(ry_hy) - s_edge; 01031 klo = (int) floor(rz_hz) - s_edge; 01032 01033 /* calculate Phi stencils and derivatives along each dimension */ 01034 #ifndef TEST_INLINING 01035 delta = rx_hx - (double) ilo; 01036 //STENCIL_1D(xphi, delta, approx); 01037 D_STENCIL_1D(dxphi, xphi, hx_1, delta, approx); 01038 delta = ry_hy - (double) jlo; 01039 //STENCIL_1D(yphi, delta, approx); 01040 D_STENCIL_1D(dyphi, yphi, hy_1, delta, approx); 01041 delta = rz_hz - (double) klo; 01042 //STENCIL_1D(zphi, delta, approx); 01043 D_STENCIL_1D(dzphi, zphi, hz_1, delta, approx); 01044 #else 01045 t = rx_hx - (double) ilo; 01046 xphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \ 01047 dxphi[0] = (1.5 * t - 2) * (2 - t) * hx_1; \ 01048 t--; \ 01049 xphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \ 01050 dxphi[1] = (-5 + 4.5 * t) * t * hx_1; \ 01051 t--; \ 01052 xphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \ 01053 dxphi[2] = (-5 - 4.5 * t) * t * hx_1; \ 01054 t--; \ 01055 xphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \ 01056 dxphi[3] = (1.5 * t + 2) * (2 + t) * hx_1; \ 01057 01058 t = ry_hy - (double) jlo; 01059 yphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \ 01060 dyphi[0] = (1.5 * t - 2) * (2 - t) * hy_1; \ 01061 t--; \ 01062 yphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \ 01063 dyphi[1] = (-5 + 4.5 * t) * t * hy_1; \ 01064 t--; \ 01065 yphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \ 01066 dyphi[2] = (-5 - 4.5 * t) * t * hy_1; \ 01067 t--; \ 01068 yphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \ 01069 dyphi[3] = (1.5 * t + 2) * (2 + t) * hy_1; \ 01070 01071 t = rz_hz - (double) klo; 01072 zphi[0] = 0.5 * (1 - t) * (2 - t) * (2 - t); \ 01073 dzphi[0] = (1.5 * t - 2) * (2 - t) * hz_1; \ 01074 t--; \ 01075 zphi[1] = (1 - t) * (1 + t - 1.5 * t * t); \ 01076 dzphi[1] = (-5 + 4.5 * t) * t * hz_1; \ 01077 t--; \ 01078 zphi[2] = (1 + t) * (1 - t - 1.5 * t * t); \ 01079 dzphi[2] = (-5 - 4.5 * t) * t * hz_1; \ 01080 t--; \ 01081 zphi[3] = 0.5 * (1 + t) * (2 + t) * (2 + t); \ 01082 dzphi[3] = (1.5 * t + 2) * (2 + t) * hz_1; \ 01083 01084 #endif 01085 01086 /* test to see if stencil is within edges of grid */ 01087 iswithin = ( ia <= ilo && (ilo+(s_size-1)) <= ib && 01088 ja <= jlo && (jlo+(s_size-1)) <= jb && 01089 ka <= klo && (klo+(s_size-1)) <= kb ); 01090 01091 if ( iswithin ) { /* no wrapping needed */ 01092 01093 /* determine force on atom from cube of grid point potentials */ 01094 fx = fy = fz = 0; 01095 for (k = 0; k < s_size; k++) { 01096 koff = (k + klo) * nj; 01097 for (j = 0; j < s_size; j++) { 01098 jkoff = (koff + (j + jlo)) * ni; 01099 cx = yphi[j] * zphi[k]; 01100 cy = dyphi[j] * zphi[k]; 01101 cz = yphi[j] * dzphi[k]; 01102 for (i = 0; i < s_size; i++) { 01103 index = jkoff + (i + ilo); 01104 GRID_INDEX_CHECK(ehgrid, i+ilo, j+jlo, k+klo); 01105 ASSERT(GRID_INDEX(ehgrid, i+ilo, j+jlo, k+klo) == index); 01106 fx += eh[index] * dxphi[i] * cx; 01107 fy += eh[index] * xphi[i] * cy; 01108 fz += eh[index] * xphi[i] * cz; 01109 } 01110 } 01111 } 01112 } /* if */ 01113 01114 else { /* requires wrapping around grid */ 01115 int ip, jp, kp; 01116 01117 /* adjust ilo, jlo, klo so they are within grid indexing */ 01118 if (ispx) { 01119 if (ilo < ia) do { ilo += ni; } while (ilo < ia); 01120 else if (ilo > ib) do { ilo -= ni; } while (ilo > ib); 01121 } 01122 else if (ilo < ia || (ilo+(s_size-1)) > ib) { 01123 return NL_MSM_ERROR_RANGE; 01124 } 01125 01126 if (ispy) { 01127 if (jlo < ja) do { jlo += nj; } while (jlo < ja); 01128 else if (jlo > jb) do { jlo -= nj; } while (jlo > jb); 01129 } 01130 else if (jlo < ja || (jlo+(s_size-1)) > jb) { 01131 return NL_MSM_ERROR_RANGE; 01132 } 01133 01134 if (ispz) { 01135 if (klo < ka) do { klo += nk; } while (klo < ka); 01136 else if (klo > kb) do { klo -= nk; } while (klo > kb); 01137 } 01138 else if (klo < ka || (klo+(s_size-1)) > kb) { 01139 return NL_MSM_ERROR_RANGE; 01140 } 01141 01142 /* determine force on atom from cube of grid point potentials, wrapping */ 01143 fx = fy = fz = 0; 01144 for (k = 0, kp = klo; k < s_size; k++, kp++) { 01145 if (kp > kb) kp = ka; /* wrap stencil around grid */ 01146 koff = kp * nj; 01147 for (j = 0, jp = jlo; j < s_size; j++, jp++) { 01148 if (jp > jb) jp = ja; /* wrap stencil around grid */ 01149 jkoff = (koff + jp) * ni; 01150 cx = yphi[j] * zphi[k]; 01151 cy = dyphi[j] * zphi[k]; 01152 cz = yphi[j] * dzphi[k]; 01153 for (i = 0, ip = ilo; i < s_size; i++, ip++) { 01154 if (ip > ib) ip = ia; /* wrap stencil around grid */ 01155 index = jkoff + ip; 01156 GRID_INDEX_CHECK(ehgrid, ip, jp, kp); 01157 ASSERT(GRID_INDEX(ehgrid, ip, jp, kp) == index); 01158 fx += eh[index] * dxphi[i] * cx; 01159 fy += eh[index] * xphi[i] * cy; 01160 fz += eh[index] * xphi[i] * cz; 01161 } 01162 } 01163 } 01164 } /* else */ 01165 01166 /* update force */ 01167 f[3*n ] -= q * fx; 01168 f[3*n+1] -= q * fy; 01169 f[3*n+2] -= q * fz; 01170 // printf("force[%d] = %g %g %g\n", n, -q*fx, -q*fy, -q*fz); 01171 01172 } /* end loop over atoms */ 01173 01174 /* compute potential, subtract self potential */ 01175 u = 0; 01176 if (1) { 01177 for (n = 0; n < natoms; n++) { 01178 double q = atom[4*n + 3]; 01179 u += q * q; 01180 } 01181 u *= -msm->gzero; 01182 } 01183 for (index = 0; index < nn; index++) { 01184 u += qbuf[index] * ebuf[index]; 01185 } 01186 msm->uelec += 0.5 * u; 01187 // printf("MSM long-range potential: %g\n", 0.5 * u); 01188 01189 return NL_MSM_SUCCESS; 01190 } /* interpolation() */
int NL_msm_compute_long_range | ( | NL_Msm * | msm | ) |
Definition at line 52 of file msm_longrng.c.
References anterpolation(), gridcutoff(), interpolation(), NL_Msm_t::msmflags, NL_MSM_COMPUTE_CUDA_FALL_BACK, NL_MSM_COMPUTE_CUDA_GRID_CUTOFF, NL_MSM_COMPUTE_NONFACTORED, NL_MSM_ERROR_SUPPORT, NL_MSM_SUCCESS, NL_Msm_t::nlevels, prolongation(), prolongation_factored(), NL_Msm_t::report_timings, restriction(), restriction_factored(), NL_Msm_t::timer_longrng, wkf_timer_start(), wkf_timer_stop(), and wkf_timer_time().
Referenced by NL_msm_compute_force().
00052 { 00053 double time_delta; 00054 int rc = 0; 00055 int level; 00056 int nlevels = msm->nlevels; 00057 int use_cuda_gridcutoff = (msm->msmflags & NL_MSM_COMPUTE_CUDA_GRID_CUTOFF); 00058 int fallback_cpu = (msm->msmflags & NL_MSM_COMPUTE_CUDA_FALL_BACK); 00059 int use_nonfactored = (msm->msmflags & NL_MSM_COMPUTE_NONFACTORED); 00060 00061 wkf_timer_start(msm->timer_longrng); 00062 rc = anterpolation(msm); 00063 if (rc) return rc; 00064 wkf_timer_stop(msm->timer_longrng); 00065 time_delta = wkf_timer_time(msm->timer_longrng); 00066 if (msm->report_timings) { 00067 printf("MSM anterpolation: %6.3f sec\n", time_delta); 00068 } 00069 00070 wkf_timer_start(msm->timer_longrng); 00071 if (use_nonfactored) { 00072 for (level = 0; level < nlevels - 1; level++) { 00073 rc = restriction(msm, level); 00074 if (rc) return rc; 00075 } 00076 } 00077 else { 00078 for (level = 0; level < nlevels - 1; level++) { 00079 rc = restriction_factored(msm, level); 00080 if (rc) return rc; 00081 } 00082 } 00083 wkf_timer_stop(msm->timer_longrng); 00084 time_delta = wkf_timer_time(msm->timer_longrng); 00085 if (msm->report_timings) { 00086 printf("MSM restriction: %6.3f sec\n", time_delta); 00087 } 00088 00089 wkf_timer_start(msm->timer_longrng); 00090 if (use_cuda_gridcutoff && nlevels > 1) { 00091 #ifdef NL_USE_CUDA 00092 if ((rc = NL_msm_cuda_condense_qgrids(msm)) != NL_MSM_SUCCESS || 00093 (rc = NL_msm_cuda_compute_gridcutoff(msm)) != NL_MSM_SUCCESS || 00094 (rc = NL_msm_cuda_expand_egrids(msm)) != NL_MSM_SUCCESS) { 00095 if (fallback_cpu) { 00096 printf("Falling back on CPU for grid cutoff computation\n"); 00097 use_cuda_gridcutoff = 0; 00098 } 00099 else return rc; 00100 } 00101 #else 00102 if (fallback_cpu) { 00103 printf("Falling back on CPU for grid cutoff computation\n"); 00104 use_cuda_gridcutoff = 0; 00105 } 00106 else return NL_MSM_ERROR_SUPPORT; 00107 #endif 00108 } 00109 00110 if ( ! use_cuda_gridcutoff ) { 00111 for (level = 0; level < nlevels - 1; level++) { 00112 rc = gridcutoff(msm, level); 00113 if (rc) return rc; 00114 } 00115 } 00116 00117 rc = gridcutoff(msm, level); /* top level */ 00118 if (rc) return rc; 00119 00120 wkf_timer_stop(msm->timer_longrng); 00121 time_delta = wkf_timer_time(msm->timer_longrng); 00122 if (msm->report_timings) { 00123 printf("MSM grid cutoff: %6.3f sec\n", time_delta); 00124 } 00125 00126 wkf_timer_start(msm->timer_longrng); 00127 if (use_nonfactored) { 00128 for (level--; level >= 0; level--) { 00129 rc = prolongation(msm, level); 00130 if (rc) return rc; 00131 } 00132 } 00133 else { 00134 for (level--; level >= 0; level--) { 00135 rc = prolongation_factored(msm, level); 00136 if (rc) return rc; 00137 } 00138 } 00139 wkf_timer_stop(msm->timer_longrng); 00140 time_delta = wkf_timer_time(msm->timer_longrng); 00141 if (msm->report_timings) { 00142 printf("MSM prolongation: %6.3f sec\n", time_delta); 00143 } 00144 00145 wkf_timer_start(msm->timer_longrng); 00146 rc = interpolation(msm); 00147 if (rc) return rc; 00148 wkf_timer_stop(msm->timer_longrng); 00149 time_delta = wkf_timer_time(msm->timer_longrng); 00150 if (msm->report_timings) { 00151 printf("MSM interpolation: %6.3f sec\n", time_delta); 00152 } 00153 00154 return 0; 00155 }
int prolongation | ( | NL_Msm * | msm, | |
int | level | |||
) | [static] |
Definition at line 1582 of file msm_longrng.c.
References NL_Msm_t::approx, NL_Msm_t::eh, IndexOffset, j, NL_Msm_t::msmflags, NL_MSM_PERIODIC_VEC1, NL_MSM_PERIODIC_VEC2, NL_MSM_PERIODIC_VEC3, NL_MSM_SUCCESS, Nstencil, and PhiStencil.
Referenced by NL_msm_compute_long_range().
01582 { 01583 /* grids of charge, finer grid and coarser grid */ 01584 NL_Msmgrid_double *ehgrid = &(msm->eh[level]); 01585 double *eh = ehgrid->data; /* index the offset data buffer */ 01586 const NL_Msmgrid_double *e2hgrid = &(msm->eh[level+1]); 01587 const double *e2h_buffer = e2hgrid->buffer; /* index the raw buffer */ 01588 01589 /* finer grid index ranges and dimensions */ 01590 const int ia1 = ehgrid->i0; /* lowest x-index */ 01591 const int ib1 = ia1 + ehgrid->ni - 1; /* highest x-index */ 01592 const int ja1 = ehgrid->j0; /* lowest y-index */ 01593 const int jb1 = ja1 + ehgrid->nj - 1; /* highest y-index */ 01594 const int ka1 = ehgrid->k0; /* lowest z-index */ 01595 const int kb1 = ka1 + ehgrid->nk - 1; /* highest z-index */ 01596 const int ni1 = ehgrid->ni; /* length along x-dim */ 01597 const int nj1 = ehgrid->nj; /* length along y-dim */ 01598 const int nk1 = ehgrid->nk; /* length along z-dim */ 01599 01600 /* coarser grid index ranges and dimensions */ 01601 const int ia2 = e2hgrid->i0; /* lowest x-index */ 01602 const int ib2 = ia2 + e2hgrid->ni - 1; /* highest x-index */ 01603 const int ja2 = e2hgrid->j0; /* lowest y-index */ 01604 const int jb2 = ja2 + e2hgrid->nj - 1; /* highest y-index */ 01605 const int ka2 = e2hgrid->k0; /* lowest z-index */ 01606 const int kb2 = ka2 + e2hgrid->nk - 1; /* highest z-index */ 01607 01608 const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1); 01609 const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2); 01610 const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3); 01611 01612 const int nstencil = Nstencil[msm->approx]; 01613 const int *offset = IndexOffset[msm->approx]; 01614 const double *phi = PhiStencil[msm->approx]; 01615 01616 double cjk; 01617 01618 int i1, j1, k1, index1, jk1off, k1off; 01619 int i2, j2, k2; 01620 int index2; 01621 int i, j, k; 01622 01623 for (index2 = 0, k2 = ka2; k2 <= kb2; k2++) { 01624 k1 = k2 * 2; 01625 for (j2 = ja2; j2 <= jb2; j2++) { 01626 j1 = j2 * 2; 01627 for (i2 = ia2; i2 <= ib2; i2++, index2++) { 01628 i1 = i2 * 2; 01629 01630 for (k = 0; k < nstencil; k++) { 01631 k1off = k1 + offset[k]; 01632 if (k1off < ka1) { 01633 if (ispz) do { k1off += nk1; } while (k1off < ka1); 01634 else continue; 01635 } 01636 else if (k1off > kb1) { 01637 if (ispz) do { k1off -= nk1; } while (k1off > kb1); 01638 else break; 01639 } 01640 k1off *= nj1; 01641 for (j = 0; j < nstencil; j++) { 01642 jk1off = j1 + offset[j]; 01643 if (jk1off < ja1) { 01644 if (ispy) do { jk1off += nj1; } while (jk1off < ja1); 01645 else continue; 01646 } 01647 else if (jk1off > jb1) { 01648 if (ispy) do { jk1off -= nj1; } while (jk1off > jb1); 01649 else break; 01650 } 01651 jk1off = (k1off + jk1off) * ni1; 01652 cjk = phi[j] * phi[k]; 01653 for (i = 0; i < nstencil; i++) { 01654 index1 = i1 + offset[i]; 01655 if (index1 < ia1) { 01656 if (ispx) do { index1 += ni1; } while (index1 < ia1); 01657 else continue; 01658 } 01659 else if (index1 > ib1) { 01660 if (ispx) do { index1 -= ni1; } while (index1 > ib1); 01661 else break; 01662 } 01663 index1 += jk1off; 01664 eh[index1] += e2h_buffer[index2] * phi[i] * cjk; 01665 } 01666 } 01667 } /* end loop over finer grid stencil */ 01668 01669 } 01670 } 01671 } /* end loop over each coarser grid point */ 01672 01673 return NL_MSM_SUCCESS; 01674 }
int prolongation_factored | ( | NL_Msm * | msm, | |
int | level | |||
) | [static] |
Definition at line 1340 of file msm_longrng.c.
References NL_Msm_t::approx, NL_Msm_t::eh, j, NL_Msm_t::lyzd, NL_Msm_t::lzd, NL_Msm_t::msmflags, NL_MSM_PERIODIC_VEC1, NL_MSM_PERIODIC_VEC2, NL_MSM_PERIODIC_VEC3, NL_MSM_SUCCESS, PhiStencilFactored, and PolyDegree.
Referenced by NL_msm_compute_long_range().
01340 { 01341 /* grids of potential, finer grid and coarser grid */ 01342 NL_Msmgrid_double *ehgrid = &(msm->eh[level]); 01343 double *eh = ehgrid->data; 01344 const NL_Msmgrid_double *e2hgrid = &(msm->eh[level+1]); 01345 const double *e2h = e2hgrid->data; 01346 01347 /* finer grid index ranges and dimensions */ 01348 const int ia1 = ehgrid->i0; /* lowest x-index */ 01349 const int ib1 = ia1 + ehgrid->ni - 1; /* highest x-index */ 01350 const int ja1 = ehgrid->j0; /* lowest y-index */ 01351 const int jb1 = ja1 + ehgrid->nj - 1; /* highest y-index */ 01352 const int ka1 = ehgrid->k0; /* lowest z-index */ 01353 const int kb1 = ka1 + ehgrid->nk - 1; /* highest z-index */ 01354 const int ni1 = ehgrid->ni; /* length along x-dim */ 01355 const int nj1 = ehgrid->nj; /* length along y-dim */ 01356 const int nk1 = ehgrid->nk; /* length along z-dim */ 01357 01358 /* coarser grid index ranges and dimensions */ 01359 const int ia2 = e2hgrid->i0; /* lowest x-index */ 01360 const int ib2 = ia2 + e2hgrid->ni - 1; /* highest x-index */ 01361 const int ja2 = e2hgrid->j0; /* lowest y-index */ 01362 const int jb2 = ja2 + e2hgrid->nj - 1; /* highest y-index */ 01363 const int ka2 = e2hgrid->k0; /* lowest z-index */ 01364 const int kb2 = ka2 + e2hgrid->nk - 1; /* highest z-index */ 01365 const int nrow_e2 = e2hgrid->ni; 01366 const int nstride_e2 = nrow_e2 * e2hgrid->nj; 01367 01368 const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1); 01369 const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2); 01370 const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3); 01371 01372 /* set buffer using indexing offset, so that indexing matches eh grid */ 01373 double *ezd = msm->lzd + (-ka1); 01374 double *eyzd = msm->lyzd + (-ka1*nj1 + -ja1); 01375 01376 const size_t size_lzd = nk1 * sizeof(double); 01377 const size_t size_lyzd = nj1 * nk1 * sizeof(double); 01378 01379 const double *phi = NULL; 01380 01381 int i2, j2, k2; 01382 int im, jm, km; 01383 int i, j, k; 01384 int index_plane_e2, index_e2; 01385 int index_jk, offset_k; 01386 int offset; 01387 01388 const double *phi_factored = PhiStencilFactored[msm->approx]; 01389 const int r_stencil = PolyDegree[msm->approx]; /* "radius" of stencil */ 01390 const int diam_stencil = 2*r_stencil + 1; /* "diameter" of stencil */ 01391 01392 for (i2 = ia2; i2 <= ib2; i2++) { 01393 memset(msm->lyzd, 0, size_lyzd); 01394 01395 for (j2 = ja2; j2 <= jb2; j2++) { 01396 memset(msm->lzd, 0, size_lzd); 01397 index_plane_e2 = j2 * nrow_e2 + i2; 01398 01399 for (k2 = ka2; k2 <= kb2; k2++) { 01400 index_e2 = k2 * nstride_e2 + index_plane_e2; 01401 km = (k2 << 1); /* = 2*k2 */ 01402 if ( ! ispz ) { /* nonperiodic */ 01403 int lower = km - r_stencil; 01404 int upper = km + r_stencil; 01405 if (lower < ka1) lower = ka1; 01406 if (upper > kb1) upper = kb1; /* clip edges */ 01407 phi = phi_factored + r_stencil; /* center of stencil */ 01408 for (k = lower; k <= upper; k++) { 01409 ezd[k] += phi[k-km] * e2h[index_e2]; 01410 } 01411 } 01412 else { /* periodic */ 01413 int kp = km - r_stencil; /* index at left end of stencil */ 01414 if (kp < ka1) do { kp += nk1; } while (kp < ka1); /* start inside */ 01415 phi = phi_factored; /* left end of stencil */ 01416 for (k = 0; k < diam_stencil; k++, kp++) { 01417 if (kp > kb1) kp = ka1; /* wrap around edge of grid */ 01418 ezd[kp] += phi[k] * e2h[index_e2]; 01419 } 01420 } 01421 } /* for k2 */ 01422 01423 for (k = ka1; k <= kb1; k++) { 01424 offset = k * nj1; 01425 jm = (j2 << 1); /* = 2*j2 */ 01426 if ( ! ispy ) { /* nonperiodic */ 01427 int lower = jm - r_stencil; 01428 int upper = jm + r_stencil; 01429 if (lower < ja1) lower = ja1; 01430 if (upper > jb1) upper = jb1; /* clip edges */ 01431 phi = phi_factored + r_stencil; /* center of stencil */ 01432 for (j = lower; j <= upper; j++) { 01433 eyzd[offset + j] += phi[j-jm] * ezd[k]; 01434 } 01435 } 01436 else { /* periodic */ 01437 int jp = jm - r_stencil; /* index at left end of stencil */ 01438 if (jp < ja1) do { jp += nj1; } while (jp < ja1); /* start inside */ 01439 phi = phi_factored; /* left end of stencil */ 01440 for (j = 0; j < diam_stencil; j++, jp++) { 01441 if (jp > jb1) jp = ja1; /* wrap around edge of grid */ 01442 eyzd[offset + jp] += phi[j] * ezd[k]; 01443 } 01444 } 01445 } /* for k */ 01446 01447 } /* for j2 */ 01448 01449 for (k = ka1; k <= kb1; k++) { 01450 offset_k = k * nj1; 01451 01452 for (j = ja1; j <= jb1; j++) { 01453 index_jk = offset_k + j; 01454 offset = index_jk * ni1; 01455 im = (i2 << 1); /* = 2*i2 */ 01456 if ( ! ispx ) { /* nonperiodic */ 01457 int lower = im - r_stencil; 01458 int upper = im + r_stencil; 01459 if (lower < ia1) lower = ia1; 01460 if (upper > ib1) upper = ib1; /* clip edges */ 01461 phi = phi_factored + r_stencil; /* center of stencil */ 01462 for (i = lower; i <= upper; i++) { 01463 eh[offset + i] += phi[i-im] * eyzd[index_jk]; 01464 } 01465 } 01466 else { /* periodic */ 01467 int ip = im - r_stencil; /* index at left end of stencil */ 01468 if (ip < ia1) do { ip += ni1; } while (ip < ia1); /* start inside */ 01469 phi = phi_factored; /* left end of stencil */ 01470 for (i = 0; i < diam_stencil; i++, ip++) { 01471 if (ip > ib1) ip = ia1; /* wrap around edge of grid */ 01472 eh[offset + ip] += phi[i] * eyzd[index_jk]; 01473 } 01474 } 01475 } /* for j */ 01476 01477 } /* for k */ 01478 01479 } /* for i2 */ 01480 01481 return NL_MSM_SUCCESS; 01482 } /* prolongation_factored */
int restriction | ( | NL_Msm * | msm, | |
int | level | |||
) | [static] |
Definition at line 1485 of file msm_longrng.c.
References NL_Msm_t::approx, IndexOffset, j, NL_Msm_t::msmflags, NL_MSM_PERIODIC_VEC1, NL_MSM_PERIODIC_VEC2, NL_MSM_PERIODIC_VEC3, NL_MSM_SUCCESS, Nstencil, PhiStencil, and NL_Msm_t::qh.
Referenced by NL_msm_compute_long_range().
01485 { 01486 /* grids of charge, finer grid and coarser grid */ 01487 const NL_Msmgrid_double *qhgrid = &(msm->qh[level]); 01488 const double *qh = qhgrid->data; /* index the offset data buffer */ 01489 NL_Msmgrid_double *q2hgrid = &(msm->qh[level+1]); 01490 double *q2h_buffer = q2hgrid->buffer; /* index the raw buffer */ 01491 01492 /* finer grid index ranges and dimensions */ 01493 const int ia1 = qhgrid->i0; /* lowest x-index */ 01494 const int ib1 = ia1 + qhgrid->ni - 1; /* highest x-index */ 01495 const int ja1 = qhgrid->j0; /* lowest y-index */ 01496 const int jb1 = ja1 + qhgrid->nj - 1; /* highest y-index */ 01497 const int ka1 = qhgrid->k0; /* lowest z-index */ 01498 const int kb1 = ka1 + qhgrid->nk - 1; /* highest z-index */ 01499 const int ni1 = qhgrid->ni; /* length along x-dim */ 01500 const int nj1 = qhgrid->nj; /* length along y-dim */ 01501 const int nk1 = qhgrid->nk; /* length along z-dim */ 01502 01503 /* coarser grid index ranges and dimensions */ 01504 const int ia2 = q2hgrid->i0; /* lowest x-index */ 01505 const int ib2 = ia2 + q2hgrid->ni - 1; /* highest x-index */ 01506 const int ja2 = q2hgrid->j0; /* lowest y-index */ 01507 const int jb2 = ja2 + q2hgrid->nj - 1; /* highest y-index */ 01508 const int ka2 = q2hgrid->k0; /* lowest z-index */ 01509 const int kb2 = ka2 + q2hgrid->nk - 1; /* highest z-index */ 01510 01511 const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1); 01512 const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2); 01513 const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3); 01514 01515 const int nstencil = Nstencil[msm->approx]; 01516 const int *offset = IndexOffset[msm->approx]; 01517 const double *phi = PhiStencil[msm->approx]; 01518 01519 double q2h_sum, cjk; 01520 01521 int i1, j1, k1, index1, jk1off, k1off; 01522 int i2, j2, k2; 01523 int index2; 01524 int i, j, k; 01525 01526 for (index2 = 0, k2 = ka2; k2 <= kb2; k2++) { 01527 k1 = k2 * 2; 01528 for (j2 = ja2; j2 <= jb2; j2++) { 01529 j1 = j2 * 2; 01530 for (i2 = ia2; i2 <= ib2; i2++, index2++) { 01531 i1 = i2 * 2; 01532 01533 q2h_sum = 0; 01534 for (k = 0; k < nstencil; k++) { 01535 k1off = k1 + offset[k]; 01536 if (k1off < ka1) { 01537 if (ispz) do { k1off += nk1; } while (k1off < ka1); 01538 else continue; 01539 } 01540 else if (k1off > kb1) { 01541 if (ispz) do { k1off -= nk1; } while (k1off > kb1); 01542 else break; 01543 } 01544 k1off *= nj1; 01545 for (j = 0; j < nstencil; j++) { 01546 jk1off = j1 + offset[j]; 01547 if (jk1off < ja1) { 01548 if (ispy) do { jk1off += nj1; } while (jk1off < ja1); 01549 else continue; 01550 } 01551 else if (jk1off > jb1) { 01552 if (ispy) do { jk1off -= nj1; } while (jk1off > jb1); 01553 else break; 01554 } 01555 jk1off = (k1off + jk1off) * ni1; 01556 cjk = phi[j] * phi[k]; 01557 for (i = 0; i < nstencil; i++) { 01558 index1 = i1 + offset[i]; 01559 if (index1 < ia1) { 01560 if (ispx) do { index1 += ni1; } while (index1 < ia1); 01561 else continue; 01562 } 01563 else if (index1 > ib1) { 01564 if (ispx) do { index1 -= ni1; } while (index1 > ib1); 01565 else break; 01566 } 01567 index1 += jk1off; 01568 q2h_sum += qh[index1] * phi[i] * cjk; 01569 } 01570 } 01571 } /* end loop over finer grid stencil */ 01572 01573 q2h_buffer[index2] = q2h_sum; /* store charge to coarser grid */ 01574 } 01575 } 01576 } /* end loop over each coarser grid point */ 01577 01578 return NL_MSM_SUCCESS; 01579 }
int restriction_factored | ( | NL_Msm * | msm, | |
int | level | |||
) | [static] |
Definition at line 1193 of file msm_longrng.c.
References NL_Msm_t::approx, j, NL_Msm_t::lyzd, NL_Msm_t::lzd, NL_Msm_t::msmflags, NL_MSM_PERIODIC_VEC1, NL_MSM_PERIODIC_VEC2, NL_MSM_PERIODIC_VEC3, NL_MSM_SUCCESS, PhiStencilFactored, PolyDegree, and NL_Msm_t::qh.
Referenced by NL_msm_compute_long_range().
01193 { 01194 /* grids of charge, finer grid and coarser grid */ 01195 const NL_Msmgrid_double *qhgrid = &(msm->qh[level]); 01196 const double *qh = qhgrid->data; 01197 NL_Msmgrid_double *q2hgrid = &(msm->qh[level+1]); 01198 double *q2h = q2hgrid->data; 01199 01200 /* finer grid index ranges and dimensions */ 01201 const int ia1 = qhgrid->i0; /* lowest x-index */ 01202 const int ib1 = ia1 + qhgrid->ni - 1; /* highest x-index */ 01203 const int ja1 = qhgrid->j0; /* lowest y-index */ 01204 const int jb1 = ja1 + qhgrid->nj - 1; /* highest y-index */ 01205 const int ka1 = qhgrid->k0; /* lowest z-index */ 01206 const int kb1 = ka1 + qhgrid->nk - 1; /* highest z-index */ 01207 const int ni1 = qhgrid->ni; /* length along x-dim */ 01208 const int nj1 = qhgrid->nj; /* length along y-dim */ 01209 const int nk1 = qhgrid->nk; /* length along z-dim */ 01210 01211 /* coarser grid index ranges and dimensions */ 01212 const int ia2 = q2hgrid->i0; /* lowest x-index */ 01213 const int ib2 = ia2 + q2hgrid->ni - 1; /* highest x-index */ 01214 const int ja2 = q2hgrid->j0; /* lowest y-index */ 01215 const int jb2 = ja2 + q2hgrid->nj - 1; /* highest y-index */ 01216 const int ka2 = q2hgrid->k0; /* lowest z-index */ 01217 const int kb2 = ka2 + q2hgrid->nk - 1; /* highest z-index */ 01218 const int nrow_q2 = q2hgrid->ni; 01219 const int nstride_q2 = nrow_q2 * q2hgrid->nj; 01220 01221 const int ispx = (msm->msmflags & NL_MSM_PERIODIC_VEC1); 01222 const int ispy = (msm->msmflags & NL_MSM_PERIODIC_VEC2); 01223 const int ispz = (msm->msmflags & NL_MSM_PERIODIC_VEC3); 01224 01225 /* set buffer using indexing offset, so that indexing matches qh grid */ 01226 double *qzd = msm->lzd + (-ka1); 01227 double *qyzd = msm->lyzd + (-ka1*nj1 + -ja1); 01228 double qsum; 01229 01230 const double *phi = NULL; 01231 01232 int i2, j2, k2; 01233 int im, jm, km; 01234 int i, j, k; 01235 int index_plane_q2, index_q2; 01236 int index_jk, offset_k; 01237 int offset; 01238 01239 const double *phi_factored = PhiStencilFactored[msm->approx]; 01240 const int r_stencil = PolyDegree[msm->approx]; /* "radius" of stencil */ 01241 const int diam_stencil = 2*r_stencil + 1; /* "diameter" of stencil */ 01242 01243 for (i2 = ia2; i2 <= ib2; i2++) { 01244 01245 for (k = ka1; k <= kb1; k++) { 01246 offset_k = k * nj1; 01247 01248 for (j = ja1; j <= jb1; j++) { 01249 index_jk = offset_k + j; 01250 offset = index_jk * ni1; 01251 im = (i2 << 1); /* = 2*i2 */ 01252 qsum = 0; 01253 if ( ! ispx ) { /* nonperiodic */ 01254 int lower = im - r_stencil; 01255 int upper = im + r_stencil; 01256 if (lower < ia1) lower = ia1; 01257 if (upper > ib1) upper = ib1; /* clip edges */ 01258 phi = phi_factored + r_stencil; /* center of stencil */ 01259 for (i = lower; i <= upper; i++) { 01260 qsum += phi[i-im] * qh[offset + i]; 01261 } 01262 } 01263 else { /* periodic */ 01264 int ip = im - r_stencil; /* index at left end of stencil */ 01265 if (ip < ia1) do { ip += ni1; } while (ip < ia1); /* start inside */ 01266 phi = phi_factored; /* left end of stencil */ 01267 for (i = 0; i < diam_stencil; i++, ip++) { 01268 if (ip > ib1) ip = ia1; /* wrap around edge of grid */ 01269 qsum += phi[i] * qh[offset + ip]; 01270 } 01271 } 01272 qyzd[index_jk] = qsum; 01273 } /* for j */ 01274 01275 } /* for k */ 01276 01277 for (j2 = ja2; j2 <= jb2; j2++) { 01278 index_plane_q2 = j2 * nrow_q2 + i2; 01279 01280 for (k = ka1; k <= kb1; k++) { 01281 offset = k * nj1; 01282 jm = (j2 << 1); /* = 2*j2 */ 01283 qsum = 0; 01284 if ( ! ispy ) { /* nonperiodic */ 01285 int lower = jm - r_stencil; 01286 int upper = jm + r_stencil; 01287 if (lower < ja1) lower = ja1; 01288 if (upper > jb1) upper = jb1; /* clip edges */ 01289 phi = phi_factored + r_stencil; /* center of stencil */ 01290 for (j = lower; j <= upper; j++) { 01291 qsum += phi[j-jm] * qyzd[offset + j]; 01292 } 01293 } 01294 else { /* periodic */ 01295 int jp = jm - r_stencil; /* index at left end of stencil */ 01296 if (jp < ja1) do { jp += nj1; } while (jp < ja1); /* start inside */ 01297 phi = phi_factored; /* left end of stencil */ 01298 for (j = 0; j < diam_stencil; j++, jp++) { 01299 if (jp > jb1) jp = ja1; /* wrap around edge of grid */ 01300 qsum += phi[j] * qyzd[offset + jp]; 01301 } 01302 } 01303 qzd[k] = qsum; 01304 } /* for k */ 01305 01306 for (k2 = ka2; k2 <= kb2; k2++) { 01307 index_q2 = k2 * nstride_q2 + index_plane_q2; 01308 km = (k2 << 1); /* = 2*k2 */ 01309 qsum = 0; 01310 if ( ! ispz ) { /* nonperiodic */ 01311 int lower = km - r_stencil; 01312 int upper = km + r_stencil; 01313 if (lower < ka1) lower = ka1; 01314 if (upper > kb1) upper = kb1; /* clip edges */ 01315 phi = phi_factored + r_stencil; /* center of stencil */ 01316 for (k = lower; k <= upper; k++) { 01317 qsum += phi[k-km] * qzd[k]; 01318 } 01319 } 01320 else { /* periodic */ 01321 int kp = km - r_stencil; /* index at left end of stencil */ 01322 if (kp < ka1) do { kp += nk1; } while (kp < ka1); /* start inside */ 01323 phi = phi_factored; /* left end of stencil */ 01324 for (k = 0; k < diam_stencil; k++, kp++) { 01325 if (kp > kb1) kp = ka1; /* wrap around edge of grid */ 01326 qsum += phi[k] * qzd[kp]; 01327 } 01328 } 01329 q2h[index_q2] = qsum; 01330 } /* for k2 */ 01331 01332 } /* for j2 */ 01333 01334 } /* for i2 */ 01335 01336 return NL_MSM_SUCCESS; 01337 } /* restriction_factored */
const int IndexOffset[NL_MSM_APPROX_END][MAX_NSTENCIL] [static] |
{ {-3, -1, 0, 1, 3}, {-5, -3, -1, 0, 1, 3, 5}, {-5, -3, -1, 0, 1, 3, 5}, {-7, -5, -3, -1, 0, 1, 3, 5, 7}, {-7, -5, -3, -1, 0, 1, 3, 5, 7}, {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9}, {-9, -7, -5, -3, -1, 0, 1, 3, 5, 7, 9}, {-3, -2, -1, 0, 1, 2, 3}, }
Index offsets from the stencil-centered grid element, to get to the correct contributing grid element.
Definition at line 214 of file msm_longrng.c.
Referenced by prolongation(), and restriction().
const int Nstencil[NL_MSM_APPROX_END] [static] |
{ 5, 7, 7, 9, 9, 11, 11, 7 }
The stencil array lengths below.
Definition at line 208 of file msm_longrng.c.
Referenced by prolongation(), and restriction().
const double PhiStencil[NL_MSM_APPROX_END][MAX_NSTENCIL] [static] |
The grid transfer stencils for the non-factored restriction and prolongation procedures.
Definition at line 242 of file msm_longrng.c.
Referenced by prolongation(), and restriction().
const double PhiStencilFactored[NL_MSM_APPROX_END][2 *MAX_POLY_DEGREE+1] [static] |
The grid transfer stencils for factored restriction and prolongation. Must be listed in same order as APPROX enum from msm.h
Definition at line 170 of file msm_longrng.c.
Referenced by prolongation_factored(), and restriction_factored().
const int PolyDegree[NL_MSM_APPROX_END] [static] |
{ 3, 5, 5, 7, 7, 9, 9, 3, }
Degree of polynomial basis function Phi. Must be listed in same order as APPROX enum from msm.h
Definition at line 163 of file msm_longrng.c.
Referenced by anterpolation(), interpolation(), prolongation_factored(), and restriction_factored().