1
0
Fork 0
mirror of https://github.com/cosmo-sims/MUSIC.git synced 2024-09-10 06:43:45 +02:00
MUSIC/src/mg_operators.hh
2024-02-24 10:52:43 +01:00

1383 lines
50 KiB
C++

// This file is part of MUSIC
// A software package to generate ICs for cosmological simulations
// Copyright (C) 2010-2024 by Oliver Hahn
//
// monofonIC is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// monofonIC is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
#ifndef __MG_OPERATORS_HH
#define __MG_OPERATORS_HH
//! injection operator based on Catmull-Rom splines with arbitrary refinement factor
class mg_cubic_mult
{
protected:
template <typename real_t>
inline double CUBE(real_t x) const
{
return x * x * x;
}
template <typename T>
inline T SQR(T x) const
{
return x * x;
}
public:
template <typename M>
inline double interp_cubic(double dx, double dy, double dz, M &V, int ox, int oy, int oz) const
{
int i, j, k;
double u[4], v[4], w[4];
double r[4], q[4];
double vox = 0;
int sx = 0, sy = 0, sz = 0;
if (dx > 0.0)
sx = 1;
if (dy > 0.0)
sy = 1;
if (dz > 0.0)
sz = 1;
if (dx > 0)
{
w[0] = -0.5 * CUBE(dx) + SQR(dx) - 0.5 * dx;
w[1] = 1.5 * CUBE(dx) - 2.5 * SQR(dx) + 1.0;
w[2] = -1.5 * CUBE(dx) + 2.0 * SQR(dx) + 0.5 * dx;
w[3] = 0.5 * CUBE(dx) - 0.5 * SQR(dx);
}
else
{
w[0] = -0.5 * CUBE(dx) - 0.5 * SQR(dx);
w[1] = 1.5 * CUBE(dx) + 2.0 * SQR(dx) - 0.5 * dx;
w[2] = -1.5 * CUBE(dx) - 2.5 * SQR(dx) + 1.0;
w[3] = 0.5 * CUBE(dx) + SQR(dx) + 0.5 * dx;
}
if (dy > 0)
{
v[0] = -0.5 * CUBE(dy) + SQR(dy) - 0.5 * dy;
v[1] = 1.5 * CUBE(dy) - 2.5 * SQR(dy) + 1.0;
v[2] = -1.5 * CUBE(dy) + 2.0 * SQR(dy) + 0.5 * dy;
v[3] = 0.5 * CUBE(dy) - 0.5 * SQR(dy);
}
else
{
v[0] = -0.5 * CUBE(dy) - 0.5 * SQR(dy);
v[1] = 1.5 * CUBE(dy) + 2.0 * SQR(dy) - 0.5 * dy;
v[2] = -1.5 * CUBE(dy) - 2.5 * SQR(dy) + 1.0;
v[3] = 0.5 * CUBE(dy) + SQR(dy) + 0.5 * dy;
}
if (dz > 0)
{
u[0] = -0.5 * CUBE(dz) + SQR(dz) - 0.5 * dz;
u[1] = 1.5 * CUBE(dz) - 2.5 * SQR(dz) + 1.0;
u[2] = -1.5 * CUBE(dz) + 2.0 * SQR(dz) + 0.5 * dz;
u[3] = 0.5 * CUBE(dz) - 0.5 * SQR(dz);
}
else
{
u[0] = -0.5 * CUBE(dz) - 0.5 * SQR(dz);
u[1] = 1.5 * CUBE(dz) + 2.0 * SQR(dz) - 0.5 * dz;
u[2] = -1.5 * CUBE(dz) - 2.5 * SQR(dz) + 1.0;
u[3] = 0.5 * CUBE(dz) + SQR(dz) + 0.5 * dz;
}
for (k = 0; k < 4; k++)
{
q[k] = 0;
for (j = 0; j < 4; j++)
{
r[j] = 0;
for (i = 0; i < 4; i++)
{
r[j] += u[i] * V(ox + k - 2 + sx, oy + j - 2 + sy, oz + i - 2 + sz);
}
q[k] += v[j] * r[j];
}
vox += w[k] * q[k];
}
return vox;
}
template <typename m1, typename m2>
inline void prolong(m1 &V, m2 &v) const
{
// int N = 1<<R;
// int Nl = 0; //, Nr=N/2;
double dx = 0.5; // 1.0/(double)N;
int nx = v.size(0), ny = v.size(1), nz = v.size(2);
double finemean = 0.0, coarsemean = 0.0;
size_t finecount = 0, coarsecount = 0;
// int oxc = V.offset(0), oyc = V.offset(1), ozc = V.offset(2);
int oxf = v.offset(0), oyf = v.offset(1), ozf = v.offset(2);
#pragma omp parallel for reduction(+ \
: finemean, finecount, coarsemean, coarsecount)
for (int i = 0; i < nx; i += 2)
for (int j = 0; j < ny; j += 2)
for (int k = 0; k < nz; k += 2)
{
// determine the coarse cell index
int iic, jjc, kkc;
iic = oxf + (int)(dx * (double)i);
jjc = oyf + (int)(dx * (double)j);
kkc = ozf + (int)(dx * (double)k);
double xc, yc, zc, xf, yf, zf;
// double coarseval = V(iic,jjc,kkc);
// double finesum = 0.0;
for (int ii = 0; ii < 2; ++ii)
for (int jj = 0; jj < 2; ++jj)
for (int kk = 0; kk < 2; ++kk)
{
xf = oxf + (int)(dx * (double)(i + ii)) - 0.5 * dx;
yf = oyf + (int)(dx * (double)(j + jj)) - 0.5 * dx;
zf = ozf + (int)(dx * (double)(k + kk)) - 0.5 * dx;
xc = (double)(iic);
yc = (double)(jjc);
zc = (double)(kkc);
double val = interp_cubic(xf - xc, yf - yc, zf - zc, V, iic, jjc, kkc);
// finesum += val;
v(i + ii, j + jj, k + kk) = val;
}
// finesum /= 8.0;
/*double dv = coarseval-finesum;
for( int ii=0; ii<2; ++ii )
for( int jj=0; jj<2; ++jj )
for( int kk=0; kk<2; ++kk )
v(i+ii,j+jj,k+kk) += dv;
*/
}
}
template <typename m1, typename m2>
inline void prolong(m1 &V, m2 &v, int oxc, int oyc, int ozc, int oxf, int oyf, int ozf, int R) const
{
int N = 1 << R;
int Nl = -N / 2 + 1; //, Nr=N/2;
double dx = 1.0 / (double)N;
int nx = v.size(0), ny = v.size(1), nz = v.size(2);
double finemean = 0.0, coarsemean = 0.0;
size_t finecount = 0, coarsecount = 0;
#pragma omp parallel for reduction(+ \
: finemean, finecount, coarsemean, coarsecount)
for (int i = 0; i < nx; ++i)
for (int j = 0; j < ny; ++j)
for (int k = 0; k < nz; ++k)
{
// determine the coarse cell index
int iic, jjc, kkc;
iic = (int)(dx * (double)(oxf + i) - oxc);
jjc = (int)(dx * (double)(oyf + j) - oyc);
kkc = (int)(dx * (double)(ozf + k) - ozc);
double xc, yc, zc, xf, yf, zf;
xc = iic + 0.5;
yc = jjc + 0.5;
zc = kkc + 0.5;
xf = (dx * (double)(oxf + i + Nl) - oxc) - 0.5 * dx + 0.5;
yf = (dx * (double)(oyf + j + Nl) - oyc) - 0.5 * dx + 0.5;
zf = (dx * (double)(ozf + k + Nl) - ozc) - 0.5 * dx + 0.5;
double val = interp_cubic(xf - xc, yf - yc, zf - zc, V, iic, jjc, kkc);
v(i, j, k) = val;
finemean += val;
finecount++;
coarsemean += V(iic, jjc, kkc);
coarsecount++;
}
coarsemean /= coarsecount;
finemean /= finecount;
//... subtract the mean difference caused by interpolation
/*double dmean = coarsemean-finemean;
#pragma omp parallel for
for( int i=0; i<nx; ++i )
for( int j=0; j<ny; ++j )
for( int k=0; k<nz; ++k )
v(i,j,k) += dmean;*/
}
template <typename m1, typename m2>
inline void prolong_add(m1 &V, m2 &v, int oxc, int oyc, int ozc, int oxf, int oyf, int ozf, int R) const
{
int N = 1 << R;
int Nl = -N / 2 + 1; //, Nr=N/2;
double dx = 1.0 / (double)N;
int nx = v.size(0), ny = v.size(1), nz = v.size(2);
double finemean = 0.0, coarsemean = 0.0;
size_t finecount = 0, coarsecount = 0;
#pragma omp parallel for reduction(+ \
: finemean, finecount, coarsemean, coarsecount)
for (int i = 0; i < nx; ++i)
for (int j = 0; j < ny; ++j)
for (int k = 0; k < nz; ++k)
{
// determine the coarse cell index
int iic, jjc, kkc;
iic = (int)(dx * (double)(oxf + i) - oxc);
jjc = (int)(dx * (double)(oyf + j) - oyc);
kkc = (int)(dx * (double)(ozf + k) - ozc);
double xc, yc, zc, xf, yf, zf;
xc = iic + 0.5;
yc = jjc + 0.5;
zc = kkc + 0.5;
xf = (dx * (double)(oxf + i + Nl) - oxc) - 0.5 * dx + 0.5;
yf = (dx * (double)(oyf + j + Nl) - oyc) - 0.5 * dx + 0.5;
zf = (dx * (double)(ozf + k + Nl) - ozc) - 0.5 * dx + 0.5;
double val = interp_cubic(xf - xc, yf - yc, zf - zc, V, iic, jjc, kkc);
v(i, j, k) += val;
finemean += val;
finecount++;
coarsemean += V(iic, jjc, kkc);
coarsecount++;
}
coarsemean /= coarsecount;
finemean /= finecount;
/*double dmean = coarsemean-finemean;
//... subtract the mean difference caused by interpolation
#pragma omp parallel for
for( int i=0; i<nx; ++i )
for( int j=0; j<ny; ++j )
for( int k=0; k<nz; ++k )
v(i,j,k) += dmean;
*/
}
};
//! injection operator based on Catmull-Rom splines
class mg_cubic
{
protected:
template <typename real_t>
inline double CUBE(real_t x) const
{
return x * x * x;
}
template <typename T>
inline T SQR(T x) const
{
return x * x;
}
public:
template <int sx, int sy, int sz, typename M>
inline double interp_cubic(int x, int y, int z, M &V, double s = 1.0) const
{
int i, j, k;
// double dx, dy, dz;
double u[4], v[4], w[4];
double r[4], q[4];
double vox = 0;
// dx = 0.5*((double)sz - 0.5)*s;
// dy = 0.5*((double)sy - 0.5)*s;
// dz = 0.5*((double)sx - 0.5)*s;
if (sz == 1)
{
u[0] = -4.5 / 64.0;
u[1] = 55.5 / 64.0;
u[2] = 14.5 / 64.0;
u[3] = -1.5 / 64.0;
}
else
{
u[0] = -1.5 / 64.0;
u[1] = 14.5 / 64.0;
u[2] = 55.5 / 64.0;
u[3] = -4.5 / 64.0;
}
if (sy == 1)
{
v[0] = -4.5 / 64.0;
v[1] = 55.5 / 64.0;
v[2] = 14.5 / 64.0;
v[3] = -1.5 / 64.0;
}
else
{
v[0] = -1.5 / 64.0;
v[1] = 14.5 / 64.0;
v[2] = 55.5 / 64.0;
v[3] = -4.5 / 64.0;
}
if (sx == 1)
{
w[0] = -4.5 / 64.0;
w[1] = 55.5 / 64.0;
w[2] = 14.5 / 64.0;
w[3] = -1.5 / 64.0;
}
else
{
w[0] = -1.5 / 64.0;
w[1] = 14.5 / 64.0;
w[2] = 55.5 / 64.0;
w[3] = -4.5 / 64.0;
}
for (k = 0; k < 4; k++)
{
q[k] = 0;
for (j = 0; j < 4; j++)
{
r[j] = 0;
for (i = 0; i < 4; i++)
{
r[j] += u[i] * V(x + k - 2 + sx, y + j - 2 + sy, z + i - 2 + sz);
}
q[k] += v[j] * r[j];
}
vox += w[k] * q[k];
}
return vox;
}
public:
//... restricts v to V
template <typename m1, typename m2>
inline void restrict(m1 &v, m2 &V)
{
int
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for (int i = 0; i < nx; ++i)
{
int i2 = 2 * i;
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
V(ox + i, oy + j, oz + k) = 0.125 * (interp_cubic<1, 0, 0>(i2, j2, k2, v) + interp_cubic<0, 1, 0>(i2, j2, k2, v) + interp_cubic<0, 0, 1>(i2, j2, k2, v) + interp_cubic<1, 1, 0>(i2, j2, k2, v) + interp_cubic<0, 1, 1>(i2, j2, k2, v) + interp_cubic<1, 0, 1>(i2, j2, k2, v) + interp_cubic<1, 1, 1>(i2, j2, k2, v) + interp_cubic<0, 0, 0>(i2, j2, k2, v));
}
}
//... restricts v to V
template <typename m1, typename m2>
inline void restrict_add(m1 &v, m2 &V)
{
int
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for (int i = 0; i < nx; ++i)
{
int i2 = 2 * i;
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
V(ox + i, oy + j, oz + k) += 0.125 * (interp_cubic<1, 0, 0>(i2 + 1, j2, k2, v) + interp_cubic<0, 1, 0>(i2, j2 + 1, k2, v) + interp_cubic<0, 0, 1>(i2, j2, k2 + 1, v) + interp_cubic<1, 1, 0>(i2 + 1, j2 + 1, k2, v) + interp_cubic<0, 1, 1>(i2, j2 + 1, k2 + 1, v) + interp_cubic<1, 0, 1>(i2 + 1, j2, k2 + 1, v) + interp_cubic<1, 1, 1>(i2 + 1, j2 + 1, k2 + 1, v) + interp_cubic<0, 0, 0>(i2, j2, k2, v));
}
}
//.. straight restriction on boundary
template <typename m1, typename m2>
inline void restrict_bnd(const m1 &v, m2 &V) const
{
unsigned
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
//... boundary points
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
V(ox - 1, j + oy, k + oz) = 0.125 * (v(-1, j2, k2) + v(-1, j2 + 1, k2) + v(-1, j2, k2 + 1) + v(-1, j2 + 1, k2 + 1) +
v(-2, j2, k2) + v(-2, j2 + 1, k2) + v(-2, j2, k2 + 1) + v(-2, j2 + 1, k2 + 1));
V(ox + nx, j + oy, k + oz) = 0.125 * (v(2 * nx, j2, k2) + v(2 * nx, j2 + 1, k2) + v(2 * nx, j2, k2 + 1) + v(2 * nx, j2 + 1, k2 + 1) +
v(2 * nx + 1, j2, k2) + v(2 * nx + 1, j2 + 1, k2) + v(2 * nx + 1, j2, k2 + 1) + v(2 * nx + 1, j2 + 1, k2 + 1));
}
for (int i = 0, i2 = 0; i < nx; ++i, i2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
V(i + ox, oy - 1, k + oz) = 0.125 * (v(i2, -1, k2) + v(i2 + 1, -1, k2) + v(i2, -1, k2 + 1) + v(i2 + 1, -1, k2 + 1) +
v(i2, -2, k2) + v(i2 + 1, -2, k2) + v(i2, -2, k2 + 1) + v(i2 + 1, -2, k2 + 1));
V(i + ox, oy + ny, k + oz) = 0.125 * (v(i2, 2 * ny, k2) + v(i2 + 1, 2 * ny, k2) + v(i2, 2 * ny, k2 + 1) + v(i2 + 1, 2 * ny, k2 + 1) +
v(i2, 2 * ny + 1, k2) + v(i2 + 1, 2 * ny + 1, k2) + v(i2, 2 * ny + 1, k2 + 1) + v(i2 + 1, 2 * ny + 1, k2 + 1));
}
for (int i = 0, i2 = 0; i < nx; ++i, i2 += 2)
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
{
V(i + ox, j + oy, oz - 1) = 0.125 * (v(i2, j2, -1) + v(i2 + 1, j2, -1) + v(i2, j2 + 1, -1) + v(i2 + 1, j2 + 1, -1) +
v(i2, j2, -2) + v(i2 + 1, j2, -2) + v(i2, j2 + 1, -2) + v(i2 + 1, j2 + 1, -2));
V(i + ox, j + oy, oz + nz) = 0.125 * (v(i2, j2, 2 * nz) + v(i2 + 1, j2, 2 * nz) + v(i2, j2 + 1, 2 * nz) + v(i2 + 1, j2 + 1, 2 * nz) +
v(i2, j2, 2 * nz + 1) + v(i2 + 1, j2, 2 * nz + 1) + v(i2, j2 + 1, 2 * nz + 1) + v(i2 + 1, j2 + 1, 2 * nz + 1));
}
}
template <typename m1, typename m2>
inline void restrict_bnd_add(const m1 &v, m2 &V) const
{
unsigned
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
//... boundary points
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
V(ox - 1, j + oy, k + oz) += 0.125 * (v(-1, j2, k2) + v(-1, j2 + 1, k2) + v(-1, j2, k2 + 1) + v(-1, j2 + 1, k2 + 1) +
v(-2, j2, k2) + v(-2, j2 + 1, k2) + v(-2, j2, k2 + 1) + v(-2, j2 + 1, k2 + 1));
V(ox + nx, j + oy, k + oz) += 0.125 * (v(2 * nx, j2, k2) + v(2 * nx, j2 + 1, k2) + v(2 * nx, j2, k2 + 1) + v(2 * nx, j2 + 1, k2 + 1) +
v(2 * nx + 1, j2, k2) + v(2 * nx + 1, j2 + 1, k2) + v(2 * nx + 1, j2, k2 + 1) + v(2 * nx + 1, j2 + 1, k2 + 1));
}
for (int i = 0, i2 = 0; i < nx; ++i, i2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
V(i + ox, oy - 1, k + oz) += 0.125 * (v(i2, -1, k2) + v(i2 + 1, -1, k2) + v(i2, -1, k2 + 1) + v(i2 + 1, -1, k2 + 1) +
v(i2, -2, k2) + v(i2 + 1, -2, k2) + v(i2, -2, k2 + 1) + v(i2 + 1, -2, k2 + 1));
V(i + ox, oy + ny, k + oz) += 0.125 * (v(i2, 2 * ny, k2) + v(i2 + 1, 2 * ny, k2) + v(i2, 2 * ny, k2 + 1) + v(i2 + 1, 2 * ny, k2 + 1) +
v(i2, 2 * ny + 1, k2) + v(i2 + 1, 2 * ny + 1, k2) + v(i2, 2 * ny + 1, k2 + 1) + v(i2 + 1, 2 * ny + 1, k2 + 1));
}
for (int i = 0, i2 = 0; i < nx; ++i, i2 += 2)
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
{
V(i + ox, j + oy, oz - 1) += 0.125 * (v(i2, j2, -1) + v(i2 + 1, j2, -1) + v(i2, j2 + 1, -1) + v(i2 + 1, j2 + 1, -1) +
v(i2, j2, -2) + v(i2 + 1, j2, -2) + v(i2, j2 + 1, -2) + v(i2 + 1, j2 + 1, -2));
V(i + ox, j + oy, oz + nz) += 0.125 * (v(i2, j2, 2 * nz) + v(i2 + 1, j2, 2 * nz) + v(i2, j2 + 1, 2 * nz) + v(i2 + 1, j2 + 1, 2 * nz) +
v(i2, j2, 2 * nz + 1) + v(i2 + 1, j2, 2 * nz + 1) + v(i2, j2 + 1, 2 * nz + 1) + v(i2 + 1, j2 + 1, 2 * nz + 1));
}
}
template <typename m1, typename m2>
inline void prolong(m1 &V, m2 &v) const
{
int
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for (int i = 0; i < nx; ++i)
{
int i2(2 * i);
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
double sum = 0.0;
sum += (v(i2 + 1, j2, k2) = interp_cubic<1, 0, 0>(i + ox, j + oy, k + oz, V));
sum += (v(i2, j2 + 1, k2) = interp_cubic<0, 1, 0>(i + ox, j + oy, k + oz, V));
sum += (v(i2, j2, k2 + 1) = interp_cubic<0, 0, 1>(i + ox, j + oy, k + oz, V));
sum += (v(i2 + 1, j2 + 1, k2) = interp_cubic<1, 1, 0>(i + ox, j + oy, k + oz, V));
sum += (v(i2 + 1, j2, k2 + 1) = interp_cubic<1, 0, 1>(i + ox, j + oy, k + oz, V));
sum += (v(i2 + 1, j2 + 1, k2 + 1) = interp_cubic<1, 1, 1>(i + ox, j + oy, k + oz, V));
sum += (v(i2, j2 + 1, k2 + 1) = interp_cubic<0, 1, 1>(i + ox, j + oy, k + oz, V));
sum += (v(i2, j2, k2) = interp_cubic<0, 0, 0>(i + ox, j + oy, k + oz, V));
sum *= 0.125;
double dd = V(i + ox, j + oy, k + oz) - sum;
v(i2 + 1, j2, k2) += dd;
v(i2, j2 + 1, k2) += dd;
v(i2, j2, k2 + 1) += dd;
v(i2 + 1, j2 + 1, k2) += dd;
v(i2 + 1, j2, k2 + 1) += dd;
v(i2 + 1, j2 + 1, k2 + 1) += dd;
v(i2, j2 + 1, k2 + 1) += dd;
v(i2, j2, k2) += dd;
}
}
}
template <typename m1, typename m2>
inline void prolong_add(m1 &V, m2 &v) const
{
int
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for (int i = 0; i < nx; ++i)
{
int i2(2 * i);
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
double val, sum = 0.0;
v(i2 + 1, j2, k2) += (val = interp_cubic<1, 0, 0>(i + ox, j + oy, k + oz, V));
sum += val;
v(i2, j2 + 1, k2) += (val = interp_cubic<0, 1, 0>(i + ox, j + oy, k + oz, V));
sum += val;
v(i2, j2, k2 + 1) += (val = interp_cubic<0, 0, 1>(i + ox, j + oy, k + oz, V));
sum += val;
v(i2 + 1, j2 + 1, k2) += (val = interp_cubic<1, 1, 0>(i + ox, j + oy, k + oz, V));
sum += val;
v(i2 + 1, j2, k2 + 1) += (val = interp_cubic<1, 0, 1>(i + ox, j + oy, k + oz, V));
sum += val;
v(i2 + 1, j2 + 1, k2 + 1) += (val = interp_cubic<1, 1, 1>(i + ox, j + oy, k + oz, V));
sum += val;
v(i2, j2 + 1, k2 + 1) += (val = interp_cubic<0, 1, 1>(i + ox, j + oy, k + oz, V));
sum += val;
v(i2, j2, k2) += (val = interp_cubic<0, 0, 0>(i + ox, j + oy, k + oz, V));
sum += val;
sum *= 0.125;
double dd = V(i + ox, j + oy, k + oz) - sum;
v(i2 + 1, j2, k2) += dd;
v(i2, j2 + 1, k2) += dd;
v(i2, j2, k2 + 1) += dd;
v(i2 + 1, j2 + 1, k2) += dd;
v(i2 + 1, j2, k2 + 1) += dd;
v(i2 + 1, j2 + 1, k2 + 1) += dd;
v(i2, j2 + 1, k2 + 1) += dd;
v(i2, j2, k2) += dd;
}
}
}
template <typename m1, typename m2>
inline void prolong_bnd(m1 &V, m2 &v)
{
int
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
int nbnd = V.m_nbnd;
int nbndtop = nbnd / 2;
#pragma omp parallel for
for (int i = -nbndtop; i < nx + nbndtop; ++i)
{
int i2(2 * i);
for (int j = -nbndtop, j2 = -2 * nbndtop; j < ny + nbndtop; ++j, j2 += 2)
for (int k = -nbndtop, k2 = -2 * nbndtop; k < nz + nbndtop; ++k, k2 += 2)
{
if (i >= 0 && i < nx && j >= 0 && j < ny && k >= 0 && k < nz)
continue;
v(i2 + 1, j2, k2) = interp_cubic<1, 0, 0>(i + ox, j + oy, k + oz, V);
v(i2, j2 + 1, k2) = interp_cubic<0, 1, 0>(i + ox, j + oy, k + oz, V);
v(i2, j2, k2 + 1) = interp_cubic<0, 0, 1>(i + ox, j + oy, k + oz, V);
v(i2 + 1, j2 + 1, k2) = interp_cubic<1, 1, 0>(i + ox, j + oy, k + oz, V);
v(i2 + 1, j2, k2 + 1) = interp_cubic<1, 0, 1>(i + ox, j + oy, k + oz, V);
v(i2 + 1, j2 + 1, k2 + 1) = interp_cubic<1, 1, 1>(i + ox, j + oy, k + oz, V);
v(i2, j2 + 1, k2 + 1) = interp_cubic<0, 1, 1>(i + ox, j + oy, k + oz, V);
v(i2, j2, k2) = interp_cubic<0, 0, 0>(i + ox, j + oy, k + oz, V);
}
}
}
template <typename m1, typename m2>
inline void prolong_add_bnd(m1 &V, m2 &v)
{
unsigned
nx = V.size(0),
ny = V.size(1),
nz = V.size(2);
#pragma omp parallel for
for (int i = -1; i <= nx; ++i)
{
int i2(2 * i);
for (int j = -1, j2 = -2; j <= ny; ++j, j2 += 2)
for (int k = -1, k2 = -2; k <= nz; ++k, k2 += 2)
{
if (i >= 0 && i < nx && j >= 0 && j < ny && k >= 0 && k < nz)
continue;
v(i2 + 1, j2, k2) += interp_cubic<1, 0, 0>(i, j, k, V);
v(i2, j2 + 1, k2) += interp_cubic<0, 1, 0>(i, j, k, V);
v(i2, j2, k2 + 1) += interp_cubic<0, 0, 1>(i, j, k, V);
v(i2 + 1, j2 + 1, k2) += interp_cubic<1, 1, 0>(i, j, k, V);
v(i2 + 1, j2, k2 + 1) += interp_cubic<1, 0, 1>(i, j, k, V);
v(i2 + 1, j2 + 1, k2 + 1) += interp_cubic<1, 1, 1>(i, j, k, V);
v(i2, j2 + 1, k2 + 1) += interp_cubic<0, 1, 1>(i, j, k, V);
v(i2, j2, k2) += interp_cubic<0, 0, 0>(i, j, k, V);
}
}
}
};
/*
class mg_lin
{
public:
template< typename M >
inline double restr_lin( int x, int y, int z, M& V ) const
{
double w[4] = { 1.0/4.0, 3.0/4.0, 3.0/4.0, 1.0/4.0 };
double vox = 0.0;
int i,j,k;
for( i=0; i<4; ++i )
for( j=0; j<4; ++j )
for( k=0; k<4; ++k )
vox += w[i]*w[j]*w[k] * V(x+i-1,y+j-1,z+k-1);
return vox;
}
template< int sx, int sy, int sz, typename M >
inline double interp_lin( int x, int y, int z, M& V, double s=1.0 ) const
{
double u[2], v[2], w[2];
double vox = 0;
int i,j,k;
if( sx==0 ){
u[0] = 1.0/4.0;
u[1] = 3.0/4.0;
}else{
u[0] = 3.0/4.0;
u[1] = 1.0/4.0;
}
if( sy==0 ){
v[0] = 1.0/4.0;
v[1] = 3.0/4.0;
}else{
v[0] = 3.0/4.0;
v[1] = 1.0/4.0;
}
if( sz==0 ){
w[0] = 1.0/4.0;
w[1] = 3.0/4.0;
}else{
w[0] = 3.0/4.0;
w[1] = 1.0/4.0;
}
for( i=0; i<2; ++i )
for( j=0; j<2; ++j )
for( k=0; k<2; ++k )
vox += u[i]*v[j]*w[k] * V(x+i+sx-1,y+j+sy-1,z+k+sz-1);
return vox;
}
template< typename m1, typename m2 >
inline void prolong( const m1& V, m2& v ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2(2*i);
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
v(i2+1,j2,k2) = interp_lin<1,0,0>(i+ox,j+oy,k+oz,V);
v(i2,j2+1,k2) = interp_lin<0,1,0>(i+ox,j+oy,k+oz,V);
v(i2,j2,k2+1) = interp_lin<0,0,1>(i+ox,j+oy,k+oz,V);
v(i2+1,j2+1,k2) = interp_lin<1,1,0>(i+ox,j+oy,k+oz,V);
v(i2+1,j2,k2+1) = interp_lin<1,0,1>(i+ox,j+oy,k+oz,V);
v(i2+1,j2+1,k2+1) = interp_lin<1,1,1>(i+ox,j+oy,k+oz,V);
v(i2,j2+1,k2+1) = interp_lin<0,1,1>(i+ox,j+oy,k+oz,V);
v(i2,j2,k2) = interp_lin<0,0,0>(i+ox,j+oy,k+oz,V);
}
}
}
template< typename m1, typename m2 >
inline void prolong_add( const m1& V, m2& v ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2(2*i);
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
v(i2+1,j2,k2) += interp_lin<1,0,0>(i+ox,j+oy,k+oz,V);
v(i2,j2+1,k2) += interp_lin<0,1,0>(i+ox,j+oy,k+oz,V);
v(i2,j2,k2+1) += interp_lin<0,0,1>(i+ox,j+oy,k+oz,V);
v(i2+1,j2+1,k2) += interp_lin<1,1,0>(i+ox,j+oy,k+oz,V);
v(i2+1,j2,k2+1) += interp_lin<1,0,1>(i+ox,j+oy,k+oz,V);
v(i2+1,j2+1,k2+1) += interp_lin<1,1,1>(i+ox,j+oy,k+oz,V);
v(i2,j2+1,k2+1) += interp_lin<0,1,1>(i+ox,j+oy,k+oz,V);
v(i2,j2,k2) += interp_lin<0,0,0>(i+ox,j+oy,k+oz,V);
}
}
}
template< typename m1, typename m2 >
inline void restrict( const m1& v, m2& V ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
V(i+ox,j+oy,k+oz) = 0.125 * restr_lin(i2, j2, k2, v);
}
}
};
*/
//! linear grid injection/restriction operator
// template< typename T >
class mg_linear
{
public:
// typedef T real_t;
// inline void prolong_bnd( const MeshvarBnd<real_t>& V, MeshvarBnd<real_t>& v ) const
template <typename m1, typename m2>
inline void prolong_bnd(const m1 &V, m2 &v) const
{
int
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel
{
{
for (int q = 0; q < 2; ++q)
{
int i = nx;
if (q == 0)
i = -1;
int i2 = 2 * i;
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
int ii = i + ox, jj = j + oy, kk = k + oz;
v(i2, j2, k2) = (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii - 1, jj - 1, kk) + V(ii - 1, jj, kk - 1) + V(ii, jj - 1, kk - 1)) + V(ii - 1, jj - 1, kk - 1)) / 64.;
v(i2 + 1, j2, k2) = (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii + 1, jj - 1, kk) + V(ii + 1, jj, kk - 1) + V(ii, jj - 1, kk - 1)) + V(ii + 1, jj - 1, kk - 1)) / 64.;
v(i2, j2 + 1, k2) = (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii - 1, jj + 1, kk) + V(ii - 1, jj, kk - 1) + V(ii, jj + 1, kk - 1)) + V(ii - 1, jj + 1, kk - 1)) / 64.;
v(i2, j2, k2 + 1) = (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii - 1, jj - 1, kk) + V(ii - 1, jj, kk + 1) + V(ii, jj - 1, kk + 1)) + V(ii - 1, jj - 1, kk + 1)) / 64.;
v(i2 + 1, j2 + 1, k2) = (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii + 1, jj + 1, kk) + V(ii + 1, jj, kk - 1) + V(ii, jj + 1, kk - 1)) + V(ii + 1, jj + 1, kk - 1)) / 64.;
v(i2 + 1, j2, k2 + 1) = (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii + 1, jj - 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj - 1, kk + 1)) + V(ii + 1, jj - 1, kk + 1)) / 64.;
v(i2, j2 + 1, k2 + 1) = (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii - 1, jj + 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj + 1, kk + 1)) + V(ii - 1, jj + 1, kk + 1)) / 64.;
v(i2 + 1, j2 + 1, k2 + 1) = (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii + 1, jj + 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj + 1, kk + 1)) + V(ii + 1, jj + 1, kk + 1)) / 64.;
}
}
}
{
for (int q = 0; q < 2; ++q)
{
int j = ny;
if (q == 0)
j = -1;
int j2 = 2 * j;
for (int i = 0, i2 = 0; i < nx; ++i, i2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
int ii = i + ox, jj = j + oy, kk = k + oz;
v(i2, j2, k2) = (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii - 1, jj - 1, kk) + V(ii - 1, jj, kk - 1) + V(ii, jj - 1, kk - 1)) + V(ii - 1, jj - 1, kk - 1)) / 64.;
v(i2 + 1, j2, k2) = (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii + 1, jj - 1, kk) + V(ii + 1, jj, kk - 1) + V(ii, jj - 1, kk - 1)) + V(ii + 1, jj - 1, kk - 1)) / 64.;
v(i2, j2 + 1, k2) = (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii - 1, jj + 1, kk) + V(ii - 1, jj, kk - 1) + V(ii, jj + 1, kk - 1)) + V(ii - 1, jj + 1, kk - 1)) / 64.;
v(i2, j2, k2 + 1) = (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii - 1, jj - 1, kk) + V(ii - 1, jj, kk + 1) + V(ii, jj - 1, kk + 1)) + V(ii - 1, jj - 1, kk + 1)) / 64.;
v(i2 + 1, j2 + 1, k2) = (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii + 1, jj + 1, kk) + V(ii + 1, jj, kk - 1) + V(ii, jj + 1, kk - 1)) + V(ii + 1, jj + 1, kk - 1)) / 64.;
v(i2 + 1, j2, k2 + 1) = (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii + 1, jj - 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj - 1, kk + 1)) + V(ii + 1, jj - 1, kk + 1)) / 64.;
v(i2, j2 + 1, k2 + 1) = (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii - 1, jj + 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj + 1, kk + 1)) + V(ii - 1, jj + 1, kk + 1)) / 64.;
v(i2 + 1, j2 + 1, k2 + 1) = (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii + 1, jj + 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj + 1, kk + 1)) + V(ii + 1, jj + 1, kk + 1)) / 64.;
}
}
}
{
for (int q = 0; q < 2; ++q)
{
int k = nz;
if (q == 0)
k = -1;
int k2 = 2 * k;
for (int i = 0, i2 = 0; i < nx; ++i, i2 += 2)
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
{
int ii = i + ox, jj = j + oy, kk = k + oz;
v(i2, j2, k2) = (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii - 1, jj - 1, kk) + V(ii - 1, jj, kk - 1) + V(ii, jj - 1, kk - 1)) + V(ii - 1, jj - 1, kk - 1)) / 64.;
v(i2 + 1, j2, k2) = (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii + 1, jj - 1, kk) + V(ii + 1, jj, kk - 1) + V(ii, jj - 1, kk - 1)) + V(ii + 1, jj - 1, kk - 1)) / 64.;
v(i2, j2 + 1, k2) = (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii - 1, jj + 1, kk) + V(ii - 1, jj, kk - 1) + V(ii, jj + 1, kk - 1)) + V(ii - 1, jj + 1, kk - 1)) / 64.;
v(i2, j2, k2 + 1) = (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii - 1, jj - 1, kk) + V(ii - 1, jj, kk + 1) + V(ii, jj - 1, kk + 1)) + V(ii - 1, jj - 1, kk + 1)) / 64.;
v(i2 + 1, j2 + 1, k2) = (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii + 1, jj + 1, kk) + V(ii + 1, jj, kk - 1) + V(ii, jj + 1, kk - 1)) + V(ii + 1, jj + 1, kk - 1)) / 64.;
v(i2 + 1, j2, k2 + 1) = (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii + 1, jj - 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj - 1, kk + 1)) + V(ii + 1, jj - 1, kk + 1)) / 64.;
v(i2, j2 + 1, k2 + 1) = (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii - 1, jj + 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj + 1, kk + 1)) + V(ii - 1, jj + 1, kk + 1)) / 64.;
v(i2 + 1, j2 + 1, k2 + 1) = (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii + 1, jj + 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj + 1, kk + 1)) + V(ii + 1, jj + 1, kk + 1)) / 64.;
}
}
}
#pragma omp barrier
}
}
template <typename m1, typename m2>
inline void prolong_add_bnd(const m1 &V, m2 &v) const
{
int
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel
{
{
for (int q = 0; q < 2; ++q)
{
int i = nx;
if (q == 0)
i = -1;
int i2 = 2 * i;
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
int ii = i + ox, jj = j + oy, kk = k + oz;
v(i2, j2, k2) += (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii - 1, jj - 1, kk) + V(ii - 1, jj, kk - 1) + V(ii, jj - 1, kk - 1)) + V(ii - 1, jj - 1, kk - 1)) / 64.;
v(i2 + 1, j2, k2) += (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii + 1, jj - 1, kk) + V(ii + 1, jj, kk - 1) + V(ii, jj - 1, kk - 1)) + V(ii + 1, jj - 1, kk - 1)) / 64.;
v(i2, j2 + 1, k2) += (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii - 1, jj + 1, kk) + V(ii - 1, jj, kk - 1) + V(ii, jj + 1, kk - 1)) + V(ii - 1, jj + 1, kk - 1)) / 64.;
v(i2, j2, k2 + 1) += (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii - 1, jj - 1, kk) + V(ii - 1, jj, kk + 1) + V(ii, jj - 1, kk + 1)) + V(ii - 1, jj - 1, kk + 1)) / 64.;
v(i2 + 1, j2 + 1, k2) += (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii + 1, jj + 1, kk) + V(ii + 1, jj, kk - 1) + V(ii, jj + 1, kk - 1)) + V(ii + 1, jj + 1, kk - 1)) / 64.;
v(i2 + 1, j2, k2 + 1) += (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii + 1, jj - 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj - 1, kk + 1)) + V(ii + 1, jj - 1, kk + 1)) / 64.;
v(i2, j2 + 1, k2 + 1) += (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii - 1, jj + 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj + 1, kk + 1)) + V(ii - 1, jj + 1, kk + 1)) / 64.;
v(i2 + 1, j2 + 1, k2 + 1) += (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii + 1, jj + 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj + 1, kk + 1)) + V(ii + 1, jj + 1, kk + 1)) / 64.;
}
}
}
{
for (int q = 0; q < 2; ++q)
{
int j = ny;
if (q == 0)
j = -1;
int j2 = 2 * j;
for (int i = 0, i2 = 0; i < nx; ++i, i2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
int ii = i + ox, jj = j + oy, kk = k + oz;
v(i2, j2, k2) += (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii - 1, jj - 1, kk) + V(ii - 1, jj, kk - 1) + V(ii, jj - 1, kk - 1)) + V(ii - 1, jj - 1, kk - 1)) / 64.;
v(i2 + 1, j2, k2) += (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii + 1, jj - 1, kk) + V(ii + 1, jj, kk - 1) + V(ii, jj - 1, kk - 1)) + V(ii + 1, jj - 1, kk - 1)) / 64.;
v(i2, j2 + 1, k2) += (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii - 1, jj + 1, kk) + V(ii - 1, jj, kk - 1) + V(ii, jj + 1, kk - 1)) + V(ii - 1, jj + 1, kk - 1)) / 64.;
v(i2, j2, k2 + 1) += (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii - 1, jj - 1, kk) + V(ii - 1, jj, kk + 1) + V(ii, jj - 1, kk + 1)) + V(ii - 1, jj - 1, kk + 1)) / 64.;
v(i2 + 1, j2 + 1, k2) += (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii + 1, jj + 1, kk) + V(ii + 1, jj, kk - 1) + V(ii, jj + 1, kk - 1)) + V(ii + 1, jj + 1, kk - 1)) / 64.;
v(i2 + 1, j2, k2 + 1) += (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii + 1, jj - 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj - 1, kk + 1)) + V(ii + 1, jj - 1, kk + 1)) / 64.;
v(i2, j2 + 1, k2 + 1) += (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii - 1, jj + 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj + 1, kk + 1)) + V(ii - 1, jj + 1, kk + 1)) / 64.;
v(i2 + 1, j2 + 1, k2 + 1) += (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii + 1, jj + 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj + 1, kk + 1)) + V(ii + 1, jj + 1, kk + 1)) / 64.;
}
}
}
{
for (int q = 0; q < 2; ++q)
{
int k = nz;
if (q == 0)
k = -1;
int k2 = 2 * k;
for (int i = 0, i2 = 0; i < nx; ++i, i2 += 2)
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
{
int ii = i + ox, jj = j + oy, kk = k + oz;
v(i2, j2, k2) += (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii - 1, jj - 1, kk) + V(ii - 1, jj, kk - 1) + V(ii, jj - 1, kk - 1)) + V(ii - 1, jj - 1, kk - 1)) / 64.;
v(i2 + 1, j2, k2) += (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii + 1, jj - 1, kk) + V(ii + 1, jj, kk - 1) + V(ii, jj - 1, kk - 1)) + V(ii + 1, jj - 1, kk - 1)) / 64.;
v(i2, j2 + 1, k2) += (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii - 1, jj + 1, kk) + V(ii - 1, jj, kk - 1) + V(ii, jj + 1, kk - 1)) + V(ii - 1, jj + 1, kk - 1)) / 64.;
v(i2, j2, k2 + 1) += (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii - 1, jj - 1, kk) + V(ii - 1, jj, kk + 1) + V(ii, jj - 1, kk + 1)) + V(ii - 1, jj - 1, kk + 1)) / 64.;
v(i2 + 1, j2 + 1, k2) += (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii + 1, jj + 1, kk) + V(ii + 1, jj, kk - 1) + V(ii, jj + 1, kk - 1)) + V(ii + 1, jj + 1, kk - 1)) / 64.;
v(i2 + 1, j2, k2 + 1) += (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii + 1, jj - 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj - 1, kk + 1)) + V(ii + 1, jj - 1, kk + 1)) / 64.;
v(i2, j2 + 1, k2 + 1) += (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii - 1, jj + 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj + 1, kk + 1)) + V(ii - 1, jj + 1, kk + 1)) / 64.;
v(i2 + 1, j2 + 1, k2 + 1) += (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii + 1, jj + 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj + 1, kk + 1)) + V(ii + 1, jj + 1, kk + 1)) / 64.;
}
}
}
#pragma omp barrier
}
}
template <typename m1, typename m2>
inline void prolong(const m1 &V, m2 &v) const
{
int
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for (int i = 0; i < nx; ++i)
{
int i2 = 2 * i;
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
int ii = i + ox, jj = j + oy, kk = k + oz;
v(i2, j2, k2) = (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii - 1, jj - 1, kk) + V(ii - 1, jj, kk - 1) + V(ii, jj - 1, kk - 1)) + V(ii - 1, jj - 1, kk - 1)) / 64.;
v(i2 + 1, j2, k2) = (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii + 1, jj - 1, kk) + V(ii + 1, jj, kk - 1) + V(ii, jj - 1, kk - 1)) + V(ii + 1, jj - 1, kk - 1)) / 64.;
v(i2, j2 + 1, k2) = (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii - 1, jj + 1, kk) + V(ii - 1, jj, kk - 1) + V(ii, jj + 1, kk - 1)) + V(ii - 1, jj + 1, kk - 1)) / 64.;
v(i2, j2, k2 + 1) = (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii - 1, jj - 1, kk) + V(ii - 1, jj, kk + 1) + V(ii, jj - 1, kk + 1)) + V(ii - 1, jj - 1, kk + 1)) / 64.;
v(i2 + 1, j2 + 1, k2) = (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii + 1, jj + 1, kk) + V(ii + 1, jj, kk - 1) + V(ii, jj + 1, kk - 1)) + V(ii + 1, jj + 1, kk - 1)) / 64.;
v(i2 + 1, j2, k2 + 1) = (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii + 1, jj - 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj - 1, kk + 1)) + V(ii + 1, jj - 1, kk + 1)) / 64.;
v(i2, j2 + 1, k2 + 1) = (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii - 1, jj + 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj + 1, kk + 1)) + V(ii - 1, jj + 1, kk + 1)) / 64.;
v(i2 + 1, j2 + 1, k2 + 1) = (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii + 1, jj + 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj + 1, kk + 1)) + V(ii + 1, jj + 1, kk + 1)) / 64.;
}
}
}
template <typename m1, typename m2>
inline void prolong_add(const m1 &V, m2 &v) const
{
int
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for (int i = 0; i < nx; ++i)
{
int i2 = 2 * i;
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
int ii = i + ox, jj = j + oy, kk = k + oz;
v(i2, j2, k2) += (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii - 1, jj - 1, kk) + V(ii - 1, jj, kk - 1) + V(ii, jj - 1, kk - 1)) + V(ii - 1, jj - 1, kk - 1)) / 64.;
v(i2 + 1, j2, k2) += (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii + 1, jj - 1, kk) + V(ii + 1, jj, kk - 1) + V(ii, jj - 1, kk - 1)) + V(ii + 1, jj - 1, kk - 1)) / 64.;
v(i2, j2 + 1, k2) += (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii - 1, jj + 1, kk) + V(ii - 1, jj, kk - 1) + V(ii, jj + 1, kk - 1)) + V(ii - 1, jj + 1, kk - 1)) / 64.;
v(i2, j2, k2 + 1) += (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii - 1, jj - 1, kk) + V(ii - 1, jj, kk + 1) + V(ii, jj - 1, kk + 1)) + V(ii - 1, jj - 1, kk + 1)) / 64.;
v(i2 + 1, j2 + 1, k2) += (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk - 1)) + 3. * (V(ii + 1, jj + 1, kk) + V(ii + 1, jj, kk - 1) + V(ii, jj + 1, kk - 1)) + V(ii + 1, jj + 1, kk - 1)) / 64.;
v(i2 + 1, j2, k2 + 1) += (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj - 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii + 1, jj - 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj - 1, kk + 1)) + V(ii + 1, jj - 1, kk + 1)) / 64.;
v(i2, j2 + 1, k2 + 1) += (27. * V(ii, jj, kk) + 9. * (V(ii - 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii - 1, jj + 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj + 1, kk + 1)) + V(ii - 1, jj + 1, kk + 1)) / 64.;
v(i2 + 1, j2 + 1, k2 + 1) += (27. * V(ii, jj, kk) + 9. * (V(ii + 1, jj, kk) + V(ii, jj + 1, kk) + V(ii, jj, kk + 1)) + 3. * (V(ii + 1, jj + 1, kk) + V(ii + 1, jj, kk + 1) + V(ii, jj + 1, kk + 1)) + V(ii + 1, jj + 1, kk + 1)) / 64.;
}
}
}
};
//! zero order grid injection/restriction operator
class mg_straight
{
public:
template <typename m1, typename m2>
inline void restrict_bnd(const m1 &v, m2 &V) const
{
unsigned
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
//... boundary points
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
V(ox - 1, j + oy, k + oz) = 0.25 * (v(-1, j2, k2) + v(-1, j2 + 1, k2) + v(-1, j2, k2 + 1) + v(-1, j2 + 1, k2 + 1));
V(ox + nx, j + oy, k + oz) = 0.25 * (v(2 * nx, j2, k2) + v(2 * nx, j2 + 1, k2) + v(2 * nx, j2, k2 + 1) + v(2 * nx, j2 + 1, k2 + 1));
}
for (int i = 0, i2 = 0; i < nx; ++i, i2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
V(i + ox, oy - 1, k + oz) = 0.25 * (v(i2, -1, k2) + v(i2 + 1, -1, k2) + v(i2, -1, k2 + 1) + v(i2 + 1, -1, k2 + 1));
V(i + ox, oy + ny, k + oz) = 0.25 * (v(i2, 2 * ny, k2) + v(i2 + 1, 2 * ny, k2) + v(i2, 2 * ny, k2 + 1) + v(i2 + 1, 2 * ny, k2 + 1));
}
for (int i = 0, i2 = 0; i < nx; ++i, i2 += 2)
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
{
V(i + ox, j + oy, oz - 1) = 0.25 * (v(i2, j2, -1) + v(i2 + 1, j2, -1) + v(i2, j2 + 1, -1) + v(i2 + 1, j2 + 1, -1));
V(i + ox, j + oy, oz + nz) = 0.25 * (v(i2, j2, 2 * nz) + v(i2 + 1, j2, 2 * nz) + v(i2, j2 + 1, 2 * nz) + v(i2 + 1, j2 + 1, 2 * nz));
}
}
//... restricts v to V
template <typename m1, typename m2>
inline void restrict(const m1 &v, m2 &V) const
{
int
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for (int i = 0; i < nx; ++i)
{
int i2 = 2 * i;
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
V(i + ox, j + oy, k + oz) = 0.125 * (v(i2 + 1, j2, k2) + v(i2, j2 + 1, k2) + v(i2, j2, k2 + 1) + v(i2 + 1, j2 + 1, k2) +
v(i2 + 1, j2, k2 + 1) + v(i2 + 1, j2 + 1, k2 + 1) + v(i2, j2 + 1, k2 + 1) + v(i2, j2, k2));
}
}
template <typename m1, typename m2>
inline void restrict_add(const m1 &v, m2 &V) const
{
int
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for (int i = 0; i < nx; ++i)
{
int i2 = 2 * i;
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
V(i + ox, j + oy, k + oz) += 0.125 * (v(i2 + 1, j2, k2) + v(i2, j2 + 1, k2) + v(i2, j2, k2 + 1) + v(i2 + 1, j2 + 1, k2) +
v(i2 + 1, j2, k2 + 1) + v(i2 + 1, j2 + 1, k2 + 1) + v(i2, j2 + 1, k2 + 1) + v(i2, j2, k2));
}
}
template <typename m1, typename m2>
inline void restrict_bnd_add(const m1 &v, m2 &V) const
{
unsigned
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
//... boundary points
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
V(ox - 1, j + oy, k + oz) += 0.25 * (v(-1, j2, k2) + v(-1, j2 + 1, k2) + v(-1, j2, k2 + 1) + v(-1, j2 + 1, k2 + 1));
V(ox + nx, j + oy, k + oz) += 0.25 * (v(2 * nx, j2, k2) + v(2 * nx, j2 + 1, k2) + v(2 * nx, j2, k2 + 1) + v(2 * nx, j2 + 1, k2 + 1));
}
for (int i = 0, i2 = 0; i < nx; ++i, i2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
V(i + ox, oy - 1, k + oz) += 0.25 * (v(i2, -1, k2) + v(i2 + 1, -1, k2) + v(i2, -1, k2 + 1) + v(i2 + 1, -1, k2 + 1));
V(i + ox, oy + ny, k + oz) += 0.25 * (v(i2, 2 * ny, k2) + v(i2 + 1, 2 * ny, k2) + v(i2, 2 * ny, k2 + 1) + v(i2 + 1, 2 * ny, k2 + 1));
}
for (int i = 0, i2 = 0; i < nx; ++i, i2 += 2)
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
{
V(i + ox, j + oy, oz - 1) += 0.25 * (v(i2, j2, -1) + v(i2 + 1, j2, -1) + v(i2, j2 + 1, -1) + v(i2 + 1, j2 + 1, -1));
V(i + ox, j + oy, oz + nz) += 0.25 * (v(i2, j2, 2 * nz) + v(i2 + 1, j2, 2 * nz) + v(i2, j2 + 1, 2 * nz) + v(i2 + 1, j2 + 1, 2 * nz));
}
}
//... prolongs V to v
template <typename m1, typename m2>
inline void prolong(const m1 &V, m2 &v) const
{
int
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel
{
#pragma omp for nowait
for (int i = 0; i < nx; ++i)
{
int i2 = 2 * i;
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
v(i2 + 1, j2, k2) =
v(i2, j2 + 1, k2) =
v(i2, j2, k2 + 1) =
v(i2 + 1, j2 + 1, k2) =
v(i2 + 1, j2, k2 + 1) =
v(i2 + 1, j2 + 1, k2 + 1) =
v(i2, j2 + 1, k2 + 1) =
v(i2, j2, k2) = V(i + ox, j + oy, k + oz);
}
}
#pragma omp barrier
}
}
//... prolongs V to v on grid boundary cells
template <typename m1, typename m2>
inline void prolong_bnd(const m1 &V, m2 &v) const
{
int
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel
{
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
v(-1, j2, k2) =
v(-1, j2 + 1, k2) =
v(-1, j2, k2 + 1) =
v(-1, j2 + 1, k2 + 1) = V(ox - 1, j + oy, k + oz);
v(2 * nx, j2, k2) =
v(2 * nx, j2 + 1, k2) =
v(2 * nx, j2, k2 + 1) =
v(2 * nx, j2 + 1, k2 + 1) = V(ox + nx, j + oy, k + oz);
}
for (int i = 0, i2 = 0; i < nx; ++i, i2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
v(i2, -1, k2) =
v(i2 + 1, -1, k2) =
v(i2, -1, k2 + 1) =
v(i2 + 1, -1, k2 + 1) = V(i + ox, oy - 1, k + oz);
v(i2, 2 * ny, k2) =
v(i2 + 1, 2 * ny, k2) =
v(i2, 2 * ny, k2 + 1) =
v(i2 + 1, 2 * ny, k2 + 1) = V(i + ox, oy + ny, k + oz);
}
for (int i = 0, i2 = 0; i < nx; ++i, i2 += 2)
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
{
v(i2, j2, -1) =
v(i2 + 1, j2, -1) =
v(i2, j2 + 1, -1) =
v(i2 + 1, j2 + 1, -1) = V(i + ox, j + oy, oz - 1);
v(i2, j2, 2 * nz) =
v(i2 + 1, j2, 2 * nz) =
v(i2, j2 + 1, 2 * nz) =
v(i2 + 1, j2 + 1, 2 * nz) = V(i + ox, j + oy, oz + nz);
}
#pragma omp barrier
}
}
//... prolongs V to v
template <typename m1, typename m2>
inline void prolong_add_bnd(const m1 &V, m2 &v) const
{
int
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel
{
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
v(-1, j2, k2) += V(ox - 1, j + oy, k + oz);
v(-1, j2 + 1, k2) += V(ox - 1, j + oy, k + oz);
v(-1, j2, k2 + 1) += V(ox - 1, j + oy, k + oz);
v(-1, j2 + 1, k2 + 1) += V(ox - 1, j + oy, k + oz);
v(2 * nx, j2, k2) += V(ox + nx, j + oy, k + oz);
v(2 * nx, j2 + 1, k2) += V(ox + nx, j + oy, k + oz);
v(2 * nx, j2, k2 + 1) += V(ox + nx, j + oy, k + oz);
v(2 * nx, j2 + 1, k2 + 1) += V(ox + nx, j + oy, k + oz);
}
for (int i = 0, i2 = 0; i < nx; ++i, i2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
v(i2, -1, k2) += V(i + ox, oy - 1, k + oz);
v(i2 + 1, -1, k2) += V(i + ox, oy - 1, k + oz);
v(i2, -1, k2 + 1) += V(i + ox, oy - 1, k + oz);
v(i2 + 1, -1, k2 + 1) += V(i + ox, oy - 1, k + oz);
v(i2, 2 * ny, k2) += V(i + ox, oy + ny, k + oz);
v(i2 + 1, 2 * ny, k2) += V(i + ox, oy + ny, k + oz);
v(i2, 2 * ny, k2 + 1) += V(i + ox, oy + ny, k + oz);
v(i2 + 1, 2 * ny, k2 + 1) += V(i + ox, oy + ny, k + oz);
}
for (int i = 0, i2 = 0; i < nx; ++i, i2 += 2)
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
{
v(i2, j2, -1) += V(i + ox, j + oy, oz - 1);
v(i2 + 1, j2, -1) += V(i + ox, j + oy, oz - 1);
v(i2, j2 + 1, -1) += V(i + ox, j + oy, oz - 1);
v(i2 + 1, j2 + 1, -1) += V(i + ox, j + oy, oz - 1);
v(i2, j2, 2 * nz) += V(i + ox, j + oy, oz + nz);
v(i2 + 1, j2, 2 * nz) += V(i + ox, j + oy, oz + nz);
v(i2, j2 + 1, 2 * nz) += V(i + ox, j + oy, oz + nz);
v(i2 + 1, j2 + 1, 2 * nz) += V(i + ox, j + oy, oz + nz);
}
#pragma omp barrier
}
}
//! prolongs V and adds it to v
template <typename m1, typename m2>
inline void prolong_add(const m1 &V, m2 &v) const
{
int
nx = v.size(0) / 2,
ny = v.size(1) / 2,
nz = v.size(2) / 2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for (int i = 0; i < nx; ++i)
{
int i2 = 2 * i;
for (int j = 0, j2 = 0; j < ny; ++j, j2 += 2)
for (int k = 0, k2 = 0; k < nz; ++k, k2 += 2)
{
v(i2 + 1, j2, k2) += V(i + ox, j + oy, k + oz);
v(i2, j2 + 1, k2) += V(i + ox, j + oy, k + oz);
v(i2, j2, k2 + 1) += V(i + ox, j + oy, k + oz);
v(i2 + 1, j2 + 1, k2) += V(i + ox, j + oy, k + oz);
v(i2 + 1, j2, k2 + 1) += V(i + ox, j + oy, k + oz);
v(i2 + 1, j2 + 1, k2 + 1) += V(i + ox, j + oy, k + oz);
v(i2, j2 + 1, k2 + 1) += V(i + ox, j + oy, k + oz);
v(i2, j2, k2) += V(i + ox, j + oy, k + oz);
}
}
}
};
#endif //__MG_OPERATORS_HH