1
0
Fork 0
mirror of https://github.com/cosmo-sims/MUSIC.git synced 2024-09-19 17:03:46 +02:00
MUSIC/mg_operators.hh
Oliver Hahn 002763346f major mod: added subgrid deconvolution for DM
* The DM transfer function is now deconvolved with the cell assignment
  function in order to restore power near the Nyquist wavenumber.
* Added an option to change the random number sample cube size
  [random]/cubesize. This should be set >128 to generate un-
  correlated numbers. True fix pending.
* Removed some unused code
* Increased Buffer size for Gadget-2 output plug-in
2010-07-21 01:10:29 -07:00

1119 lines
35 KiB
C++

/*
mg_operators.hh - This file is part of MUSIC -
a code to generate multi-scale initial conditions
for cosmological simulations
Copyright (C) 2010 Oliver Hahn
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef __MG_OPERATORS_HH
#define __MG_OPERATORS_HH
class mg_cubic
{
protected:
template< typename real_t >
inline double CUBE( real_t x ) const
{ return x*x*x; }
template< typename T >
inline T SQR( T x ) const
{ return x*x; }
public:
template< int sx, int sy, int sz, typename M >
inline double interp_cubic( int x, int y, int z, M& V, double s=1.0 ) const
{
int i, j, k;
double dx, dy, dz;
double u[4], v[4], w[4];
double r[4], q[4];
double vox = 0;
dx = 0.5*((double)sz - 0.5)*s;
dy = 0.5*((double)sy - 0.5)*s;
dz = 0.5*((double)sx - 0.5)*s;
if( sz == 1 )
{
u[0] = -4.5/64.0;
u[1] = 55.5/64.0;
u[2] = 14.5/64.0;
u[3] = -1.5/64.0;
} else {
u[0] = -1.5/64.0;
u[1] = 14.5/64.0;
u[2] = 55.5/64.0;
u[3] = -4.5/64.0;
}
if( sy == 1 )
{
v[0] = -4.5/64.0;
v[1] = 55.5/64.0;
v[2] = 14.5/64.0;
v[3] = -1.5/64.0;
} else{
v[0] = -1.5/64.0;
v[1] = 14.5/64.0;
v[2] = 55.5/64.0;
v[3] = -4.5/64.0;
}
if( sx == 1 )
{
w[0] = -4.5/64.0;
w[1] = 55.5/64.0;
w[2] = 14.5/64.0;
w[3] = -1.5/64.0;
} else {
w[0] = -1.5/64.0;
w[1] = 14.5/64.0;
w[2] = 55.5/64.0;
w[3] = -4.5/64.0;
}
for (k = 0; k < 4; k++)
{
q[k] = 0;
for (j = 0; j < 4; j++)
{
r[j] = 0;
for (i = 0; i < 4; i++)
{
r[j] += u[i] * V(x+k-2+sx,y+j-2+sy,z+i-2+sz);
}
q[k] += v[j] * r[j];
}
vox += w[k] * q[k];
}
return vox;
}
public:
//... restricts v to V
template< typename m1, typename m2 >
inline void restrict( m1& v, m2& V )
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
V(ox+i,oy+j,oz+k) = 0.125 * ( interp_cubic<1,0,0>(i2,j2,k2,v)
+interp_cubic<0,1,0>(i2,j2,k2,v)
+interp_cubic<0,0,1>(i2,j2,k2,v)
+interp_cubic<1,1,0>(i2,j2,k2,v)
+interp_cubic<0,1,1>(i2,j2,k2,v)
+interp_cubic<1,0,1>(i2,j2,k2,v)
+interp_cubic<1,1,1>(i2,j2,k2,v)
+interp_cubic<0,0,0>(i2,j2,k2,v));
}
}
//... restricts v to V
template< typename m1, typename m2 >
inline void restrict_add( m1& v, m2& V )
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
V(ox+i,oy+j,oz+k) += 0.125 * ( interp_cubic<1,0,0>(i2+1,j2,k2,v)
+interp_cubic<0,1,0>(i2,j2+1,k2,v)
+interp_cubic<0,0,1>(i2,j2,k2+1,v)
+interp_cubic<1,1,0>(i2+1,j2+1,k2,v)
+interp_cubic<0,1,1>(i2,j2+1,k2+1,v)
+interp_cubic<1,0,1>(i2+1,j2,k2+1,v)
+interp_cubic<1,1,1>(i2+1,j2+1,k2+1,v)
+interp_cubic<0,0,0>(i2,j2,k2,v));
}
}
//.. straight restriction on boundary
template< typename m1, typename m2 >
inline void restrict_bnd( const m1& v, m2& V ) const
{
unsigned
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
//... boundary points
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
V(ox-1,j+oy,k+oz) = 0.125*(v(-1,j2,k2)+v(-1,j2+1,k2)+v(-1,j2,k2+1)+v(-1,j2+1,k2+1)+
v(-2,j2,k2)+v(-2,j2+1,k2)+v(-2,j2,k2+1)+v(-2,j2+1,k2+1));
V(ox+nx,j+oy,k+oz) = 0.125*(v(2*nx,j2,k2)+v(2*nx,j2+1,k2)+v(2*nx,j2,k2+1)+v(2*nx,j2+1,k2+1)+
v(2*nx+1,j2,k2)+v(2*nx+1,j2+1,k2)+v(2*nx+1,j2,k2+1)+v(2*nx+1,j2+1,k2+1));
}
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
V(i+ox,oy-1,k+oz) = 0.125*(v(i2,-1,k2)+v(i2+1,-1,k2)+v(i2,-1,k2+1)+v(i2+1,-1,k2+1)+
v(i2,-2,k2)+v(i2+1,-2,k2)+v(i2,-2,k2+1)+v(i2+1,-2,k2+1) );
V(i+ox,oy+ny,k+oz) = 0.125*(v(i2,2*ny,k2)+v(i2+1,2*ny,k2)+v(i2,2*ny,k2+1)+v(i2+1,2*ny,k2+1)+
v(i2,2*ny+1,k2)+v(i2+1,2*ny+1,k2)+v(i2,2*ny+1,k2+1)+v(i2+1,2*ny+1,k2+1));
}
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int j=0,j2=0; j<ny; ++j,j2+=2 ){
V(i+ox,j+oy,oz-1) = 0.125*(v(i2,j2,-1)+v(i2+1,j2,-1)+v(i2,j2+1,-1)+v(i2+1,j2+1,-1)+
v(i2,j2,-2)+v(i2+1,j2,-2)+v(i2,j2+1,-2)+v(i2+1,j2+1,-2));
V(i+ox,j+oy,oz+nz) = 0.125*(v(i2,j2,2*nz)+v(i2+1,j2,2*nz)+v(i2,j2+1,2*nz)+v(i2+1,j2+1,2*nz)+
v(i2,j2,2*nz+1)+v(i2+1,j2,2*nz+1)+v(i2,j2+1,2*nz+1)+v(i2+1,j2+1,2*nz+1));
}
}
template< typename m1, typename m2 >
inline void restrict_bnd_add( const m1& v, m2& V ) const
{
unsigned
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
//... boundary points
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
V(ox-1,j+oy,k+oz) += 0.125*(v(-1,j2,k2)+v(-1,j2+1,k2)+v(-1,j2,k2+1)+v(-1,j2+1,k2+1)+
v(-2,j2,k2)+v(-2,j2+1,k2)+v(-2,j2,k2+1)+v(-2,j2+1,k2+1));
V(ox+nx,j+oy,k+oz) += 0.125*(v(2*nx,j2,k2)+v(2*nx,j2+1,k2)+v(2*nx,j2,k2+1)+v(2*nx,j2+1,k2+1)+
v(2*nx+1,j2,k2)+v(2*nx+1,j2+1,k2)+v(2*nx+1,j2,k2+1)+v(2*nx+1,j2+1,k2+1));
}
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
V(i+ox,oy-1,k+oz) += 0.125*(v(i2,-1,k2)+v(i2+1,-1,k2)+v(i2,-1,k2+1)+v(i2+1,-1,k2+1)+
v(i2,-2,k2)+v(i2+1,-2,k2)+v(i2,-2,k2+1)+v(i2+1,-2,k2+1) );
V(i+ox,oy+ny,k+oz) += 0.125*(v(i2,2*ny,k2)+v(i2+1,2*ny,k2)+v(i2,2*ny,k2+1)+v(i2+1,2*ny,k2+1)+
v(i2,2*ny+1,k2)+v(i2+1,2*ny+1,k2)+v(i2,2*ny+1,k2+1)+v(i2+1,2*ny+1,k2+1));
}
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int j=0,j2=0; j<ny; ++j,j2+=2 ){
V(i+ox,j+oy,oz-1) += 0.125*(v(i2,j2,-1)+v(i2+1,j2,-1)+v(i2,j2+1,-1)+v(i2+1,j2+1,-1)+
v(i2,j2,-2)+v(i2+1,j2,-2)+v(i2,j2+1,-2)+v(i2+1,j2+1,-2));
V(i+ox,j+oy,oz+nz) += 0.125*(v(i2,j2,2*nz)+v(i2+1,j2,2*nz)+v(i2,j2+1,2*nz)+v(i2+1,j2+1,2*nz)+
v(i2,j2,2*nz+1)+v(i2+1,j2,2*nz+1)+v(i2,j2+1,2*nz+1)+v(i2+1,j2+1,2*nz+1));
}
}
template< typename m1, typename m2 >
inline void prolong( m1& V, m2& v ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2(2*i);
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
v(i2+1,j2,k2) = interp_cubic<1,0,0>(i+ox,j+oy,k+oz,V);
v(i2,j2+1,k2) = interp_cubic<0,1,0>(i+ox,j+oy,k+oz,V);
v(i2,j2,k2+1) = interp_cubic<0,0,1>(i+ox,j+oy,k+oz,V);
v(i2+1,j2+1,k2) = interp_cubic<1,1,0>(i+ox,j+oy,k+oz,V);
v(i2+1,j2,k2+1) = interp_cubic<1,0,1>(i+ox,j+oy,k+oz,V);
v(i2+1,j2+1,k2+1) = interp_cubic<1,1,1>(i+ox,j+oy,k+oz,V);
v(i2,j2+1,k2+1) = interp_cubic<0,1,1>(i+ox,j+oy,k+oz,V);
v(i2,j2,k2) = interp_cubic<0,0,0>(i+ox,j+oy,k+oz,V);
}
}
}
template< typename m1, typename m2 >
inline void prolong_add( m1& V, m2& v ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2(2*i);
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
v(i2+1,j2,k2) += interp_cubic<1,0,0>(i+ox,j+oy,k+oz,V);
v(i2,j2+1,k2) += interp_cubic<0,1,0>(i+ox,j+oy,k+oz,V);
v(i2,j2,k2+1) += interp_cubic<0,0,1>(i+ox,j+oy,k+oz,V);
v(i2+1,j2+1,k2) += interp_cubic<1,1,0>(i+ox,j+oy,k+oz,V);
v(i2+1,j2,k2+1) += interp_cubic<1,0,1>(i+ox,j+oy,k+oz,V);
v(i2+1,j2+1,k2+1) += interp_cubic<1,1,1>(i+ox,j+oy,k+oz,V);
v(i2,j2+1,k2+1) += interp_cubic<0,1,1>(i+ox,j+oy,k+oz,V);
v(i2,j2,k2) += interp_cubic<0,0,0>(i+ox,j+oy,k+oz,V);
}
}
}
template< typename m1, typename m2 >
inline void prolong_bnd( m1& V, m2& v )
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
int nbnd = V.m_nbnd;
int nbndtop = nbnd/2;
#pragma omp parallel for
for( int i=-nbndtop; i<nx+nbndtop; ++i )
{
int i2(2*i);
for( int j=-nbndtop,j2=-2*nbndtop; j<ny+nbndtop; ++j,j2+=2 )
for( int k=-nbndtop,k2=-2*nbndtop; k<nz+nbndtop; ++k,k2+=2 )
{
if( i>=0&&i<nx&&j>=0&&j<ny&&k>=0&&k<nz )
continue;
v(i2+1,j2,k2) = interp_cubic<1,0,0>(i+ox,j+oy,k+oz,V);
v(i2,j2+1,k2) = interp_cubic<0,1,0>(i+ox,j+oy,k+oz,V);
v(i2,j2,k2+1) = interp_cubic<0,0,1>(i+ox,j+oy,k+oz,V);
v(i2+1,j2+1,k2) = interp_cubic<1,1,0>(i+ox,j+oy,k+oz,V);
v(i2+1,j2,k2+1) = interp_cubic<1,0,1>(i+ox,j+oy,k+oz,V);
v(i2+1,j2+1,k2+1) = interp_cubic<1,1,1>(i+ox,j+oy,k+oz,V);
v(i2,j2+1,k2+1) = interp_cubic<0,1,1>(i+ox,j+oy,k+oz,V);
v(i2,j2,k2) = interp_cubic<0,0,0>(i+ox,j+oy,k+oz,V);
}
}
}
template< typename m1, typename m2 >
inline void prolong_add_bnd( m1& V, m2& v )
{
unsigned
nx = V.size(0),
ny = V.size(1),
nz = V.size(2);
#pragma omp parallel for
for( int i=-1; i<=nx; ++i )
{
int i2(2*i);
for( int j=-1,j2=-2; j<=ny; ++j,j2+=2 )
for( int k=-1,k2=-2; k<=nz; ++k,k2+=2 )
{
if( i>=0&&i<nx&&j>=0&&j<ny&&k>=0&&k<nz )
continue;
v(i2+1,j2,k2) += interp_cubic<1,0,0>(i,j,k,V);
v(i2,j2+1,k2) += interp_cubic<0,1,0>(i,j,k,V);
v(i2,j2,k2+1) += interp_cubic<0,0,1>(i,j,k,V);
v(i2+1,j2+1,k2) += interp_cubic<1,1,0>(i,j,k,V);
v(i2+1,j2,k2+1) += interp_cubic<1,0,1>(i,j,k,V);
v(i2+1,j2+1,k2+1) += interp_cubic<1,1,1>(i,j,k,V);
v(i2,j2+1,k2+1) += interp_cubic<0,1,1>(i,j,k,V);
v(i2,j2,k2) += interp_cubic<0,0,0>(i,j,k,V);
}
}
}
};
class mg_lin
{
public:
template< typename M >
inline double restr_lin( int x, int y, int z, M& V ) const
{
double w[4] = { 1.0/4.0, 3.0/4.0, 3.0/4.0, 1.0/4.0 };
double vox = 0.0;
int i,j,k;
for( i=0; i<4; ++i )
for( j=0; j<4; ++j )
for( k=0; k<4; ++k )
vox += w[i]*w[j]*w[k] * V(x+i-1,y+j-1,z+k-1);
return vox;
}
template< int sx, int sy, int sz, typename M >
inline double interp_lin( int x, int y, int z, M& V, double s=1.0 ) const
{
double u[2], v[2], w[2];
double vox = 0;
int i,j,k;
if( sx==0 ){
u[0] = 1.0/4.0;
u[1] = 3.0/4.0;
}else{
u[0] = 3.0/4.0;
u[1] = 1.0/4.0;
}
if( sy==0 ){
v[0] = 1.0/4.0;
v[1] = 3.0/4.0;
}else{
v[0] = 3.0/4.0;
v[1] = 1.0/4.0;
}
if( sz==0 ){
w[0] = 1.0/4.0;
w[1] = 3.0/4.0;
}else{
w[0] = 3.0/4.0;
w[1] = 1.0/4.0;
}
for( i=0; i<2; ++i )
for( j=0; j<2; ++j )
for( k=0; k<2; ++k )
vox += u[i]*v[j]*w[k] * V(x+i+sx-1,y+j+sy-1,z+k+sz-1);
return vox;
}
template< typename m1, typename m2 >
inline void prolong( const m1& V, m2& v ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2(2*i);
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
v(i2+1,j2,k2) = interp_lin<1,0,0>(i+ox,j+oy,k+oz,V);
v(i2,j2+1,k2) = interp_lin<0,1,0>(i+ox,j+oy,k+oz,V);
v(i2,j2,k2+1) = interp_lin<0,0,1>(i+ox,j+oy,k+oz,V);
v(i2+1,j2+1,k2) = interp_lin<1,1,0>(i+ox,j+oy,k+oz,V);
v(i2+1,j2,k2+1) = interp_lin<1,0,1>(i+ox,j+oy,k+oz,V);
v(i2+1,j2+1,k2+1) = interp_lin<1,1,1>(i+ox,j+oy,k+oz,V);
v(i2,j2+1,k2+1) = interp_lin<0,1,1>(i+ox,j+oy,k+oz,V);
v(i2,j2,k2) = interp_lin<0,0,0>(i+ox,j+oy,k+oz,V);
}
}
}
template< typename m1, typename m2 >
inline void prolong_add( const m1& V, m2& v ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2(2*i);
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
v(i2+1,j2,k2) += interp_lin<1,0,0>(i+ox,j+oy,k+oz,V);
v(i2,j2+1,k2) += interp_lin<0,1,0>(i+ox,j+oy,k+oz,V);
v(i2,j2,k2+1) += interp_lin<0,0,1>(i+ox,j+oy,k+oz,V);
v(i2+1,j2+1,k2) += interp_lin<1,1,0>(i+ox,j+oy,k+oz,V);
v(i2+1,j2,k2+1) += interp_lin<1,0,1>(i+ox,j+oy,k+oz,V);
v(i2+1,j2+1,k2+1) += interp_lin<1,1,1>(i+ox,j+oy,k+oz,V);
v(i2,j2+1,k2+1) += interp_lin<0,1,1>(i+ox,j+oy,k+oz,V);
v(i2,j2,k2) += interp_lin<0,0,0>(i+ox,j+oy,k+oz,V);
}
}
}
template< typename m1, typename m2 >
inline void restrict( const m1& v, m2& V ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
V(i+ox,j+oy,k+oz) = 0.125 * restr_lin(i2, j2, k2, v);
}
}
};
//template< typename T >
class mg_linear
{
public:
//typedef T real_t;
//inline void prolong_bnd( const MeshvarBnd<real_t>& V, MeshvarBnd<real_t>& v ) const
template< typename m1, typename m2 >
inline void prolong_bnd( const m1& V, m2& v ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel
{
{
for(int q=0;q<2; ++q)
{
int i=nx;
if(q==0) i=-1;
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
int ii=i+ox,jj=j+oy,kk=k+oz;
v(i2,j2,k2) = (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii-1,jj-1,kk-1))/64.;
v(i2+1,j2,k2) = (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii+1,jj-1,kk-1))/64.;
v(i2,j2+1,k2) = (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj+1,kk)+V(ii-1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii-1,jj+1,kk-1))/64.;
v(i2,j2,k2+1) = (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii-1,jj-1,kk+1))/64.;
v(i2+1,j2+1,k2) = (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii+1,jj+1,kk-1))/64.;
v(i2+1,j2,k2+1) = (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii+1,jj-1,kk+1))/64.;
v(i2,j2+1,k2+1) = (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii-1,jj+1,kk+1))/64.;
v(i2+1,j2+1,k2+1) = (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii+1,jj+1,kk+1))/64.;
}
}
}
{
for(int q=0;q<2; ++q)
{
int j=ny;
if(q==0) j=-1;
int j2 = 2*j;
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
int ii=i+ox,jj=j+oy,kk=k+oz;
v(i2,j2,k2) = (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii-1,jj-1,kk-1))/64.;
v(i2+1,j2,k2) = (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii+1,jj-1,kk-1))/64.;
v(i2,j2+1,k2) = (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj+1,kk)+V(ii-1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii-1,jj+1,kk-1))/64.;
v(i2,j2,k2+1) = (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii-1,jj-1,kk+1))/64.;
v(i2+1,j2+1,k2) = (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii+1,jj+1,kk-1))/64.;
v(i2+1,j2,k2+1) = (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii+1,jj-1,kk+1))/64.;
v(i2,j2+1,k2+1) = (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii-1,jj+1,kk+1))/64.;
v(i2+1,j2+1,k2+1) = (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii+1,jj+1,kk+1))/64.;
}
}
}
{
for(int q=0;q<2; ++q)
{
int k = nz;
if(q==0) k=-1;
int k2 = 2*k;
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int j=0,j2=0; j<nx; ++i,i2+=2 )
{
int ii=i+ox,jj=j+oy,kk=k+oz;
v(i2,j2,k2) = (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii-1,jj-1,kk-1))/64.;
v(i2+1,j2,k2) = (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii+1,jj-1,kk-1))/64.;
v(i2,j2+1,k2) = (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj+1,kk)+V(ii-1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii-1,jj+1,kk-1))/64.;
v(i2,j2,k2+1) = (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii-1,jj-1,kk+1))/64.;
v(i2+1,j2+1,k2) = (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii+1,jj+1,kk-1))/64.;
v(i2+1,j2,k2+1) = (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii+1,jj-1,kk+1))/64.;
v(i2,j2+1,k2+1) = (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii-1,jj+1,kk+1))/64.;
v(i2+1,j2+1,k2+1) = (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii+1,jj+1,kk+1))/64.;
}
}
}
#pragma omp barrier
}
}
template< typename m1, typename m2 >
inline void prolong_add_bnd( const m1& V, m2& v ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel
{
{
for(int q=0;q<2; ++q)
{
int i=nx;
if(q==0) i=-1;
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
int ii=i+ox,jj=j+oy,kk=k+oz;
v(i2,j2,k2) += (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii-1,jj-1,kk-1))/64.;
v(i2+1,j2,k2) += (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii+1,jj-1,kk-1))/64.;
v(i2,j2+1,k2) += (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj+1,kk)+V(ii-1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii-1,jj+1,kk-1))/64.;
v(i2,j2,k2+1) += (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii-1,jj-1,kk+1))/64.;
v(i2+1,j2+1,k2) += (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii+1,jj+1,kk-1))/64.;
v(i2+1,j2,k2+1) += (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii+1,jj-1,kk+1))/64.;
v(i2,j2+1,k2+1) += (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii-1,jj+1,kk+1))/64.;
v(i2+1,j2+1,k2+1) += (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii+1,jj+1,kk+1))/64.;
}
}
}
{
for(int q=0;q<2; ++q)
{
int j=ny;
if(q==0) j=-1;
int j2 = 2*j;
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
int ii=i+ox,jj=j+oy,kk=k+oz;
v(i2,j2,k2) += (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii-1,jj-1,kk-1))/64.;
v(i2+1,j2,k2) += (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii+1,jj-1,kk-1))/64.;
v(i2,j2+1,k2) += (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj+1,kk)+V(ii-1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii-1,jj+1,kk-1))/64.;
v(i2,j2,k2+1) += (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii-1,jj-1,kk+1))/64.;
v(i2+1,j2+1,k2) += (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii+1,jj+1,kk-1))/64.;
v(i2+1,j2,k2+1) += (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii+1,jj-1,kk+1))/64.;
v(i2,j2+1,k2+1) += (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii-1,jj+1,kk+1))/64.;
v(i2+1,j2+1,k2+1) += (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii+1,jj+1,kk+1))/64.;
}
}
}
{
for(int q=0;q<2; ++q)
{
int k = nz;
if(q==0) k=-1;
int k2 = 2*k;
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int j=0,j2=0; j<nx; ++i,i2+=2 )
{
int ii=i+ox,jj=j+oy,kk=k+oz;
v(i2,j2,k2) += (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii-1,jj-1,kk-1))/64.;
v(i2+1,j2,k2) += (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii+1,jj-1,kk-1))/64.;
v(i2,j2+1,k2) += (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj+1,kk)+V(ii-1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii-1,jj+1,kk-1))/64.;
v(i2,j2,k2+1) += (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii-1,jj-1,kk+1))/64.;
v(i2+1,j2+1,k2) += (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii+1,jj+1,kk-1))/64.;
v(i2+1,j2,k2+1) += (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii+1,jj-1,kk+1))/64.;
v(i2,j2+1,k2+1) += (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii-1,jj+1,kk+1))/64.;
v(i2+1,j2+1,k2+1) += (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii+1,jj+1,kk+1))/64.;
}
}
}
#pragma omp barrier
}
}
template< typename m1, typename m2 >
inline void prolong( const m1& V, m2& v ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for( int i=0; i<nx; ++i ){
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
int ii=i+ox,jj=j+oy,kk=k+oz;
v(i2,j2,k2) = (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii-1,jj-1,kk-1))/64.;
v(i2+1,j2,k2) = (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii+1,jj-1,kk-1))/64.;
v(i2,j2+1,k2) = (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj+1,kk)+V(ii-1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii-1,jj+1,kk-1))/64.;
v(i2,j2,k2+1) = (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii-1,jj-1,kk+1))/64.;
v(i2+1,j2+1,k2) = (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii+1,jj+1,kk-1))/64.;
v(i2+1,j2,k2+1) = (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii+1,jj-1,kk+1))/64.;
v(i2,j2+1,k2+1) = (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii-1,jj+1,kk+1))/64.;
v(i2+1,j2+1,k2+1) = (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii+1,jj+1,kk+1))/64.;
}
}
}
template< typename m1, typename m2 >
inline void prolong_add( const m1& V, m2& v ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for( int i=0; i<nx; ++i ){
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
int ii=i+ox,jj=j+oy,kk=k+oz;
v(i2,j2,k2) += (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii-1,jj-1,kk-1))/64.;
v(i2+1,j2,k2) += (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk-1)+V(ii,jj-1,kk-1))+V(ii+1,jj-1,kk-1))/64.;
v(i2,j2+1,k2) += (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii-1,jj+1,kk)+V(ii-1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii-1,jj+1,kk-1))/64.;
v(i2,j2,k2+1) += (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj-1,kk)+V(ii-1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii-1,jj-1,kk+1))/64.;
v(i2+1,j2+1,k2) += (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk-1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk-1)+V(ii,jj+1,kk-1))+V(ii+1,jj+1,kk-1))/64.;
v(i2+1,j2,k2+1) += (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj-1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj-1,kk)+V(ii+1,jj,kk+1)+V(ii,jj-1,kk+1))+V(ii+1,jj-1,kk+1))/64.;
v(i2,j2+1,k2+1) += (27.*V(ii,jj,kk)+9.*(V(ii-1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii-1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii-1,jj+1,kk+1))/64.;
v(i2+1,j2+1,k2+1) += (27.*V(ii,jj,kk)+9.*(V(ii+1,jj,kk)+V(ii,jj+1,kk)+V(ii,jj,kk+1))+3.*(V(ii+1,jj+1,kk)+V(ii+1,jj,kk+1)+V(ii,jj+1,kk+1))+V(ii+1,jj+1,kk+1))/64.;
}
}
}
};
//template< typename T >
class mg_straight
{
public:
//typedef T real_t;
template< typename m1, typename m2 >
inline void restrict_bnd( const m1& v, m2& V ) const
{
unsigned
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
//... boundary points
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
V(ox-1,j+oy,k+oz) = 0.25*(v(-1,j2,k2)+v(-1,j2+1,k2)+v(-1,j2,k2+1)+v(-1,j2+1,k2+1));
V(ox+nx,j+oy,k+oz) = 0.25*(v(2*nx,j2,k2)+v(2*nx,j2+1,k2)+v(2*nx,j2,k2+1)+v(2*nx,j2+1,k2+1));
}
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
V(i+ox,oy-1,k+oz) = 0.25*(v(i2,-1,k2)+v(i2+1,-1,k2)+v(i2,-1,k2+1)+v(i2+1,-1,k2+1));
V(i+ox,oy+ny,k+oz) = 0.25*(v(i2,2*ny,k2)+v(i2+1,2*ny,k2)+v(i2,2*ny,k2+1)+v(i2+1,2*ny,k2+1));
}
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int j=0,j2=0; j<ny; ++j,j2+=2 ){
V(i+ox,j+oy,oz-1) = 0.25*(v(i2,j2,-1)+v(i2+1,j2,-1)+v(i2,j2+1,-1)+v(i2+1,j2+1,-1));
V(i+ox,j+oy,oz+nz) = 0.25*(v(i2,j2,2*nz)+v(i2+1,j2,2*nz)+v(i2,j2+1,2*nz)+v(i2+1,j2+1,2*nz));
}
}
//... restricts v to V
template< typename m1, typename m2 >
inline void restrict( const m1& v, m2& V ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
V(i+ox,j+oy,k+oz) = 0.125 * ( v(i2+1,j2,k2) + v(i2,j2+1,k2) + v(i2,j2,k2+1) + v(i2+1,j2+1,k2) +
v(i2+1,j2,k2+1) + v(i2+1,j2+1,k2+1) + v(i2,j2+1,k2+1) + v(i2,j2,k2) );
}
}
template< typename m1, typename m2 >
inline void restrict_add( const m1& v, m2& V ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for( int i=0; i<nx; ++i )
{
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
V(i+ox,j+oy,k+oz) += 0.125 * ( v(i2+1,j2,k2) + v(i2,j2+1,k2) + v(i2,j2,k2+1) + v(i2+1,j2+1,k2) +
v(i2+1,j2,k2+1) + v(i2+1,j2+1,k2+1) + v(i2,j2+1,k2+1) + v(i2,j2,k2) );
}
}
template< typename m1, typename m2 >
inline void restrict_bnd_add( const m1& v, m2& V ) const
{
unsigned
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
//... boundary points
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
V(ox-1,j+oy,k+oz) += 0.25*(v(-1,j2,k2)+v(-1,j2+1,k2)+v(-1,j2,k2+1)+v(-1,j2+1,k2+1));
V(ox+nx,j+oy,k+oz) += 0.25*(v(2*nx,j2,k2)+v(2*nx,j2+1,k2)+v(2*nx,j2,k2+1)+v(2*nx,j2+1,k2+1));
}
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
V(i+ox,oy-1,k+oz) += 0.25*(v(i2,-1,k2)+v(i2+1,-1,k2)+v(i2,-1,k2+1)+v(i2+1,-1,k2+1));
V(i+ox,oy+ny,k+oz) += 0.25*(v(i2,2*ny,k2)+v(i2+1,2*ny,k2)+v(i2,2*ny,k2+1)+v(i2+1,2*ny,k2+1));
}
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int j=0,j2=0; j<ny; ++j,j2+=2 ){
V(i+ox,j+oy,oz-1) += 0.25*(v(i2,j2,-1)+v(i2+1,j2,-1)+v(i2,j2+1,-1)+v(i2+1,j2+1,-1));
V(i+ox,j+oy,oz+nz) += 0.25*(v(i2,j2,2*nz)+v(i2+1,j2,2*nz)+v(i2,j2+1,2*nz)+v(i2+1,j2+1,2*nz));
}
}
//... prolongs V to v
template< typename m1, typename m2 >
inline void prolong( const m1& V, m2& v ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel
{
#pragma omp for nowait
for( int i=0; i<nx; ++i ){
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
v(i2+1,j2,k2) =
v(i2,j2+1,k2) =
v(i2,j2,k2+1) =
v(i2+1,j2+1,k2) =
v(i2+1,j2,k2+1) =
v(i2+1,j2+1,k2+1) =
v(i2,j2+1,k2+1) =
v(i2,j2,k2) = V(i+ox,j+oy,k+oz);
}
}
#pragma omp barrier
}
}
//... prolongs V to v
template< typename m1, typename m2 >
inline void prolong_bnd( const m1& V, m2& v ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel
{
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
v(-1,j2,k2) =
v(-1,j2+1,k2)=
v(-1,j2,k2+1)=
v(-1,j2+1,k2+1) = V(ox-1,j+oy,k+oz);
v(2*nx,j2,k2)=
v(2*nx,j2+1,k2)=
v(2*nx,j2,k2+1)=
v(2*nx,j2+1,k2+1)=V(ox+nx,j+oy,k+oz);
}
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
v(i2,-1,k2)=
v(i2+1,-1,k2)=
v(i2,-1,k2+1)=
v(i2+1,-1,k2+1)=V(i+ox,oy-1,k+oz);
v(i2,2*ny,k2)=
v(i2+1,2*ny,k2)=
v(i2,2*ny,k2+1)=
v(i2+1,2*ny,k2+1)=V(i+ox,oy+ny,k+oz);
}
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int j=0,j2=0; j<ny; ++j,j2+=2 ){
v(i2,j2,-1)=
v(i2+1,j2,-1)=
v(i2,j2+1,-1)=
v(i2+1,j2+1,-1)=V(i+ox,j+oy,oz-1);
v(i2,j2,2*nz)=
v(i2+1,j2,2*nz)=
v(i2,j2+1,2*nz)=
v(i2+1,j2+1,2*nz)=V(i+ox,j+oy,oz+nz);
}
#pragma omp barrier
}
}
//... prolongs V to v
template< typename m1, typename m2 >
inline void prolong_add_bnd( const m1& V, m2& v ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel
{
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
v(-1,j2,k2) +=V(ox-1,j+oy,k+oz);
v(-1,j2+1,k2)+=V(ox-1,j+oy,k+oz);
v(-1,j2,k2+1)+=V(ox-1,j+oy,k+oz);
v(-1,j2+1,k2+1)+=V(ox-1,j+oy,k+oz);
v(2*nx,j2,k2)+=V(ox+nx,j+oy,k+oz);
v(2*nx,j2+1,k2)+=V(ox+nx,j+oy,k+oz);
v(2*nx,j2,k2+1)+=V(ox+nx,j+oy,k+oz);
v(2*nx,j2+1,k2+1)+=V(ox+nx,j+oy,k+oz);
}
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 ){
v(i2,-1,k2)+=V(i+ox,oy-1,k+oz);
v(i2+1,-1,k2)+=V(i+ox,oy-1,k+oz);
v(i2,-1,k2+1)+=V(i+ox,oy-1,k+oz);
v(i2+1,-1,k2+1)+=V(i+ox,oy-1,k+oz);
v(i2,2*ny,k2)+=V(i+ox,oy+ny,k+oz);
v(i2+1,2*ny,k2)+=V(i+ox,oy+ny,k+oz);
v(i2,2*ny,k2+1)+=V(i+ox,oy+ny,k+oz);
v(i2+1,2*ny,k2+1)+=V(i+ox,oy+ny,k+oz);
}
for( int i=0,i2=0; i<nx; ++i,i2+=2 )
for( int j=0,j2=0; j<ny; ++j,j2+=2 ){
v(i2,j2,-1)+=V(i+ox,j+oy,oz-1);
v(i2+1,j2,-1)+=V(i+ox,j+oy,oz-1);
v(i2,j2+1,-1)+=V(i+ox,j+oy,oz-1);
v(i2+1,j2+1,-1)+=V(i+ox,j+oy,oz-1);
v(i2,j2,2*nz)+=V(i+ox,j+oy,oz+nz);
v(i2+1,j2,2*nz)+=V(i+ox,j+oy,oz+nz);
v(i2,j2+1,2*nz)+=V(i+ox,j+oy,oz+nz);
v(i2+1,j2+1,2*nz)+=V(i+ox,j+oy,oz+nz);
}
#pragma omp barrier
}
}
template< typename m1, typename m2 >
inline void prolong_add( const m1& V, m2& v ) const
{
int
nx = v.size(0)/2,
ny = v.size(1)/2,
nz = v.size(2)/2,
ox = v.offset(0),
oy = v.offset(1),
oz = v.offset(2);
#pragma omp parallel for
for( int i=0; i<nx; ++i ){
int i2 = 2*i;
for( int j=0,j2=0; j<ny; ++j,j2+=2 )
for( int k=0,k2=0; k<nz; ++k,k2+=2 )
{
v(i2+1,j2,k2) += V(i+ox,j+oy,k+oz);
v(i2,j2+1,k2) += V(i+ox,j+oy,k+oz);
v(i2,j2,k2+1) += V(i+ox,j+oy,k+oz);
v(i2+1,j2+1,k2) += V(i+ox,j+oy,k+oz);
v(i2+1,j2,k2+1) += V(i+ox,j+oy,k+oz);
v(i2+1,j2+1,k2+1) += V(i+ox,j+oy,k+oz);
v(i2,j2+1,k2+1) += V(i+ox,j+oy,k+oz);
v(i2,j2,k2) += V(i+ox,j+oy,k+oz);
}
}
}
};
#endif //__MG_OPERATORS_HH