diff --git a/Src/FFT/AMReX_FFT_Poisson.H b/Src/FFT/AMReX_FFT_Poisson.H index 8ab467cc54..fa6489dea0 100644 --- a/Src/FFT/AMReX_FFT_Poisson.H +++ b/Src/FFT/AMReX_FFT_Poisson.H @@ -91,6 +91,7 @@ template class PoissonHybrid { public: + using T = typename MF::value_type; template ,int> = 0> explicit PoissonHybrid (Geometry const& geom) @@ -104,6 +105,11 @@ public: } void solve (MF& soln, MF const& rhs); + void solve (MF& soln, MF const& rhs, Vector const& dz); + void solve (MF& soln, MF const& rhs, Gpu::DeviceVector const& dz); + + template + void solve_doit (MF& soln, MF const& rhs, DZ const& dz); // has to be public for cuda private: Geometry m_geom; @@ -225,14 +231,44 @@ void PoissonOpenBC::solve (MF& soln, MF const& rhs) template void PoissonHybrid::solve (MF& soln, MF const& rhs) +{ + auto delz = T(m_geom.CellSize(AMREX_SPACEDIM-1)); + struct DZ { + [[nodiscard]] constexpr T operator[] (int) const { return m_delz; } + T m_delz; + }; + solve_doit(soln, rhs, DZ{delz}); +} + +template +void PoissonHybrid::solve (MF& soln, MF const& rhs, Gpu::DeviceVector const& dz) +{ + auto const* pdz = dz.dataPtr(); + solve_doit(soln, rhs, pdz); +} + +template +void PoissonHybrid::solve (MF& soln, MF const& rhs, Vector const& dz) +{ +#ifdef AMREX_USE_GPU + Gpu::DeviceVector d_dz(dz.size()); + Gpu::htod_memcpy_async(d_dz.data(), dz.data(), dz.size()*sizeof(T)); + auto const* pdz = d_dz.data(); +#else + auto const* pdz = dz.data(); +#endif + solve_doit(soln, rhs, pdz); +} + +template +template +void PoissonHybrid::solve_doit (MF& soln, MF const& rhs, DZ const& dz) { BL_PROFILE("FFT::PoissonHybrid::solve"); #if (AMREX_SPACEDIM < 3) amrex::ignore_unused(soln, rhs); #else - using T = typename MF::value_type; - auto facx = T(2)*Math::pi()/T(m_geom.ProbLength(0)); auto facy = T(2)*Math::pi()/T(m_geom.ProbLength(1)); auto dx = T(m_geom.CellSize(0)); @@ -242,9 +278,6 @@ void PoissonHybrid::solve (MF& soln, MF const& rhs) auto ny = m_geom.Domain().length(1); auto nz = m_geom.Domain().length(2); - Gpu::DeviceVector delzv(nz, T(m_geom.CellSize(2))); - auto const* delz = delzv.data(); - Box cdomain = m_geom.Domain(); cdomain.setBig(0,cdomain.length(0)/2); auto cba = amrex::decompose(cdomain, ParallelContext::NProcsSub(), @@ -283,18 +316,18 @@ void PoissonHybrid::solve (MF& soln, MF const& rhs) for(int k=0; k < nz; k++) { if(k==0) { ald(i,j,k) = 0.; - cud(i,j,k) = 2.0 /(delz[k]*(delz[k]+delz[k+1])); + cud(i,j,k) = 2.0 /(dz[k]*(dz[k]+dz[k+1])); bd(i,j,k) = k2 -ald(i,j,k)-cud(i,j,k); } else if (k == nz-1) { - ald(i,j,k) = 2.0 /(delz[k]*(delz[k]+delz[k-1])); + ald(i,j,k) = 2.0 /(dz[k]*(dz[k]+dz[k-1])); cud(i,j,k) = 0.; bd(i,j,k) = k2 -ald(i,j,k)-cud(i,j,k); if (i == 0 && j == 0) { bd(i,j,k) *= 2.0; } } else { - ald(i,j,k) = 2.0 /(delz[k]*(delz[k]+delz[k-1])); - cud(i,j,k) = 2.0 /(delz[k]*(delz[k]+delz[k+1])); + ald(i,j,k) = 2.0 /(dz[k]*(dz[k]+dz[k-1])); + cud(i,j,k) = 2.0 /(dz[k]*(dz[k]+dz[k+1])); bd(i,j,k) = k2 -ald(i,j,k)-cud(i,j,k); } } @@ -339,18 +372,18 @@ void PoissonHybrid::solve (MF& soln, MF const& rhs) for(int k=0; k < nz; k++) { if(k==0) { ald[k] = 0.; - cud[k] = 2.0 /(delz[k]*(delz[k]+delz[k+1])); + cud[k] = 2.0 /(dz[k]*(dz[k]+dz[k+1])); bd[k] = k2 -ald[k]-cud[k]; } else if (k == nz-1) { - ald[k] = 2.0 /(delz[k]*(delz[k]+delz[k-1])); + ald[k] = 2.0 /(dz[k]*(dz[k]+dz[k-1])); cud[k] = 0.; bd[k] = k2 -ald[k]-cud[k]; if (i == 0 && j == 0) { bd[k] *= 2.0; } } else { - ald[k] = 2.0 /(delz[k]*(delz[k]+delz[k-1])); - cud[k] = 2.0 /(delz[k]*(delz[k]+delz[k+1])); + ald[k] = 2.0 /(dz[k]*(dz[k]+dz[k-1])); + cud[k] = 2.0 /(dz[k]*(dz[k]+dz[k+1])); bd[k] = k2 -ald[k]-cud[k]; } } diff --git a/Tests/FFT/Poisson/main.cpp b/Tests/FFT/Poisson/main.cpp index 392a81124f..634a03154a 100644 --- a/Tests/FFT/Poisson/main.cpp +++ b/Tests/FFT/Poisson/main.cpp @@ -6,6 +6,135 @@ using namespace amrex; +void make_rhs (MultiFab& rhs, Geometry const& geom, + Array,AMREX_SPACEDIM> const& fft_bc) +{ + auto const& dx = geom.CellSizeArray(); + auto const& problo = geom.ProbLoArray(); + auto const& probhi = geom.ProbHiArray(); + GpuArray center + {AMREX_D_DECL(0.5_rt*(problo[0]+probhi[0]), + 0.5_rt*(problo[1]+probhi[1]), + 0.5_rt*(problo[2]+probhi[2]))}; + GpuArray problen + {AMREX_D_DECL((probhi[0]-problo[0]), + (probhi[1]-problo[1]), + (probhi[2]-problo[2]))}; + + GpuArray fac + {AMREX_D_DECL(2._rt*Math::pi()/problen[0], + 2._rt*Math::pi()/problen[1], + 2._rt*Math::pi()/problen[2])}; + + auto const& rhsma = rhs.arrays(); + ParallelFor(rhs, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) + { + IntVect iv(AMREX_D_DECL(i,j,k)); + Real r = 1.0_rt; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + Real x = (iv[idim]+0.5_rt) * dx[idim]; + if (fft_bc[idim].first == FFT::Boundary::periodic) { + r *= (0.11_rt + std::sin((x+0.1_rt)*fac[idim])); + } else if (fft_bc[idim].first == FFT::Boundary::even && + fft_bc[idim].second == FFT::Boundary::even) { + r *= (0.12_rt + std::cos(x*2._rt*fac[idim])); + } else if (fft_bc[idim].first == FFT::Boundary::odd && + fft_bc[idim].second == FFT::Boundary::odd) { + r *= std::sin(x*1.5_rt*fac[idim]); + } else if (fft_bc[idim].first == FFT::Boundary::odd && + fft_bc[idim].second == FFT::Boundary::even) { + r *= std::sin(x*0.75_rt*fac[idim]); + } else if (fft_bc[idim].first == FFT::Boundary::even && + fft_bc[idim].second == FFT::Boundary::odd) { + r *= std::cos(x*0.75_rt*fac[idim]); + } + x -= center[idim]; + x /= problen[idim]; + r *= 1.0_rt + 0.1_rt*Math::abs(std::tanh(x)); + } + rhsma[b](i,j,k) = r; + }); + + bool has_dirichlet = false; + for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { + has_dirichlet = has_dirichlet || + fft_bc[idim].first == FFT::Boundary::odd || + fft_bc[idim].second == FFT::Boundary::odd; + } + if (! has_dirichlet) { + // Shift rhs so that its sum is zero. + auto rhosum = rhs.sum(0); + rhs.plus(-rhosum/geom.Domain().d_numPts(), 0, 1); + } +} + +std::pair check_convergence + (MultiFab const& soln, MultiFab const& rhs, Geometry const& geom, + Array,AMREX_SPACEDIM> const& fft_bc) +{ + MultiFab phi(soln.boxArray(), soln.DistributionMap(), 1, 1); + MultiFab res(soln.boxArray(), soln.DistributionMap(), 1, 0); + MultiFab::Copy(phi, soln, 0, 0, 1, 0); + phi.FillBoundary(geom.periodicity()); + auto const& res_ma = res.arrays(); + auto const& phi_ma = phi.const_arrays(); + auto const& rhs_ma = rhs.const_arrays(); + auto const& dx = geom.CellSizeArray(); + GpuArray lapfac + {AMREX_D_DECL(1._rt/(dx[0]*dx[0]), + 1._rt/(dx[1]*dx[1]), + 1._rt/(dx[2]*dx[2]))}; + AMREX_D_TERM(int n_cell_x = geom.Domain().length(0);, + int n_cell_y = geom.Domain().length(1);, + int n_cell_z = geom.Domain().length(2)); + ParallelFor(res, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) + { + auto const& phia = phi_ma[b]; + Real lap = 0; + if (i == 0 && fft_bc[0].first == FFT::Boundary::odd) { + lap += (-3._rt*phia(i,j,k)+phia(i+1,j,k)) * lapfac[0]; + } else if (i == 0 && fft_bc[0].first == FFT::Boundary::even) { + lap += (-phia(i,j,k)+phia(i+1,j,k)) * lapfac[0]; + } else if (i == n_cell_x-1 && fft_bc[0].second == FFT::Boundary::odd) { + lap += (phia(i-1,j,k)-3._rt*phia(i,j,k)) * lapfac[0]; + } else if (i == n_cell_x-1 && fft_bc[0].second == FFT::Boundary::even) { + lap += (phia(i-1,j,k)-phia(i,j,k)) * lapfac[0]; + } else { + lap += (phia(i-1,j,k)-2._rt*phia(i,j,k)+phia(i+1,j,k)) * lapfac[0]; + } +#if (AMREX_SPACEDIM >= 2) + if (j == 0 && fft_bc[1].first == FFT::Boundary::odd) { + lap += (-3._rt*phia(i,j,k)+phia(i,j+1,k)) * lapfac[1]; + } else if (j == 0 && fft_bc[1].first == FFT::Boundary::even) { + lap += (-phia(i,j,k)+phia(i,j+1,k)) * lapfac[1]; + } else if (j == n_cell_y-1 && fft_bc[1].second == FFT::Boundary::odd) { + lap += (phia(i,j-1,k)-3._rt*phia(i,j,k)) * lapfac[1]; + } else if (j == n_cell_y-1 && fft_bc[1].second == FFT::Boundary::even) { + lap += (phia(i,j-1,k)-phia(i,j,k)) * lapfac[1]; + } else { + lap += (phia(i,j-1,k)-2._rt*phia(i,j,k)+phia(i,j+1,k)) * lapfac[1]; + } +#endif +#if (AMREX_SPACEDIM == 3) + if (k == 0 && fft_bc[2].first == FFT::Boundary::odd) { + lap += (-3._rt*phia(i,j,k)+phia(i,j,k+1)) * lapfac[2]; + } else if (k == 0 && fft_bc[2].first == FFT::Boundary::even) { + lap += (-phia(i,j,k)+phia(i,j,k+1)) * lapfac[2]; + } else if (k == n_cell_z-1 && fft_bc[2].second == FFT::Boundary::odd) { + lap += (phia(i,j,k-1)-3._rt*phia(i,j,k)) * lapfac[2]; + } else if (k == n_cell_z-1 && fft_bc[2].second == FFT::Boundary::even) { + lap += (phia(i,j,k-1)-phia(i,j,k)) * lapfac[2]; + } else { + lap += (phia(i,j,k-1)-2._rt*phia(i,j,k)+phia(i,j,k+1)) * lapfac[2]; + } +#endif + res_ma[b](i,j,k) = rhs_ma[b](i,j,k) - lap; + }); + auto bnorm = rhs.norminf(); + auto rnorm = res.norminf(); + return {bnorm, rnorm}; +} + int main (int argc, char* argv[]) { amrex::Initialize(argc, argv); @@ -51,15 +180,6 @@ int main (int argc, char* argv[]) AMREX_D_DECL(prob_hi_x,prob_hi_y,prob_hi_z)), CoordSys::cartesian, {AMREX_D_DECL(1,1,1)}); } - auto const& dx = geom.CellSizeArray(); - GpuArray center - {AMREX_D_DECL(0.5_rt*(prob_lo_x+prob_hi_x), - 0.5_rt*(prob_lo_y+prob_hi_y), - 0.5_rt*(prob_lo_z+prob_hi_z))}; - GpuArray problen - {AMREX_D_DECL((prob_hi_x-prob_lo_x), - (prob_hi_y-prob_lo_y), - (prob_hi_z-prob_lo_z))}; // For each dimension, there are 5 possibilities constexpr int ncases = 5; @@ -93,118 +213,46 @@ int main (int argc, char* argv[]) } amrex::Print() << ")\n"; - GpuArray fac - {AMREX_D_DECL(2._rt*Math::pi()/problen[0], - 2._rt*Math::pi()/problen[1], - 2._rt*Math::pi()/problen[2])}; - MultiFab rhs(ba,dm,1,0); MultiFab soln(ba,dm,1,0); soln.setVal(std::numeric_limits::max()); - - auto const& rhsma = rhs.arrays(); - ParallelFor(rhs, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) - { - IntVect iv(AMREX_D_DECL(i,j,k)); - Real r = 1.0_rt; - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - Real x = (iv[idim]+0.5_rt) * dx[idim]; - if (fft_bc[idim].first == FFT::Boundary::periodic) { - r *= (0.11_rt + std::sin((x+0.1_rt)*fac[idim])); - } else if (fft_bc[idim].first == FFT::Boundary::even && - fft_bc[idim].second == FFT::Boundary::even) { - r *= (0.12_rt + std::cos(x*2._rt*fac[idim])); - } else if (fft_bc[idim].first == FFT::Boundary::odd && - fft_bc[idim].second == FFT::Boundary::odd) { - r *= std::sin(x*1.5_rt*fac[idim]); - } else if (fft_bc[idim].first == FFT::Boundary::odd && - fft_bc[idim].second == FFT::Boundary::even) { - r *= std::sin(x*0.75_rt*fac[idim]); - } else if (fft_bc[idim].first == FFT::Boundary::even && - fft_bc[idim].second == FFT::Boundary::odd) { - r *= std::cos(x*0.75_rt*fac[idim]); - } - x -= center[idim]; - x /= problen[idim]; - r *= 1.0_rt + 0.1_rt*Math::abs(std::tanh(x)); - } - rhsma[b](i,j,k) = r; - }); - - bool has_dirichlet = false; - for (int idim = 0; idim < AMREX_SPACEDIM; ++idim) { - has_dirichlet = has_dirichlet || - fft_bc[idim].first == FFT::Boundary::odd || - fft_bc[idim].second == FFT::Boundary::odd; - } - if (! has_dirichlet) { - // Shift rhs so that its sum is zero. - auto rhosum = rhs.sum(0); - rhs.plus(-rhosum/geom.Domain().d_numPts(), 0, 1); - } - - // We know that the sum of our rhs is zero for non-Dirichlet - // cases. Otherwise, we should shift rhs so that its sum is zero. + make_rhs(rhs, geom, fft_bc); FFT::Poisson fft_poisson(geom, fft_bc); fft_poisson.solve(soln, rhs); - MultiFab phi(soln.boxArray(), soln.DistributionMap(), 1, 1); - MultiFab res(soln.boxArray(), soln.DistributionMap(), 1, 0); - MultiFab::Copy(phi, soln, 0, 0, 1, 0); - phi.FillBoundary(geom.periodicity()); - auto const& res_ma = res.arrays(); - auto const& phi_ma = phi.const_arrays(); - auto const& rhs_ma = rhs.const_arrays(); - GpuArray lapfac - {AMREX_D_DECL(1._rt/(dx[0]*dx[0]), - 1._rt/(dx[1]*dx[1]), - 1._rt/(dx[2]*dx[2]))}; - ParallelFor(res, [=] AMREX_GPU_DEVICE (int b, int i, int j, int k) - { - auto const& phia = phi_ma[b]; - Real lap = 0; - if (i == 0 && fft_bc[0].first == FFT::Boundary::odd) { - lap += (-3._rt*phia(i,j,k)+phia(i+1,j,k)) * lapfac[0]; - } else if (i == 0 && fft_bc[0].first == FFT::Boundary::even) { - lap += (-phia(i,j,k)+phia(i+1,j,k)) * lapfac[0]; - } else if (i == n_cell_x-1 && fft_bc[0].second == FFT::Boundary::odd) { - lap += (phia(i-1,j,k)-3._rt*phia(i,j,k)) * lapfac[0]; - } else if (i == n_cell_x-1 && fft_bc[0].second == FFT::Boundary::even) { - lap += (phia(i-1,j,k)-phia(i,j,k)) * lapfac[0]; - } else { - lap += (phia(i-1,j,k)-2._rt*phia(i,j,k)+phia(i+1,j,k)) * lapfac[0]; - } -#if (AMREX_SPACEDIM >= 2) - if (j == 0 && fft_bc[1].first == FFT::Boundary::odd) { - lap += (-3._rt*phia(i,j,k)+phia(i,j+1,k)) * lapfac[1]; - } else if (j == 0 && fft_bc[1].first == FFT::Boundary::even) { - lap += (-phia(i,j,k)+phia(i,j+1,k)) * lapfac[1]; - } else if (j == n_cell_y-1 && fft_bc[1].second == FFT::Boundary::odd) { - lap += (phia(i,j-1,k)-3._rt*phia(i,j,k)) * lapfac[1]; - } else if (j == n_cell_y-1 && fft_bc[1].second == FFT::Boundary::even) { - lap += (phia(i,j-1,k)-phia(i,j,k)) * lapfac[1]; - } else { - lap += (phia(i,j-1,k)-2._rt*phia(i,j,k)+phia(i,j+1,k)) * lapfac[1]; - } + auto [bnorm, rnorm] = check_convergence(soln, rhs, geom, fft_bc); + amrex::Print() << " rhs inf norm " << bnorm << "\n" + << " res inf norm " << rnorm << "\n"; +#ifdef AMREX_USE_FLOAT + auto eps = 2.e-3f; +#else + auto eps = 1.e-11; #endif + AMREX_ALWAYS_ASSERT(rnorm < eps*bnorm); + }}} + #if (AMREX_SPACEDIM == 3) - if (k == 0 && fft_bc[2].first == FFT::Boundary::odd) { - lap += (-3._rt*phia(i,j,k)+phia(i,j,k+1)) * lapfac[2]; - } else if (k == 0 && fft_bc[2].first == FFT::Boundary::even) { - lap += (-phia(i,j,k)+phia(i,j,k+1)) * lapfac[2]; - } else if (k == n_cell_z-1 && fft_bc[2].second == FFT::Boundary::odd) { - lap += (phia(i,j,k-1)-3._rt*phia(i,j,k)) * lapfac[2]; - } else if (k == n_cell_z-1 && fft_bc[2].second == FFT::Boundary::even) { - lap += (phia(i,j,k-1)-phia(i,j,k)) * lapfac[2]; - } else { - lap += (phia(i,j,k-1)-2._rt*phia(i,j,k)+phia(i,j,k+1)) * lapfac[2]; - } -#endif - res_ma[b](i,j,k) = rhs_ma[b](i,j,k) - lap; - }); - auto bnorm = rhs.norminf(); - auto rnorm = res.norminf(); + { + amrex::Print() << " Testing PoissonHybrid\n"; + + Array,AMREX_SPACEDIM> + fft_bc{std::make_pair(FFT::Boundary::periodic,FFT::Boundary::periodic), + std::make_pair(FFT::Boundary::periodic,FFT::Boundary::periodic), + std::make_pair(FFT::Boundary::even,FFT::Boundary::even)}; + + MultiFab rhs(ba,dm,1,0); + MultiFab soln(ba,dm,1,0); + soln.setVal(std::numeric_limits::max()); + make_rhs(rhs, geom, fft_bc); + + Gpu::DeviceVector dz(n_cell_z, geom.CellSize(2)); + // or Vector dz(n_cell_z, geom.CellSize(2)); + + FFT::PoissonHybrid fft_poisson(geom); + fft_poisson.solve(soln, rhs, dz); + + auto [bnorm, rnorm] = check_convergence(soln, rhs, geom, fft_bc); amrex::Print() << " rhs inf norm " << bnorm << "\n" << " res inf norm " << rnorm << "\n"; #ifdef AMREX_USE_FLOAT @@ -213,7 +261,9 @@ int main (int argc, char* argv[]) auto eps = 1.e-11; #endif AMREX_ALWAYS_ASSERT(rnorm < eps*bnorm); - }}} + } +#endif } + amrex::Finalize(); }