From d5caaf968e940b160829213825bdb79f9871b129 Mon Sep 17 00:00:00 2001 From: Jacques Xing <jacques.xing@kcl.ac.uk> Date: Thu, 18 Jan 2024 08:06:53 +0000 Subject: [PATCH] Implement NeuBndCond operator for CUDA --- CMakeLists.txt | 2 +- Operators/NeuBndCond/NeuBndCondCUDA.cu | 13 ++ Operators/NeuBndCond/NeuBndCondCUDA.hpp | 151 ++++++++++++++++++ .../NeuBndCond/NeuBndCondCUDAKernels.cuh | 62 +++++++ Operators/NeuBndCond/NeuBndCondStdMat.hpp | 10 +- tests/CMakeLists.txt | 19 +++ tests/init_neumannfields.hpp | 72 +++++++++ tests/test_neumanncuda.cpp | 92 +++++++++++ 8 files changed, 415 insertions(+), 6 deletions(-) create mode 100644 Operators/NeuBndCond/NeuBndCondCUDA.cu create mode 100644 Operators/NeuBndCond/NeuBndCondCUDA.hpp create mode 100644 Operators/NeuBndCond/NeuBndCondCUDAKernels.cuh create mode 100644 tests/init_neumannfields.hpp create mode 100644 tests/test_neumanncuda.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index ecfd7de..1fed5c9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -68,7 +68,7 @@ if (NEKTAR_USE_CUDA) Operators/NullPrecon/NullPreconCUDA.cu #Operators/DiagPrecon/DiagPreconCUDA.cu Operators/DirBndCond/DirBndCondCUDA.cu - #Operators/NeuBndCond/NeuBndCondCUDA.cu + Operators/NeuBndCond/NeuBndCondCUDA.cu #Operators/RobBndCond/RobBndCondCUDA.cu #Operators/ConjGrad/ConjGradCUDA.cu #Operators/FwdTrans/FwdTransCUDA.cu diff --git a/Operators/NeuBndCond/NeuBndCondCUDA.cu b/Operators/NeuBndCond/NeuBndCondCUDA.cu new file mode 100644 index 0000000..7968af2 --- /dev/null +++ b/Operators/NeuBndCond/NeuBndCondCUDA.cu @@ -0,0 +1,13 @@ +#include "NeuBndCondCUDA.hpp" + +namespace Nektar::Operators::detail +{ + +// Register implementation with Operator Factory +template <> +std::string OperatorNeuBndCondImpl<double, ImplCUDA>::className = + GetOperatorFactory<default_fp_type>().RegisterCreatorFunction( + "NeuBndCondCUDA", OperatorNeuBndCondImpl<double, ImplCUDA>::instantiate, + "..."); + +} // namespace Nektar::Operators::detail diff --git a/Operators/NeuBndCond/NeuBndCondCUDA.hpp b/Operators/NeuBndCond/NeuBndCondCUDA.hpp new file mode 100644 index 0000000..86543eb --- /dev/null +++ b/Operators/NeuBndCond/NeuBndCondCUDA.hpp @@ -0,0 +1,151 @@ +#pragma once + +#include "MemoryRegionCUDA.hpp" +#include "Operators/NeuBndCond/NeuBndCondCUDAKernels.cuh" +#include "Operators/OperatorNeuBndCond.hpp" + +#include <MultiRegions/AssemblyMap/AssemblyMapCG.h> +#include <MultiRegions/ContField.h> +#include <SpatialDomains/Conditions.h> + +using namespace Nektar; +using namespace Nektar::MultiRegions; + +namespace Nektar::Operators::detail +{ +template <typename TData> +class OperatorNeuBndCondImpl<TData, ImplCUDA> : public OperatorNeuBndCond<TData> +{ +public: + OperatorNeuBndCondImpl(const MultiRegions::ExpListSharedPtr &expansionList) + : OperatorNeuBndCond<TData>(expansionList) + { + auto contfield = + std::dynamic_pointer_cast<ContField>(this->m_expansionList); + auto assmbMap = contfield->GetLocalToGlobalMap(); + m_bndExpSize = contfield->GetBndCondExpansions().size(); + m_signChange = assmbMap->GetSignChange(); + + // Memory allocation + if (m_signChange) + { + auto &sign = assmbMap->GetBndCondCoeffsToLocalCoeffsSign(); + cudaMalloc((void **)&m_sign, sizeof(TData) * sign.size()); + cudaMemcpy(m_sign, sign.get(), sizeof(TData) * sign.size(), + cudaMemcpyHostToDevice); + } + + auto &map = assmbMap->GetBndCondCoeffsToLocalCoeffsMap(); + cudaMalloc((void **)&m_map, sizeof(int) * map.size()); + cudaMemcpy(m_map, map.get(), sizeof(int) * map.size(), + cudaMemcpyHostToDevice); + + Array<OneD, int> offset(m_bndExpSize, 0); + for (size_t i = 1; i < offset.size(); ++i) + { + offset[i] = offset[i - 1] + + contfield->GetBndCondExpansions()[i - 1]->GetNcoeffs(); + } + cudaMalloc((void **)&m_offset, sizeof(int) * offset.size()); + cudaMemcpy(m_offset, offset.get(), sizeof(int) * offset.size(), + cudaMemcpyHostToDevice); + + Array<OneD, int> ncoeff(m_bndExpSize); + size_t ntotcoeff = 0; + for (size_t i = 0; i < ncoeff.size(); ++i) + { + ncoeff[i] = contfield->GetBndCondExpansions()[i]->GetNcoeffs(); + ntotcoeff += contfield->GetBndCondExpansions()[i]->GetNcoeffs(); + } + cudaMalloc((void **)&m_ncoeff, sizeof(int) * ncoeff.size()); + cudaMemcpy(m_ncoeff, ncoeff.get(), sizeof(int) * ncoeff.size(), + cudaMemcpyHostToDevice); + + Array<OneD, BoundaryConditionType> bctype(m_bndExpSize); + for (size_t i = 0; i < bctype.size(); ++i) + { + bctype[i] = + contfield->GetBndConditions()[i]->GetBoundaryConditionType(); + } + cudaMalloc((void **)&m_bctype, + sizeof(BoundaryConditionType) * bctype.size()); + cudaMemcpy(m_bctype, bctype.get(), + sizeof(BoundaryConditionType) * bctype.size(), + cudaMemcpyHostToDevice); + + Array<OneD, TData> coeff(ntotcoeff); + for (size_t i = 0; i < offset.size(); ++i) + { + for (size_t j = 0; j < ncoeff[i]; ++j) + { + coeff[offset[i] + j] = + contfield->GetBndCondExpansions()[i]->GetCoeffs()[j]; + } + } + cudaMalloc((void **)&m_coeff, sizeof(TData) * coeff.size()); + cudaMemcpy(m_coeff, coeff.get(), sizeof(TData) * coeff.size(), + cudaMemcpyHostToDevice); + } + + ~OperatorNeuBndCondImpl(void) + { + if (m_signChange) + { + cudaFree(m_sign); + } + cudaFree(m_map); + cudaFree(m_offset); + cudaFree(m_ncoeff); + cudaFree(m_bctype); + cudaFree(m_coeff); + } + + void apply(Field<TData, FieldState::Coeff> &inout) override + { + // Copy memory to GPU, if necessary and get raw pointers. + auto *inoutptr = + inout.template GetStorage<MemoryRegionCUDA>().GetGPUPtr(); + + // Deterime CUDA grid parameters. + m_gridSize = m_bndExpSize / m_blockSize; + m_gridSize += (m_bndExpSize % m_blockSize == 0) ? 0 : 1; + + if (m_signChange) + { + NeuBndCondKernel<TData><<<m_gridSize, m_blockSize>>>( + m_bndExpSize, m_offset, m_bctype, m_ncoeff, m_sign, m_map, + m_coeff, inoutptr); + } + else + { + NeuBndCondKernel<TData><<<m_gridSize, m_blockSize>>>( + m_bndExpSize, m_offset, m_bctype, m_ncoeff, m_map, m_coeff, + inoutptr); + } + } + + // instantiation function for CreatorFunction in OperatorFactory + static std::unique_ptr<Operator<TData>> instantiate( + const MultiRegions::ExpListSharedPtr &expansionList) + { + return std::make_unique<OperatorNeuBndCondImpl<TData, ImplCUDA>>( + expansionList); + } + + // className - for OperatorFactory + static std::string className; + +protected: + bool m_signChange; + size_t m_bndExpSize; + BoundaryConditionType *m_bctype; + TData *m_coeff; + TData *m_sign; + int *m_map; + int *m_ncoeff; + int *m_offset; + size_t m_gridSize; + size_t m_blockSize = 32; +}; + +} // namespace Nektar::Operators::detail diff --git a/Operators/NeuBndCond/NeuBndCondCUDAKernels.cuh b/Operators/NeuBndCond/NeuBndCondCUDAKernels.cuh new file mode 100644 index 0000000..1ad7f1f --- /dev/null +++ b/Operators/NeuBndCond/NeuBndCondCUDAKernels.cuh @@ -0,0 +1,62 @@ +#include <SpatialDomains/Conditions.h> + +using namespace Nektar; +using namespace Nektar::SpatialDomains; + +namespace Nektar::Operators::detail +{ + +template <typename TData> +__global__ void NeuBndCondKernel(const size_t nsize, const int *offsetptr, + const BoundaryConditionType *bctypeptr, + const int *ncoeffptr, const int *mapptr, + const TData *inptr, TData *outptr) +{ + size_t i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i >= nsize) + { + return; + } + + if (bctypeptr[i] == eNeumann || bctypeptr[i] == eRobin) + { + size_t offset = offsetptr[i]; + size_t ncoeff = ncoeffptr[i]; + for (size_t j = 0; j < ncoeff; j++) + { + // atomicAdd(outptr + mapptr[offset + j], inptr[offset + j]); + outptr[mapptr[offset + j]] += inptr[offset + j]; + } + } +} + +template <typename TData> +__global__ void NeuBndCondKernel(const size_t nsize, const int *offsetptr, + const BoundaryConditionType *bctypeptr, + const int *ncoeffptr, const TData *signptr, + const int *mapptr, const TData *inptr, + TData *outptr) +{ + size_t i = blockDim.x * blockIdx.x + threadIdx.x; + + if (i >= nsize) + { + return; + } + + if (bctypeptr[i] == eNeumann || bctypeptr[i] == eRobin) + { + size_t offset = offsetptr[i]; + size_t ncoeff = ncoeffptr[i]; + for (size_t j = 0; j < ncoeff; j++) + { + // atomicAdd(outptr + mapptr[offset + j], signptr[offset + j] * + // inptr[offset + j]); + outptr[mapptr[offset + j]] += + signptr[offset + j] * inptr[offset + j]; + } + } +} + +} // namespace Nektar::Operators::detail diff --git a/Operators/NeuBndCond/NeuBndCondStdMat.hpp b/Operators/NeuBndCond/NeuBndCondStdMat.hpp index 91ac430..792c0af 100644 --- a/Operators/NeuBndCond/NeuBndCondStdMat.hpp +++ b/Operators/NeuBndCond/NeuBndCondStdMat.hpp @@ -34,8 +34,8 @@ public: auto ncoeffs = m_assmbMap->GetNumLocalCoeffs(); // Copy data from input field - Array<OneD, NekDouble> outarr(ncoeffs, 0.0); - auto *inarrptr = outarr.data(); + Array<OneD, NekDouble> inoutarr(ncoeffs); + auto *inarrptr = inoutarr.data(); auto *inptr = inout.GetStorage().GetCPUPtr(); for (size_t block_idx = 0; block_idx < inout.GetBlocks().size(); @@ -66,7 +66,7 @@ public: for (size_t j = 0; j < bndCondExpansions[i]->GetNcoeffs(); j++) { - outarr[map[bndcnt + j]] += + inoutarr[map[bndcnt + j]] += sign[bndcnt + j] * bndcoeff[j]; } } @@ -75,7 +75,7 @@ public: for (size_t j = 0; j < bndCondExpansions[i]->GetNcoeffs(); j++) { - outarr[map[bndcnt + j]] += bndcoeff[j]; + inoutarr[map[bndcnt + j]] += bndcoeff[j]; } } } @@ -83,7 +83,7 @@ public: } // Copy data to output field - auto *outarrptr = outarr.data(); + auto *outarrptr = inoutarr.data(); auto *outptr = inout.GetStorage().GetCPUPtr(); for (size_t block_idx = 0; block_idx < inout.GetBlocks().size(); diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 6981fc0..0dbf180 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -216,6 +216,17 @@ IF (NEKTAR_USE_CUDA) -DTEST_PATH="${CMAKE_SOURCE_DIR}") ENDIF() +IF (NEKTAR_USE_CUDA) + add_executable(test_neumanncuda test_neumanncuda.cpp) + target_link_libraries(test_neumanncuda PRIVATE Operators) + target_link_libraries(test_neumanncuda PRIVATE ${NEKTAR++_LIBRARIES}) + target_link_libraries(test_neumanncuda PRIVATE Boost::unit_test_framework) + target_include_directories(test_neumanncuda PRIVATE "${CMAKE_SOURCE_DIR}") + target_include_directories(test_neumanncuda PRIVATE ${NEKTAR++_INCLUDE_DIRS}) + target_compile_definitions(test_neumanncuda PRIVATE + -DTEST_PATH="${CMAKE_SOURCE_DIR}") +ENDIF() + add_executable(test_nullprecon test_nullprecon.cpp) target_link_libraries(test_nullprecon PRIVATE Operators) target_link_libraries(test_nullprecon PRIVATE ${NEKTAR++_LIBRARIES}) @@ -431,6 +442,14 @@ IF (NEKTAR_USE_CUDA) ) ENDIF() +IF (NEKTAR_USE_CUDA) + add_test( + NAME NeumannCUDA + COMMAND test_neumanncuda + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + ) +ENDIF() + add_test( NAME NullPrecon COMMAND test_nullprecon diff --git a/tests/init_neumannfields.hpp b/tests/init_neumannfields.hpp new file mode 100644 index 0000000..ac18052 --- /dev/null +++ b/tests/init_neumannfields.hpp @@ -0,0 +1,72 @@ +#include "init_fields.hpp" + +using namespace std; +using namespace Nektar::Operators; +using namespace Nektar::LibUtilities; +using namespace Nektar; + +class NeumannField + : public InitFields<double, FieldState::Coeff, FieldState::Coeff, + MultiRegions::ContField> +{ +public: + NeumannField() + : InitFields<double, FieldState::Coeff, FieldState::Coeff, + MultiRegions::ContField>() + { + } +}; + +class Helmholtz1D_Seg : public NeumannField +{ +public: + Helmholtz1D_Seg() + { + meshName = "run/Helmholtz1D_P8.xml"; + } +}; + +class Helmholtz2D_Tri_Quad : public NeumannField +{ +public: + Helmholtz2D_Tri_Quad() + { + meshName = "run/Helmholtz2D_P7_AllBCs.xml"; + } +}; + +class Helmholtz3D_Hex : public NeumannField +{ +public: + Helmholtz3D_Hex() + { + meshName = "run/Helmholtz3D_Hex_Heterogeneous.xml"; + } +}; + +class Helmholtz3D_Prism : public NeumannField +{ +public: + Helmholtz3D_Prism() + { + meshName = "run/Helmholtz3D_Prism_VarP.xml"; + } +}; + +class Helmholtz3D_Pyr : public NeumannField +{ +public: + Helmholtz3D_Pyr() + { + meshName = "run/Helmholtz3D_Pyr_VarP.xml"; + } +}; + +class Helmholtz3D_Tet : public NeumannField +{ +public: + Helmholtz3D_Tet() + { + meshName = "run/Helmholtz3D_Tet_VarP.xml"; + } +}; diff --git a/tests/test_neumanncuda.cpp b/tests/test_neumanncuda.cpp new file mode 100644 index 0000000..3305146 --- /dev/null +++ b/tests/test_neumanncuda.cpp @@ -0,0 +1,92 @@ +#define BOOST_TEST_DYN_LINK +#define BOOST_TEST_MODULE TestNeumannCUDA +#include <boost/test/tools/output_test_stream.hpp> +#include <boost/test/unit_test.hpp> + +#include <iostream> +#include <memory> + +#include "Operators/OperatorNeuBndCond.hpp" +#include "init_dirichletfields.hpp" + +BOOST_AUTO_TEST_SUITE(TestNeumann) + +BOOST_FIXTURE_TEST_CASE(dirichlet1dcuda_seg, Helmholtz1D_Seg) +{ + Configure(); + NeuBndCond<>::create(fixt_explist)->apply(*fixt_out); + NeuBndCond<>::create(fixt_explist, "CUDA")->apply(*fixtcuda_out); + BOOST_TEST(fixtcuda_out->compare(*fixt_out, 1.0E-12)); + boost::test_tools::output_test_stream output; + { + OutputIfNotMatch(fixtcuda_out->GetStorage().GetCPUPtr(), + fixt_out->GetStorage().GetCPUPtr(), 1.0E-12); + } +} + +BOOST_FIXTURE_TEST_CASE(dirichlet2dcuda_tri_quad, Helmholtz2D_Tri_Quad) +{ + Configure(); + NeuBndCond<>::create(fixt_explist)->apply(*fixt_out); + NeuBndCond<>::create(fixt_explist, "CUDA")->apply(*fixtcuda_out); + BOOST_TEST(fixtcuda_out->compare(*fixt_out, 1.0E-12)); + boost::test_tools::output_test_stream output; + { + OutputIfNotMatch(fixtcuda_out->GetStorage().GetCPUPtr(), + fixt_out->GetStorage().GetCPUPtr(), 1.0E-12); + } +} + +BOOST_FIXTURE_TEST_CASE(dirichlet3dcuda_hex, Helmholtz3D_Hex) +{ + Configure(); + NeuBndCond<>::create(fixt_explist)->apply(*fixt_out); + NeuBndCond<>::create(fixt_explist, "CUDA")->apply(*fixtcuda_out); + BOOST_TEST(fixtcuda_out->compare(*fixt_out, 1.0E-12)); + boost::test_tools::output_test_stream output; + { + OutputIfNotMatch(fixtcuda_out->GetStorage().GetCPUPtr(), + fixt_out->GetStorage().GetCPUPtr(), 1.0E-12); + } +} + +BOOST_FIXTURE_TEST_CASE(dirichlet3dcuda_prism, Helmholtz3D_Prism) +{ + Configure(); + NeuBndCond<>::create(fixt_explist)->apply(*fixt_out); + NeuBndCond<>::create(fixt_explist, "CUDA")->apply(*fixtcuda_out); + BOOST_TEST(fixtcuda_out->compare(*fixt_out, 1.0E-12)); + boost::test_tools::output_test_stream output; + { + OutputIfNotMatch(fixtcuda_out->GetStorage().GetCPUPtr(), + fixt_out->GetStorage().GetCPUPtr(), 1.0E-12); + } +} + +BOOST_FIXTURE_TEST_CASE(dirichlet3dcuda_pyr, Helmholtz3D_Pyr) +{ + Configure(); + NeuBndCond<>::create(fixt_explist)->apply(*fixt_out); + NeuBndCond<>::create(fixt_explist, "CUDA")->apply(*fixtcuda_out); + BOOST_TEST(fixtcuda_out->compare(*fixt_out, 1.0E-12)); + boost::test_tools::output_test_stream output; + { + OutputIfNotMatch(fixtcuda_out->GetStorage().GetCPUPtr(), + fixt_out->GetStorage().GetCPUPtr(), 1.0E-12); + } +} + +BOOST_FIXTURE_TEST_CASE(dirichlet3dcuda_tet, Helmholtz3D_Tet) +{ + Configure(); + NeuBndCond<>::create(fixt_explist)->apply(*fixt_out); + NeuBndCond<>::create(fixt_explist, "CUDA")->apply(*fixtcuda_out); + BOOST_TEST(fixtcuda_out->compare(*fixt_out, 1.0E-12)); + boost::test_tools::output_test_stream output; + { + OutputIfNotMatch(fixtcuda_out->GetStorage().GetCPUPtr(), + fixt_out->GetStorage().GetCPUPtr(), 1.0E-12); + } +} + +BOOST_AUTO_TEST_SUITE_END() -- GitLab