From 1a28303af19cc1725eb56a90d745782b356ddb10 Mon Sep 17 00:00:00 2001 From: Jacques Xing <jacques.xing@kcl.ac.uk> Date: Tue, 30 Jan 2024 14:08:02 +0000 Subject: [PATCH] Update ConjGrad StdMat operator implementation for MPI --- Field.hpp | 10 ++++--- MemoryRegionCPU.hpp | 4 +-- MemoryRegionCUDA.hpp | 8 ++--- Operators/ConjGrad/ConjGradStdMat.hpp | 38 +++++++++++++++-------- Operators/FwdTrans/FwdTransStdMat.hpp | 12 ++++---- Operators/HelmSolve/HelmSolveStdMat.hpp | 12 ++++---- Operators/Matrix/MatrixCUDA.hpp | 5 ++-- Operators/Matrix/MatrixStdMat.hpp | 7 +++-- Operators/NullPrecon/NullPreconCUDA.hpp | 3 +- Operators/Operator.hpp | 2 +- Operators/OperatorConjGrad.hpp | 1 - tests/CMakeLists.txt | 40 ++++++++++++++++++------- tests/init_diagpreconfields.hpp | 2 +- tests/init_fields.hpp | 1 - tests/init_fwdtransfields.hpp | 2 +- tests/init_helmsolvefields.hpp | 3 +- tests/init_nullpreconfields.hpp | 1 - tests/test_nullprecon.cpp | 1 - 18 files changed, 91 insertions(+), 61 deletions(-) diff --git a/Field.hpp b/Field.hpp index 613694aa..ea51fc98 100644 --- a/Field.hpp +++ b/Field.hpp @@ -222,8 +222,9 @@ public: size_t storage_size = std::accumulate( field.block_attributes.begin(), field.block_attributes.end(), 0, - [](size_t acc, const BlockAttributes &block) - { return acc + block.block_size; }); + [](size_t acc, const BlockAttributes &block) { + return acc + block.block_size; + }); // Create new TMemoryRegion and polymorphically store as MemoryRegionCPU field.m_storage = std::make_unique<TMemoryRegion<TData>>( @@ -252,8 +253,9 @@ public: size_t storage_size = std::accumulate( field.block_attributes.begin(), field.block_attributes.end(), 0, - [](size_t acc, const BlockAttributes &block) - { return acc + block.block_size; }); + [](size_t acc, const BlockAttributes &block) { + return acc + block.block_size; + }); // Create new TMemoryRegion and polymorphically store as MemoryRegionCPU field.m_storage = std::make_unique<TMemoryRegion<TData>>( diff --git a/MemoryRegionCPU.hpp b/MemoryRegionCPU.hpp index b7d762db..7dcd1c2b 100644 --- a/MemoryRegionCPU.hpp +++ b/MemoryRegionCPU.hpp @@ -13,8 +13,8 @@ template <typename TData> class MemoryRegionCPU { public: - MemoryRegionCPU() = delete; - MemoryRegionCPU(const MemoryRegionCPU &rhs) = delete; + MemoryRegionCPU() = delete; + MemoryRegionCPU(const MemoryRegionCPU &rhs) = delete; MemoryRegionCPU &operator=(const MemoryRegionCPU &rhs) = delete; MemoryRegionCPU(MemoryRegionCPU &&rhs) diff --git a/MemoryRegionCUDA.hpp b/MemoryRegionCUDA.hpp index d77cfce5..ce5469dd 100644 --- a/MemoryRegionCUDA.hpp +++ b/MemoryRegionCUDA.hpp @@ -42,10 +42,10 @@ public: void operator=(MemoryRegionCUDA &&rhs) { MemoryRegionCPU<TData>::operator=(std::move(rhs)); - m_device = rhs.m_device; - m_size = rhs.m_size; - rhs.m_device = nullptr; - rhs.m_size = 0; + m_device = rhs.m_device; + m_size = rhs.m_size; + rhs.m_device = nullptr; + rhs.m_size = 0; } virtual TData *GetCPUPtr() override diff --git a/Operators/ConjGrad/ConjGradStdMat.hpp b/Operators/ConjGrad/ConjGradStdMat.hpp index d99870f1..d3f9ac7a 100644 --- a/Operators/ConjGrad/ConjGradStdMat.hpp +++ b/Operators/ConjGrad/ConjGradStdMat.hpp @@ -41,6 +41,7 @@ public: m_maxIter, 5000); contfield->GetSession()->LoadParameter("IterativeSolverTolerance", m_tol, 1.0E-09); + m_rowComm = contfield->GetSession()->GetComm()->GetRowComm(); m_nloc = contfield->GetLocalToGlobalMap()->GetNumLocalCoeffs(); m_assmbScatr = AssmbScatr<TData>::create(this->m_expansionList); m_robBndCond = RobBndCond<TData>::create(this->m_expansionList); @@ -51,6 +52,12 @@ public: void apply(Field<TData, FieldState::Coeff> &in, Field<TData, FieldState::Coeff> &out) override { + if (m_rowComm->GetSize() > 1 && m_rowComm->GetRank() == 0) + { + std::cout << "Solve ConjGrad with " << m_rowComm->GetSize() + << " processors" << std::endl; + } + // Store pointers to temporary fields Array<OneD, TData> inArr(m_nloc, 0.0); Array<OneD, TData> outArr(m_nloc, 0.0); @@ -79,7 +86,7 @@ public: TData rho_new; TData mu; TData eps; - std::array<TData, 3> vExchange{0.0, 0.0, 0.0}; + Array<OneD, TData> vExchange(3, 0.0); // Copy data from input field auto *inarrptr = inArr.data(); @@ -106,14 +113,14 @@ public: vExchange[2] = std::inner_product(p_wk, p_wk + m_nloc, p_r_A, 0.0); // Perform inner-product exchanges - // m_rowComm->AllReduce(vExchange, Nektar::LibUtilities::ReduceSum); + m_rowComm->AllReduce(vExchange, Nektar::LibUtilities::ReduceSum); eps = vExchange[2]; // Calculate rhs magnitude m_assmbScatr->apply(m_r_A, m_wk); rhsMagnitude = std::inner_product(p_in, p_in + m_nloc, p_wk, 0.0); - // m_rowComm->AllReduce(rhsMagnitude, Nektar::LibUtilities::ReduceSum); + m_rowComm->AllReduce(rhsMagnitude, Nektar::LibUtilities::ReduceSum); rhsMagnitude = (rhsMagnitude > 1.0e-6) ? rhsMagnitude : 1.0; // If input residual is less than tolerance skip solve. @@ -134,7 +141,7 @@ public: vExchange[0] = std::inner_product(p_r_A, p_r_A + m_nloc, p_w_A, 0.0); vExchange[1] = std::inner_product(p_s_A, p_s_A + m_nloc, p_w_A, 0.0); - // m_rowComm->AllReduce(vExchange, Nektar::LibUtilities::ReduceSum); + m_rowComm->AllReduce(vExchange, Nektar::LibUtilities::ReduceSum); rho = vExchange[0]; mu = vExchange[1]; @@ -151,23 +158,27 @@ public: // Compute new search direction p_k std::transform(p_p_A, p_p_A + m_nloc, p_w_A, p_p_A, - [&beta](const TData &pElem, const TData &wElem) - { return beta * pElem + wElem; }); + [&beta](const TData &pElem, const TData &wElem) { + return beta * pElem + wElem; + }); // Compute new search direction q_k std::transform(p_q_A, p_q_A + m_nloc, p_s_A, p_q_A, - [&beta](const TData &qElem, const TData &sElem) - { return beta * qElem + sElem; }); + [&beta](const TData &qElem, const TData &sElem) { + return beta * qElem + sElem; + }); // Update solution x_{k+1} std::transform(p_p_A, p_p_A + m_nloc, p_out, p_out, - [&alpha](const TData &pElem, const TData &xElem) - { return alpha * pElem + xElem; }); + [&alpha](const TData &pElem, const TData &xElem) { + return alpha * pElem + xElem; + }); // Update residual vector r_{k+1} std::transform(p_q_A, p_q_A + m_nloc, p_r_A, p_r_A, - [&alpha](const TData &qElem, const TData &rElem) - { return -alpha * qElem + rElem; }); + [&alpha](const TData &qElem, const TData &rElem) { + return -alpha * qElem + rElem; + }); // Apply preconditioner this->m_precon->apply(m_r_A, m_w_A); @@ -191,7 +202,7 @@ public: vExchange[2] = std::inner_product(p_wk, p_wk + m_nloc, p_r_A, 0.0); // Perform inner-product exchanges - // m_rowComm->AllReduce(vExchange, Nektar::LibUtilities::ReduceSum); + m_rowComm->AllReduce(vExchange, Nektar::LibUtilities::ReduceSum); rho_new = vExchange[0]; mu = vExchange[1]; @@ -244,6 +255,7 @@ public: static std::string className; protected: + LibUtilities::CommSharedPtr m_rowComm; std::shared_ptr<OperatorAssmbScatr<TData>> m_assmbScatr; std::shared_ptr<OperatorRobBndCond<TData>> m_robBndCond; Field<TData, FieldState::Coeff> m_w_A; diff --git a/Operators/FwdTrans/FwdTransStdMat.hpp b/Operators/FwdTrans/FwdTransStdMat.hpp index 37a8a2bd..d15ccd6e 100644 --- a/Operators/FwdTrans/FwdTransStdMat.hpp +++ b/Operators/FwdTrans/FwdTransStdMat.hpp @@ -50,9 +50,9 @@ public: // Handle Dirichlet BCs m_DirBCOp->apply(out); m_MassOp->apply(out, m_tmp); - std::transform(rhsptr, rhsptr + nloc, tmpptr, rhsptr, - [](const TData &rhs, const TData &dir) - { return rhs - dir; }); + std::transform( + rhsptr, rhsptr + nloc, tmpptr, rhsptr, + [](const TData &rhs, const TData &dir) { return rhs - dir; }); // Handle Robin BCs m_RobBCOp->apply(out, m_rhs, true); @@ -61,9 +61,9 @@ public: m_CGOp->apply(m_rhs, m_tmp); // Add Dirichlet BCs - std::transform(outptr, outptr + nloc, tmpptr, outptr, - [](const TData &x, const TData &diff) - { return x + diff; }); + std::transform( + outptr, outptr + nloc, tmpptr, outptr, + [](const TData &x, const TData &diff) { return x + diff; }); } void setPrecon( diff --git a/Operators/HelmSolve/HelmSolveStdMat.hpp b/Operators/HelmSolve/HelmSolveStdMat.hpp index 30e8fe5f..1d15b369 100644 --- a/Operators/HelmSolve/HelmSolveStdMat.hpp +++ b/Operators/HelmSolve/HelmSolveStdMat.hpp @@ -57,9 +57,9 @@ public: // Handle Dirichlet BCs m_DirBCOp->apply(out); m_HelmOp->apply(out, m_tmp); - std::transform(rhsptr, rhsptr + nloc, tmpptr, rhsptr, - [](const TData &rhs, const TData &dir) - { return rhs - dir; }); + std::transform( + rhsptr, rhsptr + nloc, tmpptr, rhsptr, + [](const TData &rhs, const TData &dir) { return rhs - dir; }); // Handle Robin BCs m_RobBCOp->apply(out, m_rhs, true); @@ -68,9 +68,9 @@ public: m_CGOp->apply(m_rhs, m_tmp); // Add Dirichlet BCs - std::transform(outptr, outptr + nloc, tmpptr, outptr, - [](const TData &x, const TData &diff) - { return x + diff; }); + std::transform( + outptr, outptr + nloc, tmpptr, outptr, + [](const TData &x, const TData &diff) { return x + diff; }); } void setLambda(const TData &lambda) diff --git a/Operators/Matrix/MatrixCUDA.hpp b/Operators/Matrix/MatrixCUDA.hpp index 0ab87111..eb4575d3 100644 --- a/Operators/Matrix/MatrixCUDA.hpp +++ b/Operators/Matrix/MatrixCUDA.hpp @@ -21,8 +21,9 @@ public: // expansionlist auto blocks = GetBlockAttributes(TFieldState, expansionList); m_size = std::accumulate(blocks.begin(), blocks.end(), 0, - [](size_t acc, const BlockAttributes &block) - { return acc + block.block_size; }); + [](size_t acc, const BlockAttributes &block) { + return acc + block.block_size; + }); // create memory for square matrix of given size // allocate memory device cudaMalloc((void **)&m_matrix, sizeof(TData) * m_size * m_size); diff --git a/Operators/Matrix/MatrixStdMat.hpp b/Operators/Matrix/MatrixStdMat.hpp index fd045a13..bf823137 100644 --- a/Operators/Matrix/MatrixStdMat.hpp +++ b/Operators/Matrix/MatrixStdMat.hpp @@ -1,7 +1,7 @@ #pragma once -#include <numeric> #include "Operators/OperatorMatrix.hpp" +#include <numeric> namespace Nektar::Operators::detail { @@ -18,8 +18,9 @@ public: // expansionlist auto blocks = GetBlockAttributes(TFieldState, expansionList); m_size = std::accumulate(blocks.begin(), blocks.end(), 0, - [](size_t acc, const BlockAttributes &block) - { return acc + block.block_size; }); + [](size_t acc, const BlockAttributes &block) { + return acc + block.block_size; + }); // create memory for square matrix of given size m_matrix = std::vector<TData>(m_size * m_size); diff --git a/Operators/NullPrecon/NullPreconCUDA.hpp b/Operators/NullPrecon/NullPreconCUDA.hpp index a9561267..bb93c12d 100644 --- a/Operators/NullPrecon/NullPreconCUDA.hpp +++ b/Operators/NullPrecon/NullPreconCUDA.hpp @@ -9,8 +9,7 @@ namespace Nektar::Operators::detail { template <typename TData> -class OperatorNullPreconImpl<TData, ImplCUDA> - : public OperatorNullPrecon<TData> +class OperatorNullPreconImpl<TData, ImplCUDA> : public OperatorNullPrecon<TData> { public: OperatorNullPreconImpl(const MultiRegions::ExpListSharedPtr &expansionList) diff --git a/Operators/Operator.hpp b/Operators/Operator.hpp index 2af377b0..d03e3ca1 100644 --- a/Operators/Operator.hpp +++ b/Operators/Operator.hpp @@ -134,7 +134,7 @@ protected: { auto expPtr = this->m_expansionList->GetExp(e); auto &df = expPtr->GetMetricInfo()->GetDerivFactors( - expPtr->GetPointsKeys()); + expPtr->GetPointsKeys()); size_t nqTot = expPtr->GetTotPoints(); if (expPtr->GetMetricInfo()->GetGtype() == SpatialDomains::eDeformed) diff --git a/Operators/OperatorConjGrad.hpp b/Operators/OperatorConjGrad.hpp index cd52fb51..ad1bf8ee 100644 --- a/Operators/OperatorConjGrad.hpp +++ b/Operators/OperatorConjGrad.hpp @@ -27,7 +27,6 @@ public: apply(in, out); } - void setLHS(const std::shared_ptr<OperatorLinear<TData, FieldState::Coeff, FieldState::Coeff>> &ptr) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a37c18c3..eeeba79f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,3 +1,7 @@ +find_package( + MPI +) + find_package( Boost REQUIRED @@ -541,11 +545,19 @@ IF (NEKTAR_USE_CUDA) ) ENDIF() -add_test( - NAME FwdTrans - COMMAND test_fwdtrans - WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" -) +IF (${MPI_FOUND}) + add_test( + NAME FwdTrans + COMMAND mpirun -np 2 "${CMAKE_BINARY_DIR}/tests/test_fwdtrans" + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + ) +ELSE() + add_test( + NAME FwdTrans + COMMAND test_fwdtrans + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + ) +ENDIF() IF (NEKTAR_USE_CUDA) add_test( @@ -569,11 +581,19 @@ IF (NEKTAR_USE_CUDA) ) ENDIF() -add_test( - NAME HelmSolve - COMMAND test_helmsolve - WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" -) +IF (${MPI_FOUND}) + add_test( + NAME HelmSolve + COMMAND mpirun -np 2 "${CMAKE_BINARY_DIR}/tests/test_helmsolve" + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + ) +ELSE() + add_test( + NAME HelmSolve + COMMAND test_helmsolve + WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" + ) +ENDIF() IF (NEKTAR_USE_CUDA) add_test( diff --git a/tests/init_diagpreconfields.hpp b/tests/init_diagpreconfields.hpp index 913a6d55..2be53bb5 100644 --- a/tests/init_diagpreconfields.hpp +++ b/tests/init_diagpreconfields.hpp @@ -104,7 +104,7 @@ class Helmholtz2D_Tri_Quad : public DiagPreconField public: Helmholtz2D_Tri_Quad() { - //meshName = "run/Helmholtz2D_P7_AllBCs.xml"; + // meshName = "run/Helmholtz2D_P7_AllBCs.xml"; meshName = "run/Helmholtz2D_varP.xml"; } }; diff --git a/tests/init_fields.hpp b/tests/init_fields.hpp index 97a752a9..ea357ff9 100644 --- a/tests/init_fields.hpp +++ b/tests/init_fields.hpp @@ -96,7 +96,6 @@ public: session, graph, "u", true, false, Collections::eNoCollection); } - else if constexpr (std::is_same_v<TExpList, MultiRegions::DisContField>) { fixt_explist = diff --git a/tests/init_fwdtransfields.hpp b/tests/init_fwdtransfields.hpp index f723eea7..ed8f7180 100644 --- a/tests/init_fwdtransfields.hpp +++ b/tests/init_fwdtransfields.hpp @@ -144,7 +144,7 @@ class Helmholtz2D_Tri_Quad : public FwdTransField public: Helmholtz2D_Tri_Quad() { - //meshName = "run/Helmholtz2D_P7_AllBCs.xml"; + // meshName = "run/Helmholtz2D_P7_AllBCs.xml"; meshName = "run/Helmholtz2D_varP.xml"; } }; diff --git a/tests/init_helmsolvefields.hpp b/tests/init_helmsolvefields.hpp index 53f5aec0..b6584565 100644 --- a/tests/init_helmsolvefields.hpp +++ b/tests/init_helmsolvefields.hpp @@ -148,7 +148,7 @@ class Helmholtz2D_Tri_Quad : public HelmSolveField public: Helmholtz2D_Tri_Quad() { - //meshName = "run/Helmholtz2D_P7_AllBCs.xml"; + // meshName = "run/Helmholtz2D_P7_AllBCs.xml"; meshName = "run/Helmholtz2D_varP.xml"; } }; @@ -188,4 +188,3 @@ public: meshName = "run/Helmholtz3D_Tet_VarP.xml"; } }; - diff --git a/tests/init_nullpreconfields.hpp b/tests/init_nullpreconfields.hpp index 80a6633d..44ab2154 100644 --- a/tests/init_nullpreconfields.hpp +++ b/tests/init_nullpreconfields.hpp @@ -137,4 +137,3 @@ public: meshName = "run/Helmholtz3D_Tet_VarP.xml"; } }; - diff --git a/tests/test_nullprecon.cpp b/tests/test_nullprecon.cpp index 05b567d0..b814f775 100644 --- a/tests/test_nullprecon.cpp +++ b/tests/test_nullprecon.cpp @@ -11,7 +11,6 @@ BOOST_AUTO_TEST_SUITE(TestNullPrecon) - BOOST_FIXTURE_TEST_CASE(nullprecon_seg, Helmholtz1D_Seg) { Configure(); -- GitLab