From 4aee847346f8dd3f52d8ddeece469913c80e9a67 Mon Sep 17 00:00:00 2001 From: Alexandra Liosi <a.liosi22@imperial.ac.uk> Date: Fri, 13 Sep 2024 21:03:28 +0000 Subject: [PATCH] Matrix-Free PhysInterp1DScaled --- CHANGELOG.md | 1 + library/Collections/BwdTrans.cpp | 2 + library/Collections/Helmholtz.cpp | 5 + library/Collections/IProductWRTBase.cpp | 3 + library/Collections/IProductWRTDerivBase.cpp | 5 + .../LinearAdvectionDiffusionReaction.cpp | 6 + library/Collections/PhysDeriv.cpp | 3 + library/Collections/PhysInterp1DScaled.cpp | 353 ++++++------ library/Demos/MultiRegions/Tests/test.opt | 28 + library/MatrixFreeOps/BwdTrans.h | 35 +- library/MatrixFreeOps/CMakeLists.txt | 2 +- library/MatrixFreeOps/Helmholtz.h | 45 +- library/MatrixFreeOps/IProduct.h | 35 +- library/MatrixFreeOps/IProductWRTDerivBase.h | 35 +- .../LinearAdvectionDiffusionReaction.h | 57 +- library/MatrixFreeOps/Operator.hpp | 124 ++++- library/MatrixFreeOps/PhysDeriv.h | 37 +- library/MatrixFreeOps/PhysInterp1DScaled.h | 501 ++++++++++++++++++ .../PhysInterp1DScaledKernels.hpp | 225 ++++++++ library/MatrixFreeOps/SwitchNodesPoints.h | 16 - library/MatrixFreeOps/SwitchPoints.h | 10 - library/MultiRegions/ExpList.cpp | 1 + .../Collections/TestHexCollection.cpp | 189 ++++++- .../Collections/TestPrismCollection.cpp | 202 +++++++ .../Collections/TestPyrCollection.cpp | 185 +++++++ .../Collections/TestQuadCollection.cpp | 153 ++++++ .../Collections/TestSegCollection.cpp | 140 +++++ .../Collections/TestTetCollection.cpp | 198 ++++++- .../Collections/TestTriCollection.cpp | 171 +++++- 29 files changed, 2414 insertions(+), 353 deletions(-) create mode 100644 library/MatrixFreeOps/PhysInterp1DScaled.h create mode 100644 library/MatrixFreeOps/PhysInterp1DScaledKernels.hpp diff --git a/CHANGELOG.md b/CHANGELOG.md index 3ba97fbb72..874f99f113 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ v5.7.0 - Modified MatrixFreeOp library switch initialisation to use BOOST_PP (!1794) - Fix memory-leak with LowEnergyBlock preconditioner for time-updated matrices (!1627) - Fix Fourier expansion integration weights are related test (!1803) +- Introduced the MatrixFree implementation for the PhysInterp1DScaled operator and tidying up MatrixFreeOps (!1812) - Separate MeshGraph input/output functions into a new class (!1778) - Added checkpoint file writing start time in the fieldconvert filter (!1789) - Fix fieldconvert filter incorrect boundary values (!1789) diff --git a/library/Collections/BwdTrans.cpp b/library/Collections/BwdTrans.cpp index 2ee0f0a314..7581727dd7 100644 --- a/library/Collections/BwdTrans.cpp +++ b/library/Collections/BwdTrans.cpp @@ -209,6 +209,8 @@ private: auto oper = MatrixFree::GetOperatorFactory().CreateInstance( op_string, basis, pCollExp.size()); + oper->SetUpBdata(basis); + m_oper = std::dynamic_pointer_cast<MatrixFree::BwdTrans>(oper); ASSERTL0(m_oper, "Failed to cast pointer."); } diff --git a/library/Collections/Helmholtz.cpp b/library/Collections/Helmholtz.cpp index 001b3bbdf9..3a9a430ca1 100644 --- a/library/Collections/Helmholtz.cpp +++ b/library/Collections/Helmholtz.cpp @@ -745,6 +745,11 @@ private: // Store derivative factor oper->SetDF(pGeomData->GetDerivFactorsInterLeave(pCollExp, m_nElmtPad)); + oper->SetUpBdata(basis); + oper->SetUpDBdata(basis); + oper->SetUpZW(basis); + oper->SetUpD(basis); + m_oper = std::dynamic_pointer_cast<MatrixFree::Helmholtz>(oper); ASSERTL0(m_oper, "Failed to cast pointer."); diff --git a/library/Collections/IProductWRTBase.cpp b/library/Collections/IProductWRTBase.cpp index d50bc39725..847e97f7b4 100644 --- a/library/Collections/IProductWRTBase.cpp +++ b/library/Collections/IProductWRTBase.cpp @@ -228,6 +228,9 @@ private: auto oper = MatrixFree::GetOperatorFactory().CreateInstance( op_string, basis, pCollExp.size()); + oper->SetUpBdata(basis); + oper->SetUpZW(basis); + // Set Jacobian oper->SetJac(pGeomData->GetJacInterLeave(pCollExp, m_nElmtPad)); diff --git a/library/Collections/IProductWRTDerivBase.cpp b/library/Collections/IProductWRTDerivBase.cpp index 534b85eb26..f708a7c40c 100644 --- a/library/Collections/IProductWRTDerivBase.cpp +++ b/library/Collections/IProductWRTDerivBase.cpp @@ -354,6 +354,11 @@ private: auto oper = MatrixFree::GetOperatorFactory().CreateInstance( op_string, basis, pCollExp.size()); + // set up required copies for operations + oper->SetUpBdata(basis); + oper->SetUpDBdata(basis); + oper->SetUpZW(basis); + // Set Jacobian oper->SetJac(pGeomData->GetJacInterLeave(pCollExp, m_nElmtPad)); diff --git a/library/Collections/LinearAdvectionDiffusionReaction.cpp b/library/Collections/LinearAdvectionDiffusionReaction.cpp index cc917c236a..2b2e671001 100644 --- a/library/Collections/LinearAdvectionDiffusionReaction.cpp +++ b/library/Collections/LinearAdvectionDiffusionReaction.cpp @@ -765,6 +765,12 @@ private: // Get N quadpoints with padding m_nqtot = m_numElmt * pCollExp[0]->GetStdExp()->GetTotPoints(); + // set up required copies for operations + oper->SetUpBdata(basis); + oper->SetUpDBdata(basis); + oper->SetUpD(basis); + oper->SetUpZW(basis); + // Set Jacobian oper->SetJac(pGeomData->GetJacInterLeave(pCollExp, m_nElmtPad)); diff --git a/library/Collections/PhysDeriv.cpp b/library/Collections/PhysDeriv.cpp index 7c06af8051..958a16a8fd 100644 --- a/library/Collections/PhysDeriv.cpp +++ b/library/Collections/PhysDeriv.cpp @@ -362,6 +362,9 @@ private: auto oper = MatrixFree::GetOperatorFactory().CreateInstance( op_string, basis, pCollExp.size()); + oper->SetUpZW(basis); + oper->SetUpD(basis); + // Set derivative factors oper->SetDF(pGeomData->GetDerivFactorsInterLeave(pCollExp, m_nElmtPad)); diff --git a/library/Collections/PhysInterp1DScaled.cpp b/library/Collections/PhysInterp1DScaled.cpp index 1f7c4bdb18..c550866de3 100644 --- a/library/Collections/PhysInterp1DScaled.cpp +++ b/library/Collections/PhysInterp1DScaled.cpp @@ -63,14 +63,12 @@ class PhysInterp1DScaled_Helper : virtual public Operator { public: void UpdateFactors(StdRegions::FactorMap factors) override - { if (factors == m_factors) { return; } m_factors = factors; - // Set scaling factor for the PhysInterp1DScaled function [[maybe_unused]] auto x = factors.find(StdRegions::eFactorConst); ASSERTL1( @@ -78,6 +76,15 @@ public: "Constant factor not defined: " + std::string( StdRegions::ConstFactorTypeMap[StdRegions::eFactorConst])); + + NekDouble scale = m_factors[StdRegions::eFactorConst]; + + int shape_dimension = m_stdExp->GetShapeDimension(); + m_outputSize = m_numElmt; // initializing m_outputSize + for (int i = 0; i < shape_dimension; ++i) + { + m_outputSize *= (int)(m_stdExp->GetNumPoints(i) * scale); + } } protected: @@ -93,7 +100,6 @@ protected: { scale = 1.5; } - // expect input to be number of elements by the number of quad points m_inputSize = m_numElmt * m_stdExp->GetTotPoints(); // expect input to be number of elements by the number of quad points @@ -104,83 +110,14 @@ protected: m_outputSize *= (int)(m_stdExp->GetNumPoints(i) * scale); } } + StdRegions::FactorMap m_factors; // map for storing the scaling factor of the operator }; /** - * @brief PhysInterp1DScaled operator using standard matrix approach. Currently, - * it is an identical copy of the BwdTrans operator. + * @brief PhysInterp1DScaled operator using matrix free implementation. */ - -class PhysInterp1DScaled_StdMat final : virtual public Operator, - PhysInterp1DScaled_Helper -{ -public: - OPERATOR_CREATE(PhysInterp1DScaled_StdMat) - ~PhysInterp1DScaled_StdMat() final - { - } - - void operator()(const Array<OneD, const NekDouble> &input, - Array<OneD, NekDouble> &output, - [[maybe_unused]] Array<OneD, NekDouble> &output1, - [[maybe_unused]] Array<OneD, NekDouble> &output2, - [[maybe_unused]] Array<OneD, NekDouble> &wsp) override - { - Blas::Dgemm('N', 'N', m_mat->GetRows(), m_numElmt, m_mat->GetColumns(), - 1.0, m_mat->GetRawPtr(), m_mat->GetRows(), input.get(), - m_stdExp->GetNcoeffs(), 0.0, output.get(), - m_stdExp->GetTotPoints()); - } - - void operator()([[maybe_unused]] int dir, - [[maybe_unused]] const Array<OneD, const NekDouble> &input, - [[maybe_unused]] Array<OneD, NekDouble> &output, - [[maybe_unused]] Array<OneD, NekDouble> &wsp) final - { - ASSERTL0(false, "Not valid for this operator."); - } - - void UpdateFactors(StdRegions::FactorMap factors) override - { - if (factors == m_factors) - { - return; - } - - m_factors = factors; - - // Set scaling factor for the PhysInterp1DScaled function - [[maybe_unused]] auto x = factors.find(StdRegions::eFactorConst); - ASSERTL1( - x != factors.end(), - "Constant factor not defined: " + - std::string( - StdRegions::ConstFactorTypeMap[StdRegions::eFactorConst])); - } - -protected: - DNekMatSharedPtr m_mat; - StdRegions::FactorMap m_factors; // Initially, PhysInterp1DScaled was taking - // as input a single scaling factor - // This single scaling factor will be expressed into a matrix where each - // element has the same value so that matrix - vector multiplication - // optimization by BLAS is implemented - -private: - PhysInterp1DScaled_StdMat( - vector<StdRegions::StdExpansionSharedPtr> pCollExp, - CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors) - : Operator(pCollExp, pGeomData, factors), PhysInterp1DScaled_Helper() - { - } -}; - -/** - * @brief PhysInterp1DScaled operator using matrix free operators. - */ - class PhysInterp1DScaled_MatrixFree final : virtual public Operator, MatrixFreeBase, PhysInterp1DScaled_Helper @@ -188,9 +125,7 @@ class PhysInterp1DScaled_MatrixFree final : virtual public Operator, public: OPERATOR_CREATE(PhysInterp1DScaled_MatrixFree) - ~PhysInterp1DScaled_MatrixFree() final - { - } + ~PhysInterp1DScaled_MatrixFree() final = default; void operator()(const Array<OneD, const NekDouble> &input, Array<OneD, NekDouble> &output0, @@ -207,30 +142,56 @@ public: [[maybe_unused]] Array<OneD, NekDouble> &wsp) final { NEKERROR(ErrorUtil::efatal, - "BwdTrans_MatrixFree: Not valid for this operator."); + "PhysInterp1DScaled_MatrixFree: Not valid for this operator."); } void UpdateFactors([[maybe_unused]] StdRegions::FactorMap factors) override { - ASSERTL0(false, "Not valid for this operator."); + + // Set lambda for this call + auto x = factors.find(StdRegions::eFactorConst); + ASSERTL1( + x != factors.end(), + "Constant factor not defined: " + + std::string( + StdRegions::ConstFactorTypeMap[StdRegions::eFactorConst])); + // Update the factors member inside PhysInterp1DScaled_MatrixFree + // class + m_factors = factors; + + const auto dim = m_stdExp->GetShapeDimension(); + + // Definition of basis vector + std::vector<LibUtilities::BasisSharedPtr> basis(dim); + for (unsigned int i = 0; i < dim; ++i) + { + basis[i] = m_stdExp->GetBasis(i); + } + + // Update the scaling factor inside MatrixFreeOps using the SetLamda + // routine + m_oper->SetScalingFactor(x->second); + m_oper->SetUpInterp1D(basis, m_factors[StdRegions::eFactorConst]); + m_nOut = m_nElmtPad * m_outputSize / m_numElmt; } private: - std::shared_ptr<MatrixFree::BwdTrans> m_oper; + std::shared_ptr<MatrixFree::PhysInterp1DScaled> m_oper; PhysInterp1DScaled_MatrixFree( vector<StdRegions::StdExpansionSharedPtr> pCollExp, CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors) : Operator(pCollExp, pGeomData, factors), - MatrixFreeBase(pCollExp[0]->GetStdExp()->GetNcoeffs(), + MatrixFreeBase(pCollExp[0]->GetStdExp()->GetTotPoints(), pCollExp[0]->GetStdExp()->GetTotPoints(), pCollExp.size()), PhysInterp1DScaled_Helper() { - // Basis vector. const auto dim = pCollExp[0]->GetStdExp()->GetShapeDimension(); + + // Definition of basis vector std::vector<LibUtilities::BasisSharedPtr> basis(dim); - for (auto i = 0; i < dim; ++i) + for (unsigned int i = 0; i < dim; ++i) { basis[i] = pCollExp[0]->GetBasis(i); } @@ -239,73 +200,77 @@ private: auto shapeType = pCollExp[0]->GetStdExp()->DetShapeType(); // Generate operator string and create operator. - std::string op_string = "BwdTrans"; + std::string op_string = "PhysInterp1DScaled"; op_string += MatrixFree::GetOpstring(shapeType, false); auto oper = MatrixFree::GetOperatorFactory().CreateInstance( op_string, basis, pCollExp.size()); - m_oper = std::dynamic_pointer_cast<MatrixFree::BwdTrans>(oper); + m_oper = + std::dynamic_pointer_cast<MatrixFree::PhysInterp1DScaled>(oper); ASSERTL0(m_oper, "Failed to cast pointer."); - } -}; - -/** - * @brief PhysInterp1DScaled operator using default StdRegions operator - */ - -class PhysInterp1DScaled_IterPerExp final : virtual public Operator, - PhysInterp1DScaled_Helper -{ -public: - OPERATOR_CREATE(PhysInterp1DScaled_IterPerExp) - - ~PhysInterp1DScaled_IterPerExp() final - { - } - void operator()(const Array<OneD, const NekDouble> &input, - Array<OneD, NekDouble> &output, - [[maybe_unused]] Array<OneD, NekDouble> &output1, - [[maybe_unused]] Array<OneD, NekDouble> &output2, - [[maybe_unused]] Array<OneD, NekDouble> &wsp) override - { - const int nCoeffs = m_stdExp->GetNcoeffs(); - const int nPhys = m_stdExp->GetTotPoints(); - Array<OneD, NekDouble> tmp; - - for (int i = 0; i < m_numElmt; ++i) + NekDouble scale; // declaration of the scaling factor to be used for the + // output size + if (factors[StdRegions::eFactorConst] != 0) { - m_stdExp->BwdTrans(input + i * nCoeffs, tmp = output + i * nPhys); + m_factors = factors; + scale = m_factors[StdRegions::eFactorConst]; } + else + { + scale = 1.5; + m_factors[StdRegions::eFactorConst] = 1.5; + } + // Update the scaling factor inside MatrixFreeOps using the SetLamda + // routine + m_oper->SetScalingFactor(scale); + m_oper->SetUpInterp1D(basis, scale); + m_nOut = m_nElmtPad * m_outputSize / m_numElmt; } +}; - void operator()([[maybe_unused]] int dir, - [[maybe_unused]] const Array<OneD, const NekDouble> &input, - [[maybe_unused]] Array<OneD, NekDouble> &output, - [[maybe_unused]] Array<OneD, NekDouble> &wsp) final - { - ASSERTL0(false, "Not valid for this operator."); - } - - void UpdateFactors([[maybe_unused]] StdRegions::FactorMap factors) override - { - m_factors = factors; - } - -protected: - StdRegions::FactorMap m_factors; // Initially, PhysInterp1DScaled was taking - // as input a single scaling factor - // This single scaling factor will be expressed into a matrix where each - // element has the same value so that matrix - vector multiplication - // optimization by BLAS is implemented - -private: - PhysInterp1DScaled_IterPerExp( - vector<StdRegions::StdExpansionSharedPtr> pCollExp, - CoalescedGeomDataSharedPtr pGeomData, StdRegions::FactorMap factors) - : Operator(pCollExp, pGeomData, factors), PhysInterp1DScaled_Helper() - { - } +/// Factory initialisation for the PhysInterp1DScaled_MatrixFree operators +OperatorKey PhysInterp1DScaled_MatrixFree::m_typeArr[] = { + GetOperatorFactory().RegisterCreatorFunction( + OperatorKey(eSegment, ePhysInterp1DScaled, eMatrixFree, false), + PhysInterp1DScaled_MatrixFree::create, + "PhysInterp1DScaled_MatrixFree_Seg"), + GetOperatorFactory().RegisterCreatorFunction( + OperatorKey(eTriangle, ePhysInterp1DScaled, eMatrixFree, false), + PhysInterp1DScaled_MatrixFree::create, + "PhysInterp1DScaled_MatrixFree_Tri"), + GetOperatorFactory().RegisterCreatorFunction( + OperatorKey(eTriangle, ePhysInterp1DScaled, eMatrixFree, true), + PhysInterp1DScaled_MatrixFree::create, + "PhysInterp1DScaled_MatrixFree_NodalTri"), + GetOperatorFactory().RegisterCreatorFunction( + OperatorKey(eQuadrilateral, ePhysInterp1DScaled, eMatrixFree, false), + PhysInterp1DScaled_MatrixFree::create, + "PhysInterp1DScaled_MatrixFree_Quad"), + GetOperatorFactory().RegisterCreatorFunction( + OperatorKey(eTetrahedron, ePhysInterp1DScaled, eMatrixFree, false), + PhysInterp1DScaled_MatrixFree::create, + "PhysInterp1DScaled_MatrixFree_Tet"), + GetOperatorFactory().RegisterCreatorFunction( + OperatorKey(eTetrahedron, ePhysInterp1DScaled, eMatrixFree, true), + PhysInterp1DScaled_MatrixFree::create, + "PhysInterp1DScaled_MatrixFree_NodalTet"), + GetOperatorFactory().RegisterCreatorFunction( + OperatorKey(ePyramid, ePhysInterp1DScaled, eMatrixFree, false), + PhysInterp1DScaled_MatrixFree::create, + "PhysInterp1DScaled_MatrixFree_Pyr"), + GetOperatorFactory().RegisterCreatorFunction( + OperatorKey(ePrism, ePhysInterp1DScaled, eMatrixFree, false), + PhysInterp1DScaled_MatrixFree::create, + "PhysInterp1DScaled_MatrixFree_Prism"), + GetOperatorFactory().RegisterCreatorFunction( + OperatorKey(ePrism, ePhysInterp1DScaled, eMatrixFree, true), + PhysInterp1DScaled_MatrixFree::create, + "PhysInterp1DScaled_MatrixFree_NodalPrism"), + GetOperatorFactory().RegisterCreatorFunction( + OperatorKey(eHexahedron, ePhysInterp1DScaled, eMatrixFree, false), + PhysInterp1DScaled_MatrixFree::create, + "PhysInterp1DScaled_NoCollection_Hex"), }; /** @@ -340,17 +305,18 @@ public: // same for each element inside a single collection int pt0 = m_expList[0]->GetNumPoints(0); int npt0 = (int)pt0 * scale; + // current points key - use first entry + LibUtilities::PointsKey PointsKey0( + pt0, m_expList[0]->GetPointsType(0)); + // get new points key + LibUtilities::PointsKey newPointsKey0( + npt0, m_expList[0]->GetPointsType(0)); + // Interpolate points; + I0 = LibUtilities::PointsManager()[PointsKey0]->GetI( + newPointsKey0); + for (int i = 0; i < m_numElmt; ++i) { - // current points key - LibUtilities::PointsKey PointsKey0( - pt0, m_expList[i]->GetPointsType(0)); - // get new points key - LibUtilities::PointsKey newPointsKey0( - npt0, m_expList[i]->GetPointsType(0)); - // Interpolate points; - I0 = LibUtilities::PointsManager()[PointsKey0]->GetI( - newPointsKey0); Blas::Dgemv('N', npt0, pt0, 1.0, I0->GetPtr().get(), npt0, &input[cnt], 1, 0.0, &output[cnt1], 1); @@ -371,25 +337,25 @@ public: // workspace declaration Array<OneD, NekDouble> wsp(npt1 * pt0); // fnp0*tnp1 + // current points key - using first entry + LibUtilities::PointsKey PointsKey0( + pt0, m_expList[0]->GetPointsType(0)); + LibUtilities::PointsKey PointsKey1( + pt1, m_expList[0]->GetPointsType(1)); + // get new points key + LibUtilities::PointsKey newPointsKey0( + npt0, m_expList[0]->GetPointsType(0)); + LibUtilities::PointsKey newPointsKey1( + npt1, m_expList[0]->GetPointsType(1)); + + // Interpolate points; + I0 = LibUtilities::PointsManager()[PointsKey0]->GetI( + newPointsKey0); + I1 = LibUtilities::PointsManager()[PointsKey1]->GetI( + newPointsKey1); + for (int i = 0; i < m_numElmt; ++i) { - // current points key - LibUtilities::PointsKey PointsKey0( - pt0, m_expList[i]->GetPointsType(0)); - LibUtilities::PointsKey PointsKey1( - pt1, m_expList[i]->GetPointsType(1)); - // get new points key - LibUtilities::PointsKey newPointsKey0( - npt0, m_expList[i]->GetPointsType(0)); - LibUtilities::PointsKey newPointsKey1( - npt1, m_expList[i]->GetPointsType(1)); - - // Interpolate points; - I0 = LibUtilities::PointsManager()[PointsKey0]->GetI( - newPointsKey0); - I1 = LibUtilities::PointsManager()[PointsKey1]->GetI( - newPointsKey1); - Blas::Dgemm('N', 'T', pt0, npt1, pt1, 1.0, &input[cnt], pt0, I1->GetPtr().get(), npt1, 0.0, wsp.get(), pt0); @@ -415,30 +381,30 @@ public: Array<OneD, NekDouble> wsp1(npt0 * npt1 * pt2); Array<OneD, NekDouble> wsp2(npt0 * pt1 * pt2); + // current points key + LibUtilities::PointsKey PointsKey0( + pt0, m_expList[0]->GetPointsType(0)); + LibUtilities::PointsKey PointsKey1( + pt1, m_expList[0]->GetPointsType(1)); + LibUtilities::PointsKey PointsKey2( + pt2, m_expList[0]->GetPointsType(2)); + // get new points key + LibUtilities::PointsKey newPointsKey0( + npt0, m_expList[0]->GetPointsType(0)); + LibUtilities::PointsKey newPointsKey1( + npt1, m_expList[0]->GetPointsType(1)); + LibUtilities::PointsKey newPointsKey2( + npt2, m_expList[0]->GetPointsType(2)); + + I0 = LibUtilities::PointsManager()[PointsKey0]->GetI( + newPointsKey0); + I1 = LibUtilities::PointsManager()[PointsKey1]->GetI( + newPointsKey1); + I2 = LibUtilities::PointsManager()[PointsKey2]->GetI( + newPointsKey2); + for (int i = 0; i < m_numElmt; ++i) { - // current points key - LibUtilities::PointsKey PointsKey0( - pt0, m_expList[i]->GetPointsType(0)); - LibUtilities::PointsKey PointsKey1( - pt1, m_expList[i]->GetPointsType(1)); - LibUtilities::PointsKey PointsKey2( - pt2, m_expList[i]->GetPointsType(2)); - // get new points key - LibUtilities::PointsKey newPointsKey0( - npt0, m_expList[i]->GetPointsType(0)); - LibUtilities::PointsKey newPointsKey1( - npt1, m_expList[i]->GetPointsType(1)); - LibUtilities::PointsKey newPointsKey2( - npt2, m_expList[i]->GetPointsType(2)); - - I0 = LibUtilities::PointsManager()[PointsKey0]->GetI( - newPointsKey0); - I1 = LibUtilities::PointsManager()[PointsKey1]->GetI( - newPointsKey1); - I2 = LibUtilities::PointsManager()[PointsKey2]->GetI( - newPointsKey2); - // Interpolate points Blas::Dgemm('N', 'N', npt0, pt1 * pt2, pt0, 1.0, I0->GetPtr().get(), npt0, &input[cnt], pt0, 0.0, @@ -483,14 +449,6 @@ public: m_factors = factors; } -protected: - vector<StdRegions::StdExpansionSharedPtr> m_expList; - StdRegions::FactorMap m_factors; // Initially, PhysInterp1DScaled was taking - // as input a single scaling factor - // This single scaling factor will be expressed into a matrix where each - // element has the same value so that matrix - vector multiplication - // optimization by BLAS is implemented - private: PhysInterp1DScaled_NoCollection( vector<StdRegions::StdExpansionSharedPtr> pCollExp, @@ -500,6 +458,9 @@ private: m_expList = pCollExp; m_factors = factors; } + + StdRegions::FactorMap m_factors; + vector<StdRegions::StdExpansionSharedPtr> m_expList; }; /// Factory initialisation for the PhysInterp1DScaled_NoCollection operators diff --git a/library/Demos/MultiRegions/Tests/test.opt b/library/Demos/MultiRegions/Tests/test.opt index 6d85804037..723fd47d9d 100644 --- a/library/Demos/MultiRegions/Tests/test.opt +++ b/library/Demos/MultiRegions/Tests/test.opt @@ -4,22 +4,50 @@ <OPERATOR TYPE="BwdTrans"> <ELEMENT TYPE="T" ORDER="*" IMPTYPE="NoCollection" /> <ELEMENT TYPE="Q" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="H" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="R" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="P" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="A" ORDER="*" IMPTYPE="NoCollection" /> </OPERATOR> <OPERATOR TYPE="Helmholtz"> <ELEMENT TYPE="T" ORDER="*" IMPTYPE="NoCollection" /> <ELEMENT TYPE="Q" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="H" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="R" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="P" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="A" ORDER="*" IMPTYPE="NoCollection" /> </OPERATOR> <OPERATOR TYPE="IProductWRTBase"> <ELEMENT TYPE="T" ORDER="*" IMPTYPE="NoCollection" /> <ELEMENT TYPE="Q" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="H" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="R" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="P" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="A" ORDER="*" IMPTYPE="NoCollection" /> </OPERATOR> <OPERATOR TYPE="IProductWRTDerivBase"> <ELEMENT TYPE="T" ORDER="*" IMPTYPE="NoCollection" /> <ELEMENT TYPE="Q" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="H" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="R" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="P" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="A" ORDER="*" IMPTYPE="NoCollection" /> </OPERATOR> <OPERATOR TYPE="PhysDeriv"> <ELEMENT TYPE="T" ORDER="*" IMPTYPE="NoCollection" /> <ELEMENT TYPE="Q" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="H" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="R" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="P" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="A" ORDER="*" IMPTYPE="NoCollection" /> + </OPERATOR> + <OPERATOR TYPE="PhysInterp1DScaled"> + <ELEMENT TYPE="T" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="Q" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="H" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="R" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="P" ORDER="*" IMPTYPE="NoCollection" /> + <ELEMENT TYPE="A" ORDER="*" IMPTYPE="NoCollection" /> </OPERATOR> </COLLECTIONS> </NEKTAR> diff --git a/library/MatrixFreeOps/BwdTrans.h b/library/MatrixFreeOps/BwdTrans.h index e81cb44ae5..92c0d6e104 100644 --- a/library/MatrixFreeOps/BwdTrans.h +++ b/library/MatrixFreeOps/BwdTrans.h @@ -92,6 +92,17 @@ struct BwdTransTemplate void operator()(const Array<OneD, const NekDouble> &input, Array<OneD, NekDouble> &output) final { + const int nm0 = this->m_nm[0]; + const int nq0 = this->m_nq[0]; +#if defined(SHAPE_DIMENSION_2D) + const int nm1 = this->m_nm[1]; + const int nq1 = this->m_nq[1]; +#elif defined(SHAPE_DIMENSION_3D) + const int nm1 = this->m_nm[1]; + const int nm2 = this->m_nm[2]; + const int nq1 = this->m_nq[1]; + const int nq2 = this->m_nq[2]; +#endif #include "SwitchNodesPoints.h" } @@ -111,8 +122,8 @@ struct BwdTransTemplate void operator1D(const Array<OneD, const NekDouble> &input, Array<OneD, NekDouble> &output) { - const auto nm0 = m_basis[0]->GetNumModes(); - const auto nq0 = m_basis[0]->GetNumPoints(); + const auto nm0 = this->m_nm[0]; + const auto nq0 = this->m_nq[0]; const auto nqTot = nq0; const auto nqBlocks = nqTot * vec_t::width; @@ -226,11 +237,11 @@ struct BwdTransTemplate void operator2D(const Array<OneD, const NekDouble> &input, Array<OneD, NekDouble> &output) { - const auto nm0 = m_basis[0]->GetNumModes(); - const auto nm1 = m_basis[1]->GetNumModes(); + const auto nm0 = this->m_nm[0]; + const auto nm1 = this->m_nm[1]; - const auto nq0 = m_basis[0]->GetNumPoints(); - const auto nq1 = m_basis[1]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; + const auto nq1 = this->m_nq[1]; const auto nqTot = nq0 * nq1; const auto nqBlocks = nqTot * vec_t::width; @@ -356,13 +367,13 @@ struct BwdTransTemplate void operator3D(const Array<OneD, const NekDouble> &input, Array<OneD, NekDouble> &output) { - const auto nm0 = m_basis[0]->GetNumModes(); - const auto nm1 = m_basis[1]->GetNumModes(); - const auto nm2 = m_basis[2]->GetNumModes(); + const auto nm0 = this->m_nm[0]; + const auto nm1 = this->m_nm[1]; + const auto nm2 = this->m_nm[2]; - const auto nq0 = m_basis[0]->GetNumPoints(); - const auto nq1 = m_basis[1]->GetNumPoints(); - const auto nq2 = m_basis[2]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; + const auto nq1 = this->m_nq[1]; + const auto nq2 = this->m_nq[2]; const auto nqTot = nq0 * nq1 * nq2; const auto nqBlocks = nqTot * vec_t::width; diff --git a/library/MatrixFreeOps/CMakeLists.txt b/library/MatrixFreeOps/CMakeLists.txt index 7dc170f643..267ece36c0 100644 --- a/library/MatrixFreeOps/CMakeLists.txt +++ b/library/MatrixFreeOps/CMakeLists.txt @@ -16,7 +16,7 @@ CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/SwitchLimits.h.in # so to reduce compile time timeout failures due to the large number # of template impementations. -SET(OPERATORS BwdTrans PhysDeriv Helmholtz LinearAdvectionDiffusionReaction IProduct IProductWRTDerivBase) +SET(OPERATORS BwdTrans PhysDeriv Helmholtz LinearAdvectionDiffusionReaction IProduct IProductWRTDerivBase PhysInterp1DScaled) SET(SHAPES Seg Tri Quad Hex Tet Prism Pyr ) SET(DIMENSIONS 1 2 2 3 3 3 3 ) diff --git a/library/MatrixFreeOps/Helmholtz.h b/library/MatrixFreeOps/Helmholtz.h index 6d1ec4bfa0..2926479e6b 100644 --- a/library/MatrixFreeOps/Helmholtz.h +++ b/library/MatrixFreeOps/Helmholtz.h @@ -83,8 +83,8 @@ struct HelmholtzTemplate SHAPE_TYPE, this->m_nm[0], this->m_nm[1]); #if defined(SHAPE_TYPE_TRI) - const auto nq0 = m_basis[0]->GetNumPoints(); - const auto nq1 = m_basis[1]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; + const auto nq1 = this->m_nq[1]; const Array<OneD, const NekDouble> &z0 = m_basis[0]->GetZ(); const Array<OneD, const NekDouble> &z1 = m_basis[1]->GetZ(); @@ -99,9 +99,9 @@ struct HelmholtzTemplate #if defined(SHAPE_TYPE_TET) || defined(SHAPE_TYPE_PRISM) || \ defined(SHAPE_TYPE_PYR) - const auto nq0 = m_basis[0]->GetNumPoints(); - const auto nq1 = m_basis[1]->GetNumPoints(); - const auto nq2 = m_basis[2]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; + const auto nq1 = this->m_nq[1]; + const auto nq2 = this->m_nq[2]; const Array<OneD, const NekDouble> &z0 = m_basis[0]->GetZ(); const Array<OneD, const NekDouble> &z1 = m_basis[1]->GetZ(); @@ -123,6 +123,17 @@ struct HelmholtzTemplate void operator()(const Array<OneD, const NekDouble> &input, Array<OneD, NekDouble> &output) final { + const int nm0 = this->m_nm[0]; + const int nq0 = this->m_nq[0]; +#if defined(SHAPE_DIMENSION_2D) + const int nm1 = this->m_nm[1]; + const int nq1 = this->m_nq[1]; +#elif defined(SHAPE_DIMENSION_3D) + const int nm1 = this->m_nm[1]; + const int nm2 = this->m_nm[2]; + const int nq1 = this->m_nq[1]; + const int nq2 = this->m_nq[2]; +#endif #include "SwitchNodesPoints.h" } @@ -159,11 +170,11 @@ struct HelmholtzTemplate void operator2D(const Array<OneD, const NekDouble> &input, Array<OneD, NekDouble> &output) { - const auto nm0 = m_basis[0]->GetNumModes(); - const auto nm1 = m_basis[1]->GetNumModes(); + const auto nm0 = this->m_nm[0]; + const auto nm1 = this->m_nm[1]; - const auto nq0 = m_basis[0]->GetNumPoints(); - const auto nq1 = m_basis[1]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; + const auto nq1 = this->m_nq[1]; constexpr auto ndf = 4; @@ -194,7 +205,7 @@ struct HelmholtzTemplate const vec_t *jac_ptr = &((*this->m_jac)[0]); const vec_t *df_ptr = &((*this->m_df)[0]); - const auto dfSize = ndf * nqTot; + [[maybe_unused]] const auto dfSize = ndf * nqTot; for (int e = 0; e < this->m_nBlocks - 1; e++) { @@ -367,13 +378,13 @@ struct HelmholtzTemplate void operator3D(const Array<OneD, const NekDouble> &input, Array<OneD, NekDouble> &output) { - const auto nm0 = m_basis[0]->GetNumModes(); - const auto nm1 = m_basis[1]->GetNumModes(); - const auto nm2 = m_basis[2]->GetNumModes(); + const auto nm0 = this->m_nm[0]; + const auto nm1 = this->m_nm[1]; + const auto nm2 = this->m_nm[2]; - const auto nq0 = m_basis[0]->GetNumPoints(); - const auto nq1 = m_basis[1]->GetNumPoints(); - const auto nq2 = m_basis[2]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; + const auto nq1 = this->m_nq[1]; + const auto nq2 = this->m_nq[2]; constexpr auto ndf = 9; const auto nqTot = nq0 * nq1 * nq2; @@ -407,7 +418,7 @@ struct HelmholtzTemplate const vec_t *jac_ptr = &((*this->m_jac)[0]); const vec_t *df_ptr = &((*this->m_df)[0]); - const auto dfSize = ndf * nqTot; + [[maybe_unused]] const auto dfSize = ndf * nqTot; for (int e = 0; e < this->m_nBlocks - 1; e++) { diff --git a/library/MatrixFreeOps/IProduct.h b/library/MatrixFreeOps/IProduct.h index b22f8dba37..88d14dd56a 100644 --- a/library/MatrixFreeOps/IProduct.h +++ b/library/MatrixFreeOps/IProduct.h @@ -95,6 +95,17 @@ struct IProductTemplate void operator()(const Array<OneD, const NekDouble> &input, Array<OneD, NekDouble> &output) final { + const int nm0 = this->m_nm[0]; + const int nq0 = this->m_nq[0]; +#if defined(SHAPE_DIMENSION_2D) + const int nm1 = this->m_nm[1]; + const int nq1 = this->m_nq[1]; +#elif defined(SHAPE_DIMENSION_3D) + const int nm1 = this->m_nm[1]; + const int nm2 = this->m_nm[2]; + const int nq1 = this->m_nq[1]; + const int nq2 = this->m_nq[2]; +#endif #include "SwitchNodesPoints.h" } @@ -114,8 +125,8 @@ struct IProductTemplate void operator1D(const Array<OneD, const NekDouble> &input, Array<OneD, NekDouble> &output) { - const auto nm0 = m_basis[0]->GetNumModes(); - const auto nq0 = m_basis[0]->GetNumPoints(); + const auto nm0 = this->m_nm[0]; + const auto nq0 = this->m_nq[0]; const auto nqTot = nq0; const auto nqBlocks = nqTot * vec_t::width; @@ -247,11 +258,11 @@ struct IProductTemplate void operator2D(const Array<OneD, const NekDouble> &input, Array<OneD, NekDouble> &output) { - const auto nm0 = m_basis[0]->GetNumModes(); - const auto nm1 = m_basis[1]->GetNumModes(); + const auto nm0 = this->m_nm[0]; + const auto nm1 = this->m_nm[1]; - const auto nq0 = m_basis[0]->GetNumPoints(); - const auto nq1 = m_basis[1]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; + const auto nq1 = this->m_nq[1]; const auto nqTot = nq0 * nq1; const auto nqBlocks = nqTot * vec_t::width; @@ -403,13 +414,13 @@ struct IProductTemplate void operator3D(const Array<OneD, const NekDouble> &input, Array<OneD, NekDouble> &output) { - const auto nm0 = m_basis[0]->GetNumModes(); - const auto nm1 = m_basis[1]->GetNumModes(); - const auto nm2 = m_basis[2]->GetNumModes(); + const auto nm0 = this->m_nm[0]; + const auto nm1 = this->m_nm[1]; + const auto nm2 = this->m_nm[2]; - const auto nq0 = m_basis[0]->GetNumPoints(); - const auto nq1 = m_basis[1]->GetNumPoints(); - const auto nq2 = m_basis[2]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; + const auto nq1 = this->m_nq[1]; + const auto nq2 = this->m_nq[2]; const auto nqTot = nq0 * nq1 * nq2; const auto nqBlocks = nqTot * vec_t::width; diff --git a/library/MatrixFreeOps/IProductWRTDerivBase.h b/library/MatrixFreeOps/IProductWRTDerivBase.h index 9d9aebad5c..93331c16c6 100644 --- a/library/MatrixFreeOps/IProductWRTDerivBase.h +++ b/library/MatrixFreeOps/IProductWRTDerivBase.h @@ -97,6 +97,17 @@ struct IProductWRTDerivBaseTemplate void operator()(const Array<OneD, Array<OneD, NekDouble>> &input, Array<OneD, NekDouble> &output) final { + const int nm0 = this->m_nm[0]; + const int nq0 = this->m_nq[0]; +#if defined(SHAPE_DIMENSION_2D) + const int nm1 = this->m_nm[1]; + const int nq1 = this->m_nq[1]; +#elif defined(SHAPE_DIMENSION_3D) + const int nm1 = this->m_nm[1]; + const int nm2 = this->m_nm[2]; + const int nq1 = this->m_nq[1]; + const int nq2 = this->m_nq[2]; +#endif #include "SwitchNodesPoints.h" } @@ -121,8 +132,8 @@ struct IProductWRTDerivBaseTemplate ASSERTL1(input.size() <= 3, "IProductWRTDerivBaseTemplate::Operator1D: " "Operator not set up for other dimensions.") - const auto nm0 = m_basis[0]->GetNumModes(); - const auto nq0 = m_basis[0]->GetNumPoints(); + const auto nm0 = this->m_nm[0]; + const auto nq0 = this->m_nq[0]; const auto nqTot = nq0; const auto nqBlocks = nqTot * vec_t::width; @@ -340,11 +351,11 @@ struct IProductWRTDerivBaseTemplate ASSERTL1(input.size() <= 3, "IProductWRTDerivBaseTemplate::Operator2D: " "Operator not set up for 3D coordinates."); - const auto nm0 = m_basis[0]->GetNumModes(); - const auto nm1 = m_basis[1]->GetNumModes(); + const auto nm0 = this->m_nm[0]; + const auto nm1 = this->m_nm[1]; - const auto nq0 = m_basis[0]->GetNumPoints(); - const auto nq1 = m_basis[1]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; + const auto nq1 = this->m_nq[1]; const auto nqTot = nq0 * nq1; const auto nqBlocks = nqTot * vec_t::width; @@ -587,13 +598,13 @@ struct IProductWRTDerivBaseTemplate "IProductWRTDerivBaseTemplate::Operator3D: Cannot call 3D " "routine with 1 or 2 outputs."); - const auto nm0 = m_basis[0]->GetNumModes(); - const auto nm1 = m_basis[1]->GetNumModes(); - const auto nm2 = m_basis[2]->GetNumModes(); + const auto nm0 = this->m_nm[0]; + const auto nm1 = this->m_nm[1]; + const auto nm2 = this->m_nm[2]; - const auto nq0 = m_basis[0]->GetNumPoints(); - const auto nq1 = m_basis[1]->GetNumPoints(); - const auto nq2 = m_basis[2]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; + const auto nq1 = this->m_nq[1]; + const auto nq2 = this->m_nq[2]; constexpr auto ndf = 9u; const auto nqTot = nq0 * nq1 * nq2; diff --git a/library/MatrixFreeOps/LinearAdvectionDiffusionReaction.h b/library/MatrixFreeOps/LinearAdvectionDiffusionReaction.h index f8cbbc019c..6833f5bf5b 100644 --- a/library/MatrixFreeOps/LinearAdvectionDiffusionReaction.h +++ b/library/MatrixFreeOps/LinearAdvectionDiffusionReaction.h @@ -84,8 +84,8 @@ struct LinearAdvectionDiffusionReactionTemplate SHAPE_TYPE, this->m_nm[0], this->m_nm[1]); #if defined(SHAPE_TYPE_TRI) - const auto nq0 = m_basis[0]->GetNumPoints(); - const auto nq1 = m_basis[1]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; + const auto nq1 = this->m_nq[1]; const Array<OneD, const NekDouble> &z0 = m_basis[0]->GetZ(); const Array<OneD, const NekDouble> &z1 = m_basis[1]->GetZ(); @@ -101,9 +101,9 @@ struct LinearAdvectionDiffusionReactionTemplate #if defined(SHAPE_TYPE_TET) || defined(SHAPE_TYPE_PRISM) || \ defined(SHAPE_TYPE_PYR) - const auto nq0 = m_basis[0]->GetNumPoints(); - const auto nq1 = m_basis[1]->GetNumPoints(); - const auto nq2 = m_basis[2]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; + const auto nq1 = this->m_nq[1]; + const auto nq2 = this->m_nq[2]; const Array<OneD, const NekDouble> &z0 = m_basis[0]->GetZ(); const Array<OneD, const NekDouble> &z1 = m_basis[1]->GetZ(); @@ -126,6 +126,17 @@ struct LinearAdvectionDiffusionReactionTemplate void operator()(const Array<OneD, const NekDouble> &input, Array<OneD, NekDouble> &output) final { + const int nm0 = this->m_nm[0]; + const int nq0 = this->m_nq[0]; +#if defined(SHAPE_DIMENSION_2D) + const int nm1 = this->m_nm[1]; + const int nq1 = this->m_nq[1]; +#elif defined(SHAPE_DIMENSION_3D) + const int nm1 = this->m_nm[1]; + const int nm2 = this->m_nm[2]; + const int nq1 = this->m_nq[1]; + const int nq2 = this->m_nq[2]; +#endif #include "SwitchNodesPoints.h" } @@ -164,11 +175,11 @@ struct LinearAdvectionDiffusionReactionTemplate void operator2D(const Array<OneD, const NekDouble> &input, Array<OneD, NekDouble> &output) { - const auto nm0 = m_basis[0]->GetNumModes(); - const auto nm1 = m_basis[1]->GetNumModes(); + const auto nm0 = this->m_nm[0]; + const auto nm1 = this->m_nm[1]; - const auto nq0 = m_basis[0]->GetNumPoints(); - const auto nq1 = m_basis[1]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; + const auto nq1 = this->m_nq[1]; constexpr auto ndf = 4; constexpr auto nvel = 2; @@ -202,8 +213,8 @@ struct LinearAdvectionDiffusionReactionTemplate const vec_t *advVel_ptr = &((*this->m_advVel)[0]); // Get size of derivative factor block - const auto dfSize = ndf * nqTot; - const auto advVelSize = nvel * nqTot; + [[maybe_unused]] const auto dfSize = ndf * nqTot; + const auto advVelSize = nvel * nqTot; for (int e = 0; e < this->m_nBlocks - 1; e++) { @@ -368,10 +379,10 @@ struct LinearAdvectionDiffusionReactionTemplate deriv1, bwd); // Step 4: Inner product for mass + advection matrix operation - // Input: bwd = 1/lambda * V \cdot \nabla_x uPhys + uPhys (const) - // Output: wsp0 = temporary - // tmpOut = lambda \int_\Omega (1/lambda*V \cdot \nabla_x - // uPhys + uPhys) phi + // Input: bwd = 1/lambda * V \cdot \nabla_x uPhys + uPhys + // (const) Output: wsp0 = temporary + // tmpOut = lambda \int_\Omega (1/lambda*V \cdot + // \nabla_x uPhys + uPhys) phi IProduct2DKernel<SHAPE_TYPE, true, false, DEFORMED>( nm0, nm1, nq0, nq1, correct, bwd, this->m_bdata[0], this->m_bdata[1], this->m_w[0], this->m_w[1], jac_ptr, wsp0, tmpOut, @@ -413,13 +424,13 @@ struct LinearAdvectionDiffusionReactionTemplate void operator3D(const Array<OneD, const NekDouble> &input, Array<OneD, NekDouble> &output) { - const auto nm0 = m_basis[0]->GetNumModes(); - const auto nm1 = m_basis[1]->GetNumModes(); - const auto nm2 = m_basis[2]->GetNumModes(); + const auto nm0 = this->m_nm[0]; + const auto nm1 = this->m_nm[1]; + const auto nm2 = this->m_nm[2]; - const auto nq0 = m_basis[0]->GetNumPoints(); - const auto nq1 = m_basis[1]->GetNumPoints(); - const auto nq2 = m_basis[2]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; + const auto nq1 = this->m_nq[1]; + const auto nq2 = this->m_nq[2]; constexpr auto ndf = 9; constexpr auto nvel = 3; @@ -456,8 +467,8 @@ struct LinearAdvectionDiffusionReactionTemplate const vec_t *advVel_ptr = &((*this->m_advVel)[0]); // Get size of derivative factor block - const auto dfSize = ndf * nqTot; - const auto advVelSize = nvel * nqTot; + [[maybe_unused]] const auto dfSize = ndf * nqTot; + const auto advVelSize = nvel * nqTot; for (int e = 0; e < this->m_nBlocks - 1; e++) { diff --git a/library/MatrixFreeOps/Operator.hpp b/library/MatrixFreeOps/Operator.hpp index 22577425a0..83b552fcfc 100644 --- a/library/MatrixFreeOps/Operator.hpp +++ b/library/MatrixFreeOps/Operator.hpp @@ -43,6 +43,7 @@ #include <LibUtilities/BasicUtils/NekFactory.hpp> #include <LibUtilities/BasicUtils/NekInline.hpp> #include <LibUtilities/Foundations/Basis.h> +#include <LibUtilities/Foundations/ManagerAccess.h> #include <LibUtilities/SimdLib/tinysimd.hpp> namespace Nektar::MatrixFree @@ -82,6 +83,18 @@ public: return false; } + MATRIXFREE_EXPORT virtual void SetUpBdata( + [[maybe_unused]] std::vector<LibUtilities::BasisSharedPtr> &basis) = 0; + MATRIXFREE_EXPORT virtual void SetUpDBdata( + [[maybe_unused]] std::vector<LibUtilities::BasisSharedPtr> &basis) = 0; + MATRIXFREE_EXPORT virtual void SetUpInterp1D( + [[maybe_unused]] std::vector<LibUtilities::BasisSharedPtr> &basis, + [[maybe_unused]] NekDouble factor) = 0; + MATRIXFREE_EXPORT virtual void SetUpZW( + [[maybe_unused]] std::vector<LibUtilities::BasisSharedPtr> &basis) = 0; + MATRIXFREE_EXPORT virtual void SetUpD( + [[maybe_unused]] std::vector<LibUtilities::BasisSharedPtr> &basis) = 0; + MATRIXFREE_EXPORT virtual void SetDF( const std::shared_ptr<std::vector<vec_t, tinysimd::allocator<vec_t>>> &df) = 0; @@ -119,6 +132,12 @@ public: int nElmt) : m_basis(basis), m_nElmt(nElmt) { + // Since the class is constructed before the + // PhysInterp1DScaled_MatrixFree class inside collections, we are + // initializing the m_scalingFactor member with the default value of 1.5 + // and upon construction of the aforemmentioned class, it will be + // updated to the correct value. + m_scalingFactor = 1.5; } ~PhysInterp1DScaled() override = default; @@ -127,9 +146,19 @@ public: const Array<OneD, const NekDouble> &input, Array<OneD, NekDouble> &output) = 0; // Abstract Method + // Function to initialize the scaling factor that is used to increase the + // number of quadrature points in each direction of the flow + NEK_FORCE_INLINE void SetScalingFactor(NekDouble scalingFactor) + { + m_scalingFactor = scalingFactor; + } + protected: std::vector<LibUtilities::BasisSharedPtr> m_basis; int m_nElmt; + + // Scaling factor to enhance the polynomial space + NekDouble m_scalingFactor; }; // Base class for product operator. @@ -484,31 +513,82 @@ protected: m_nBlocks = nElmt / vec_t::width + 1; m_nPads = vec_t::width - (nElmt % vec_t::width); } - - // Depending on element dimension, set up basis information, quadrature, - // etc, inside vectorised environment. for (int i = 0; i < DIM; ++i) { - const Array<OneD, const NekDouble> bdata = basis[i]->GetBdata(); - const Array<OneD, const NekDouble> dbdata = basis[i]->GetDbdata(); - const Array<OneD, const NekDouble> w = basis[i]->GetW(); - m_nm[i] = basis[i]->GetNumModes(); m_nq[i] = basis[i]->GetNumPoints(); + } + } + + // Depending on element dimension, set up basis information, + // inside vectorised environment. + void SetUpBdata(std::vector<LibUtilities::BasisSharedPtr> &basis) final + { + for (int i = 0; i < DIM; ++i) + { + const Array<OneD, const NekDouble> bdata = basis[i]->GetBdata(); m_bdata[i].resize(bdata.size()); for (auto j = 0; j < bdata.size(); ++j) { m_bdata[i][j] = bdata[j]; } + } + } + + // Depending on element dimension, set up derivative of basis + // information, inside vectorised environment. + void SetUpDBdata(std::vector<LibUtilities::BasisSharedPtr> &basis) final + { + for (int i = 0; i < DIM; ++i) + { + const Array<OneD, const NekDouble> dbdata = basis[i]->GetDbdata(); m_dbdata[i].resize(dbdata.size()); for (auto j = 0; j < dbdata.size(); ++j) { m_dbdata[i][j] = dbdata[j]; } + } + } - NekDouble fac = 1.0; + // Depending on element dimension, set up 1D interpolation matrix in + // basis data inside vectorised environment. + void SetUpInterp1D(std::vector<LibUtilities::BasisSharedPtr> &basis, + NekDouble factor) final + { + // Is this updated at each time-step? + // Depending on element dimension, set up interpolation matrices, + // inside vectorised environment. + for (int i = 0; i < DIM; ++i) + { + m_enhancednq[i] = (int)basis[i]->GetNumPoints() * factor; + + LibUtilities::PointsKey PointsKeyIn(m_nq[i], + basis[i]->GetPointsType()); + LibUtilities::PointsKey PointsKeyOut((int)m_nq[i] * factor, + basis[i]->GetPointsType()); + auto I = LibUtilities::PointsManager()[PointsKeyIn] + ->GetI(PointsKeyOut) + ->GetPtr(); + + m_I[i].resize(I.size()); + for (int j = 0; j < I.size(); ++j) + { + m_I[i][j] = I[j]; + } + } + } + + void SetUpZW(std::vector<LibUtilities::BasisSharedPtr> &basis) final + { + + // Depending on element dimension, set up quadrature, + // inside vectorised environment. + for (int i = 0; i < DIM; ++i) + { + const Array<OneD, const NekDouble> w = basis[i]->GetW(); + NekDouble fac = 1.0; if (basis[i]->GetPointsType() == LibUtilities::eGaussRadauMAlpha1Beta0) { @@ -526,13 +606,6 @@ protected: m_w[i][j] = fac * w[j]; } - auto D = basis[i]->GetD()->GetPtr(); - m_D[i].resize(D.size()); - for (int j = 0; j < D.size(); ++j) - { - m_D[i][j] = D[j]; - } - auto Z = basis[i]->GetZ(); m_Z[i].resize(Z.size()); for (int j = 0; j < Z.size(); ++j) @@ -542,6 +615,22 @@ protected: } } + void SetUpD(std::vector<LibUtilities::BasisSharedPtr> &basis) final + { + + // Depending on element dimension, set up quadrature, + // inside vectorised environment. + for (int i = 0; i < DIM; ++i) + { + auto D = basis[i]->GetD()->GetPtr(); + m_D[i].resize(D.size()); + for (int j = 0; j < D.size(); ++j) + { + m_D[i][j] = D[j]; + } + } + } + void SetDF( const std::shared_ptr<std::vector<vec_t, tinysimd::allocator<vec_t>>> &df) final @@ -558,7 +647,7 @@ protected: int m_nBlocks; int m_nPads; - std::array<int, DIM> m_nm, m_nq; + std::array<int, DIM> m_nm, m_nq, m_enhancednq; std::array<std::vector<vec_t, tinysimd::allocator<vec_t>>, DIM> m_bdata; std::array<std::vector<vec_t, tinysimd::allocator<vec_t>>, DIM> m_dbdata; std::array<std::vector<vec_t, tinysimd::allocator<vec_t>>, DIM> @@ -567,12 +656,13 @@ protected: m_Z; // Zeroes std::array<std::vector<vec_t, tinysimd::allocator<vec_t>>, DIM> m_w; // Weights + std::array<std::vector<vec_t, tinysimd::allocator<vec_t>>, DIM> + m_I; // Interpolation Matrices std::shared_ptr<std::vector<vec_t, tinysimd::allocator<vec_t>>> m_df; // Chain rule function deriviatives for each element (00, 10, // 20, 30...) std::shared_ptr<std::vector<vec_t, tinysimd::allocator<vec_t>>> m_jac; }; - } // namespace Nektar::MatrixFree #endif diff --git a/library/MatrixFreeOps/PhysDeriv.h b/library/MatrixFreeOps/PhysDeriv.h index 49b0d16570..b2bd73f278 100644 --- a/library/MatrixFreeOps/PhysDeriv.h +++ b/library/MatrixFreeOps/PhysDeriv.h @@ -66,23 +66,6 @@ struct PhysDerivTemplate Helper<LibUtilities::ShapeTypeDimMap[SHAPE_TYPE], DEFORMED>(basis, nElmt) { - constexpr auto DIM = LibUtilities::ShapeTypeDimMap[SHAPE_TYPE]; - - if (DIM == 1) - { - m_nmTot = LibUtilities::GetNumberOfCoefficients(SHAPE_TYPE, - this->m_nm[0]); - } - else if (DIM == 2) - { - m_nmTot = LibUtilities::GetNumberOfCoefficients( - SHAPE_TYPE, this->m_nm[0], this->m_nm[1]); - } - else if (DIM == 3) - { - m_nmTot = LibUtilities::GetNumberOfCoefficients( - SHAPE_TYPE, this->m_nm[0], this->m_nm[1], this->m_nm[2]); - } } static std::shared_ptr<Operator> Create( @@ -95,6 +78,13 @@ struct PhysDerivTemplate void operator()(const Array<OneD, const NekDouble> &input, Array<OneD, Array<OneD, NekDouble>> &output) final { + const auto nq0 = this->m_nq[0]; +#if defined(SHAPE_DIMENSION_2D) + const auto nq1 = this->m_nq[1]; +#elif defined(SHAPE_DIMENSION_3D) + const auto nq1 = this->m_nq[1]; + const auto nq2 = this->m_nq[2]; +#endif #include "SwitchPoints.h" } @@ -119,7 +109,7 @@ struct PhysDerivTemplate ASSERTL1(output.size() <= 3, "PhysDerivTemplate::Operator1D: Operator " "not set up for other dimensions.") - const auto nq0 = m_basis[0]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; const auto nqTot = nq0; const auto nqBlocks = nqTot * vec_t::width; @@ -306,8 +296,8 @@ struct PhysDerivTemplate ASSERTL1(output.size() <= 3, "PhysDerivTemplate::Operator2D: Operator " "not set up for 3D coordinates."); - const auto nq0 = m_basis[0]->GetNumPoints(); - const auto nq1 = m_basis[1]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; + const auto nq1 = this->m_nq[1]; const auto nqTot = nq0 * nq1; const auto nqBlocks = nqTot * vec_t::width; @@ -495,9 +485,9 @@ struct PhysDerivTemplate ASSERTL1(output.size() == 3, "PhysDerivTemplate::Operator3D: Cannot " "call 3D routine with 1 or 2 outputs."); - const auto nq0 = m_basis[0]->GetNumPoints(); - const auto nq1 = m_basis[1]->GetNumPoints(); - const auto nq2 = m_basis[2]->GetNumPoints(); + const auto nq0 = this->m_nq[0]; + const auto nq1 = this->m_nq[1]; + const auto nq2 = this->m_nq[2]; const auto nqTot = nq0 * nq1 * nq2; const auto nqBlocks = nqTot * vec_t::width; @@ -677,7 +667,6 @@ struct PhysDerivTemplate #endif private: - int m_nmTot; }; } // namespace Nektar::MatrixFree diff --git a/library/MatrixFreeOps/PhysInterp1DScaled.h b/library/MatrixFreeOps/PhysInterp1DScaled.h new file mode 100644 index 0000000000..952e98a3b8 --- /dev/null +++ b/library/MatrixFreeOps/PhysInterp1DScaled.h @@ -0,0 +1,501 @@ +/////////////////////////////////////////////////////////////////////////////// +// +// File: PhysInterp1DScaled.h +// +// For more information, please see: http://www.nektar.info +// +// The MIT License +// +// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA), +// Department of Aeronautics, Imperial College London (UK), and Scientific +// Computing and Imaging Institute, University of Utah (USA). +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +// Description: +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef NEKTAR_LIBRARY_MF_PHYSINTERP1DSCALED_H +#define NEKTAR_LIBRARY_MF_PHYSINTERP1DSCALED_H + +#include <LibUtilities/BasicUtils/ShapeType.hpp> +#include <LibUtilities/BasicUtils/SharedArray.hpp> +#include <LibUtilities/Foundations/Basis.h> + +#include "Operator.hpp" +#include "PhysInterp1DScaledKernels.hpp" + +// As each opertor has seven shapes over three dimension to get to the +// "work" each operator uses a series of preprocessor directives based +// on the dimension and shape type so to limit the code +// inclusion. This keeps the library size as minimal as possible while +// removing runtime conditionals. + +// The preprocessor directives, SHAPE_DIMENSION_?D and SHAPE_TYPE_* +// are constructed by CMake in the CMakeLists.txt file which uses an +// implementation file. See the CMakeLists.txt files for more +// details. +namespace Nektar::MatrixFree +{ + +template <LibUtilities::ShapeType SHAPE_TYPE, bool DEFORMED = false> +struct PhysInterp1DScaledTemplate + : public PhysInterp1DScaled, + public Helper<LibUtilities::ShapeTypeDimMap[SHAPE_TYPE]> +{ + PhysInterp1DScaledTemplate(std::vector<LibUtilities::BasisSharedPtr> basis, + int nElmt) + : PhysInterp1DScaled(basis, nElmt), + Helper<LibUtilities::ShapeTypeDimMap[SHAPE_TYPE]>(basis, nElmt) + { + } + + static std::shared_ptr<Operator> Create( + std::vector<LibUtilities::BasisSharedPtr> basis, int nElmt) + { + return std::make_shared<PhysInterp1DScaledTemplate<SHAPE_TYPE>>(basis, + nElmt); + } + + void operator()(const Array<OneD, const NekDouble> &input, + Array<OneD, NekDouble> &output) final + { + // Following the same variable naming as in SwitchNodesPoints.h + // Both input and output number of points of the kernels are in the + // physical space + // nm0 is replaced by nq_in0 + // nq0 is replaced by nq_out0 + const auto nm0 = this->m_nq[0]; + const auto nq0 = this->m_enhancednq[0]; +#if defined(SHAPE_DIMENSION_2D) + // nm1 is replaced by nq_in1 + // nq1 is replaced by nq_out1 + // Following the same variable naming as in SwitchNodesPoints.h + const auto nm1 = this->m_nq[1]; + const auto nq1 = this->m_enhancednq[1]; +#elif defined(SHAPE_DIMENSION_3D) + // nm2 is replaced by nq_in2 + // nq2 is replaced by nq_out2 + // Following the same variable naming as in SwitchNodesPoints.h + const auto nm1 = this->m_nq[1]; + const auto nm2 = this->m_nq[2]; + const auto nq1 = this->m_enhancednq[1]; + const auto nq2 = this->m_enhancednq[2]; +#endif +#include "SwitchNodesPoints.h" + } + + // There must be separate 1D, 2D, and 3D operators because the + // helper base class is based on the shape type dim. Thus indices + // for the basis must be respected. + + // Further there are duplicate 1D, 2D, and 3D operators so to + // allow for compile time array sizes to be part of the template + // and thus gain loop unrolling. The only difference is the + // location of the size value, in the template or the function: + // foo<int bar>() {} vs foo() { int bar = ... } + +#if defined(SHAPE_DIMENSION_1D) + + // Non-size based operator. + void operator1D(const Array<OneD, const NekDouble> &input, + Array<OneD, NekDouble> &output) + { + const auto nq_in0 = this->m_nq[0]; + const auto nq_out0 = this->m_enhancednq[0]; + + const auto nb_out = nq_out0 * vec_t::width; + const auto nb_in = nq_in0 * vec_t::width; + + auto *inptr = &input[0]; + auto *outptr = &output[0]; + + // Workspace for kernels - also checks preconditions + PhysInterp1DScaled1DWorkspace<SHAPE_TYPE>(nq_in0, nq_out0); + + std::vector<vec_t, allocator<vec_t>> tmpIn(nq_in0), tmpOut(nq_out0); + + // temporary aligned storage for local fields + NekDouble *locField = static_cast<NekDouble *>(::operator new[]( + nb_out * sizeof(NekDouble), std::align_val_t(vec_t::alignment))); + + for (int e = 0; e < this->m_nBlocks - 1; ++e) + { + // Load data to aligned storage and interleave it + // std::copy(inptr, inptr + nb_in, locField); + // load_interleave(locField, nq_in0, tmpIn); + load_unalign_interleave(inptr, nq_in0, tmpIn); + + PhysInterp1DScaled1DKernel<SHAPE_TYPE>(nq_in0, nq_out0, tmpIn, + this->m_I[0], tmpOut); + + // de-interleave and store data + // deinterleave_store(tmpOut, nq_out0, locField); + // std::copy(locField, locField + nb_out, outptr); + deinterleave_unalign_store(tmpOut, nq_out0, outptr); + + inptr += nb_in; + outptr += nb_out; + } + // last block + { + int acturalSize = nb_in - this->m_nPads * nq_in0; + std::copy(inptr, inptr + acturalSize, locField); + load_interleave(locField, nq_in0, tmpIn); + + PhysInterp1DScaled1DKernel<SHAPE_TYPE>(nq_in0, nq_out0, tmpIn, + this->m_I[0], tmpOut); + + // de-interleave and store data + deinterleave_store(tmpOut, nq_out0, locField); + acturalSize = nb_out - this->m_nPads * nq_out0; + std::copy(locField, locField + acturalSize, outptr); + } + // free aligned memory + ::operator delete[](locField, std::align_val_t(vec_t::alignment)); + } + + // Size based template version. + template <int nq_in0, int nq_out0> + void operator1D(const Array<OneD, const NekDouble> &input, + Array<OneD, NekDouble> &output) + { + constexpr auto nOutTot = nq_out0; + constexpr auto nInTot = nq_in0; + constexpr auto nb_out = nOutTot * vec_t::width; + const auto nb_in = nInTot * vec_t::width; + + auto *inptr = &input[0]; + auto *outptr = &output[0]; + + // Workspace for kernels - also checks preconditions + PhysInterp1DScaled1DWorkspace<SHAPE_TYPE>(nq_in0, nq_out0); + + std::vector<vec_t, allocator<vec_t>> tmpIn(nInTot), tmpOut(nOutTot); + + // temporary aligned storage for local fields + alignas(vec_t::alignment) NekDouble locField[nb_out]; + + for (int e = 0; e < this->m_nBlocks - 1; ++e) + { + // Load data to aligned storage and interleave it + // std::copy(inptr, inptr + nb_in, locField); + // load_interleave(locField, m_nInTot, tmpIn); + load_unalign_interleave(inptr, nInTot, tmpIn); + + PhysInterp1DScaled1DKernel<SHAPE_TYPE>(nq_in0, nq_out0, tmpIn, + this->m_I[0], tmpOut); + + // de-interleave and store data + // deinterleave_store(tmpOut, nOutTot, locField); + // std::copy(locField, locField + nb_out, outptr); + deinterleave_unalign_store(tmpOut, nOutTot, outptr); + + inptr += nb_in; + outptr += nb_out; + } + // last block + { + int acturalSize = nb_in - this->m_nPads * nInTot; + std::copy(inptr, inptr + acturalSize, locField); + load_interleave(locField, nInTot, tmpIn); + + PhysInterp1DScaled1DKernel<SHAPE_TYPE>(nq_in0, nq_out0, tmpIn, + this->m_I[0], tmpOut); + + // de-interleave and store data + deinterleave_store(tmpOut, nOutTot, locField); + acturalSize = nb_out - this->m_nPads * nOutTot; + std::copy(locField, locField + acturalSize, outptr); + } + } + +#elif defined(SHAPE_DIMENSION_2D) + + // Non-size based operator. + void operator2D(const Array<OneD, const NekDouble> &input, + Array<OneD, NekDouble> &output) + { + const auto nq_in0 = this->m_nq[0]; + const auto nq_in1 = this->m_nq[1]; + + const auto nq_out0 = this->m_enhancednq[0]; + const auto nq_out1 = this->m_enhancednq[1]; + + const auto nInTot = nq_in0 * nq_in1; + const auto nOutTot = nq_out0 * nq_out1; + const auto nb_out = nOutTot * vec_t::width; + const auto nb_in = nInTot * vec_t::width; + + auto *inptr = &input[0]; + auto *outptr = &output[0]; + + // Workspace for kernels - also checks preconditions + size_t wsp0Size = 0; + PhysInterp1DScaled2DWorkspace<SHAPE_TYPE>(nq_in0, nq_in1, nq_out0, + nq_out1, wsp0Size); + + std::vector<vec_t, allocator<vec_t>> wsp0(wsp0Size), tmpIn(nInTot), + tmpOut(nOutTot); + + // temporary aligned storage for local fields + NekDouble *locField = static_cast<NekDouble *>(::operator new[]( + nb_out * sizeof(NekDouble), std::align_val_t(vec_t::alignment))); + + for (int e = 0; e < this->m_nBlocks - 1; ++e) + { + // Load data to aligned storage and interleave it + // std::copy(inptr, inptr + nb_in, locField); + // load_interleave(locField, m_nInTot, tmpIn); + load_unalign_interleave(inptr, nInTot, tmpIn); + + PhysInterp1DScaled2DKernel<SHAPE_TYPE>(nq_in0, nq_in1, nq_out0, + nq_out1, tmpIn, this->m_I[0], + this->m_I[1], wsp0, tmpOut); + + // de-interleave and store data + // deinterleave_store(tmpOut, nOutTot, locField); + // std::copy(locField, locField + nb_out, outptr); + deinterleave_unalign_store(tmpOut, nOutTot, outptr); + + inptr += nb_in; + outptr += nb_out; + } + // last block + { + int acturalSize = nb_in - this->m_nPads * nInTot; + std::copy(inptr, inptr + acturalSize, locField); + load_interleave(locField, nInTot, tmpIn); + + PhysInterp1DScaled2DKernel<SHAPE_TYPE>(nq_in0, nq_in1, nq_out0, + nq_out1, tmpIn, this->m_I[0], + this->m_I[1], wsp0, tmpOut); + + // de-interleave and store data + deinterleave_store(tmpOut, nOutTot, locField); + acturalSize = nb_out - this->m_nPads * nOutTot; + std::copy(locField, locField + acturalSize, outptr); + } + // free aligned memory + ::operator delete[](locField, std::align_val_t(vec_t::alignment)); + } + + // Size based template version. + template <int nq_in0, int nq_in1, int nq_out0, int nq_out1> + void operator2D(const Array<OneD, const NekDouble> &input, + Array<OneD, NekDouble> &output) + { + constexpr auto nInTot = nq_in0 * nq_in1; + constexpr auto nOutTot = nq_out0 * nq_out1; + constexpr auto nb_out = nOutTot * vec_t::width; + const auto nb_in = nInTot * vec_t::width; + + auto *inptr = &input[0]; + auto *outptr = &output[0]; + + // Workspace for kernels - also checks preconditions + size_t wsp0Size = 0; + PhysInterp1DScaled2DWorkspace<SHAPE_TYPE>(nq_in0, nq_in1, nq_out0, + nq_out1, wsp0Size); + + std::vector<vec_t, allocator<vec_t>> wsp0(wsp0Size), tmpIn(nInTot), + tmpOut(nOutTot); + + // temporary aligned storage for local fields + alignas(vec_t::alignment) NekDouble locField[nb_out]; + + for (int e = 0; e < this->m_nBlocks - 1; ++e) + { + // Load data to aligned storage and interleave it + // std::copy(inptr, inptr + nb_in, locField); + // load_interleave(locField, m_nInTot, tmpIn); + load_unalign_interleave(inptr, nInTot, tmpIn); + + PhysInterp1DScaled2DKernel<SHAPE_TYPE>(nq_in0, nq_in1, nq_out0, + nq_out1, tmpIn, this->m_I[0], + this->m_I[1], wsp0, tmpOut); + // de-interleave and store data + // deinterleave_store(tmpOut, nOutTot, locField); + // std::copy(locField, locField + nb_out, outptr); + deinterleave_unalign_store(tmpOut, nOutTot, outptr); + + inptr += nb_in; + outptr += nb_out; + } + // last block + { + int acturalSize = nb_in - this->m_nPads * nInTot; + std::copy(inptr, inptr + acturalSize, locField); + load_interleave(locField, nInTot, tmpIn); + + PhysInterp1DScaled2DKernel<SHAPE_TYPE>(nq_in0, nq_in1, nq_out0, + nq_out1, tmpIn, this->m_I[0], + this->m_I[1], wsp0, tmpOut); + + // de-interleave and store data + deinterleave_store(tmpOut, nOutTot, locField); + acturalSize = nb_out - this->m_nPads * nOutTot; + std::copy(locField, locField + acturalSize, outptr); + } + } + +#elif defined(SHAPE_DIMENSION_3D) + + // Non-size based operator. + void operator3D(const Array<OneD, const NekDouble> &input, + Array<OneD, NekDouble> &output) + { + const auto nq_in0 = this->m_nq[0]; + const auto nq_in1 = this->m_nq[1]; + const auto nq_in2 = this->m_nq[2]; + + const auto nq_out0 = this->m_enhancednq[0]; + const auto nq_out1 = this->m_enhancednq[1]; + const auto nq_out2 = this->m_enhancednq[2]; + + const auto nInTot = nq_in0 * nq_in1 * nq_in2; + const auto nOutTot = nq_out0 * nq_out1 * nq_out2; + const auto nb_out = nOutTot * vec_t::width; + const auto nb_in = nInTot * vec_t::width; + + auto *inptr = &input[0]; + auto *outptr = &output[0]; + + // Workspace for kernels - also checks preconditions + size_t wsp0Size = 0, wsp1Size = 0; + PhysInterp1DScaled3DWorkspace<SHAPE_TYPE>(nq_in0, nq_in1, nq_in2, + nq_out0, nq_out1, nq_out2, + wsp0Size, wsp1Size); + + std::vector<vec_t, allocator<vec_t>> wsp0(wsp0Size), wsp1(wsp1Size), + tmpIn(nInTot), tmpOut(nOutTot); + + // temporary aligned storage for local fields + NekDouble *locField = static_cast<NekDouble *>(::operator new[]( + nb_out * sizeof(NekDouble), std::align_val_t(vec_t::alignment))); + + for (int e = 0; e < this->m_nBlocks - 1; ++e) + { + // Load data to aligned storage and interleave it + // std::copy(inptr, inptr + nb_in, locField); + // load_interleave(locField, m_nInTot, tmpIn); + load_unalign_interleave(inptr, nInTot, tmpIn); + + PhysInterp1DScaled3DKernel<SHAPE_TYPE>( + nq_in0, nq_in1, nq_in2, nq_out0, nq_out1, nq_out2, tmpIn, + this->m_I[0], this->m_I[1], this->m_I[2], wsp0, wsp1, tmpOut); + + // de-interleave and store data + // deinterleave_store(tmpOut, nOutTot, locField); + // std::copy(locField, locField + nb_out, outptr); + deinterleave_unalign_store(tmpOut, nOutTot, outptr); + + inptr += nb_in; + outptr += nb_out; + } + // last block + { + int acturalSize = nb_in - this->m_nPads * nInTot; + std::copy(inptr, inptr + acturalSize, locField); + load_interleave(locField, nInTot, tmpIn); + + PhysInterp1DScaled3DKernel<SHAPE_TYPE>( + nq_in0, nq_in1, nq_in2, nq_out0, nq_out1, nq_out2, tmpIn, + this->m_I[0], this->m_I[1], this->m_I[2], wsp0, wsp1, tmpOut); + + // de-interleave and store data + deinterleave_store(tmpOut, nOutTot, locField); + acturalSize = nb_out - this->m_nPads * nOutTot; + std::copy(locField, locField + acturalSize, outptr); + } + // free aligned memory + ::operator delete[](locField, std::align_val_t(vec_t::alignment)); + } + + // Size based template version. + template <int nq_in0, int nq_in1, int nq_in2, int nq_out0, int nq_out1, + int nq_out2> + void operator3D(const Array<OneD, const NekDouble> &input, + Array<OneD, NekDouble> &output) + { + constexpr auto nInTot = nq_in0 * nq_in1 * nq_in2; + constexpr auto nOutTot = nq_out0 * nq_out1 * nq_out2; + constexpr auto nb_out = nOutTot * vec_t::width; + const auto nb_in = nInTot * vec_t::width; + + auto *inptr = &input[0]; + auto *outptr = &output[0]; + + // Workspace for kernels - also checks preconditions + size_t wsp0Size = 0, wsp1Size = 0; + PhysInterp1DScaled3DWorkspace<SHAPE_TYPE>(nq_in0, nq_in1, nq_in2, + nq_out0, nq_out1, nq_out2, + wsp0Size, wsp1Size); + + std::vector<vec_t, allocator<vec_t>> wsp0(wsp0Size), wsp1(wsp1Size), + tmpIn(nInTot), tmpOut(nOutTot); + + // temporary aligned storage for local fields + alignas(vec_t::alignment) NekDouble locField[nb_out]; + + for (int e = 0; e < this->m_nBlocks - 1; ++e) + { + // Load data to aligned storage and interleave it + // std::copy(inptr, inptr + nb_in, locField); + // load_interleave(locField, m_nInTot, tmpIn); + load_unalign_interleave(inptr, nInTot, tmpIn); + + PhysInterp1DScaled3DKernel<SHAPE_TYPE>( + nq_in0, nq_in1, nq_in2, nq_out0, nq_out1, nq_out2, tmpIn, + this->m_I[0], this->m_I[1], this->m_I[2], wsp0, wsp1, tmpOut); + + // de-interleave and store data + // deinterleave_store(tmpOut, nOutTot, locField); + // std::copy(locField, locField + nb_out, outptr); + deinterleave_unalign_store(tmpOut, nOutTot, outptr); + + inptr += nb_in; + outptr += nb_out; + } + // last block + { + int acturalSize = nb_in - this->m_nPads * nInTot; + std::copy(inptr, inptr + acturalSize, locField); + load_interleave(locField, nInTot, tmpIn); + + PhysInterp1DScaled3DKernel<SHAPE_TYPE>( + nq_in0, nq_in1, nq_in2, nq_out0, nq_out1, nq_out2, tmpIn, + this->m_I[0], this->m_I[1], this->m_I[2], wsp0, wsp1, tmpOut); + + // de-interleave and store data + deinterleave_store(tmpOut, nOutTot, locField); + acturalSize = nb_out - this->m_nPads * nOutTot; + std::copy(locField, locField + acturalSize, outptr); + } + } + +#endif // SHAPE_DIMENSION + +private: +}; + +} // namespace Nektar::MatrixFree + +#endif diff --git a/library/MatrixFreeOps/PhysInterp1DScaledKernels.hpp b/library/MatrixFreeOps/PhysInterp1DScaledKernels.hpp new file mode 100644 index 0000000000..8d728be480 --- /dev/null +++ b/library/MatrixFreeOps/PhysInterp1DScaledKernels.hpp @@ -0,0 +1,225 @@ +/////////////////////////////////////////////////////////////////////////////// +// +// File: PhysInterp1DScaledKernels.hpp +// +// For more information, please see: http://www.nektar.info +// +// The MIT License +// +// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA), +// Department of Aeronautics, Imperial College London (UK), and Scientific +// Computing and Imaging Institute, University of Utah (USA). +// +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +// Description: +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef NEKTAR_LIBRARY_MF_PHYSINTERP1DSCALED_KERNELS_HPP +#define NEKTAR_LIBRARY_MF_PHYSINTERP1DSCALED_KERNELS_HPP + +#include <LibUtilities/BasicUtils/NekInline.hpp> + +namespace Nektar::MatrixFree +{ + +using namespace tinysimd; +using vec_t = simd<NekDouble>; + +// Workspace - used to dynamically get the workspace size needed for +// temporary memory. +#if defined(SHAPE_DIMENSION_1D) + +template <LibUtilities::ShapeType SHAPE_TYPE> +NEK_FORCE_INLINE static void PhysInterp1DScaled1DWorkspace( + [[maybe_unused]] const size_t nq_in0, [[maybe_unused]] const size_t nq_out0) + +{ +} + +#elif defined(SHAPE_DIMENSION_2D) + +template <LibUtilities::ShapeType SHAPE_TYPE> +NEK_FORCE_INLINE static void PhysInterp1DScaled2DWorkspace( + [[maybe_unused]] const size_t nq_in0, [[maybe_unused]] const size_t nq_in1, + [[maybe_unused]] const size_t nq_out0, + [[maybe_unused]] const size_t nq_out1, size_t &wsp0Size) +{ + wsp0Size = std::max(wsp0Size, nq_in0 * nq_out0); +} + +#elif defined(SHAPE_DIMENSION_3D) + +template <LibUtilities::ShapeType SHAPE_TYPE> +NEK_FORCE_INLINE static void PhysInterp1DScaled3DWorkspace( + [[maybe_unused]] const size_t nq_in0, [[maybe_unused]] const size_t nq_in1, + [[maybe_unused]] const size_t nq_in2, [[maybe_unused]] const size_t nq_out0, + [[maybe_unused]] const size_t nq_out1, + [[maybe_unused]] const size_t nq_out2, size_t &wsp0Size, size_t &wsp1Size) +{ + + wsp0Size = + std::max(wsp0Size, nq_out0 * nq_in1 * nq_in2); // nq_in1 == nq_in2 + wsp1Size = + std::max(wsp1Size, nq_out0 * nq_out1 * nq_in2); // nq_out0 == nq_out1 +} + +#endif // SHAPE_DIMENSION + +// The dimension kernels which select the shape kernel. +#if defined(SHAPE_DIMENSION_1D) + +template <LibUtilities::ShapeType SHAPE_TYPE> +NEK_FORCE_INLINE static void PhysInterp1DScaled1DKernel( + const size_t nq_in0, const size_t nq_out0, + const std::vector<vec_t, allocator<vec_t>> &in, + const std::vector<vec_t, allocator<vec_t>> &basis0, + std::vector<vec_t, allocator<vec_t>> &out) +{ + for (int i = 0; i < nq_out0; ++i) + { + vec_t tmp = in[0] * basis0[i]; // Load 2x + + for (int p = 1; p < nq_in0; ++p) + { + tmp.fma(in[p], basis0[p * nq_out0 + i]); // Load 2x + } + + out[i] = tmp; // Store 1x + } +} + +#elif defined(SHAPE_DIMENSION_2D) + +template <LibUtilities::ShapeType SHAPE_TYPE> +NEK_FORCE_INLINE static void PhysInterp1DScaled2DKernel( + const size_t nq_in0, const size_t nq_in1, const size_t nq_out0, + const size_t nq_out1, const std::vector<vec_t, allocator<vec_t>> &in, + const std::vector<vec_t, allocator<vec_t>> &basis0, + const std::vector<vec_t, allocator<vec_t>> &basis1, + std::vector<vec_t, allocator<vec_t>> &wsp, + std::vector<vec_t, allocator<vec_t>> &out) +{ + for (int i = 0, cnt_iq = 0; i < nq_out0; ++i) + { + for (int q = 0, cnt_pq = 0; q < nq_in1; ++q, ++cnt_iq) + { + vec_t tmp = in[cnt_pq] * basis0[i]; // Load 2x + ++cnt_pq; + for (int p = 1; p < nq_in0; ++p, ++cnt_pq) + { + tmp.fma(in[cnt_pq], basis0[p * nq_out0 + i]); // Load 2x + } + wsp[cnt_iq] = tmp; // Store 1x + } + } + + for (int j = 0, cnt_ij = 0; j < nq_out1; ++j) + { + for (int i = 0, cnt_iq = 0; i < nq_out0; ++i, ++cnt_ij) + { + vec_t tmp = wsp[cnt_iq] * basis1[j]; // Load 2x + ++cnt_iq; + for (int q = 1; q < nq_in1; ++q, ++cnt_iq) + { + tmp.fma(wsp[cnt_iq], basis1[q * nq_out1 + j]); // Load 2x + } + out[cnt_ij] = tmp; // Store 1x + } + } +} + +#elif defined(SHAPE_DIMENSION_3D) + +template <LibUtilities::ShapeType SHAPE_TYPE> +NEK_FORCE_INLINE static void PhysInterp1DScaled3DKernel( + const size_t nq_in0, const size_t nq_in1, const size_t nq_in2, + const size_t nq_out0, const size_t nq_out1, const size_t nq_out2, + const std::vector<vec_t, allocator<vec_t>> &in, + const std::vector<vec_t, allocator<vec_t>> &basis0, + const std::vector<vec_t, allocator<vec_t>> &basis1, + const std::vector<vec_t, allocator<vec_t>> &basis2, + std::vector<vec_t, allocator<vec_t>> &sum_irq, // nq_out0 * nq_in2 * nq_in1 + std::vector<vec_t, allocator<vec_t>> &sum_jir, // nq_out1 * nq_out0 * nq_in2 + std::vector<vec_t, allocator<vec_t>> &out) +{ + for (int i = 0, cnt_irq = 0; i < nq_out0; ++i) + { + for (int r = 0, cnt_rqp = 0; r < nq_in2; ++r) + { + for (int q = 0; q < nq_in1; ++q, ++cnt_irq) + { + vec_t tmp = in[cnt_rqp] * basis0[i]; + ++cnt_rqp; + + for (int p = 1; p < nq_in0; ++p, ++cnt_rqp) + { + tmp.fma(in[cnt_rqp], basis0[p * nq_out0 + i]); + } + + sum_irq[cnt_irq] = tmp; + } + } + } + + for (int j = 0, cnt_jir = 0; j < nq_out1; ++j) + { + for (int i = 0, cnt_irq = 0; i < nq_out0; ++i) + { + for (int r = 0; r < nq_in2; ++r, ++cnt_jir) + { + vec_t tmp = sum_irq[cnt_irq] * basis1[j]; + ++cnt_irq; + + for (int q = 1; q < nq_in1; ++q) + { + tmp.fma(sum_irq[cnt_irq++], basis1[q * nq_out1 + j]); + } + + sum_jir[cnt_jir] = tmp; + } + } + } + + for (int k = 0, cnt_kji = 0; k < nq_out2; ++k) + { + for (int j = 0, cnt_jir = 0; j < nq_out1; ++j) + { + for (int i = 0; i < nq_out0; ++i, ++cnt_kji) + { + vec_t tmp = sum_jir[cnt_jir] * basis2[k]; + ++cnt_jir; + + for (int r = 1; r < nq_in2; ++r) + { + tmp.fma(sum_jir[cnt_jir++], basis2[r * nq_out2 + k]); + } + + out[cnt_kji] = tmp; + } + } + } +} + +#endif // SHAPE_DIMENSION + +} // namespace Nektar::MatrixFree + +#endif diff --git a/library/MatrixFreeOps/SwitchNodesPoints.h b/library/MatrixFreeOps/SwitchNodesPoints.h index c369eb115a..c8642cd14e 100644 --- a/library/MatrixFreeOps/SwitchNodesPoints.h +++ b/library/MatrixFreeOps/SwitchNodesPoints.h @@ -102,9 +102,6 @@ /* start switch definition for operator 1D*/ { - const int nm0 = m_basis[0]->GetNumModes(); - const int nq0 = m_basis[0]->GetNumPoints(); - #if defined(SHAPE_TYPE_SEG) switch (nm0) @@ -184,11 +181,6 @@ /* start switch definition for operator 2D*/ { - const int nm0 = m_basis[0]->GetNumModes(); - const int nm1 = m_basis[1]->GetNumModes(); - - const int nq0 = m_basis[0]->GetNumPoints(); - const int nq1 = m_basis[1]->GetNumPoints(); #if defined(SHAPE_TYPE_TRI) @@ -305,14 +297,6 @@ /* start switch definition for operator 3D*/ { - const int nm0 = m_basis[0]->GetNumModes(); - const int nm1 = m_basis[1]->GetNumModes(); - const int nm2 = m_basis[2]->GetNumModes(); - - const int nq0 = m_basis[0]->GetNumPoints(); - const int nq1 = m_basis[1]->GetNumPoints(); - const int nq2 = m_basis[2]->GetNumPoints(); - #if defined(SHAPE_TYPE_HEX) if (nm0 == nm1 && nm0 == nm2 && nq0 == nq1 && nq0 == nq2) diff --git a/library/MatrixFreeOps/SwitchPoints.h b/library/MatrixFreeOps/SwitchPoints.h index 01b5809f86..af07d77628 100644 --- a/library/MatrixFreeOps/SwitchPoints.h +++ b/library/MatrixFreeOps/SwitchPoints.h @@ -74,8 +74,6 @@ /* start of switch include for operator1D */ { - const int nq0 = m_basis[0]->GetNumPoints(); - #if defined(SHAPE_TYPE_SEG) switch (nq0) { @@ -113,9 +111,6 @@ /* start of switch include for operator2D */ { - const int nq0 = m_basis[0]->GetNumPoints(); - const int nq1 = m_basis[1]->GetNumPoints(); - #if defined(SHAPE_TYPE_TRI) if (nq0 == nq1 + 1) @@ -160,7 +155,6 @@ } #endif // SHAPE_TYPE - else { operator2D(input, output); @@ -195,10 +189,6 @@ /* start of include switch details for operator3D */ { - const int nq0 = m_basis[0]->GetNumPoints(); - const int nq1 = m_basis[1]->GetNumPoints(); - const int nq2 = m_basis[2]->GetNumPoints(); - #if defined(SHAPE_TYPE_HEX) if (nq0 == nq1 && nq0 == nq2) diff --git a/library/MultiRegions/ExpList.cpp b/library/MultiRegions/ExpList.cpp index 49938f9d15..c1e03af18c 100644 --- a/library/MultiRegions/ExpList.cpp +++ b/library/MultiRegions/ExpList.cpp @@ -6008,6 +6008,7 @@ void ExpList::v_PhysInterp1DScaled([[maybe_unused]] const NekDouble scale, m_collections[i].UpdateFactors(Collections::ePhysInterp1DScaled, factors); } + LIKWID_MARKER_START("v_PhysInterp1DScaled"); timer.Start(); Array<OneD, NekDouble> tmp; diff --git a/library/UnitTests/Collections/TestHexCollection.cpp b/library/UnitTests/Collections/TestHexCollection.cpp index f5890ff117..d51d3e4b74 100644 --- a/library/UnitTests/Collections/TestHexCollection.cpp +++ b/library/UnitTests/Collections/TestHexCollection.cpp @@ -497,7 +497,7 @@ BOOST_AUTO_TEST_CASE(TestHexBwdTrans_NoCollection_VariableP) CollExp.push_back(Exp); LibUtilities::SessionReaderSharedPtr dummySession; - Collections::CollectionOptimisation colOpt(dummySession, 2, + Collections::CollectionOptimisation colOpt(dummySession, 3, Collections::eNoCollection); Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(stdExp); Collections::Collection c(CollExp, impTypes); @@ -869,7 +869,7 @@ BOOST_AUTO_TEST_CASE(TestHexBwdTrans_MatrixFree_UniformP) CollExp.push_back(Exp); LibUtilities::SessionReaderSharedPtr dummySession; - Collections::CollectionOptimisation colOpt(dummySession, 2, + Collections::CollectionOptimisation colOpt(dummySession, 3, Collections::eMatrixFree); Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(stdExp); Collections::Collection c(CollExp, impTypes); @@ -1008,7 +1008,7 @@ BOOST_AUTO_TEST_CASE(TestHexIProductWRTBase_MatrixFree_UniformP_Undeformed) CollExp.push_back(Exp); LibUtilities::SessionReaderSharedPtr dummySession; - Collections::CollectionOptimisation colOpt(dummySession, 2, + Collections::CollectionOptimisation colOpt(dummySession, 3, Collections::eMatrixFree); Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(stdExp); Collections::Collection c(CollExp, impTypes); @@ -1085,7 +1085,7 @@ BOOST_AUTO_TEST_CASE(TestHexIProductWRTBase_MatrixFree_UniformP_Deformed) CollExp.push_back(Exp); LibUtilities::SessionReaderSharedPtr dummySession; - Collections::CollectionOptimisation colOpt(dummySession, 2, + Collections::CollectionOptimisation colOpt(dummySession, 3, Collections::eMatrixFree); Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(stdExp); Collections::Collection c(CollExp, impTypes); @@ -1163,7 +1163,7 @@ BOOST_AUTO_TEST_CASE( CollExp.push_back(Exp); LibUtilities::SessionReaderSharedPtr dummySession; - Collections::CollectionOptimisation colOpt(dummySession, 2, + Collections::CollectionOptimisation colOpt(dummySession, 3, Collections::eMatrixFree); Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(stdExp); Collections::Collection c(CollExp, impTypes); @@ -4644,4 +4644,183 @@ BOOST_AUTO_TEST_CASE(TestHexHelmholtz_MatrixFree_UniformP_ConstVarDiff) BOOST_CHECK_CLOSE(coeffsRef[i], coeffs[i], epsilon); } } + +BOOST_AUTO_TEST_CASE(TestHexPhysInterp1D_NoCollection_UniformP) +{ + SpatialDomains::PointGeomSharedPtr v0( + new SpatialDomains::PointGeom(3u, 0u, -1.0, -1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v1( + new SpatialDomains::PointGeom(3u, 1u, 1.0, -1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v2( + new SpatialDomains::PointGeom(3u, 2u, 1.0, 1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v3( + new SpatialDomains::PointGeom(3u, 3u, -1.0, 1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v4( + new SpatialDomains::PointGeom(3u, 4u, -1.0, -1.0, 1.0)); + SpatialDomains::PointGeomSharedPtr v5( + new SpatialDomains::PointGeom(3u, 5u, 1.0, -1.0, 1.0)); + SpatialDomains::PointGeomSharedPtr v6( + new SpatialDomains::PointGeom(3u, 6u, 2.0, 3.0, 4.0)); + SpatialDomains::PointGeomSharedPtr v7( + new SpatialDomains::PointGeom(3u, 7u, -1.0, 1.0, 1.0)); + + SpatialDomains::HexGeomSharedPtr hexGeom = + CreateHex(v0, v1, v2, v3, v4, v5, v6, v7); + + Nektar::LibUtilities::PointsType quadPointsTypeDir1 = + Nektar::LibUtilities::eGaussLobattoLegendre; + Nektar::LibUtilities::BasisType basisTypeDir1 = + Nektar::LibUtilities::eModified_A; + unsigned int numQuadPoints = 5; + unsigned int numModes = 4; + + const Nektar::LibUtilities::PointsKey quadPointsKeyDir1(numQuadPoints, + quadPointsTypeDir1); + const Nektar::LibUtilities::BasisKey basisKeyDir1(basisTypeDir1, numModes, + quadPointsKeyDir1); + + Nektar::LocalRegions::HexExpSharedPtr Exp = + MemoryManager<Nektar::LocalRegions::HexExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir1, basisKeyDir1, hexGeom); + + Nektar::StdRegions::StdHexExpSharedPtr stdExp = + MemoryManager<Nektar::StdRegions::StdHexExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir1, basisKeyDir1); + + std::vector<StdRegions::StdExpansionSharedPtr> CollExp; + CollExp.push_back(Exp); + + LibUtilities::SessionReaderSharedPtr dummySession; + Collections::CollectionOptimisation colOpt(dummySession, 3, + Collections::eNoCollection); + Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(Exp); + Collections::Collection c(CollExp, impTypes); + + StdRegions::ConstFactorMap factors; + factors[StdRegions::eFactorConst] = 1.5; + c.Initialise(Collections::ePhysInterp1DScaled, factors); + + const int nq = Exp->GetTotPoints(); + + Array<OneD, NekDouble> xc(nq), yc(nq), zc(nq); + Array<OneD, NekDouble> phys(nq), tmp; + + Exp->GetCoords(xc, yc, zc); + + for (int i = 0; i < nq; ++i) + { + phys[i] = pow(xc[i], 3) + pow(yc[i], 3) + pow(zc[i], 3); + } + + const int nq1 = c.GetOutputSize(Collections::ePhysInterp1DScaled); + Array<OneD, NekDouble> xc1(nq1); + Array<OneD, NekDouble> yc1(nq1); + Array<OneD, NekDouble> zc1(nq1); + Array<OneD, NekDouble> phys1(nq1); + + c.ApplyOperator(Collections::ePhysInterp1DScaled, xc, xc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, yc, yc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, zc, zc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, phys, phys1); + + double epsilon = 1.0e-8; + // since solution is a polynomial should be able to compare soln directly + for (int i = 0; i < nq1; ++i) + { + NekDouble exact = pow(xc1[i], 3) + pow(yc1[i], 3) + pow(zc1[i], 3); + phys1[i] = (fabs(phys1[i]) < 1e-14) ? 0.0 : phys1[i]; + exact = (fabs(exact) < 1e-14) ? 0.0 : exact; + BOOST_CHECK_CLOSE(phys1[i], exact, epsilon); + } +} + +BOOST_AUTO_TEST_CASE(TestHexPhysInterp1D_MatrixFree_UniformP) +{ + SpatialDomains::PointGeomSharedPtr v0( + new SpatialDomains::PointGeom(3u, 0u, -1.0, -1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v1( + new SpatialDomains::PointGeom(3u, 1u, 1.0, -1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v2( + new SpatialDomains::PointGeom(3u, 2u, 1.0, 1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v3( + new SpatialDomains::PointGeom(3u, 3u, -1.0, 1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v4( + new SpatialDomains::PointGeom(3u, 4u, -1.0, -1.0, 1.0)); + SpatialDomains::PointGeomSharedPtr v5( + new SpatialDomains::PointGeom(3u, 5u, 1.0, -1.0, 1.0)); + SpatialDomains::PointGeomSharedPtr v6( + new SpatialDomains::PointGeom(3u, 6u, 2.0, 3.0, 4.0)); + SpatialDomains::PointGeomSharedPtr v7( + new SpatialDomains::PointGeom(3u, 7u, -1.0, 1.0, 1.0)); + + SpatialDomains::HexGeomSharedPtr hexGeom = + CreateHex(v0, v1, v2, v3, v4, v5, v6, v7); + + Nektar::LibUtilities::PointsType quadPointsTypeDir1 = + Nektar::LibUtilities::eGaussLobattoLegendre; + Nektar::LibUtilities::BasisType basisTypeDir1 = + Nektar::LibUtilities::eModified_A; + unsigned int numQuadPoints = 5; + unsigned int numModes = 4; + + const Nektar::LibUtilities::PointsKey quadPointsKeyDir1(numQuadPoints, + quadPointsTypeDir1); + const Nektar::LibUtilities::BasisKey basisKeyDir1(basisTypeDir1, numModes, + quadPointsKeyDir1); + + Nektar::LocalRegions::HexExpSharedPtr Exp = + MemoryManager<Nektar::LocalRegions::HexExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir1, basisKeyDir1, hexGeom); + + Nektar::StdRegions::StdHexExpSharedPtr stdExp = + MemoryManager<Nektar::StdRegions::StdHexExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir1, basisKeyDir1); + + std::vector<StdRegions::StdExpansionSharedPtr> CollExp; + CollExp.push_back(Exp); + + LibUtilities::SessionReaderSharedPtr dummySession; + Collections::CollectionOptimisation colOpt(dummySession, 3, + Collections::eMatrixFree); + Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(Exp); + Collections::Collection c(CollExp, impTypes); + + StdRegions::ConstFactorMap factors; + factors[StdRegions::eFactorConst] = 1.5; + c.Initialise(Collections::ePhysInterp1DScaled, factors); + + const int nq = Exp->GetTotPoints(); + + Array<OneD, NekDouble> xc(nq), yc(nq), zc(nq); + Array<OneD, NekDouble> phys(nq), tmp; + + Exp->GetCoords(xc, yc, zc); + + for (int i = 0; i < nq; ++i) + { + phys[i] = pow(xc[i], 3) + pow(yc[i], 3) + pow(zc[i], 3); + } + + const int nq1 = c.GetOutputSize(Collections::ePhysInterp1DScaled); + Array<OneD, NekDouble> xc1(nq1); + Array<OneD, NekDouble> yc1(nq1); + Array<OneD, NekDouble> zc1(nq1); + Array<OneD, NekDouble> phys1(nq1); + + c.ApplyOperator(Collections::ePhysInterp1DScaled, xc, xc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, yc, yc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, zc, zc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, phys, phys1); + + double epsilon = 1.0e-8; + // since solution is a polynomial should be able to compare soln directly + for (int i = 0; i < nq1; ++i) + { + NekDouble exact = pow(xc1[i], 3) + pow(yc1[i], 3) + pow(zc1[i], 3); + phys1[i] = (fabs(phys1[i]) < 1e-14) ? 0.0 : phys1[i]; + exact = (fabs(exact) < 1e-14) ? 0.0 : exact; + BOOST_CHECK_CLOSE(phys1[i], exact, epsilon); + } +} + } // namespace Nektar::HexCollectionTests diff --git a/library/UnitTests/Collections/TestPrismCollection.cpp b/library/UnitTests/Collections/TestPrismCollection.cpp index d427ef12eb..9b98afe2e9 100644 --- a/library/UnitTests/Collections/TestPrismCollection.cpp +++ b/library/UnitTests/Collections/TestPrismCollection.cpp @@ -3693,4 +3693,206 @@ BOOST_AUTO_TEST_CASE(TestPrismHelmholtz_MatrixFree_UniformP_ConstVarDiff) } } +BOOST_AUTO_TEST_CASE(TestPrismPhsyInterp1DScaled_NoCollection_UniformP) +{ + SpatialDomains::PointGeomSharedPtr v0( + new SpatialDomains::PointGeom(3u, 0u, -1.0, -1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v1( + new SpatialDomains::PointGeom(3u, 1u, 1.0, -1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v2( + new SpatialDomains::PointGeom(3u, 2u, 1.0, 1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v3( + new SpatialDomains::PointGeom(3u, 3u, -1.0, 1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v4( + new SpatialDomains::PointGeom(3u, 4u, -1.0, -1.0, 1.0)); + SpatialDomains::PointGeomSharedPtr v5( + new SpatialDomains::PointGeom(3u, 5u, -1.0, 1.0, 1.0)); + + SpatialDomains::PrismGeomSharedPtr prismGeom = + CreatePrism(v0, v1, v2, v3, v4, v5); + + unsigned int numQuadPoints = 7; + unsigned int numModes = 6; + + Nektar::LibUtilities::PointsType PointsTypeDir1 = + Nektar::LibUtilities::eGaussLobattoLegendre; + const Nektar::LibUtilities::PointsKey PointsKeyDir1(numQuadPoints, + PointsTypeDir1); + Nektar::LibUtilities::BasisType basisTypeDir1 = + Nektar::LibUtilities::eModified_A; + const Nektar::LibUtilities::BasisKey basisKeyDir1(basisTypeDir1, numModes, + PointsKeyDir1); + + Nektar::LibUtilities::PointsType PointsTypeDir2 = + Nektar::LibUtilities::eGaussLobattoLegendre; + const Nektar::LibUtilities::PointsKey PointsKeyDir2(numQuadPoints, + PointsTypeDir2); + Nektar::LibUtilities::BasisType basisTypeDir2 = + Nektar::LibUtilities::eModified_A; + const Nektar::LibUtilities::BasisKey basisKeyDir2(basisTypeDir2, numModes, + PointsKeyDir2); + + Nektar::LibUtilities::PointsType PointsTypeDir3 = + Nektar::LibUtilities::eGaussRadauMAlpha1Beta0; + const Nektar::LibUtilities::PointsKey PointsKeyDir3(numQuadPoints - 1, + PointsTypeDir3); + Nektar::LibUtilities::BasisType basisTypeDir3 = + Nektar::LibUtilities::eModified_B; + const Nektar::LibUtilities::BasisKey basisKeyDir3(basisTypeDir3, numModes, + PointsKeyDir3); + + Nektar::LocalRegions::PrismExpSharedPtr Exp = + MemoryManager<Nektar::LocalRegions::PrismExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir2, basisKeyDir3, prismGeom); + + Nektar::StdRegions::StdPrismExpSharedPtr stdExp = + MemoryManager<Nektar::StdRegions::StdPrismExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir1, basisKeyDir1); + + std::vector<StdRegions::StdExpansionSharedPtr> CollExp; + CollExp.push_back(Exp); + + LibUtilities::SessionReaderSharedPtr dummySession; + Collections::CollectionOptimisation colOpt(dummySession, 3, + Collections::eNoCollection); + Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(Exp); + Collections::Collection c(CollExp, impTypes); + StdRegions::ConstFactorMap factors; + factors[StdRegions::eFactorConst] = 1.5; + c.Initialise(Collections::ePhysInterp1DScaled, factors); + + const int nq = Exp->GetTotPoints(); + + Array<OneD, NekDouble> xc(nq), yc(nq), zc(nq); + Array<OneD, NekDouble> phys(nq); + + Exp->GetCoords(xc, yc, zc); + + for (int i = 0; i < nq; ++i) + { + phys[i] = pow(xc[i], 3) + pow(yc[i], 3) + pow(zc[i], 3); + } + + const int nq1 = c.GetOutputSize(Collections::ePhysInterp1DScaled); + Array<OneD, NekDouble> xc1(nq1); + Array<OneD, NekDouble> yc1(nq1); + Array<OneD, NekDouble> zc1(nq1); + Array<OneD, NekDouble> phys1(nq1); + + c.ApplyOperator(Collections::ePhysInterp1DScaled, xc, xc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, yc, yc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, zc, zc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, phys, phys1); + + double epsilon = 2.0e-8; + for (int i = 0; i < nq1; ++i) + { + NekDouble exact = pow(xc1[i], 3) + pow(yc1[i], 3) + pow(zc1[i], 3); + phys1[i] = (fabs(phys1[i]) < 1e-14) ? 0.0 : phys1[i]; + exact = (fabs(exact) < 1e-14) ? 0.0 : exact; + BOOST_CHECK_CLOSE(phys1[i], exact, epsilon); + } +} + +BOOST_AUTO_TEST_CASE(TestPrismPhsyInterp1DScaled_MatrixFree_UniformP) +{ + SpatialDomains::PointGeomSharedPtr v0( + new SpatialDomains::PointGeom(3u, 0u, -1.0, -1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v1( + new SpatialDomains::PointGeom(3u, 1u, 1.0, -1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v2( + new SpatialDomains::PointGeom(3u, 2u, 1.0, 1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v3( + new SpatialDomains::PointGeom(3u, 3u, -1.0, 1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v4( + new SpatialDomains::PointGeom(3u, 4u, -1.0, -1.0, 1.0)); + SpatialDomains::PointGeomSharedPtr v5( + new SpatialDomains::PointGeom(3u, 5u, -1.0, 1.0, 1.0)); + + SpatialDomains::PrismGeomSharedPtr prismGeom = + CreatePrism(v0, v1, v2, v3, v4, v5); + + unsigned int numQuadPoints = 7; + unsigned int numModes = 6; + + Nektar::LibUtilities::PointsType PointsTypeDir1 = + Nektar::LibUtilities::eGaussLobattoLegendre; + const Nektar::LibUtilities::PointsKey PointsKeyDir1(numQuadPoints, + PointsTypeDir1); + Nektar::LibUtilities::BasisType basisTypeDir1 = + Nektar::LibUtilities::eModified_A; + const Nektar::LibUtilities::BasisKey basisKeyDir1(basisTypeDir1, numModes, + PointsKeyDir1); + + Nektar::LibUtilities::PointsType PointsTypeDir2 = + Nektar::LibUtilities::eGaussLobattoLegendre; + const Nektar::LibUtilities::PointsKey PointsKeyDir2(numQuadPoints, + PointsTypeDir2); + Nektar::LibUtilities::BasisType basisTypeDir2 = + Nektar::LibUtilities::eModified_A; + const Nektar::LibUtilities::BasisKey basisKeyDir2(basisTypeDir2, numModes, + PointsKeyDir2); + + Nektar::LibUtilities::PointsType PointsTypeDir3 = + Nektar::LibUtilities::eGaussRadauMAlpha1Beta0; + const Nektar::LibUtilities::PointsKey PointsKeyDir3(numQuadPoints - 1, + PointsTypeDir3); + Nektar::LibUtilities::BasisType basisTypeDir3 = + Nektar::LibUtilities::eModified_B; + const Nektar::LibUtilities::BasisKey basisKeyDir3(basisTypeDir3, numModes, + PointsKeyDir3); + + Nektar::LocalRegions::PrismExpSharedPtr Exp = + MemoryManager<Nektar::LocalRegions::PrismExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir2, basisKeyDir3, prismGeom); + + Nektar::StdRegions::StdPrismExpSharedPtr stdExp = + MemoryManager<Nektar::StdRegions::StdPrismExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir1, basisKeyDir1); + + std::vector<StdRegions::StdExpansionSharedPtr> CollExp; + CollExp.push_back(Exp); + + LibUtilities::SessionReaderSharedPtr dummySession; + Collections::CollectionOptimisation colOpt(dummySession, 3, + Collections::eMatrixFree); + Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(Exp); + Collections::Collection c(CollExp, impTypes); + StdRegions::ConstFactorMap factors; + factors[StdRegions::eFactorConst] = 1.5; + c.Initialise(Collections::ePhysInterp1DScaled, factors); + + const int nq = Exp->GetTotPoints(); + + Array<OneD, NekDouble> xc(nq), yc(nq), zc(nq); + Array<OneD, NekDouble> phys(nq); + + Exp->GetCoords(xc, yc, zc); + + for (int i = 0; i < nq; ++i) + { + phys[i] = pow(xc[i], 3) + pow(yc[i], 3) + pow(zc[i], 3); + } + + const int nq1 = c.GetOutputSize(Collections::ePhysInterp1DScaled); + Array<OneD, NekDouble> xc1(nq1); + Array<OneD, NekDouble> yc1(nq1); + Array<OneD, NekDouble> zc1(nq1); + Array<OneD, NekDouble> phys1(nq1); + + c.ApplyOperator(Collections::ePhysInterp1DScaled, xc, xc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, yc, yc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, zc, zc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, phys, phys1); + + double epsilon = 1.0e-8; + for (int i = 0; i < nq1; ++i) + { + NekDouble exact = pow(xc1[i], 3) + pow(yc1[i], 3) + pow(zc1[i], 3); + phys1[i] = (fabs(phys1[i]) < 1e-14) ? 0.0 : phys1[i]; + exact = (fabs(exact) < 1e-14) ? 0.0 : exact; + BOOST_CHECK_CLOSE(phys1[i], exact, epsilon); + } +} + } // namespace Nektar::PrismCollectionTests diff --git a/library/UnitTests/Collections/TestPyrCollection.cpp b/library/UnitTests/Collections/TestPyrCollection.cpp index d5be2f67dc..ec464f5d53 100644 --- a/library/UnitTests/Collections/TestPyrCollection.cpp +++ b/library/UnitTests/Collections/TestPyrCollection.cpp @@ -3395,4 +3395,189 @@ BOOST_AUTO_TEST_CASE(TestPyrHelmholtz_MatrixFree_UniformP_ConstVarDiff) BOOST_CHECK_CLOSE(coeffsRef[i], coeffs[i], epsilon); } } + +BOOST_AUTO_TEST_CASE(TestPyrPhysInterp1DScaled_NoCollection_UniformP_MultiElmt) +{ + SpatialDomains::PointGeomSharedPtr v0( + new SpatialDomains::PointGeom(3u, 0u, -1.0, -1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v1( + new SpatialDomains::PointGeom(3u, 1u, 1.0, -1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v2( + new SpatialDomains::PointGeom(3u, 2u, 1.0, 1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v3( + new SpatialDomains::PointGeom(3u, 3u, -1.0, 1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v4( + new SpatialDomains::PointGeom(3u, 4u, -1.0, -1.0, 1.0)); + + SpatialDomains::PyrGeomSharedPtr pyrGeom = CreatePyr(v0, v1, v2, v3, v4); + + Nektar::LibUtilities::PointsType PointsTypeDir1 = + Nektar::LibUtilities::eGaussLobattoLegendre; + const Nektar::LibUtilities::PointsKey PointsKeyDir1(5, PointsTypeDir1); + Nektar::LibUtilities::BasisType basisTypeDir1 = + Nektar::LibUtilities::eModified_A; + const Nektar::LibUtilities::BasisKey basisKeyDir1(basisTypeDir1, 4, + PointsKeyDir1); + + Nektar::LibUtilities::PointsType PointsTypeDir2 = + Nektar::LibUtilities::eGaussLobattoLegendre; + const Nektar::LibUtilities::PointsKey PointsKeyDir2(5, PointsTypeDir2); + Nektar::LibUtilities::BasisType basisTypeDir2 = + Nektar::LibUtilities::eModified_A; + const Nektar::LibUtilities::BasisKey basisKeyDir2(basisTypeDir2, 4, + PointsKeyDir2); + + Nektar::LibUtilities::PointsType PointsTypeDir3 = + Nektar::LibUtilities::eGaussRadauMAlpha2Beta0; + const Nektar::LibUtilities::PointsKey PointsKeyDir3(4, PointsTypeDir3); + Nektar::LibUtilities::BasisType basisTypeDir3 = + Nektar::LibUtilities::eModifiedPyr_C; + const Nektar::LibUtilities::BasisKey basisKeyDir3(basisTypeDir3, 4, + PointsKeyDir3); + + Nektar::LocalRegions::PyrExpSharedPtr Exp = + MemoryManager<Nektar::LocalRegions::PyrExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir2, basisKeyDir3, pyrGeom); + + int nelmts = 1; + + std::vector<StdRegions::StdExpansionSharedPtr> CollExp; + for (int i = 0; i < nelmts; ++i) + { + CollExp.push_back(Exp); + } + + LibUtilities::SessionReaderSharedPtr dummySession; + Collections::CollectionOptimisation colOpt(dummySession, 3, + Collections::eNoCollection); + Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(Exp); + Collections::Collection c(CollExp, impTypes); + StdRegions::ConstFactorMap factors; + factors[StdRegions::eFactorConst] = 1.5; + c.Initialise(Collections::ePhysInterp1DScaled, factors); + + const int nq = Exp->GetTotPoints(); + Array<OneD, NekDouble> xc(nq), yc(nq), zc(nq); + Array<OneD, NekDouble> phys(nq); + + Exp->GetCoords(xc, yc, zc); + + for (int i = 0; i < nq; ++i) + { + phys[i] = pow(xc[i], 3) + pow(yc[i], 3) + pow(zc[i], 3); + } + + const int nq1 = c.GetOutputSize(Collections::ePhysInterp1DScaled); + Array<OneD, NekDouble> xc1(nq1); + Array<OneD, NekDouble> yc1(nq1); + Array<OneD, NekDouble> zc1(nq1); + Array<OneD, NekDouble> phys1(nq1); + + c.ApplyOperator(Collections::ePhysInterp1DScaled, xc, xc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, yc, yc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, zc, zc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, phys, phys1); + + double epsilon = 2.0e-8; + for (int i = 0; i < nq1; ++i) + { + NekDouble exact = pow(xc1[i], 3) + pow(yc1[i], 3) + pow(zc1[i], 3); + phys1[i] = (fabs(phys1[i]) < 1e-14) ? 0.0 : phys1[i]; + exact = (fabs(exact) < 1e-14) ? 0.0 : exact; + BOOST_CHECK_CLOSE(phys1[i], exact, epsilon); + } +} + +BOOST_AUTO_TEST_CASE(TestPyrPhysInterp1DScaled_MatrixFree_UniformP_MultiElmt) +{ + SpatialDomains::PointGeomSharedPtr v0( + new SpatialDomains::PointGeom(3u, 0u, -1.0, -1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v1( + new SpatialDomains::PointGeom(3u, 1u, 1.0, -1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v2( + new SpatialDomains::PointGeom(3u, 2u, 1.0, 1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v3( + new SpatialDomains::PointGeom(3u, 3u, -1.0, 1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v4( + new SpatialDomains::PointGeom(3u, 4u, -1.0, -1.0, 1.0)); + + SpatialDomains::PyrGeomSharedPtr pyrGeom = CreatePyr(v0, v1, v2, v3, v4); + + Nektar::LibUtilities::PointsType PointsTypeDir1 = + Nektar::LibUtilities::eGaussLobattoLegendre; + const Nektar::LibUtilities::PointsKey PointsKeyDir1(5, PointsTypeDir1); + Nektar::LibUtilities::BasisType basisTypeDir1 = + Nektar::LibUtilities::eModified_A; + const Nektar::LibUtilities::BasisKey basisKeyDir1(basisTypeDir1, 4, + PointsKeyDir1); + + Nektar::LibUtilities::PointsType PointsTypeDir2 = + Nektar::LibUtilities::eGaussLobattoLegendre; + const Nektar::LibUtilities::PointsKey PointsKeyDir2(5, PointsTypeDir2); + Nektar::LibUtilities::BasisType basisTypeDir2 = + Nektar::LibUtilities::eModified_A; + const Nektar::LibUtilities::BasisKey basisKeyDir2(basisTypeDir2, 4, + PointsKeyDir2); + + Nektar::LibUtilities::PointsType PointsTypeDir3 = + Nektar::LibUtilities::eGaussRadauMAlpha2Beta0; + const Nektar::LibUtilities::PointsKey PointsKeyDir3(4, PointsTypeDir3); + Nektar::LibUtilities::BasisType basisTypeDir3 = + Nektar::LibUtilities::eModifiedPyr_C; + const Nektar::LibUtilities::BasisKey basisKeyDir3(basisTypeDir3, 4, + PointsKeyDir3); + + Nektar::LocalRegions::PyrExpSharedPtr Exp = + MemoryManager<Nektar::LocalRegions::PyrExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir2, basisKeyDir3, pyrGeom); + + int nelmts = 1; + + std::vector<StdRegions::StdExpansionSharedPtr> CollExp; + for (int i = 0; i < nelmts; ++i) + { + CollExp.push_back(Exp); + } + + LibUtilities::SessionReaderSharedPtr dummySession; + Collections::CollectionOptimisation colOpt(dummySession, 3, + Collections::eMatrixFree); + Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(Exp); + Collections::Collection c(CollExp, impTypes); + StdRegions::ConstFactorMap factors; + factors[StdRegions::eFactorConst] = 1.5; + c.Initialise(Collections::ePhysInterp1DScaled, factors); + + const int nq = Exp->GetTotPoints(); + Array<OneD, NekDouble> xc(nq), yc(nq), zc(nq); + Array<OneD, NekDouble> phys(nq); + + Exp->GetCoords(xc, yc, zc); + + for (int i = 0; i < nq; ++i) + { + phys[i] = pow(xc[i], 3) + pow(yc[i], 3) + pow(zc[i], 3); + } + + const int nq1 = c.GetOutputSize(Collections::ePhysInterp1DScaled); + Array<OneD, NekDouble> xc1(nq1); + Array<OneD, NekDouble> yc1(nq1); + Array<OneD, NekDouble> zc1(nq1); + Array<OneD, NekDouble> phys1(nq1); + + c.ApplyOperator(Collections::ePhysInterp1DScaled, xc, xc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, yc, yc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, zc, zc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, phys, phys1); + + double epsilon = 2.0e-8; + for (int i = 0; i < nq1; ++i) + { + NekDouble exact = pow(xc1[i], 3) + pow(yc1[i], 3) + pow(zc1[i], 3); + phys1[i] = (fabs(phys1[i]) < 1e-14) ? 0.0 : phys1[i]; + exact = (fabs(exact) < 1e-14) ? 0.0 : exact; + BOOST_CHECK_CLOSE(phys1[i], exact, epsilon); + } +} + } // namespace Nektar::PyrCollectionTests diff --git a/library/UnitTests/Collections/TestQuadCollection.cpp b/library/UnitTests/Collections/TestQuadCollection.cpp index 7c61a52f1d..7cd6423568 100644 --- a/library/UnitTests/Collections/TestQuadCollection.cpp +++ b/library/UnitTests/Collections/TestQuadCollection.cpp @@ -3286,4 +3286,157 @@ BOOST_AUTO_TEST_CASE(TestQuadHelmholtz_MatrixFree_UniformP_ConstVarDiff) BOOST_CHECK_CLOSE(coeffsRef[i], coeffs[i], epsilon); } } + +BOOST_AUTO_TEST_CASE(TestQuadPhysInterp1D_NoCollection_UniformP) +{ + SpatialDomains::PointGeomSharedPtr v0( + new SpatialDomains::PointGeom(2u, 0u, -1.0, -1.0, 0.0)); + SpatialDomains::PointGeomSharedPtr v1( + new SpatialDomains::PointGeom(2u, 1u, 1.0, -1.0, 0.0)); + SpatialDomains::PointGeomSharedPtr v2( + new SpatialDomains::PointGeom(2u, 2u, 1.0, 1.0, 0.0)); + SpatialDomains::PointGeomSharedPtr v3( + new SpatialDomains::PointGeom(2u, 3u, -1.0, 1.0, 0.0)); + + SpatialDomains::QuadGeomSharedPtr quadGeom = CreateQuad(v0, v1, v2, v3); + + Nektar::LibUtilities::PointsType quadPointsTypeDir1 = + Nektar::LibUtilities::eGaussLobattoLegendre; + Nektar::LibUtilities::BasisType basisTypeDir1 = + Nektar::LibUtilities::eModified_A; + unsigned int numQuadPoints = 6; + const Nektar::LibUtilities::PointsKey quadPointsKeyDir1(numQuadPoints, + quadPointsTypeDir1); + const Nektar::LibUtilities::BasisKey basisKeyDir1(basisTypeDir1, 4, + quadPointsKeyDir1); + + Nektar::LocalRegions::QuadExpSharedPtr Exp = + MemoryManager<Nektar::LocalRegions::QuadExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir1, quadGeom); + + Nektar::StdRegions::StdQuadExpSharedPtr stdExp = + MemoryManager<Nektar::StdRegions::StdQuadExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir1); + + std::vector<StdRegions::StdExpansionSharedPtr> CollExp; + CollExp.push_back(Exp); + + LibUtilities::SessionReaderSharedPtr dummySession; + Collections::CollectionOptimisation colOpt(dummySession, 2, + Collections::eNoCollection); + Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(Exp); + Collections::Collection c(CollExp, impTypes); + + StdRegions::ConstFactorMap factors; + factors[StdRegions::eFactorConst] = 1.5; + c.Initialise(Collections::ePhysInterp1DScaled, factors); + + const int nq = Exp->GetTotPoints(); + + Array<OneD, NekDouble> xc(nq), yc(nq); + Array<OneD, NekDouble> phys(nq), tmp; + + Exp->GetCoords(xc, yc); + + for (int i = 0; i < nq; ++i) + { + phys[i] = pow(xc[i], 3) + pow(yc[i], 3); + } + + const int nq1 = c.GetOutputSize(Collections::ePhysInterp1DScaled); + Array<OneD, NekDouble> xc1(nq1); + Array<OneD, NekDouble> yc1(nq1); + Array<OneD, NekDouble> phys1(nq1); + + c.ApplyOperator(Collections::ePhysInterp1DScaled, xc, xc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, yc, yc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, phys, phys1); + + double epsilon = 1.0e-8; + // since solution is a polynomial should be able to compare soln directly + for (int i = 0; i < nq1; ++i) + { + NekDouble exact = pow(xc1[i], 3) + pow(yc1[i], 3); + phys1[i] = (fabs(phys1[i]) < 1e-14) ? 0.0 : phys1[i]; + exact = (fabs(exact) < 1e-14) ? 0.0 : exact; + BOOST_CHECK_CLOSE(phys1[i], exact, epsilon); + } +} + +BOOST_AUTO_TEST_CASE(TestQuadPhysInterp1D_MatrixFree_UniformP) +{ + SpatialDomains::PointGeomSharedPtr v0( + new SpatialDomains::PointGeom(2u, 0u, -1.0, -1.0, 0.0)); + SpatialDomains::PointGeomSharedPtr v1( + new SpatialDomains::PointGeom(2u, 1u, 1.0, -1.0, 0.0)); + SpatialDomains::PointGeomSharedPtr v2( + new SpatialDomains::PointGeom(2u, 2u, 1.0, 1.0, 0.0)); + SpatialDomains::PointGeomSharedPtr v3( + new SpatialDomains::PointGeom(2u, 3u, -1.0, 1.0, 0.0)); + + SpatialDomains::QuadGeomSharedPtr quadGeom = CreateQuad(v0, v1, v2, v3); + + Nektar::LibUtilities::PointsType quadPointsTypeDir1 = + Nektar::LibUtilities::eGaussLobattoLegendre; + Nektar::LibUtilities::BasisType basisTypeDir1 = + Nektar::LibUtilities::eModified_A; + unsigned int numQuadPoints = 6; + const Nektar::LibUtilities::PointsKey quadPointsKeyDir1(numQuadPoints, + quadPointsTypeDir1); + const Nektar::LibUtilities::BasisKey basisKeyDir1(basisTypeDir1, 4, + quadPointsKeyDir1); + + Nektar::LocalRegions::QuadExpSharedPtr Exp = + MemoryManager<Nektar::LocalRegions::QuadExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir1, quadGeom); + + Nektar::StdRegions::StdQuadExpSharedPtr stdExp = + MemoryManager<Nektar::StdRegions::StdQuadExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir1); + + std::vector<StdRegions::StdExpansionSharedPtr> CollExp; + CollExp.push_back(Exp); + + LibUtilities::SessionReaderSharedPtr dummySession; + Collections::CollectionOptimisation colOpt(dummySession, 2, + Collections::eMatrixFree); + Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(Exp); + Collections::Collection c(CollExp, impTypes); + + StdRegions::ConstFactorMap factors; + factors[StdRegions::eFactorConst] = 1.5; + c.Initialise(Collections::ePhysInterp1DScaled, factors); + + const int nq = Exp->GetTotPoints(); + + Array<OneD, NekDouble> xc(nq), yc(nq); + Array<OneD, NekDouble> phys(nq), tmp; + + Exp->GetCoords(xc, yc); + + for (int i = 0; i < nq; ++i) + { + phys[i] = pow(xc[i], 3) + pow(yc[i], 3); + } + + const int nq1 = c.GetOutputSize(Collections::ePhysInterp1DScaled); + Array<OneD, NekDouble> xc1(nq1); + Array<OneD, NekDouble> yc1(nq1); + Array<OneD, NekDouble> phys1(nq1); + + c.ApplyOperator(Collections::ePhysInterp1DScaled, xc, xc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, yc, yc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, phys, phys1); + + double epsilon = 1.0e-8; + // since solution is a polynomial should be able to compare soln directly + for (int i = 0; i < nq1; ++i) + { + NekDouble exact = pow(xc1[i], 3) + pow(yc1[i], 3); + phys1[i] = (fabs(phys1[i]) < 1e-14) ? 0.0 : phys1[i]; + exact = (fabs(exact) < 1e-14) ? 0.0 : exact; + BOOST_CHECK_CLOSE(phys1[i], exact, epsilon); + } +} + } // namespace Nektar::QuadCollectionTests diff --git a/library/UnitTests/Collections/TestSegCollection.cpp b/library/UnitTests/Collections/TestSegCollection.cpp index 8bb95e8dfc..0630801cd5 100644 --- a/library/UnitTests/Collections/TestSegCollection.cpp +++ b/library/UnitTests/Collections/TestSegCollection.cpp @@ -1795,4 +1795,144 @@ BOOST_AUTO_TEST_CASE( BOOST_CHECK_CLOSE(coeffs1[i], coeffs2[i], epsilon); } } + +BOOST_AUTO_TEST_CASE(TestSegPhysInterp1D_NoCollection_UniformP) +{ + SpatialDomains::PointGeomSharedPtr v0( + new SpatialDomains::PointGeom(2u, 0u, -1.0, 0.0, 0.0)); + SpatialDomains::PointGeomSharedPtr v1( + new SpatialDomains::PointGeom(2u, 1u, 1.0, 0.0, 0.0)); + + SpatialDomains::SegGeomSharedPtr segGeom = CreateSegGeom(0, v0, v1, 2); + + Nektar::LibUtilities::PointsType segPointsTypeDir1 = + Nektar::LibUtilities::eGaussLobattoLegendre; + Nektar::LibUtilities::BasisType basisTypeDir1 = + Nektar::LibUtilities::eModified_A; + unsigned int numSegPoints = 6; + const Nektar::LibUtilities::PointsKey segPointsKeyDir1(numSegPoints, + segPointsTypeDir1); + const Nektar::LibUtilities::BasisKey basisKeyDir1(basisTypeDir1, 4, + segPointsKeyDir1); + + Nektar::LocalRegions::SegExpSharedPtr Exp = + MemoryManager<Nektar::LocalRegions::SegExp>::AllocateSharedPtr( + basisKeyDir1, segGeom); + + std::vector<StdRegions::StdExpansionSharedPtr> CollExp; + CollExp.push_back(Exp); + + LibUtilities::SessionReaderSharedPtr dummySession; + Collections::CollectionOptimisation colOpt(dummySession, 1, + Collections::eNoCollection); + Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(Exp); + Collections::Collection c(CollExp, impTypes); + + StdRegions::ConstFactorMap factors; + factors[StdRegions::eFactorConst] = 1.5; + c.Initialise(Collections::ePhysInterp1DScaled, factors); + + const int nq = Exp->GetTotPoints(); + + Array<OneD, NekDouble> xc(nq), yc(nq); + Array<OneD, NekDouble> phys(nq), tmp; + + Exp->GetCoords(xc, yc); + + for (int i = 0; i < nq; ++i) + { + phys[i] = pow(xc[i], 3) + pow(yc[i], 3); + } + + const int nq1 = c.GetOutputSize(Collections::ePhysInterp1DScaled); + Array<OneD, NekDouble> xc1(nq1); + Array<OneD, NekDouble> yc1(nq1); + Array<OneD, NekDouble> phys1(nq1); + + c.ApplyOperator(Collections::ePhysInterp1DScaled, xc, xc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, yc, yc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, phys, phys1); + + double epsilon = 1.0e-8; + // since solution is a polynomial should be able to compare soln directly + for (int i = 0; i < nq1; ++i) + { + NekDouble exact = pow(xc1[i], 3) + pow(yc1[i], 3); + phys1[i] = (fabs(phys1[i]) < 1e-14) ? 0.0 : phys1[i]; + exact = (fabs(exact) < 1e-14) ? 0.0 : exact; + BOOST_CHECK_CLOSE(phys1[i], exact, epsilon); + } +} + +BOOST_AUTO_TEST_CASE(TestSegPhysInterp1D_MatrixFree_UniformP) +{ + SpatialDomains::PointGeomSharedPtr v0( + new SpatialDomains::PointGeom(2u, 0u, -1.0, 1.0, 0.0)); + SpatialDomains::PointGeomSharedPtr v1( + new SpatialDomains::PointGeom(2u, 1u, 1.0, 1.0, 0.0)); + + SpatialDomains::SegGeomSharedPtr segGeom = CreateSegGeom(0, v0, v1, 2); + + Nektar::LibUtilities::PointsType segPointsTypeDir1 = + Nektar::LibUtilities::eGaussLobattoLegendre; + Nektar::LibUtilities::BasisType basisTypeDir1 = + Nektar::LibUtilities::eModified_A; + unsigned int numSegPoints = 6; + const Nektar::LibUtilities::PointsKey segPointsKeyDir1(numSegPoints, + segPointsTypeDir1); + const Nektar::LibUtilities::BasisKey basisKeyDir1(basisTypeDir1, 4, + segPointsKeyDir1); + + Nektar::LocalRegions::SegExpSharedPtr Exp = + MemoryManager<Nektar::LocalRegions::SegExp>::AllocateSharedPtr( + basisKeyDir1, segGeom); + + std::vector<StdRegions::StdExpansionSharedPtr> CollExp; + CollExp.push_back(Exp); + + LibUtilities::SessionReaderSharedPtr dummySession; + Collections::CollectionOptimisation colOpt(dummySession, 1, + Collections::eMatrixFree); + Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(Exp); + Collections::Collection c(CollExp, impTypes); + + StdRegions::ConstFactorMap factors; + factors[StdRegions::eFactorConst] = 1.5; + c.Initialise(Collections::ePhysInterp1DScaled, factors); + + const int nq = Exp->GetTotPoints(); + + Array<OneD, NekDouble> xc(nq), yc(nq); + Array<OneD, NekDouble> phys(nq), tmp; + + Exp->GetCoords(xc, yc); + + for (int i = 0; i < nq; ++i) + { + yc[i] = (fabs(yc[i]) < 1e-14) ? 0.0 : yc[i]; + phys[i] = pow(xc[i], 3) + pow(yc[i], 3); + } + + const int nq1 = c.GetOutputSize(Collections::ePhysInterp1DScaled); + Array<OneD, NekDouble> xc1(nq1); + Array<OneD, NekDouble> yc1(nq1); + Array<OneD, NekDouble> phys1(nq1); + + c.ApplyOperator(Collections::ePhysInterp1DScaled, xc, xc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, yc, yc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, phys, phys1); + + double epsilon = 1.0e-8; + // since solution is a polynomial should be able to compare soln directly + for (int i = 0; i < nq1; ++i) + { + xc1[i] = (fabs(xc1[i]) < 1e-14) ? 0.0 : xc1[i]; + yc1[i] = (fabs(yc1[i]) < 1e-14) ? 0.0 : yc1[i]; + NekDouble exact = pow(xc1[i], 3) + pow(yc1[i], 3); + phys1[i] = (fabs(phys1[i]) < 1e-14) ? 0.0 : phys1[i]; + exact = (fabs(exact) < 1e-14) ? 0.0 : exact; + BOOST_CHECK_CLOSE(phys1[i], exact, epsilon); + } +} + } // namespace Nektar::SegCollectionTests diff --git a/library/UnitTests/Collections/TestTetCollection.cpp b/library/UnitTests/Collections/TestTetCollection.cpp index 7a70f4d2ef..4de6373b39 100644 --- a/library/UnitTests/Collections/TestTetCollection.cpp +++ b/library/UnitTests/Collections/TestTetCollection.cpp @@ -2763,7 +2763,7 @@ BOOST_AUTO_TEST_CASE(TestTetmHelmholtz_IterPerExp_UniformP_ConstVarDiff) Nektar::StdRegions::StdTetExpSharedPtr stdExp = MemoryManager<Nektar::StdRegions::StdTetExp>::AllocateSharedPtr( - basisKeyDir1, basisKeyDir1, basisKeyDir1); + basisKeyDir1, basisKeyDir2, basisKeyDir3); int nelmts = 10; @@ -2873,7 +2873,7 @@ BOOST_AUTO_TEST_CASE(TestTetmHelmholtz_MatrixFree_UniformP) Nektar::StdRegions::StdTetExpSharedPtr stdExp = MemoryManager<Nektar::StdRegions::StdTetExp>::AllocateSharedPtr( - basisKeyDir1, basisKeyDir1, basisKeyDir1); + basisKeyDir1, basisKeyDir2, basisKeyDir3); int nelmts = 10; @@ -2977,7 +2977,7 @@ BOOST_AUTO_TEST_CASE(TestTetmHelmholtz_MatrixFree_Deformed_OverInt) Nektar::StdRegions::StdTetExpSharedPtr stdExp = MemoryManager<Nektar::StdRegions::StdTetExp>::AllocateSharedPtr( - basisKeyDir1, basisKeyDir1, basisKeyDir1); + basisKeyDir1, basisKeyDir2, basisKeyDir3); int nelmts = 10; @@ -3178,7 +3178,7 @@ BOOST_AUTO_TEST_CASE(TestTetmHelmholtz_MatrixFree_UniformP_ConstVarDiff) Nektar::StdRegions::StdTetExpSharedPtr stdExp = MemoryManager<Nektar::StdRegions::StdTetExp>::AllocateSharedPtr( - basisKeyDir1, basisKeyDir1, basisKeyDir1); + basisKeyDir1, basisKeyDir2, basisKeyDir3); int nelmts = 10; @@ -3238,4 +3238,194 @@ BOOST_AUTO_TEST_CASE(TestTetmHelmholtz_MatrixFree_UniformP_ConstVarDiff) BOOST_CHECK_CLOSE(coeffsRef[i], coeffs[i], epsilon); } } + +BOOST_AUTO_TEST_CASE(TestTetPhysInterp1D_NoCollections_UniformP) +{ + + SpatialDomains::PointGeomSharedPtr v0( + new SpatialDomains::PointGeom(3u, 0u, -1.0, -1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v1( + new SpatialDomains::PointGeom(3u, 1u, 1.0, -1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v2( + new SpatialDomains::PointGeom(3u, 2u, -1.0, 1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v3( + new SpatialDomains::PointGeom(3u, 3u, -1.0, -1.0, 1.0)); + + SpatialDomains::TetGeomSharedPtr tetGeom = CreateTet(v0, v1, v2, v3); + + unsigned int numQuadPoints = 5; + unsigned int numModes = 4; + + Nektar::LibUtilities::PointsType triPointsTypeDir1 = + Nektar::LibUtilities::eGaussLobattoLegendre; + const Nektar::LibUtilities::PointsKey triPointsKeyDir1(numQuadPoints, + triPointsTypeDir1); + Nektar::LibUtilities::BasisType basisTypeDir1 = + Nektar::LibUtilities::eModified_A; + const Nektar::LibUtilities::BasisKey basisKeyDir1(basisTypeDir1, numModes, + triPointsKeyDir1); + + Nektar::LibUtilities::PointsType triPointsTypeDir2 = + Nektar::LibUtilities::eGaussRadauMAlpha1Beta0; + const Nektar::LibUtilities::PointsKey triPointsKeyDir2(numQuadPoints - 1, + triPointsTypeDir2); + Nektar::LibUtilities::BasisType basisTypeDir2 = + Nektar::LibUtilities::eModified_B; + const Nektar::LibUtilities::BasisKey basisKeyDir2(basisTypeDir2, numModes, + triPointsKeyDir2); + + Nektar::LibUtilities::PointsType triPointsTypeDir3 = + Nektar::LibUtilities::eGaussRadauMAlpha2Beta0; + const Nektar::LibUtilities::PointsKey triPointsKeyDir3(numQuadPoints - 1, + triPointsTypeDir3); + Nektar::LibUtilities::BasisType basisTypeDir3 = + Nektar::LibUtilities::eModified_C; + const Nektar::LibUtilities::BasisKey basisKeyDir3(basisTypeDir3, numModes, + triPointsKeyDir3); + + Nektar::LocalRegions::TetExpSharedPtr Exp = + MemoryManager<Nektar::LocalRegions::TetExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir2, basisKeyDir3, tetGeom); + + std::vector<StdRegions::StdExpansionSharedPtr> CollExp; + CollExp.push_back(Exp); + + LibUtilities::SessionReaderSharedPtr dummySession; + Collections::CollectionOptimisation colOpt(dummySession, 3, + Collections::eNoCollection); + Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(Exp); + Collections::Collection c(CollExp, impTypes); + + StdRegions::ConstFactorMap factors; + factors[StdRegions::eFactorConst] = 1.5; + c.Initialise(Collections::ePhysInterp1DScaled, factors); + + const int nq = Exp->GetTotPoints(); + + Array<OneD, NekDouble> xc(nq), yc(nq), zc(nq); + Array<OneD, NekDouble> phys(nq); + + Exp->GetCoords(xc, yc, zc); + + for (int i = 0; i < nq; ++i) + { + phys[i] = pow(xc[i], 3) + pow(yc[i], 3) + pow(zc[i], 3); + } + + const int nq1 = c.GetOutputSize(Collections::ePhysInterp1DScaled); + Array<OneD, NekDouble> xc1(nq1); + Array<OneD, NekDouble> yc1(nq1); + Array<OneD, NekDouble> zc1(nq1); + Array<OneD, NekDouble> phys1(nq1); + + c.ApplyOperator(Collections::ePhysInterp1DScaled, xc, xc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, yc, yc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, zc, zc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, phys, phys1); + + double epsilon = 1.0e-8; + // since solution is a polynomial should be able to compare soln directly + for (int i = 0; i < nq1; ++i) + { + NekDouble exact = pow(xc1[i], 3) + pow(yc1[i], 3) + pow(zc1[i], 3); + phys1[i] = (fabs(phys1[i]) < 1e-14) ? 0.0 : phys1[i]; + exact = (fabs(exact) < 1e-14) ? 0.0 : exact; + BOOST_CHECK_CLOSE(phys1[i], exact, epsilon); + } +} + +BOOST_AUTO_TEST_CASE(TestTetPhysInterp1D_MatrixFree_UniformP) +{ + + SpatialDomains::PointGeomSharedPtr v0( + new SpatialDomains::PointGeom(3u, 0u, -1.0, -1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v1( + new SpatialDomains::PointGeom(3u, 1u, 1.0, -1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v2( + new SpatialDomains::PointGeom(3u, 2u, -1.0, 1.0, -1.0)); + SpatialDomains::PointGeomSharedPtr v3( + new SpatialDomains::PointGeom(3u, 3u, -1.0, -1.0, 1.0)); + + SpatialDomains::TetGeomSharedPtr tetGeom = CreateTet(v0, v1, v2, v3); + + unsigned int numQuadPoints = 5; + unsigned int numModes = 4; + + Nektar::LibUtilities::PointsType triPointsTypeDir1 = + Nektar::LibUtilities::eGaussLobattoLegendre; + const Nektar::LibUtilities::PointsKey triPointsKeyDir1(numQuadPoints, + triPointsTypeDir1); + Nektar::LibUtilities::BasisType basisTypeDir1 = + Nektar::LibUtilities::eModified_A; + const Nektar::LibUtilities::BasisKey basisKeyDir1(basisTypeDir1, numModes, + triPointsKeyDir1); + + Nektar::LibUtilities::PointsType triPointsTypeDir2 = + Nektar::LibUtilities::eGaussRadauMAlpha1Beta0; + const Nektar::LibUtilities::PointsKey triPointsKeyDir2(numQuadPoints - 1, + triPointsTypeDir2); + Nektar::LibUtilities::BasisType basisTypeDir2 = + Nektar::LibUtilities::eModified_B; + const Nektar::LibUtilities::BasisKey basisKeyDir2(basisTypeDir2, numModes, + triPointsKeyDir2); + + Nektar::LibUtilities::PointsType triPointsTypeDir3 = + Nektar::LibUtilities::eGaussRadauMAlpha2Beta0; + const Nektar::LibUtilities::PointsKey triPointsKeyDir3(numQuadPoints - 1, + triPointsTypeDir3); + Nektar::LibUtilities::BasisType basisTypeDir3 = + Nektar::LibUtilities::eModified_C; + const Nektar::LibUtilities::BasisKey basisKeyDir3(basisTypeDir3, numModes, + triPointsKeyDir3); + + Nektar::LocalRegions::TetExpSharedPtr Exp = + MemoryManager<Nektar::LocalRegions::TetExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir2, basisKeyDir3, tetGeom); + + std::vector<StdRegions::StdExpansionSharedPtr> CollExp; + CollExp.push_back(Exp); + + LibUtilities::SessionReaderSharedPtr dummySession; + Collections::CollectionOptimisation colOpt(dummySession, 3, + Collections::eMatrixFree); + Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(Exp); + Collections::Collection c(CollExp, impTypes); + + StdRegions::ConstFactorMap factors; + factors[StdRegions::eFactorConst] = 1.5; + c.Initialise(Collections::ePhysInterp1DScaled, factors); + + const int nq = Exp->GetTotPoints(); + + Array<OneD, NekDouble> xc(nq), yc(nq), zc(nq); + Array<OneD, NekDouble> phys(nq); + + Exp->GetCoords(xc, yc, zc); + + for (int i = 0; i < nq; ++i) + { + phys[i] = pow(xc[i], 3) + pow(yc[i], 3) + pow(zc[i], 3); + } + + const int nq1 = c.GetOutputSize(Collections::ePhysInterp1DScaled); + Array<OneD, NekDouble> xc1(nq1); + Array<OneD, NekDouble> yc1(nq1); + Array<OneD, NekDouble> zc1(nq1); + Array<OneD, NekDouble> phys1(nq1); + + c.ApplyOperator(Collections::ePhysInterp1DScaled, xc, xc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, yc, yc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, zc, zc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, phys, phys1); + + double epsilon = 1.0e-8; + // since solution is a polynomial should be able to compare soln directly + for (int i = 0; i < nq1; ++i) + { + NekDouble exact = pow(xc1[i], 3) + pow(yc1[i], 3) + pow(zc1[i], 3); + phys1[i] = (fabs(phys1[i]) < 1e-14) ? 0.0 : phys1[i]; + exact = (fabs(exact) < 1e-14) ? 0.0 : exact; + BOOST_CHECK_CLOSE(phys1[i], exact, epsilon); + } +} } // namespace Nektar::TetCollectionTests diff --git a/library/UnitTests/Collections/TestTriCollection.cpp b/library/UnitTests/Collections/TestTriCollection.cpp index 7d68e2c02b..060c491804 100644 --- a/library/UnitTests/Collections/TestTriCollection.cpp +++ b/library/UnitTests/Collections/TestTriCollection.cpp @@ -3190,7 +3190,7 @@ BOOST_AUTO_TEST_CASE(TestTriHelmholtz_IterPerExp_UniformP_ConstVarDiff) Nektar::StdRegions::StdTriExpSharedPtr stdExp = MemoryManager<Nektar::StdRegions::StdTriExp>::AllocateSharedPtr( - basisKeyDir1, basisKeyDir1); + basisKeyDir1, basisKeyDir2); int nelmts = 10; @@ -3287,7 +3287,7 @@ BOOST_AUTO_TEST_CASE(TestTriHelmholtz_MatrixFree_UniformP) Nektar::StdRegions::StdTriExpSharedPtr stdExp = MemoryManager<Nektar::StdRegions::StdTriExp>::AllocateSharedPtr( - basisKeyDir1, basisKeyDir1); + basisKeyDir1, basisKeyDir2); int nelmts = 10; @@ -3381,7 +3381,7 @@ BOOST_AUTO_TEST_CASE(TestTriHelmholtz_MatrixFree_UniformP_OverInt) Nektar::StdRegions::StdTriExpSharedPtr stdExp = MemoryManager<Nektar::StdRegions::StdTriExp>::AllocateSharedPtr( - basisKeyDir1, basisKeyDir1); + basisKeyDir1, basisKeyDir2); int nelmts = 10; @@ -3475,7 +3475,7 @@ BOOST_AUTO_TEST_CASE(TestTriHelmholtz_MatrixFree_UniformP_ConstVarDiff) Nektar::StdRegions::StdTriExpSharedPtr stdExp = MemoryManager<Nektar::StdRegions::StdTriExp>::AllocateSharedPtr( - basisKeyDir1, basisKeyDir1); + basisKeyDir1, basisKeyDir2); int nelmts = 10; @@ -3533,4 +3533,167 @@ BOOST_AUTO_TEST_CASE(TestTriHelmholtz_MatrixFree_UniformP_ConstVarDiff) BOOST_CHECK_CLOSE(coeffsRef[i], coeffs[i], epsilon); } } + +BOOST_AUTO_TEST_CASE(TestTriPhysInterp1D_NoCollection_UniformP) +{ + SpatialDomains::PointGeomSharedPtr v0( + new SpatialDomains::PointGeom(2u, 0u, -1.5, -1.5, 0.0)); + SpatialDomains::PointGeomSharedPtr v1( + new SpatialDomains::PointGeom(2u, 1u, 1.0, -1.0, 0.0)); + SpatialDomains::PointGeomSharedPtr v2( + new SpatialDomains::PointGeom(2u, 2u, -1.0, 1.0, 0.0)); + + SpatialDomains::TriGeomSharedPtr triGeom = CreateTri(v0, v1, v2); + + unsigned int numQuadPoints = 5; + unsigned int numModes = 4; + + Nektar::LibUtilities::PointsType triPointsTypeDir1 = + Nektar::LibUtilities::eGaussLobattoLegendre; + const Nektar::LibUtilities::PointsKey triPointsKeyDir1(numQuadPoints, + triPointsTypeDir1); + Nektar::LibUtilities::BasisType basisTypeDir1 = + Nektar::LibUtilities::eModified_A; + const Nektar::LibUtilities::BasisKey basisKeyDir1(basisTypeDir1, numModes, + triPointsKeyDir1); + + Nektar::LibUtilities::PointsType triPointsTypeDir2 = + Nektar::LibUtilities::eGaussRadauMAlpha1Beta0; + const Nektar::LibUtilities::PointsKey triPointsKeyDir2(numQuadPoints - 1, + triPointsTypeDir2); + Nektar::LibUtilities::BasisType basisTypeDir2 = + Nektar::LibUtilities::eModified_B; + const Nektar::LibUtilities::BasisKey basisKeyDir2(basisTypeDir2, numModes, + triPointsKeyDir2); + + Nektar::LocalRegions::TriExpSharedPtr Exp = + MemoryManager<Nektar::LocalRegions::TriExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir2, triGeom); + + std::vector<StdRegions::StdExpansionSharedPtr> CollExp; + CollExp.push_back(Exp); + + LibUtilities::SessionReaderSharedPtr dummySession; + Collections::CollectionOptimisation colOpt(dummySession, 2, + Collections::eNoCollection); + Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(Exp); + Collections::Collection c(CollExp, impTypes); + + StdRegions::ConstFactorMap factors; + factors[StdRegions::eFactorConst] = 1.5; + c.Initialise(Collections::ePhysInterp1DScaled, factors); + + const int nq = Exp->GetTotPoints(); + + Array<OneD, NekDouble> xc(nq), yc(nq); + Array<OneD, NekDouble> phys(nq), tmp; + + Exp->GetCoords(xc, yc); + + for (int i = 0; i < nq; ++i) + { + phys[i] = pow(xc[i], 3) + pow(yc[i], 3); + } + + const int nq1 = c.GetOutputSize(Collections::ePhysInterp1DScaled); + Array<OneD, NekDouble> xc1(nq1); + Array<OneD, NekDouble> yc1(nq1); + Array<OneD, NekDouble> phys1(nq1); + + c.ApplyOperator(Collections::ePhysInterp1DScaled, xc, xc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, yc, yc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, phys, phys1); + + double epsilon = 1.0e-8; + // since solution is a polynomial should be able to compare soln directly + for (int i = 0; i < nq1; ++i) + { + NekDouble exact = pow(xc1[i], 3) + pow(yc1[i], 3); + phys1[i] = (fabs(phys1[i]) < 1e-14) ? 0.0 : phys1[i]; + exact = (fabs(exact) < 1e-14) ? 0.0 : exact; + BOOST_CHECK_CLOSE(phys1[i], exact, epsilon); + } +} + +BOOST_AUTO_TEST_CASE(TestTriPhysInterp1D_MatrixFree_UniformP) +{ + SpatialDomains::PointGeomSharedPtr v0( + new SpatialDomains::PointGeom(2u, 0u, -1.5, -1.5, 0.0)); + SpatialDomains::PointGeomSharedPtr v1( + new SpatialDomains::PointGeom(2u, 1u, 1.0, -1.0, 0.0)); + SpatialDomains::PointGeomSharedPtr v2( + new SpatialDomains::PointGeom(2u, 2u, -1.0, 1.0, 0.0)); + + SpatialDomains::TriGeomSharedPtr triGeom = CreateTri(v0, v1, v2); + + unsigned int numQuadPoints = 5; + unsigned int numModes = 4; + + Nektar::LibUtilities::PointsType triPointsTypeDir1 = + Nektar::LibUtilities::eGaussLobattoLegendre; + const Nektar::LibUtilities::PointsKey triPointsKeyDir1(numQuadPoints, + triPointsTypeDir1); + Nektar::LibUtilities::BasisType basisTypeDir1 = + Nektar::LibUtilities::eModified_A; + const Nektar::LibUtilities::BasisKey basisKeyDir1(basisTypeDir1, numModes, + triPointsKeyDir1); + + Nektar::LibUtilities::PointsType triPointsTypeDir2 = + Nektar::LibUtilities::eGaussRadauMAlpha1Beta0; + const Nektar::LibUtilities::PointsKey triPointsKeyDir2(numQuadPoints - 1, + triPointsTypeDir2); + Nektar::LibUtilities::BasisType basisTypeDir2 = + Nektar::LibUtilities::eModified_B; + const Nektar::LibUtilities::BasisKey basisKeyDir2(basisTypeDir2, numModes, + triPointsKeyDir2); + + Nektar::LocalRegions::TriExpSharedPtr Exp = + MemoryManager<Nektar::LocalRegions::TriExp>::AllocateSharedPtr( + basisKeyDir1, basisKeyDir2, triGeom); + + std::vector<StdRegions::StdExpansionSharedPtr> CollExp; + CollExp.push_back(Exp); + + LibUtilities::SessionReaderSharedPtr dummySession; + Collections::CollectionOptimisation colOpt(dummySession, 2, + Collections::eMatrixFree); + Collections::OperatorImpMap impTypes = colOpt.GetOperatorImpMap(Exp); + Collections::Collection c(CollExp, impTypes); + + StdRegions::ConstFactorMap factors; + factors[StdRegions::eFactorConst] = 1.5; + c.Initialise(Collections::ePhysInterp1DScaled, factors); + + const int nq = Exp->GetTotPoints(); + + Array<OneD, NekDouble> xc(nq), yc(nq); + Array<OneD, NekDouble> phys(nq), tmp; + + Exp->GetCoords(xc, yc); + + for (int i = 0; i < nq; ++i) + { + phys[i] = pow(xc[i], 3) + pow(yc[i], 3); + } + + const int nq1 = c.GetOutputSize(Collections::ePhysInterp1DScaled); + Array<OneD, NekDouble> xc1(nq1); + Array<OneD, NekDouble> yc1(nq1); + Array<OneD, NekDouble> phys1(nq1); + + c.ApplyOperator(Collections::ePhysInterp1DScaled, xc, xc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, yc, yc1); + c.ApplyOperator(Collections::ePhysInterp1DScaled, phys, phys1); + + double epsilon = 1.0e-8; + // since solution is a polynomial should be able to compare soln directly + for (int i = 0; i < nq1; ++i) + { + NekDouble exact = pow(xc1[i], 3) + pow(yc1[i], 3); + phys1[i] = (fabs(phys1[i]) < 1e-14) ? 0.0 : phys1[i]; + exact = (fabs(exact) < 1e-14) ? 0.0 : exact; + BOOST_CHECK_CLOSE(phys1[i], exact, epsilon); + } +} + } // namespace Nektar::TriCollectionTests -- GitLab