From a2ec64949926ad188c2bfb061c04209b80beabc0 Mon Sep 17 00:00:00 2001 From: David Moxey Date: Wed, 27 Apr 2022 12:39:41 +0100 Subject: [PATCH 01/13] Add saena capability --- CMakeLists.txt | 1 + cmake/ThirdPartySaena.cmake | 51 ++++ .../AssemblyMap/AssemblyMapCG.cpp | 3 + library/MultiRegions/CMakeLists.txt | 17 ++ library/MultiRegions/GlobalLinSys.cpp | 11 +- library/MultiRegions/GlobalLinSysSaena.cpp | 264 ++++++++++++++++++ library/MultiRegions/GlobalLinSysSaena.h | 105 +++++++ .../MultiRegions/GlobalLinSysSaenaFull.cpp | 260 +++++++++++++++++ library/MultiRegions/GlobalLinSysSaenaFull.h | 91 ++++++ .../GlobalLinSysSaenaStaticCond.cpp | 231 +++++++++++++++ .../GlobalLinSysSaenaStaticCond.h | 114 ++++++++ library/MultiRegions/MultiRegions.hpp | 8 +- library/MultiRegions/PreconditionerLinear.cpp | 28 ++ library/MultiRegions/PreconditionerLinear.h | 3 +- 14 files changed, 1184 insertions(+), 3 deletions(-) create mode 100644 cmake/ThirdPartySaena.cmake create mode 100644 library/MultiRegions/GlobalLinSysSaena.cpp create mode 100644 library/MultiRegions/GlobalLinSysSaena.h create mode 100644 library/MultiRegions/GlobalLinSysSaenaFull.cpp create mode 100644 library/MultiRegions/GlobalLinSysSaenaFull.h create mode 100644 library/MultiRegions/GlobalLinSysSaenaStaticCond.cpp create mode 100644 library/MultiRegions/GlobalLinSysSaenaStaticCond.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 6da1e5512..c45a62a64 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -260,6 +260,7 @@ INCLUDE (ThirdPartyTetGen) INCLUDE (ThirdPartyCCM) INCLUDE (ThirdPartyBlasLapack) INCLUDE (ThirdPartyCwipi) +INCLUDE (ThirdPartySaena) INCLUDE (FindCFI) INCLUDE (FindLikwid) diff --git a/cmake/ThirdPartySaena.cmake b/cmake/ThirdPartySaena.cmake new file mode 100644 index 000000000..1ddf1b3de --- /dev/null +++ b/cmake/ThirdPartySaena.cmake @@ -0,0 +1,51 @@ +######################################################################## +# +# ThirdParty configuration for Nektar++ +# +# PETSc +# +######################################################################## + +CMAKE_DEPENDENT_OPTION(NEKTAR_USE_SAENA + "Enable Saena parallel matrix solver support." OFF + "NEKTAR_USE_MKL;NEKTAR_USE_MPI" ON) + +IF (NEKTAR_USE_SAENA) + SET(BUILD_SAENA ON) + + CMAKE_DEPENDENT_OPTION(THIRDPARTY_BUILD_SAENA + "Build Saena if needed" ${BUILD_SAENA} + "NEKTAR_USE_SAENA" OFF) + + IF (THIRDPARTY_BUILD_SAENA) + INCLUDE(ExternalProject) + + EXTERNALPROJECT_ADD( + saena + PREFIX ${TPSRC} + STAMP_DIR ${TPBUILD}/stamp + GIT_REPOSITORY https://github.com/mdave/Saena_Public.git + GIT_TAG 298fc0249aa05fd03c27108615c8f2afe8c111bf + DOWNLOAD_DIR ${TPSRC} + SOURCE_DIR ${TPBUILD}/saena + TMP_DIR ${TPBUILD}/saena-tmp + INSTALL_DIR ${TPDIST} + BINARY_DIR ${TPBUILD}/saena + CONFIGURE_COMMAND "" + BUILD_COMMAND ./install.sh + ) + + THIRDPARTY_LIBRARY(SAENA_LIBRARY SHARED saena + DESCRIPTION "Saena library") + SET(SAENA_INCLUDE_DIR ${TPDIST}/include CACHE FILEPATH + "Saenae includes" FORCE) + MESSAGE(STATUS "Build Saena: ${SAENA_LIBRARY}") + SET(SAENA_CONFIG_INCLUDE_DIR ${TPINC}) + ELSE() + MESSAGE(FATAL "Saena only available through third-party install") + ENDIF() + + ADD_DEFINITIONS(-DNEKTAR_USING_SAENA) + INCLUDE_DIRECTORIES(${SAENA_INCLUDE_DIR}) + MARK_AS_ADVANCED(SAENA_LIBRARY SAENA_INCLUDE_DIR) +ENDIF() diff --git a/library/MultiRegions/AssemblyMap/AssemblyMapCG.cpp b/library/MultiRegions/AssemblyMap/AssemblyMapCG.cpp index dc907484d..e56d38b73 100644 --- a/library/MultiRegions/AssemblyMap/AssemblyMapCG.cpp +++ b/library/MultiRegions/AssemblyMap/AssemblyMapCG.cpp @@ -1258,6 +1258,8 @@ namespace Nektar case eIterativeStaticCond: case ePETScStaticCond: case ePETScFullMatrix: + case eSaenaStaticCond: + case eSaenaFullMatrix: case eXxtFullMatrix: case eXxtStaticCond: { @@ -1275,6 +1277,7 @@ namespace Nektar case eDirectMultiLevelStaticCond: case eIterativeMultiLevelStaticCond: case eXxtMultiLevelStaticCond: + case eSaenaMultiLevelStaticCond: { MultiLevelBisectionReordering( boostGraphObj, perm, iperm, bottomUpGraph, diff --git a/library/MultiRegions/CMakeLists.txt b/library/MultiRegions/CMakeLists.txt index 67daccb0e..9cd109a78 100644 --- a/library/MultiRegions/CMakeLists.txt +++ b/library/MultiRegions/CMakeLists.txt @@ -113,6 +113,18 @@ IF(NEKTAR_USE_PETSC) ) ENDIF(NEKTAR_USE_PETSC) +IF(NEKTAR_USE_SAENA) + SET(MULTI_REGIONS_HEADERS ${MULTI_REGIONS_HEADERS} + GlobalLinSysSaena.h + GlobalLinSysSaenaFull.h + GlobalLinSysSaenaStaticCond.h + ) + SET(MULTI_REGIONS_SOURCES ${MULTI_REGIONS_SOURCES} + GlobalLinSysSaena.cpp + GlobalLinSysSaenaFull.cpp + GlobalLinSysSaenaStaticCond.cpp + ) +ENDIF(NEKTAR_USE_SAENA) ADD_DEFINITIONS(-DMULTI_REGIONS_EXPORTS) @@ -133,6 +145,11 @@ IF( NEKTAR_USE_PETSC ) ADD_DEPENDENCIES(MultiRegions petsc-3.11.4) ENDIF( NEKTAR_USE_PETSC ) +IF (NEKTAR_USE_SAENA) + TARGET_LINK_LIBRARIES(MultiRegions LINK_PRIVATE ${SAENA_LIBRARY}) + ADD_DEPENDENCIES(MultiRegions saena) +ENDIF() + IF (NEKTAR_BUILD_PYTHON) SUBDIRS(Python) ENDIF() diff --git a/library/MultiRegions/GlobalLinSys.cpp b/library/MultiRegions/GlobalLinSys.cpp index 9a449d2f3..c656cf880 100644 --- a/library/MultiRegions/GlobalLinSys.cpp +++ b/library/MultiRegions/GlobalLinSys.cpp @@ -83,7 +83,16 @@ namespace Nektar MultiRegions::ePETScStaticCond), LibUtilities::SessionReader::RegisterEnumValue( "GlobalSysSoln", "PETScMultiLevelStaticCond", - MultiRegions::ePETScMultiLevelStaticCond) + MultiRegions::ePETScMultiLevelStaticCond), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "SaenaFull", + MultiRegions::eSaenaFullMatrix), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "SaenaStaticCond", + MultiRegions::eSaenaStaticCond), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "SaenaMultiLevelStaticCond", + MultiRegions::eSaenaMultiLevelStaticCond) }; #ifdef NEKTAR_USE_SCOTCH diff --git a/library/MultiRegions/GlobalLinSysSaena.cpp b/library/MultiRegions/GlobalLinSysSaena.cpp new file mode 100644 index 000000000..f1c09d8b6 --- /dev/null +++ b/library/MultiRegions/GlobalLinSysSaena.cpp @@ -0,0 +1,264 @@ +/////////////////////////////////////////////////////////////////////////////// +// +// File GlobalLinSys.cpp +// +// For more information, please see: http://www.nektar.info +// +// The MIT License +// +// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA), +// Department of Aeronautics, Imperial College London (UK), and Scientific +// Computing and Imaging Institute, University of Utah (USA). +// +// License for the specific language governing rights and limitations under +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +// Description: GlobalLinSys definition +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include +#include + +using namespace std; + +namespace Nektar +{ + namespace MultiRegions + { + /** + * @class GlobalLinSysSaena + * + * Solves a linear system using Saena. + */ + GlobalLinSysSaena::GlobalLinSysSaena( + const GlobalLinSysKey &pKey, + const std::weak_ptr &pExp, + const std::shared_ptr &pLocToGloMap) + : GlobalLinSys(pKey, pExp, pLocToGloMap) + { + } + + /** + * @brief Clean up Saena objects. + * + * Note that if SessionReader::Finalize is called before the end of the + * program, Saena may have been finalized already, at which point we + * cannot deallocate our objects. If that's the case we do nothing and + * let the kernel clear up after us. + */ + GlobalLinSysSaena::~GlobalLinSysSaena() + { + } + + /** + * @brief Solve linear system using Saena. + * + * The general strategy being a Saena solve is to: + * + * - Copy values into the Saena vector #m_b + * - Solve the system #m_ksp and place result into #m_x. + * - Scatter results back into #m_locVec using #m_ctx scatter object. + * - Copy from #m_locVec to output array #pOutput. + */ + void GlobalLinSysSaena::v_SolveLinearSystem( + const int pNumRows, + const Array &pInput, + Array &pOutput, + const AssemblyMapSharedPtr &locToGloMap, + const int pNumDir) + { + BOOST_IGNORE_UNUSED(locToGLoMap); + + // @TODO: shouldn't need to but we require a new RHS vector every + // time this is called. + saena::vector m_rhs; + m_rhs.set_comm(m_comm); + + const int nHomDofs = pNumRows - pNumDir; + + m_rhs.set(&m_reorderedMap[0], &pInput[pNumDir], nHomDofs); + m_rhs.assemble(); + m_amg.set_rhs(m_rhs); + + // Temporary solution storage? + NekDouble *sol = nullptr; + + // Solve with pCG method + m_amg.solve_pCG(sol, &m_opts); + + Vmath::Vcopy(nHomDofs, sol, 1, &pOutput[pNumDir], 1); + + if(sol != nullptr) + { + free(sol); + sol = nullptr; + } + } + + /** + * @brief Calculate a reordering of universal IDs for Saena. + * + * Saena requires a unique, contiguous index of all global and universal + * degrees of freedom which represents its position inside the + * matrix. Presently Gs does not guarantee this, so this routine + * constructs a new universal mapping. + * + * @param glo2uniMap Global to universal map + * @param glo2unique Global to unique map + * @param pLocToGloMap Assembly map for this system + */ + void GlobalLinSysSaena::CalculateReordering( + const Array &glo2uniMap, + const Array &glo2unique, + const AssemblyMapSharedPtr &pLocToGloMap) + { + LibUtilities::CommSharedPtr vComm + = m_expList.lock()->GetSession()->GetComm(); + + const int nDirDofs = pLocToGloMap->GetNumGlobalDirBndCoeffs(); + const int nHomDofs = glo2uniMap.size() - nDirDofs; + const int nProc = vComm->GetSize(); + const int rank = vComm->GetRank(); + + int n, cnt; + + // Count number of unique degrees of freedom on each process. + m_nLocal = Vmath::Vsum(nHomDofs, glo2unique + nDirDofs, 1); + m_reorderedMap.resize(nHomDofs); + + // Reduce coefficient counts across all processors. + Array localCounts(nProc, 0), localOffset(nProc, 0); + localCounts[rank] = nHomDofs; + vComm->AllReduce(localCounts, LibUtilities::ReduceSum); + + for (n = 1; n < nProc; ++n) + { + localOffset[n] = localOffset[n-1] + localCounts[n-1]; + } + + int totHomDofs = Vmath::Vsum(nProc, localCounts, 1); + vector allUniIds(totHomDofs, 0); + + // Assemble list of universal IDs + for (n = 0; n < nHomDofs; ++n) + { + int gid = n + nDirDofs; + allUniIds[n + localOffset[rank]] = glo2uniMap[gid]; + } + + // Reduce this across processors so that each process has a list of + // all universal IDs. + vComm->AllReduce(allUniIds, LibUtilities::ReduceSum); + std::sort(allUniIds.begin(), allUniIds.end()); + map uniIdReorder; + + // Renumber starting from 0. + for (cnt = n = 0; n < allUniIds.size(); ++n) + { + if (uniIdReorder.count(allUniIds[n]) > 0) + { + continue; + } + + uniIdReorder[allUniIds[n]] = cnt++; + } + + // Populate reordering map. + for (n = 0; n < nHomDofs; ++n) + { + int gid = n + nDirDofs; + int uniId = glo2uniMap[gid]; + ASSERTL0(uniIdReorder.count(uniId) > 0, "Error in ordering"); + m_reorderedMap[n] = uniIdReorder[uniId]; + } + + m_bdydof = nDirDofs; + } + + /** + * @brief Construct Saena matrix and vector handles. + * + * @todo Preallocation should be done at this point, since presently + * matrix allocation takes a significant amount of time. + * + * @param nGlobal Number of global degrees of freedom in the system (on + * this processor) + * @param nDir Number of Dirichlet degrees of freedom (on this + * processor). + */ + void GlobalLinSysSaena::SetUpMatVec() + { + LibUtilities::CommSharedPtr comm = + m_expList.lock()->GetSession()->GetComm(); + auto mpiComm = std::dynamic_pointer_cast< + LibUtilities::CommMpi>(comm); + + m_comm = mpiComm->GetComm(); + m_matrix.set_comm(m_comm); + m_matrix.add_duplicates(true); + m_rhs.set_comm(m_comm); + + int nummodes = m_expList.lock()->GetFieldDefinitions()[0]->m_numModes[0]; + int p_order = nummodes - 1; + int prodim = m_expList.lock()->GetCoordim(0); + + m_matrix.set_p_order(p_order); + m_matrix.set_prodim(prodim); + + // set p_coarsen levels computation. subtract by a constant. + vector order_dif; + for(int i = 0; i < p_order - 1; ++i) + { + order_dif.emplace_back(1); + } + + // set number of multigrid levels + int max_h_level = 1; // h-multigrid levels + m_amg.set_multigrid_max_level( + static_cast(order_dif.size()) + max_h_level); + + m_amg.set_scale(m_scale); + m_amg.set_matrix( + &m_matrix, &m_opts, m_l2g, m_reorderedMap, m_bdydof, order_dif); + } + + /** + * @brief Set up KSP solver object. + * + * This is reasonably generic setup -- most solver types can be changed + * using the ? file. + * + * @param tolerance Residual tolerance to converge to. + */ + void GlobalLinSysSaena::SetUpSolver(NekDouble tolerance) + { + m_step = 0; + m_scale = false; + m_opts.set_relative_tolerance(tolerance); + // m_opts.set_dynamic_levels(false); + // m_opts.set_max_lev(5); + // m_opts.set_vcycle_num(400); + // m_opts.set_smoother("chebyshev"); // chebyshev, jacobi + // m_opts.set_preSmooth(3); + // m_opts.set_postSmooth(3); + } + } +} diff --git a/library/MultiRegions/GlobalLinSysSaena.h b/library/MultiRegions/GlobalLinSysSaena.h new file mode 100644 index 000000000..80721a578 --- /dev/null +++ b/library/MultiRegions/GlobalLinSysSaena.h @@ -0,0 +1,105 @@ +/////////////////////////////////////////////////////////////////////////////// +// +// File GlobalLinSys.h +// +// For more information, please see: http://www.nektar.info +// +// The MIT License +// +// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA), +// Department of Aeronautics, Imperial College London (UK), and Scientific +// Computing and Imaging Institute, University of Utah (USA). +// +// License for the specific language governing rights and limitations under +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +// Description: GlobalLinSysSaena header +// +/////////////////////////////////////////////////////////////////////////////// +#ifndef NEKTAR_LIB_MULTIREGIONS_GLOBALLINSYSSAENA_H +#define NEKTAR_LIB_MULTIREGIONS_GLOBALLINSYSSAENA_H + +#include +#include + +#include +#include + +namespace Nektar +{ +namespace MultiRegions +{ +// Forward declarations +class ExpList; + +/// A Saena global linear system. +class GlobalLinSysSaena : virtual public GlobalLinSys +{ +public: + /// Constructor for full direct matrix solve. + MULTI_REGIONS_EXPORT GlobalLinSysSaena( + const GlobalLinSysKey &pKey, + const std::weak_ptr &pExp, + const std::shared_ptr &pLocToGloMap); + + MULTI_REGIONS_EXPORT virtual ~GlobalLinSysSaena(); + + virtual void v_SolveLinearSystem( + const int pNumRows, + const Array &pInput, + Array &pOutput, + const AssemblyMapSharedPtr &locToGloMap, + const int pNumDir); + +protected: + /// Saena matrix object. + saena::matrix m_matrix; + /// Saena vector to store rhs + saena::vector m_rhs; + /// Saena object for options + saena::options m_opts; + /// Saena object that represents solver system. + saena::amg m_amg; + /// Reordering that takes universal IDs to a unique row in the Saena + /// matrix. @see GlobalLinSysSaena::CalculateReordering + std::vector m_reorderedMap; + /// MPI communicator + MPI_Comm m_comm; + /// Number of unique degrees of freedom on this process. + int m_nLocal; + /// Number of boundary degrees of freedom + int m_bdydof; + /// Mesh information + std::vector> m_l2g; + /// flag to set the linear system to be scaled + bool m_scale; + + PreconditionerSharedPtr m_precon; + + void SetUpMatVec(); + void SetUpSolver(NekDouble tolerance); + void CalculateReordering( + const Array &glo2uniMap, + const Array &glo2unique, + const AssemblyMapSharedPtr &pLocToGloMap); +}; +} +} + +#endif diff --git a/library/MultiRegions/GlobalLinSysSaenaFull.cpp b/library/MultiRegions/GlobalLinSysSaenaFull.cpp new file mode 100644 index 000000000..7570ac0c9 --- /dev/null +++ b/library/MultiRegions/GlobalLinSysSaenaFull.cpp @@ -0,0 +1,260 @@ +/////////////////////////////////////////////////////////////////////////////// +// +// File GlobalLinSysSaenaFull.cpp +// +// For more information, please see: http://www.nektar.info +// +// The MIT License +// +// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA), +// Department of Aeronautics, Imperial College London (UK), and Scientific +// Computing and Imaging Institute, University of Utah (USA). +// +// License for the specific language governing rights and limitations under +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +// Description: GlobalLinSysSaenaFull definition +// +/////////////////////////////////////////////////////////////////////////////// + +#include +#include + +using namespace std; + +namespace Nektar +{ + namespace MultiRegions + { + /** + * @class GlobalLinSysSaenaFull + */ + + /** + * Registers the class with the Factory. + */ + string GlobalLinSysSaenaFull::className + = GetGlobalLinSysFactory().RegisterCreatorFunction( + "SaenaFull", + GlobalLinSysSaenaFull::create, + "Saena Full Matrix."); + + + /// Constructor for full direct matrix solve. + GlobalLinSysSaenaFull::GlobalLinSysSaenaFull( + const GlobalLinSysKey &pLinSysKey, + const std::weak_ptr &pExp, + const std::shared_ptr &pLocToGloMap) + : GlobalLinSys (pLinSysKey, pExp, pLocToGloMap), + GlobalLinSysSaena(pLinSysKey, pExp, pLocToGloMap) + { + // SET UP VECTORS AND MATRIX + SetUpMatVec(); + + int rank = 0, nprocs = 0; + MPI_Comm_size(m_comm, &nprocs); + MPI_Comm_rank(m_comm, &rank); + + auto tbegin = clock(); + + const int nDirDofs = pLocToGloMap->GetNumGlobalDirBndCoeffs(); + + int i, j, n, cnt, gid1, gid2, loc_lda; + NekDouble sign1, sign2, value; + DNekScalMatSharedPtr loc_mat; + + // CALCULATE REORDERING MAPPING + CalculateReordering(pLocToGloMap->GetGlobalToUniversalMap(), + pLocToGloMap->GetGlobalToUniversalMapUnique(), + pLocToGloMap); + + // STORE MESH INFO TO BE PASSED TO SAENA +// int total_elm = this->GetExp()->size(); + auto ExpTmp = m_expList.lock()->GetExp(); + int total_elm = ExpTmp->size(); +// std::cout << total_elm << "\n"; + + int counter = 0; + vector dof_elems; + for (i = 0; i < total_elm; ++i){ +// std::cout << ExpTmp->at(i)->GetNcoeffs() << std::endl; + for (j = 0; j < ExpTmp->at(i)->GetNcoeffs(); ++j){ +// printf("%i\t", pLocToGloMap->GetLocalToGlobalMap()[counter]); + dof_elems.emplace_back(pLocToGloMap->GetLocalToGlobalMap()[counter] + 1); + ++counter; + } + m_l2g.emplace_back(dof_elems); + dof_elems.clear(); + } + + auto tend = clock(); + auto t = double(tend - tbegin) / CLOCKS_PER_SEC; + double t_ave = 0.0; + MPI_Reduce(&t, &t_ave, 1, MPI_DOUBLE, MPI_SUM, 0, m_comm); + if(!rank) printf("Saena mesh info generation time: %f\n", t_ave / nprocs); + + // CONSTRUCT KSP OBJECT + SetUpSolver(pLocToGloMap->GetIterativeTolerance()); + + tbegin = clock(); + + m_matrix.erase_no_shrink_to_fit(); + + // POPULATE MATRIX + for(n = cnt = 0; n < m_expList.lock()->GetNumElmts(); ++n) + { + loc_mat = GetBlock(n); + loc_lda = loc_mat->GetRows(); + + for(i = 0; i < loc_lda; ++i) + { + gid1 = pLocToGloMap->GetLocalToGlobalMap(cnt+i) - nDirDofs; + sign1 = pLocToGloMap->GetLocalToGlobalSign(cnt + i); + if(gid1 >= 0) + { + int gid1ro = m_reorderedMap[gid1]; + for(j = 0; j < loc_lda; ++j) + { + gid2 = pLocToGloMap->GetLocalToGlobalMap(cnt + j) + - nDirDofs; + sign2 = pLocToGloMap->GetLocalToGlobalSign(cnt + j); + if(gid2 >= 0) + { + int gid2ro = m_reorderedMap[gid2]; + value = sign1*sign2*(*loc_mat)(i,j); + m_matrix.set(gid1ro, gid2ro, value); + } + } + } + } + cnt += loc_lda; + } + + // timing + tend = clock(); + t = double(tend - tbegin) / CLOCKS_PER_SEC; + MPI_Reduce(&t, &t_ave, 1, MPI_DOUBLE, MPI_SUM, 0, m_comm); + if(!rank) printf("nektar assembly time: %f\n", t_ave / nprocs); + + tbegin = clock(); + + // ASSEMBLE MATRIX +// m_matrix.set_num_threads(1); + m_matrix.assemble(m_scale); +// m_matrix.assemble_writeToFile("matrix_folder"); + + // timing + tend = clock(); + t = double(tend - tbegin) / CLOCKS_PER_SEC; + MPI_Reduce(&t, &t_ave, 1, MPI_DOUBLE, MPI_SUM, 0, m_comm); + if(!rank) printf("Saena matrix assembly time: %f\n", t_ave / nprocs); + } + + + GlobalLinSysSaenaFull::~GlobalLinSysSaenaFull() + { + + } + + + /** + * Solve the linear system using a full global matrix system. + */ + void GlobalLinSysSaenaFull::v_Solve( + const Array &pLocInput, + Array &pLocOutput, + const AssemblyMapSharedPtr &pLocToGloMap, + const Array &pDirForcing) + { + std::shared_ptr expList = m_expList.lock(); + bool dirForcCalculated = (bool) pDirForcing.size(); + int nDirDofs = pLocToGloMap->GetNumGlobalDirBndCoeffs(); + int nGlobDofs = pLocToGloMap->GetNumGlobalCoeffs(); + int nLocDofs = pLocToGloMap->GetNumLocalCoeffs(); + +// m_locToGloMap = pLocToGloMap; // required for DoMatrixMultiply + + Array tmp(nLocDofs); + Array tmp1(nLocDofs); + Array global(nGlobDofs,0.0); + + int nDirTotal = nDirDofs; + expList->GetComm()->GetRowComm() + ->AllReduce(nDirTotal, LibUtilities::ReduceSum); + + if(nDirTotal) + { + // calculate the dirichlet forcing + if(dirForcCalculated) + { + // assume pDirForcing is in local space + ASSERTL0(pDirForcing.size() >= nLocDofs, + "DirForcing is not of sufficient size. Is it in local space?"); + Vmath::Vsub(nLocDofs, pLocInput, 1, + pDirForcing, 1,tmp1, 1); + } + else + { + // Calculate the dirichlet forcing and substract it + // from the rhs + expList->GeneralMatrixOp( + m_linSysKey, pLocOutput, tmp); + + // Apply robin boundary conditions to the solution. + for(auto &r : m_robinBCInfo) // add robin mass matrix + { + RobinBCInfoSharedPtr rBC; + Array tmploc; + + int n = r.first; + + int offset = expList->GetCoeff_Offset(n); + LocalRegions::ExpansionSharedPtr vExp = expList->GetExp(n); + + // add local matrix contribution + for(rBC = r.second;rBC; rBC = rBC->next) + { + vExp->AddRobinEdgeContribution(rBC->m_robinID, + rBC->m_robinPrimitiveCoeffs, + pLocOutput + offset, + tmploc = tmp + offset); + } + } + + Vmath::Vsub(nLocDofs, pLocInput, 1, tmp, 1, tmp1, 1); + } + + pLocToGloMap->Assemble(tmp1,tmp); + + SolveLinearSystem(nGlobDofs,tmp, global, pLocToGloMap, nDirDofs); + + pLocToGloMap->GlobalToLocal(global,tmp); + + // Add back initial and boundary condition + Vmath::Vadd(nLocDofs, tmp, 1, pLocOutput, 1, pLocOutput, 1); + } + else + { + pLocToGloMap->Assemble(pLocInput,tmp); + SolveLinearSystem(nGlobDofs, tmp,global, pLocToGloMap); + pLocToGloMap->GlobalToLocal(global,pLocOutput); + } + } + } +} diff --git a/library/MultiRegions/GlobalLinSysSaenaFull.h b/library/MultiRegions/GlobalLinSysSaenaFull.h new file mode 100644 index 000000000..e54fcf8e6 --- /dev/null +++ b/library/MultiRegions/GlobalLinSysSaenaFull.h @@ -0,0 +1,91 @@ +/////////////////////////////////////////////////////////////////////////////// +// +// File GlobalLinSysDirectXxt.h +// +// For more information, please see: http://www.nektar.info +// +// The MIT License +// +// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA), +// Department of Aeronautics, Imperial College London (UK), and Scientific +// Computing and Imaging Institute, University of Utah (USA). +// +// License for the specific language governing rights and limitations under +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +// Description: GlobalLinSysDirectXxt header +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef NEKTAR_LIB_MULTIREGIONS_GLOBALLINSYSSAENAFULL_H +#define NEKTAR_LIB_MULTIREGIONS_GLOBALLINSYSSAENAFULL_H + +#include +#include +#include + +namespace Nektar +{ + namespace MultiRegions + { + // Forward declarations + + //class AssemblyMapDG; + class ExpList; + + /// A global linear system. + class GlobalLinSysSaenaFull : public GlobalLinSysSaena + { + public: + + /// Creates an instance of this class + static GlobalLinSysSharedPtr create( + const GlobalLinSysKey &pLinSysKey, + const std::weak_ptr &pExpList, + const std::shared_ptr &pLocToGloMap) + { + return MemoryManager + ::AllocateSharedPtr(pLinSysKey, pExpList, pLocToGloMap); + } + + /// Name of class + MULTI_REGIONS_EXPORT static std::string className; + + /// Constructor for full direct matrix solve. + MULTI_REGIONS_EXPORT GlobalLinSysSaenaFull( + const GlobalLinSysKey &pLinSysKey, + const std::weak_ptr &pExpList, + const std::shared_ptr &pLocToGloMap); + + MULTI_REGIONS_EXPORT virtual ~GlobalLinSysSaenaFull(); + + private: + /// Solve the linear system for given input and output vectors + /// using a specified local to global map. + virtual void v_Solve( + const Array &in, + Array &out, + const AssemblyMapSharedPtr &locToGloMap, + const Array &dirForcing + = NullNekDouble1DArray); + }; + } +} + +#endif diff --git a/library/MultiRegions/GlobalLinSysSaenaStaticCond.cpp b/library/MultiRegions/GlobalLinSysSaenaStaticCond.cpp new file mode 100644 index 000000000..d530a313d --- /dev/null +++ b/library/MultiRegions/GlobalLinSysSaenaStaticCond.cpp @@ -0,0 +1,231 @@ +/////////////////////////////////////////////////////////////////////////////// +// +// File GlobalLinSysSaenaStaticCond.cpp +// +// For more information, please see: http://www.nektar.info +// +// The MIT License +// +// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA), +// Department of Aeronautics, Imperial College London (UK), and Scientific +// Computing and Imaging Institute, University of Utah (USA). +// +// License for the specific language governing rights and limitations under +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +// Description: GlobalLinSysSaenaStaticCond definition +// +/////////////////////////////////////////////////////////////////////////////// + +#include + +//#include +//#include +//#include + +using namespace std; + +namespace Nektar +{ + namespace MultiRegions + { + /** + * @class GlobalLinSysSaena + * + * Solves a linear system using single- or multi-level static + * condensation. + */ + + /** + * Registers the class with the Factory. + */ + string GlobalLinSysSaenaStaticCond::className + = GetGlobalLinSysFactory().RegisterCreatorFunction( + "SaenaStaticCond", + GlobalLinSysSaenaStaticCond::create, + "Saena static condensation."); + + string GlobalLinSysSaenaStaticCond::className2 + = GetGlobalLinSysFactory().RegisterCreatorFunction( + "SaenaMultiLevelStaticCond", + GlobalLinSysSaenaStaticCond::create, + "Saena multi-level static condensation."); + + /** + * For a matrix system of the form @f[ + * \left[ \begin{array}{cc} + * \boldsymbol{A} & \boldsymbol{B}\\ + * \boldsymbol{C} & \boldsymbol{D} + * \end{array} \right] + * \left[ \begin{array}{c} \boldsymbol{x_1}\\ \boldsymbol{x_2} + * \end{array}\right] + * = \left[ \begin{array}{c} \boldsymbol{y_1}\\ \boldsymbol{y_2} + * \end{array}\right], + * @f] + * where @f$\boldsymbol{D}@f$ and + * @f$(\boldsymbol{A-BD^{-1}C})@f$ are invertible, store and assemble + * a static condensation system, according to a given local to global + * mapping. #m_linSys is constructed by AssembleSchurComplement(). + * @param mKey Associated matrix key. + * @param pLocMatSys LocalMatrixSystem + * @param locToGloMap Local to global mapping. + */ + GlobalLinSysSaenaStaticCond::GlobalLinSysSaenaStaticCond( + const GlobalLinSysKey &pKey, + const std::weak_ptr &pExpList, + const std::shared_ptr &pLocToGloMap) + : GlobalLinSys (pKey, pExpList, pLocToGloMap), + GlobalLinSysSaena (pKey, pExpList, pLocToGloMap), + GlobalLinSysStaticCond(pKey, pExpList, pLocToGloMap) + { + std::cout << __func__ << std::endl; + + ASSERTL1((pKey.GetGlobalSysSolnType()==eSaenaStaticCond)|| + (pKey.GetGlobalSysSolnType()==eSaenaMultiLevelStaticCond), + "This constructor is only valid when using static " + "condensation"); + ASSERTL1(pKey.GetGlobalSysSolnType() + == pLocToGloMap->GetGlobalSysSolnType(), + "The local to global map is not set up for the requested " + "solution type"); + } + + /** + * + */ + GlobalLinSysSaenaStaticCond::GlobalLinSysSaenaStaticCond( + const GlobalLinSysKey &pKey, + const std::weak_ptr &pExpList, + const DNekScalBlkMatSharedPtr pSchurCompl, + const DNekScalBlkMatSharedPtr pBinvD, + const DNekScalBlkMatSharedPtr pC, + const DNekScalBlkMatSharedPtr pInvD, + const std::shared_ptr &pLocToGloMap, + const PreconditionerSharedPtr pPrecon) + : GlobalLinSys (pKey, pExpList, pLocToGloMap), + GlobalLinSysSaena (pKey, pExpList, pLocToGloMap), + GlobalLinSysStaticCond(pKey, pExpList, pLocToGloMap) + { + std::cout << __func__ << std::endl; + + m_schurCompl = pSchurCompl; + m_BinvD = pBinvD; + m_C = pC; + m_invD = pInvD; + m_precon = pPrecon; + } + + /** + * + */ + GlobalLinSysSaenaStaticCond::~GlobalLinSysSaenaStaticCond() + { + + } + + /** + * Assemble the schur complement matrix from the block matrices stored + * in #m_blkMatrices and the given local to global mapping information. + * @param locToGloMap Local to global mapping information. + */ + void GlobalLinSysSaenaStaticCond::v_AssembleSchurComplement( + AssemblyMapSharedPtr pLocToGloMap) + { + std::cout << __func__ << std::endl; + + int i, j, n, cnt, gid1, gid2, loc_lda; + NekDouble sign1, sign2, value; + + const int nDirDofs = pLocToGloMap->GetNumGlobalDirBndCoeffs(); + + DNekScalBlkMatSharedPtr SchurCompl = m_schurCompl; + DNekScalBlkMatSharedPtr BinvD = m_BinvD; + DNekScalBlkMatSharedPtr C = m_C; + DNekScalBlkMatSharedPtr invD = m_invD; + DNekScalMatSharedPtr loc_mat; + + // CALCULATE REORDERING MAPPING + CalculateReordering(pLocToGloMap->GetGlobalToUniversalBndMap(), + pLocToGloMap->GetGlobalToUniversalBndMapUnique(), + pLocToGloMap); + + // SET UP VECTORS AND MATRIX +// SetUpMatVec(pLocToGloMap->GetNumGlobalBndCoeffs(), nDirDofs); + SetUpMatVec(); + + // CONSTRUCT KSP OBJECT + SetUpSolver(pLocToGloMap->GetIterativeTolerance()); + + // POPULATE MATRIX + for(n = cnt = 0; n < m_schurCompl->GetNumberOfBlockRows(); ++n) + { + loc_mat = m_schurCompl->GetBlock(n,n); + loc_lda = loc_mat->GetRows(); + + for(i = 0; i < loc_lda; ++i) + { + gid1 = pLocToGloMap->GetLocalToGlobalBndMap(cnt + i)-nDirDofs; + sign1 = pLocToGloMap->GetLocalToGlobalBndSign(cnt + i); + if(gid1 >= 0) + { + int gid1ro = m_reorderedMap[gid1]; + for(j = 0; j < loc_lda; ++j) + { + gid2 = pLocToGloMap->GetLocalToGlobalBndMap(cnt + j) + - nDirDofs; + sign2 = pLocToGloMap->GetLocalToGlobalBndSign(cnt + j); + if(gid2 >= 0) + { + int gid2ro = m_reorderedMap[gid2]; + value = sign1*sign2*(*loc_mat)(i,j); + m_matrix.set(gid1ro, gid2ro, value); + } + } + } + } + cnt += loc_lda; + } + + m_matrix.assemble(); + } + + GlobalLinSysStaticCondSharedPtr GlobalLinSysSaenaStaticCond::v_Recurse( + const GlobalLinSysKey &mkey, + const std::weak_ptr &pExpList, + const DNekScalBlkMatSharedPtr pSchurCompl, + const DNekScalBlkMatSharedPtr pBinvD, + const DNekScalBlkMatSharedPtr pC, + const DNekScalBlkMatSharedPtr pInvD, + const std::shared_ptr &l2gMap) + { +// GlobalLinSysSaenaStaticCondSharedPtr sys = MemoryManager< +// GlobalLinSysSaenaStaticCond>::AllocateSharedPtr( +// mkey, pExpList, pSchurCompl, pBinvD, pC, pInvD, l2gMap, +// m_precon); + GlobalLinSysSaenaStaticCondSharedPtr sys = MemoryManager< + GlobalLinSysSaenaStaticCond>::AllocateSharedPtr( + mkey, pExpList, pSchurCompl, pBinvD, pC, pInvD, l2gMap); + + std::cout << __func__ << std::endl; + + sys->Initialise(l2gMap); + return sys; + } + } +} diff --git a/library/MultiRegions/GlobalLinSysSaenaStaticCond.h b/library/MultiRegions/GlobalLinSysSaenaStaticCond.h new file mode 100644 index 000000000..a8c1d4124 --- /dev/null +++ b/library/MultiRegions/GlobalLinSysSaenaStaticCond.h @@ -0,0 +1,114 @@ +/////////////////////////////////////////////////////////////////////////////// +// +// File GlobalLinSysSaenaStaticCond.h +// +// For more information, please see: http://www.nektar.info +// +// The MIT License +// +// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA), +// Department of Aeronautics, Imperial College London (UK), and Scientific +// Computing and Imaging Institute, University of Utah (USA). +// +// License for the specific language governing rights and limitations under +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +// Description: GlobalLinSysStaticCond header +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef NEKTAR_LIB_MULTIREGIONS_GLOBALLINSYSSAENASTATICCOND_H +#define NEKTAR_LIB_MULTIREGIONS_GLOBALLINSYSSAENASTATICCOND_H + +#include +#include +#include + +namespace Nektar +{ + namespace MultiRegions + { + // Forward declarations + class ExpList; + class GlobalLinSysSaenaStaticCond; + + typedef std::shared_ptr + GlobalLinSysSaenaStaticCondSharedPtr; + + /// A global linear system. + class GlobalLinSysSaenaStaticCond : virtual public GlobalLinSysSaena, + virtual public GlobalLinSysStaticCond + { + public: + /// Creates an instance of this class + static GlobalLinSysSharedPtr create( + const GlobalLinSysKey &pLinSysKey, + const std::weak_ptr &pExpList, + const std::shared_ptr &pLocToGloMap) + { + GlobalLinSysSharedPtr p = MemoryManager< + GlobalLinSysSaenaStaticCond>::AllocateSharedPtr( + pLinSysKey, pExpList, pLocToGloMap); + p->InitObject(); + return p; + } + + /// Name of class + MULTI_REGIONS_EXPORT static std::string className; + static std::string className2; + + /// Constructor for full direct matrix solve. + MULTI_REGIONS_EXPORT GlobalLinSysSaenaStaticCond( + const GlobalLinSysKey &mkey, + const std::weak_ptr &pExpList, + const std::shared_ptr &locToGloMap); + + /// Constructor for full direct matrix solve. + MULTI_REGIONS_EXPORT GlobalLinSysSaenaStaticCond( + const GlobalLinSysKey &mkey, + const std::weak_ptr &pExpList, + const DNekScalBlkMatSharedPtr pSchurCompl, + const DNekScalBlkMatSharedPtr pBinvD, + const DNekScalBlkMatSharedPtr pC, + const DNekScalBlkMatSharedPtr pInvD, + const std::shared_ptr &locToGloMap, + const PreconditionerSharedPtr pPrecon = + PreconditionerSharedPtr()); + + MULTI_REGIONS_EXPORT virtual ~GlobalLinSysSaenaStaticCond(); + + protected: + /// Assemble the Schur complement matrix. + virtual void v_AssembleSchurComplement( + std::shared_ptr locToGloMap); + + virtual GlobalLinSysStaticCondSharedPtr v_Recurse( + const GlobalLinSysKey &mkey, + const std::weak_ptr &pExpList, + const DNekScalBlkMatSharedPtr pSchurCompl, + const DNekScalBlkMatSharedPtr pBinvD, + const DNekScalBlkMatSharedPtr pC, + const DNekScalBlkMatSharedPtr pInvD, + const std::shared_ptr &locToGloMap); + + }; + } +} + +#endif diff --git a/library/MultiRegions/MultiRegions.hpp b/library/MultiRegions/MultiRegions.hpp index dd60706f9..8d958dcf6 100644 --- a/library/MultiRegions/MultiRegions.hpp +++ b/library/MultiRegions/MultiRegions.hpp @@ -80,6 +80,9 @@ namespace Nektar ePETScFullMatrix, ePETScStaticCond, ePETScMultiLevelStaticCond, + eSaenaFullMatrix, + eSaenaStaticCond, + eSaenaMultiLevelStaticCond, eSIZE_GlobalSysSolnType }; @@ -98,7 +101,10 @@ namespace Nektar "XxtMultiLevelStaticCond", "PETScFull", "PETScStaticCond", - "PETScMultiLevelStaticCond" + "PETScMultiLevelStaticCond", + "SaenaFull", + "SaenaStaticCond", + "SaenaMultiLevelStaticCond" }; /// Type of Galerkin projection. diff --git a/library/MultiRegions/PreconditionerLinear.cpp b/library/MultiRegions/PreconditionerLinear.cpp index 0819c9a28..41bad6265 100644 --- a/library/MultiRegions/PreconditionerLinear.cpp +++ b/library/MultiRegions/PreconditionerLinear.cpp @@ -43,6 +43,10 @@ #include #endif +#ifdef NEKTAR_USING_SAENA +#include +#endif + #include #include @@ -71,6 +75,10 @@ namespace Nektar "LinearPreconSolver", "PETSc", MultiRegions::eLinearPreconPETSc), + LibUtilities::SessionReader::RegisterEnumValue( + "LinearPreconSolver", + "Saena", + MultiRegions::eLinearPreconSaena), LibUtilities::SessionReader::RegisterEnumValue( "LinearPreconSolver", "Xxt", @@ -120,6 +128,16 @@ namespace Nektar NEKERROR(ErrorUtil::efatal, "Nektar++ has not been compiled with " "PETSc support."); +#endif + break; + } + case eLinearPreconSaena: + { + linSolveType = eSaenaFullMatrix; +#ifndef NEKTAR_USING_SAENA + NEKERROR(ErrorUtil::efatal, + "Nektar++ has not been compiled with " + "Saena support."); #endif break; } @@ -157,6 +175,16 @@ namespace Nektar #else ASSERTL0(false, "Nektar++ has not been compiled with " "PETSc support."); +#endif + } + case eLinearPreconSaena: + { +#ifdef NEKTAR_USING_SAENA + m_vertLinsys = MemoryManager:: + AllocateSharedPtr(preconKey,expList,m_vertLocToGloMap); +#else + ASSERTL0(false, "Nektar++ has not been compiled with " + "Saena support."); #endif } } diff --git a/library/MultiRegions/PreconditionerLinear.h b/library/MultiRegions/PreconditionerLinear.h index 16db50324..efc2e953b 100644 --- a/library/MultiRegions/PreconditionerLinear.h +++ b/library/MultiRegions/PreconditionerLinear.h @@ -48,7 +48,8 @@ namespace Nektar enum LinearPreconSolver { eLinearPreconXxt, - eLinearPreconPETSc + eLinearPreconPETSc, + eLinearPreconSaena }; class PreconditionerLinear; -- GitLab From c60ed5a94fc95a845440e003bc6775aa666a8566 Mon Sep 17 00:00:00 2001 From: David Moxey Date: Wed, 27 Apr 2022 14:58:48 +0100 Subject: [PATCH 02/13] Update to avoid compiler errors --- cmake/ThirdPartySaena.cmake | 8 ++++++-- library/MultiRegions/GlobalLinSys.cpp | 2 +- library/MultiRegions/GlobalLinSysSaena.cpp | 3 +-- library/MultiRegions/GlobalLinSysSaenaFull.cpp | 8 ++++---- library/MultiRegions/PreconditionerLinear.cpp | 2 ++ 5 files changed, 14 insertions(+), 9 deletions(-) diff --git a/cmake/ThirdPartySaena.cmake b/cmake/ThirdPartySaena.cmake index 1ddf1b3de..4989dcab5 100644 --- a/cmake/ThirdPartySaena.cmake +++ b/cmake/ThirdPartySaena.cmake @@ -31,8 +31,12 @@ IF (NEKTAR_USE_SAENA) TMP_DIR ${TPBUILD}/saena-tmp INSTALL_DIR ${TPDIST} BINARY_DIR ${TPBUILD}/saena - CONFIGURE_COMMAND "" - BUILD_COMMAND ./install.sh + CONFIGURE_COMMAND ${CMAKE_COMMAND} + -G ${CMAKE_GENERATOR} + -DCMAKE_C_COMPILER:FILEPATH=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER:FILEPATH=${CMAKE_CXX_COMPILER} + -DCMAKE_INSTALL_PREFIX:PATH=${TPDIST} + ${TPBUILD}/saena ) THIRDPARTY_LIBRARY(SAENA_LIBRARY SHARED saena diff --git a/library/MultiRegions/GlobalLinSys.cpp b/library/MultiRegions/GlobalLinSys.cpp index c656cf880..d640404a2 100644 --- a/library/MultiRegions/GlobalLinSys.cpp +++ b/library/MultiRegions/GlobalLinSys.cpp @@ -47,7 +47,7 @@ namespace Nektar { namespace MultiRegions { - std::string GlobalLinSys::lookupIds[12] = { + std::string GlobalLinSys::lookupIds[15] = { LibUtilities::SessionReader::RegisterEnumValue( "GlobalSysSoln", "DirectFull", MultiRegions::eDirectFullMatrix), diff --git a/library/MultiRegions/GlobalLinSysSaena.cpp b/library/MultiRegions/GlobalLinSysSaena.cpp index f1c09d8b6..c3f1347cb 100644 --- a/library/MultiRegions/GlobalLinSysSaena.cpp +++ b/library/MultiRegions/GlobalLinSysSaena.cpp @@ -85,7 +85,7 @@ namespace Nektar const AssemblyMapSharedPtr &locToGloMap, const int pNumDir) { - BOOST_IGNORE_UNUSED(locToGLoMap); + boost::ignore_unused(locToGloMap); // @TODO: shouldn't need to but we require a new RHS vector every // time this is called. @@ -250,7 +250,6 @@ namespace Nektar */ void GlobalLinSysSaena::SetUpSolver(NekDouble tolerance) { - m_step = 0; m_scale = false; m_opts.set_relative_tolerance(tolerance); // m_opts.set_dynamic_levels(false); diff --git a/library/MultiRegions/GlobalLinSysSaenaFull.cpp b/library/MultiRegions/GlobalLinSysSaenaFull.cpp index 7570ac0c9..f7794ddc2 100644 --- a/library/MultiRegions/GlobalLinSysSaenaFull.cpp +++ b/library/MultiRegions/GlobalLinSysSaenaFull.cpp @@ -230,10 +230,10 @@ namespace Nektar // add local matrix contribution for(rBC = r.second;rBC; rBC = rBC->next) { - vExp->AddRobinEdgeContribution(rBC->m_robinID, - rBC->m_robinPrimitiveCoeffs, - pLocOutput + offset, - tmploc = tmp + offset); + vExp->AddRobinTraceContribution(rBC->m_robinID, + rBC->m_robinPrimitiveCoeffs, + pLocOutput + offset, + tmploc = tmp + offset); } } diff --git a/library/MultiRegions/PreconditionerLinear.cpp b/library/MultiRegions/PreconditionerLinear.cpp index 41bad6265..8989fc76a 100644 --- a/library/MultiRegions/PreconditionerLinear.cpp +++ b/library/MultiRegions/PreconditionerLinear.cpp @@ -176,6 +176,7 @@ namespace Nektar ASSERTL0(false, "Nektar++ has not been compiled with " "PETSc support."); #endif + break; } case eLinearPreconSaena: { @@ -186,6 +187,7 @@ namespace Nektar ASSERTL0(false, "Nektar++ has not been compiled with " "Saena support."); #endif + break; } } } -- GitLab From d436b305262a6c5fb0a5e4618ab5bf1941078a57 Mon Sep 17 00:00:00 2001 From: David Moxey Date: Wed, 27 Apr 2022 15:01:55 +0100 Subject: [PATCH 03/13] Update with Saena tag --- cmake/ThirdPartySaena.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ThirdPartySaena.cmake b/cmake/ThirdPartySaena.cmake index 4989dcab5..1b06a9c6c 100644 --- a/cmake/ThirdPartySaena.cmake +++ b/cmake/ThirdPartySaena.cmake @@ -25,7 +25,7 @@ IF (NEKTAR_USE_SAENA) PREFIX ${TPSRC} STAMP_DIR ${TPBUILD}/stamp GIT_REPOSITORY https://github.com/mdave/Saena_Public.git - GIT_TAG 298fc0249aa05fd03c27108615c8f2afe8c111bf + GIT_TAG a73e811b5debd10388aa7c7ec8bc198e6a3504ca DOWNLOAD_DIR ${TPSRC} SOURCE_DIR ${TPBUILD}/saena TMP_DIR ${TPBUILD}/saena-tmp -- GitLab From 0bf200fa48e03e9a6d713c1f9f710e92bc69531f Mon Sep 17 00:00:00 2001 From: David Moxey Date: Thu, 28 Apr 2022 09:35:09 +0100 Subject: [PATCH 04/13] Try to fix segfault --- library/MultiRegions/GlobalLinSysSaena.cpp | 49 ++++++++++--------- library/MultiRegions/GlobalLinSysSaena.h | 1 + .../MultiRegions/GlobalLinSysSaenaFull.cpp | 2 + 3 files changed, 29 insertions(+), 23 deletions(-) diff --git a/library/MultiRegions/GlobalLinSysSaena.cpp b/library/MultiRegions/GlobalLinSysSaena.cpp index c3f1347cb..952d94c7e 100644 --- a/library/MultiRegions/GlobalLinSysSaena.cpp +++ b/library/MultiRegions/GlobalLinSysSaena.cpp @@ -215,29 +215,6 @@ namespace Nektar m_matrix.set_comm(m_comm); m_matrix.add_duplicates(true); m_rhs.set_comm(m_comm); - - int nummodes = m_expList.lock()->GetFieldDefinitions()[0]->m_numModes[0]; - int p_order = nummodes - 1; - int prodim = m_expList.lock()->GetCoordim(0); - - m_matrix.set_p_order(p_order); - m_matrix.set_prodim(prodim); - - // set p_coarsen levels computation. subtract by a constant. - vector order_dif; - for(int i = 0; i < p_order - 1; ++i) - { - order_dif.emplace_back(1); - } - - // set number of multigrid levels - int max_h_level = 1; // h-multigrid levels - m_amg.set_multigrid_max_level( - static_cast(order_dif.size()) + max_h_level); - - m_amg.set_scale(m_scale); - m_amg.set_matrix( - &m_matrix, &m_opts, m_l2g, m_reorderedMap, m_bdydof, order_dif); } /** @@ -259,5 +236,31 @@ namespace Nektar // m_opts.set_preSmooth(3); // m_opts.set_postSmooth(3); } + + void GlobalLinSysSaena::SetUpMultigrid() + { + int nummodes = m_expList.lock()->GetFieldDefinitions()[0]->m_numModes[0]; + int p_order = nummodes - 1; + int prodim = m_expList.lock()->GetCoordim(0); + + m_matrix.set_p_order(p_order); + m_matrix.set_prodim(prodim); + + // set p_coarsen levels computation. subtract by a constant. + vector order_dif; + for(int i = 0; i < p_order - 1; ++i) + { + order_dif.emplace_back(1); + } + + // set number of multigrid levels + int max_h_level = 1; // h-multigrid levels + m_amg.set_multigrid_max_level( + static_cast(order_dif.size()) + max_h_level); + + m_amg.set_scale(m_scale); + m_amg.set_matrix( + &m_matrix, &m_opts, m_l2g, m_reorderedMap, m_bdydof, order_dif); + } } } diff --git a/library/MultiRegions/GlobalLinSysSaena.h b/library/MultiRegions/GlobalLinSysSaena.h index 80721a578..e79695da3 100644 --- a/library/MultiRegions/GlobalLinSysSaena.h +++ b/library/MultiRegions/GlobalLinSysSaena.h @@ -94,6 +94,7 @@ protected: void SetUpMatVec(); void SetUpSolver(NekDouble tolerance); + void SetUpMultigrid(); void CalculateReordering( const Array &glo2uniMap, const Array &glo2unique, diff --git a/library/MultiRegions/GlobalLinSysSaenaFull.cpp b/library/MultiRegions/GlobalLinSysSaenaFull.cpp index f7794ddc2..d41093153 100644 --- a/library/MultiRegions/GlobalLinSysSaenaFull.cpp +++ b/library/MultiRegions/GlobalLinSysSaenaFull.cpp @@ -164,6 +164,8 @@ namespace Nektar t = double(tend - tbegin) / CLOCKS_PER_SEC; MPI_Reduce(&t, &t_ave, 1, MPI_DOUBLE, MPI_SUM, 0, m_comm); if(!rank) printf("Saena matrix assembly time: %f\n", t_ave / nprocs); + + SetUpMultigrid(); } -- GitLab From d0ece8157b6c215d6c07d554b1996e991a0de11f Mon Sep 17 00:00:00 2001 From: David Moxey Date: Thu, 28 Apr 2022 10:26:59 +0100 Subject: [PATCH 05/13] Update Saena version --- cmake/ThirdPartySaena.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/ThirdPartySaena.cmake b/cmake/ThirdPartySaena.cmake index 1b06a9c6c..bde529e99 100644 --- a/cmake/ThirdPartySaena.cmake +++ b/cmake/ThirdPartySaena.cmake @@ -25,7 +25,7 @@ IF (NEKTAR_USE_SAENA) PREFIX ${TPSRC} STAMP_DIR ${TPBUILD}/stamp GIT_REPOSITORY https://github.com/mdave/Saena_Public.git - GIT_TAG a73e811b5debd10388aa7c7ec8bc198e6a3504ca + GIT_TAG fccf708a5cc5343260f0b1efddc88f653ca5dd01 DOWNLOAD_DIR ${TPSRC} SOURCE_DIR ${TPBUILD}/saena TMP_DIR ${TPBUILD}/saena-tmp @@ -33,6 +33,7 @@ IF (NEKTAR_USE_SAENA) BINARY_DIR ${TPBUILD}/saena CONFIGURE_COMMAND ${CMAKE_COMMAND} -G ${CMAKE_GENERATOR} + -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE} -DCMAKE_C_COMPILER:FILEPATH=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER:FILEPATH=${CMAKE_CXX_COMPILER} -DCMAKE_INSTALL_PREFIX:PATH=${TPDIST} -- GitLab From ca8b28fbd1c2675f1c9873eae8f3928531f21b12 Mon Sep 17 00:00:00 2001 From: David Moxey Date: Thu, 28 Apr 2022 10:38:26 +0100 Subject: [PATCH 06/13] Update Saena tag --- cmake/ThirdPartySaena.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/ThirdPartySaena.cmake b/cmake/ThirdPartySaena.cmake index bde529e99..deb3ed413 100644 --- a/cmake/ThirdPartySaena.cmake +++ b/cmake/ThirdPartySaena.cmake @@ -25,7 +25,7 @@ IF (NEKTAR_USE_SAENA) PREFIX ${TPSRC} STAMP_DIR ${TPBUILD}/stamp GIT_REPOSITORY https://github.com/mdave/Saena_Public.git - GIT_TAG fccf708a5cc5343260f0b1efddc88f653ca5dd01 + GIT_TAG 0a6b9ddc9a3074488e41cf3e45d1eb090968a352 DOWNLOAD_DIR ${TPSRC} SOURCE_DIR ${TPBUILD}/saena TMP_DIR ${TPBUILD}/saena-tmp -- GitLab From 80dfcc62e7cda37b747aa8f366200bb3a06c90dd Mon Sep 17 00:00:00 2001 From: David Moxey Date: Fri, 29 Apr 2022 10:06:25 +0100 Subject: [PATCH 07/13] Fix for linear space solver --- library/MultiRegions/GlobalLinSysSaena.cpp | 2 +- library/MultiRegions/GlobalLinSysSaena.h | 6 ++++++ library/MultiRegions/PreconditionerLinear.cpp | 1 + 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/library/MultiRegions/GlobalLinSysSaena.cpp b/library/MultiRegions/GlobalLinSysSaena.cpp index 952d94c7e..9a28f9111 100644 --- a/library/MultiRegions/GlobalLinSysSaena.cpp +++ b/library/MultiRegions/GlobalLinSysSaena.cpp @@ -240,7 +240,7 @@ namespace Nektar void GlobalLinSysSaena::SetUpMultigrid() { int nummodes = m_expList.lock()->GetFieldDefinitions()[0]->m_numModes[0]; - int p_order = nummodes - 1; + int p_order = m_polyOrder == 0 ? nummodes - 1 : m_polyOrder; int prodim = m_expList.lock()->GetCoordim(0); m_matrix.set_p_order(p_order); diff --git a/library/MultiRegions/GlobalLinSysSaena.h b/library/MultiRegions/GlobalLinSysSaena.h index e79695da3..743bce54a 100644 --- a/library/MultiRegions/GlobalLinSysSaena.h +++ b/library/MultiRegions/GlobalLinSysSaena.h @@ -67,6 +67,11 @@ public: const AssemblyMapSharedPtr &locToGloMap, const int pNumDir); + void SetPolyOrder(int p) + { + m_polyOrder = p; + } + protected: /// Saena matrix object. saena::matrix m_matrix; @@ -89,6 +94,7 @@ protected: std::vector> m_l2g; /// flag to set the linear system to be scaled bool m_scale; + int m_polyOrder = 0; PreconditionerSharedPtr m_precon; diff --git a/library/MultiRegions/PreconditionerLinear.cpp b/library/MultiRegions/PreconditionerLinear.cpp index 8989fc76a..fe8a48fce 100644 --- a/library/MultiRegions/PreconditionerLinear.cpp +++ b/library/MultiRegions/PreconditionerLinear.cpp @@ -183,6 +183,7 @@ namespace Nektar #ifdef NEKTAR_USING_SAENA m_vertLinsys = MemoryManager:: AllocateSharedPtr(preconKey,expList,m_vertLocToGloMap); + m_vertLinsys->SetPolyOrder(1); #else ASSERTL0(false, "Nektar++ has not been compiled with " "Saena support."); -- GitLab From c29d86dc67924e76118fb435c974ff451fbbe705 Mon Sep 17 00:00:00 2001 From: David Moxey Date: Fri, 29 Apr 2022 10:10:23 +0100 Subject: [PATCH 08/13] Fix compiler error --- library/MultiRegions/PreconditionerLinear.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/library/MultiRegions/PreconditionerLinear.cpp b/library/MultiRegions/PreconditionerLinear.cpp index fe8a48fce..66a5fe91d 100644 --- a/library/MultiRegions/PreconditionerLinear.cpp +++ b/library/MultiRegions/PreconditionerLinear.cpp @@ -181,9 +181,10 @@ namespace Nektar case eLinearPreconSaena: { #ifdef NEKTAR_USING_SAENA - m_vertLinsys = MemoryManager:: + auto vertLinsys = MemoryManager:: AllocateSharedPtr(preconKey,expList,m_vertLocToGloMap); - m_vertLinsys->SetPolyOrder(1); + vertLinsys->SetPolyOrder(1); + m_vertLinSys = vertLinSys; #else ASSERTL0(false, "Nektar++ has not been compiled with " "Saena support."); -- GitLab From dd840c1d87b781c8355fdaeeea22d8def1eb480c Mon Sep 17 00:00:00 2001 From: David Moxey Date: Fri, 29 Apr 2022 10:12:12 +0100 Subject: [PATCH 09/13] Fix more compiler errors --- CMakeLists.txt | 3 ++- library/MultiRegions/PreconditionerLinear.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c45a62a64..22a241a77 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -143,7 +143,7 @@ SET(NEKTAR_LIBRARY_TYPE "SHARED") # Set up RPATH SET(CMAKE_SKIP_BUILD_RPATH FALSE) -SET(CMAKE_BUILD_RPATH "${TP_LIB_DIR}") +SET(CMAKE_BUILD_RPATH "${TP_LIB_DIR}" "${TPDIST}/lib") SET(CMAKE_INSTALL_RPATH_USE_LINK_PATH FALSE) LIST(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/${LIB_DIR}" isSystemDir) @@ -152,6 +152,7 @@ IF("${isSystemDir}" STREQUAL "-1") "${CMAKE_INSTALL_PREFIX}/${LIB_DIR}" "${TP_LIB_DIR}/") ELSE() SET(CMAKE_INSTALL_RPATH "${TP_LIB_DIR}") + message(STATUS $CMAKE_INSTALL_RPATH) ENDIF() # Enable the use of @rpath in macOS install names so that we can use multiple diff --git a/library/MultiRegions/PreconditionerLinear.cpp b/library/MultiRegions/PreconditionerLinear.cpp index 66a5fe91d..0b04645bd 100644 --- a/library/MultiRegions/PreconditionerLinear.cpp +++ b/library/MultiRegions/PreconditionerLinear.cpp @@ -184,7 +184,7 @@ namespace Nektar auto vertLinsys = MemoryManager:: AllocateSharedPtr(preconKey,expList,m_vertLocToGloMap); vertLinsys->SetPolyOrder(1); - m_vertLinSys = vertLinSys; + m_vertLinsys = vertLinsys; #else ASSERTL0(false, "Nektar++ has not been compiled with " "Saena support."); -- GitLab From e10a094d44549b5fa4f27f6d047972b21afe8bcd Mon Sep 17 00:00:00 2001 From: David Moxey Date: Tue, 16 Aug 2022 20:38:55 +0100 Subject: [PATCH 10/13] Apply patch from Hari --- library/MultiRegions/GlobalLinSysSaena.cpp | 7 ++++++- library/MultiRegions/GlobalLinSysSaena.h | 3 ++- library/MultiRegions/GlobalLinSysSaenaFull.cpp | 5 +++-- library/MultiRegions/GlobalLinSysSaenaFull.h | 5 +++-- library/MultiRegions/PreconditionerLinear.cpp | 4 ++-- 5 files changed, 16 insertions(+), 8 deletions(-) diff --git a/library/MultiRegions/GlobalLinSysSaena.cpp b/library/MultiRegions/GlobalLinSysSaena.cpp index 9a28f9111..3a30b1949 100644 --- a/library/MultiRegions/GlobalLinSysSaena.cpp +++ b/library/MultiRegions/GlobalLinSysSaena.cpp @@ -51,9 +51,14 @@ namespace Nektar GlobalLinSysSaena::GlobalLinSysSaena( const GlobalLinSysKey &pKey, const std::weak_ptr &pExp, - const std::shared_ptr &pLocToGloMap) + const std::shared_ptr &pLocToGloMap, + const unsigned int pPolynomialOrder) : GlobalLinSys(pKey, pExp, pLocToGloMap) { + if (pPolynomialOrder) { + // setup and use supplied + SetPolyOrder(pPolynomialOrder); + } } /** diff --git a/library/MultiRegions/GlobalLinSysSaena.h b/library/MultiRegions/GlobalLinSysSaena.h index 743bce54a..d9b334d2a 100644 --- a/library/MultiRegions/GlobalLinSysSaena.h +++ b/library/MultiRegions/GlobalLinSysSaena.h @@ -56,7 +56,8 @@ public: MULTI_REGIONS_EXPORT GlobalLinSysSaena( const GlobalLinSysKey &pKey, const std::weak_ptr &pExp, - const std::shared_ptr &pLocToGloMap); + const std::shared_ptr &pLocToGloMap, + const unsigned int pPolynomialOrder=0); MULTI_REGIONS_EXPORT virtual ~GlobalLinSysSaena(); diff --git a/library/MultiRegions/GlobalLinSysSaenaFull.cpp b/library/MultiRegions/GlobalLinSysSaenaFull.cpp index d41093153..1362d7445 100644 --- a/library/MultiRegions/GlobalLinSysSaenaFull.cpp +++ b/library/MultiRegions/GlobalLinSysSaenaFull.cpp @@ -60,9 +60,10 @@ namespace Nektar GlobalLinSysSaenaFull::GlobalLinSysSaenaFull( const GlobalLinSysKey &pLinSysKey, const std::weak_ptr &pExp, - const std::shared_ptr &pLocToGloMap) + const std::shared_ptr &pLocToGloMap, + const unsigned int pPolynomialOrder) : GlobalLinSys (pLinSysKey, pExp, pLocToGloMap), - GlobalLinSysSaena(pLinSysKey, pExp, pLocToGloMap) + GlobalLinSysSaena(pLinSysKey, pExp, pLocToGloMap, pPolynomialOrder) // @hari - Change this constructor instead { // SET UP VECTORS AND MATRIX SetUpMatVec(); diff --git a/library/MultiRegions/GlobalLinSysSaenaFull.h b/library/MultiRegions/GlobalLinSysSaenaFull.h index e54fcf8e6..094025c82 100644 --- a/library/MultiRegions/GlobalLinSysSaenaFull.h +++ b/library/MultiRegions/GlobalLinSysSaenaFull.h @@ -61,7 +61,7 @@ namespace Nektar const std::shared_ptr &pLocToGloMap) { return MemoryManager - ::AllocateSharedPtr(pLinSysKey, pExpList, pLocToGloMap); + ::AllocateSharedPtr(pLinSysKey, pExpList, pLocToGloMap); // check for def args for poly order } /// Name of class @@ -71,7 +71,8 @@ namespace Nektar MULTI_REGIONS_EXPORT GlobalLinSysSaenaFull( const GlobalLinSysKey &pLinSysKey, const std::weak_ptr &pExpList, - const std::shared_ptr &pLocToGloMap); + const std::shared_ptr &pLocToGloMap, + const unsigned int pPolynomialOrder=0); MULTI_REGIONS_EXPORT virtual ~GlobalLinSysSaenaFull(); diff --git a/library/MultiRegions/PreconditionerLinear.cpp b/library/MultiRegions/PreconditionerLinear.cpp index 0b04645bd..dcf38ddf8 100644 --- a/library/MultiRegions/PreconditionerLinear.cpp +++ b/library/MultiRegions/PreconditionerLinear.cpp @@ -182,8 +182,8 @@ namespace Nektar { #ifdef NEKTAR_USING_SAENA auto vertLinsys = MemoryManager:: - AllocateSharedPtr(preconKey,expList,m_vertLocToGloMap); - vertLinsys->SetPolyOrder(1); + AllocateSharedPtr(preconKey,expList,m_vertLocToGloMap, 1); + // vertLinsys->SetPolyOrder(1); m_vertLinsys = vertLinsys; #else ASSERTL0(false, "Nektar++ has not been compiled with " -- GitLab From ab2c5b04605f32acddff801119979ffd291917fa Mon Sep 17 00:00:00 2001 From: David Moxey Date: Thu, 6 Oct 2022 17:57:46 +0100 Subject: [PATCH 11/13] [formatting] Apply clang-format-11 --- .../AssemblyMap/AssemblyMapCG.cpp | 4707 ++++++++--------- library/MultiRegions/GlobalLinSys.cpp | 743 ++- library/MultiRegions/GlobalLinSysSaena.cpp | 407 +- library/MultiRegions/GlobalLinSysSaena.h | 49 +- .../MultiRegions/GlobalLinSysSaenaFull.cpp | 370 +- library/MultiRegions/GlobalLinSysSaenaFull.h | 82 +- .../GlobalLinSysSaenaStaticCond.cpp | 322 +- .../GlobalLinSysSaenaStaticCond.h | 113 +- library/MultiRegions/PreconditionerLinear.cpp | 420 +- library/MultiRegions/PreconditionerLinear.h | 111 +- 10 files changed, 3618 insertions(+), 3706 deletions(-) diff --git a/library/MultiRegions/AssemblyMap/AssemblyMapCG.cpp b/library/MultiRegions/AssemblyMap/AssemblyMapCG.cpp index 4e0104d84..e3b971672 100644 --- a/library/MultiRegions/AssemblyMap/AssemblyMapCG.cpp +++ b/library/MultiRegions/AssemblyMap/AssemblyMapCG.cpp @@ -32,2959 +32,2932 @@ // /////////////////////////////////////////////////////////////////////////////// -#include -#include +#include +#include #include #include #include -#include -#include +#include +#include #include #include +#include #include #include -#include using namespace std; namespace Nektar { - namespace MultiRegions - { - /** - * @class AssemblyMapCG - * Mappings are created for three possible global solution types: - * - Direct full matrix - * - Direct static condensation - * - Direct multi-level static condensation - * In the latter case, mappings are created recursively for the - * different levels of static condensation. - * - * These mappings are used by GlobalLinSys to generate the global - * system. - */ - - /** - * - */ - AssemblyMapCG::AssemblyMapCG( - const LibUtilities::SessionReaderSharedPtr &pSession, - const LibUtilities::CommSharedPtr &comm, const std::string variable) - : AssemblyMap(pSession, comm, variable) - { - pSession->LoadParameter("MaxStaticCondLevel", m_maxStaticCondLevel,100); - } - - int AssemblyMapCG::CreateGraph( - const ExpList &locExp, const BndCondExp &bndCondExp, - const Array &bndConditions, - const bool checkIfSystemSingular, const PeriodicMap &periodicVerts, - const PeriodicMap &periodicEdges, const PeriodicMap &periodicFaces, - DofGraph &graph, BottomUpSubStructuredGraphSharedPtr &bottomUpGraph, - set &extraDirVerts, set &extraDirEdges, - int &firstNonDirGraphVertId, int &nExtraDirichlet, int mdswitch) - { - int graphVertId = 0; - int vMaxVertId = -1; - int i, j, k, l, cnt; - int meshVertId, meshEdgeId, meshFaceId; - int meshVertId2, meshEdgeId2; - - LocalRegions::ExpansionSharedPtr exp, bndExp; - const LocalRegions::ExpansionVector &locExpVector = *(locExp.GetExp()); - LibUtilities::CommSharedPtr vComm = m_comm->GetRowComm(); - - m_numLocalBndCondCoeffs = 0; - m_systemSingular = checkIfSystemSingular; - - for (i = 0; i < bndCondExp.size(); i++) - { - - m_numLocalBndCondCoeffs += bndCondExp[i]->GetNcoeffs(); - - if (bndConditions[0][i]->GetBoundaryConditionType() == - SpatialDomains::ePeriodic) +namespace MultiRegions +{ +/** + * @class AssemblyMapCG + * Mappings are created for three possible global solution types: + * - Direct full matrix + * - Direct static condensation + * - Direct multi-level static condensation + * In the latter case, mappings are created recursively for the + * different levels of static condensation. + * + * These mappings are used by GlobalLinSys to generate the global + * system. + */ + +/** + * + */ +AssemblyMapCG::AssemblyMapCG( + const LibUtilities::SessionReaderSharedPtr &pSession, + const LibUtilities::CommSharedPtr &comm, const std::string variable) + : AssemblyMap(pSession, comm, variable) +{ + pSession->LoadParameter("MaxStaticCondLevel", m_maxStaticCondLevel, 100); +} + +int AssemblyMapCG::CreateGraph( + const ExpList &locExp, const BndCondExp &bndCondExp, + const Array &bndConditions, + const bool checkIfSystemSingular, const PeriodicMap &periodicVerts, + const PeriodicMap &periodicEdges, const PeriodicMap &periodicFaces, + DofGraph &graph, BottomUpSubStructuredGraphSharedPtr &bottomUpGraph, + set &extraDirVerts, set &extraDirEdges, + int &firstNonDirGraphVertId, int &nExtraDirichlet, int mdswitch) +{ + int graphVertId = 0; + int vMaxVertId = -1; + int i, j, k, l, cnt; + int meshVertId, meshEdgeId, meshFaceId; + int meshVertId2, meshEdgeId2; + + LocalRegions::ExpansionSharedPtr exp, bndExp; + const LocalRegions::ExpansionVector &locExpVector = *(locExp.GetExp()); + LibUtilities::CommSharedPtr vComm = m_comm->GetRowComm(); + + m_numLocalBndCondCoeffs = 0; + m_systemSingular = checkIfSystemSingular; + + for (i = 0; i < bndCondExp.size(); i++) + { + + m_numLocalBndCondCoeffs += bndCondExp[i]->GetNcoeffs(); + + if (bndConditions[0][i]->GetBoundaryConditionType() == + SpatialDomains::ePeriodic) + { + continue; + } + + // Check to see if any value on boundary has Dirichlet + // value. note this is a vector to manage coupled + // solver but for scalar will just be a vector of size 11 + cnt = 0; + for (k = 0; k < bndConditions.size(); ++k) + { + if (bndConditions[k][i]->GetBoundaryConditionType() == + SpatialDomains::eDirichlet) { - continue; + cnt++; } - - // Check to see if any value on boundary has Dirichlet - // value. note this is a vector to manage coupled - // solver but for scalar will just be a vector of size 11 - cnt = 0; - for (k = 0; k < bndConditions.size(); ++k) + if (bndConditions[k][i]->GetBoundaryConditionType() != + SpatialDomains::eNeumann) { - if (bndConditions[k][i]->GetBoundaryConditionType() == - SpatialDomains::eDirichlet) - { - cnt++; - } - if (bndConditions[k][i]->GetBoundaryConditionType() != - SpatialDomains::eNeumann) - { - m_systemSingular = false; - } + m_systemSingular = false; } + } - // Find the maximum boundary vertex ID on this process. This is - // used later to pin a vertex if the system is singular. - for (j = 0; j < bndCondExp[i]->GetNumElmts(); ++j) + // Find the maximum boundary vertex ID on this process. This is + // used later to pin a vertex if the system is singular. + for (j = 0; j < bndCondExp[i]->GetNumElmts(); ++j) + { + bndExp = bndCondExp[i]->GetExp(j)->as(); + for (k = 0; k < bndExp->GetNverts(); ++k) { - bndExp = - bndCondExp[i]->GetExp(j)->as(); - for (k = 0; k < bndExp->GetNverts(); ++k) + if (vMaxVertId < bndExp->GetGeom()->GetVid(k)) { - if (vMaxVertId < bndExp->GetGeom()->GetVid(k)) - { - vMaxVertId = bndExp->GetGeom()->GetVid(k); - } + vMaxVertId = bndExp->GetGeom()->GetVid(k); } } + } - // If all boundaries are Dirichlet fill in graph - if (cnt == bndConditions.size()) + // If all boundaries are Dirichlet fill in graph + if (cnt == bndConditions.size()) + { + for (j = 0; j < bndCondExp[i]->GetNumElmts(); j++) { - for (j = 0; j < bndCondExp[i]->GetNumElmts(); j++) - { - bndExp = bndCondExp[i]->GetExp(j); + bndExp = bndCondExp[i]->GetExp(j); - for (k = 0; k < bndExp->GetNverts(); k++) + for (k = 0; k < bndExp->GetNverts(); k++) + { + meshVertId = bndExp->GetGeom()->GetVid(k); + if (graph[0].count(meshVertId) == 0) { - meshVertId = bndExp->GetGeom()->GetVid(k); - if (graph[0].count(meshVertId) == 0) - { - graph[0][meshVertId] = graphVertId++; - } + graph[0][meshVertId] = graphVertId++; } + } - const int bndDim = bndExp->GetNumBases(); - if (bndDim > 1) + const int bndDim = bndExp->GetNumBases(); + if (bndDim > 1) + { + for (k = 0; k < bndExp->GetNtraces(); k++) { - for (k = 0; k < bndExp->GetNtraces(); k++) + meshEdgeId = bndExp->GetGeom()->GetEid(k); + if (graph[1].count(meshEdgeId) == 0) { - meshEdgeId = bndExp->GetGeom()->GetEid(k); - if (graph[1].count(meshEdgeId) == 0) - { - graph[1][meshEdgeId] = graphVertId++; - } + graph[1][meshEdgeId] = graphVertId++; } } + } - // Possibility of a face in 3D or edge in 2D - meshFaceId = bndExp->GetGeom()->GetGlobalID(); - if (graph[bndDim].count(meshFaceId) == 0) - { - graph[bndDim][meshFaceId] = graphVertId++; - } - m_numLocalDirBndCoeffs += bndExp->GetNcoeffs(); + // Possibility of a face in 3D or edge in 2D + meshFaceId = bndExp->GetGeom()->GetGlobalID(); + if (graph[bndDim].count(meshFaceId) == 0) + { + graph[bndDim][meshFaceId] = graphVertId++; } + m_numLocalDirBndCoeffs += bndExp->GetNcoeffs(); } } + } - // Number of dirichlet edges and faces (not considering periodic - // BCs) - m_numDirEdges = graph[1].size(); - m_numDirFaces = graph[2].size(); - - /* - * The purpose of this routine is to deal with those degrees of - * freedom that are Dirichlet, but do not have a local Dirichlet - * boundary condition expansion set. - * - * For example, in 2D, consider a triangulation of a square into two - * triangles. Now imagine one edge of the square is Dirichlet and - * the problem is run on two processors. On one processor, one - * triangle vertex is Dirichlet, but doesn't know this since the - * Dirichlet composite lives on the other processor. - * - * When the global linear system is solved therefore, there is an - * inconsistency that at best leads to an inaccurate answer or a - * divergence of the system. - * - * This routine identifies such cases for 2D, and also for 3D where - * e.g. edges may have the same problem (consider an extrusion of - * the case above, for example). - */ - - // Collate information on Dirichlet vertices from all processes - int n = vComm->GetSize(); - int p = vComm->GetRank(); - - if (vComm->IsSerial()) - { - // for FieldConvert Comm this is true and it resets - // parallel processing back to serial case - n = 1; - p = 0; - } - // At this point, graph only contains information from Dirichlet - // boundaries. Therefore make a global list of the vert and edge - // information on all processors. - Array vertcounts(n, 0); - Array vertoffsets(n, 0); - Array edgecounts(n, 0); - Array edgeoffsets(n, 0); - vertcounts[p] = graph[0].size(); - edgecounts[p] = graph[1].size(); - vComm->AllReduce(vertcounts, LibUtilities::ReduceSum); - vComm->AllReduce(edgecounts, LibUtilities::ReduceSum); - - for (i = 1; i < n; ++i) - { - vertoffsets[i] = vertoffsets[i - 1] + vertcounts[i - 1]; - edgeoffsets[i] = edgeoffsets[i - 1] + edgecounts[i - 1]; - } - - int nTotVerts = Vmath::Vsum(n, vertcounts, 1); - int nTotEdges = Vmath::Vsum(n, edgecounts, 1); - - Array vertlist(nTotVerts, 0); - Array edgelist(nTotEdges, 0); - - // construct list of global ids of global vertices - i = 0; - for (auto &it : graph[0]) - { - vertlist[vertoffsets[p] + i++] = it.first; - } - - // construct list of global ids of global edges - i = 0; - for (auto &it : graph[1]) - { - edgelist[edgeoffsets[p] + i++] = it.first; - } - vComm->AllReduce(vertlist, LibUtilities::ReduceSum); - vComm->AllReduce(edgelist, LibUtilities::ReduceSum); - - // Now we have a list of all Dirichlet vertices and edges on all - // processors. - nExtraDirichlet = 0; - map extraDirVertIds, extraDirEdgeIds; - - // Ensure Dirchlet vertices are consistently recorded between - // processes (e.g. Dirichlet region meets Neumann region across a - // partition boundary requires vertex on partition to be Dirichlet). - // - // To do this we look over all elements and vertices in local - // partition and see if they match the values stored in the vertlist - // from other processors and if so record the meshVertId/meshEdgeId - // and the processor it comes from. - for (i = 0; i < n; ++i) - { - if (i == p) - { - continue; - } + // Number of dirichlet edges and faces (not considering periodic + // BCs) + m_numDirEdges = graph[1].size(); + m_numDirFaces = graph[2].size(); + + /* + * The purpose of this routine is to deal with those degrees of + * freedom that are Dirichlet, but do not have a local Dirichlet + * boundary condition expansion set. + * + * For example, in 2D, consider a triangulation of a square into two + * triangles. Now imagine one edge of the square is Dirichlet and + * the problem is run on two processors. On one processor, one + * triangle vertex is Dirichlet, but doesn't know this since the + * Dirichlet composite lives on the other processor. + * + * When the global linear system is solved therefore, there is an + * inconsistency that at best leads to an inaccurate answer or a + * divergence of the system. + * + * This routine identifies such cases for 2D, and also for 3D where + * e.g. edges may have the same problem (consider an extrusion of + * the case above, for example). + */ + + // Collate information on Dirichlet vertices from all processes + int n = vComm->GetSize(); + int p = vComm->GetRank(); + + if (vComm->IsSerial()) + { + // for FieldConvert Comm this is true and it resets + // parallel processing back to serial case + n = 1; + p = 0; + } + // At this point, graph only contains information from Dirichlet + // boundaries. Therefore make a global list of the vert and edge + // information on all processors. + Array vertcounts(n, 0); + Array vertoffsets(n, 0); + Array edgecounts(n, 0); + Array edgeoffsets(n, 0); + vertcounts[p] = graph[0].size(); + edgecounts[p] = graph[1].size(); + vComm->AllReduce(vertcounts, LibUtilities::ReduceSum); + vComm->AllReduce(edgecounts, LibUtilities::ReduceSum); + + for (i = 1; i < n; ++i) + { + vertoffsets[i] = vertoffsets[i - 1] + vertcounts[i - 1]; + edgeoffsets[i] = edgeoffsets[i - 1] + edgecounts[i - 1]; + } - for (j = 0; j < locExpVector.size(); j++) - { - exp = locExpVector[j]; + int nTotVerts = Vmath::Vsum(n, vertcounts, 1); + int nTotEdges = Vmath::Vsum(n, edgecounts, 1); + + Array vertlist(nTotVerts, 0); + Array edgelist(nTotEdges, 0); + + // construct list of global ids of global vertices + i = 0; + for (auto &it : graph[0]) + { + vertlist[vertoffsets[p] + i++] = it.first; + } + + // construct list of global ids of global edges + i = 0; + for (auto &it : graph[1]) + { + edgelist[edgeoffsets[p] + i++] = it.first; + } + vComm->AllReduce(vertlist, LibUtilities::ReduceSum); + vComm->AllReduce(edgelist, LibUtilities::ReduceSum); + + // Now we have a list of all Dirichlet vertices and edges on all + // processors. + nExtraDirichlet = 0; + map extraDirVertIds, extraDirEdgeIds; + + // Ensure Dirchlet vertices are consistently recorded between + // processes (e.g. Dirichlet region meets Neumann region across a + // partition boundary requires vertex on partition to be Dirichlet). + // + // To do this we look over all elements and vertices in local + // partition and see if they match the values stored in the vertlist + // from other processors and if so record the meshVertId/meshEdgeId + // and the processor it comes from. + for (i = 0; i < n; ++i) + { + if (i == p) + { + continue; + } + + for (j = 0; j < locExpVector.size(); j++) + { + exp = locExpVector[j]; - for (k = 0; k < exp->GetNverts(); k++) + for (k = 0; k < exp->GetNverts(); k++) + { + meshVertId = exp->GetGeom()->GetVid(k); + if (graph[0].count(meshVertId) == 0) { - meshVertId = exp->GetGeom()->GetVid(k); - if (graph[0].count(meshVertId) == 0) + for (l = 0; l < vertcounts[i]; ++l) { - for (l = 0; l < vertcounts[i]; ++l) + if (vertlist[vertoffsets[i] + l] == meshVertId) { - if (vertlist[vertoffsets[i] + l] == meshVertId) - { - extraDirVertIds[meshVertId] = i; - graph[0][meshVertId] = graphVertId++; - nExtraDirichlet++; - } + extraDirVertIds[meshVertId] = i; + graph[0][meshVertId] = graphVertId++; + nExtraDirichlet++; } } } + } - for (k = 0; k < exp->GetGeom()->GetNumEdges(); k++) + for (k = 0; k < exp->GetGeom()->GetNumEdges(); k++) + { + meshEdgeId = exp->GetGeom()->GetEid(k); + if (graph[1].count(meshEdgeId) == 0) { - meshEdgeId = exp->GetGeom()->GetEid(k); - if (graph[1].count(meshEdgeId) == 0) + for (l = 0; l < edgecounts[i]; ++l) { - for (l = 0; l < edgecounts[i]; ++l) + if (edgelist[edgeoffsets[i] + l] == meshEdgeId) { - if (edgelist[edgeoffsets[i] + l] == meshEdgeId) + extraDirEdgeIds[meshEdgeId] = i; + graph[1][meshEdgeId] = graphVertId++; + if (exp->GetGeom()->GetNumFaces()) { - extraDirEdgeIds[meshEdgeId] = i; - graph[1][meshEdgeId] = graphVertId++; - if (exp->GetGeom()->GetNumFaces()) - { - nExtraDirichlet += - exp->as() - ->GetEdgeNcoeffs(k) - - 2; - } - else - { - nExtraDirichlet += - exp->GetTraceNcoeffs(k) - 2; - } + nExtraDirichlet += + exp->as() + ->GetEdgeNcoeffs(k) - + 2; + } + else + { + nExtraDirichlet += exp->GetTraceNcoeffs(k) - 2; } } } } } } + } - // Low Energy preconditioner needs to know how many extra Dirichlet - // edges are on this process so store map in array. - m_extraDirEdges = Array(extraDirEdgeIds.size(), -1); - i = 0; - for (auto &it : extraDirEdgeIds) - { - meshEdgeId = it.first; - m_extraDirEdges[i++] = meshEdgeId; - } + // Low Energy preconditioner needs to know how many extra Dirichlet + // edges are on this process so store map in array. + m_extraDirEdges = Array(extraDirEdgeIds.size(), -1); + i = 0; + for (auto &it : extraDirEdgeIds) + { + meshEdgeId = it.first; + m_extraDirEdges[i++] = meshEdgeId; + } - // Now we have a list of all vertices and edges that are Dirichlet - // and not defined on the local partition as well as which processor - // they are stored on. - // - // Make a full list of all such entities on all processors and which - // processor they belong to. - for (i = 0; i < n; ++i) - { - vertcounts[i] = 0; - vertoffsets[i] = 0; - edgecounts[i] = 0; - edgeoffsets[i] = 0; - } + // Now we have a list of all vertices and edges that are Dirichlet + // and not defined on the local partition as well as which processor + // they are stored on. + // + // Make a full list of all such entities on all processors and which + // processor they belong to. + for (i = 0; i < n; ++i) + { + vertcounts[i] = 0; + vertoffsets[i] = 0; + edgecounts[i] = 0; + edgeoffsets[i] = 0; + } - vertcounts[p] = extraDirVertIds.size(); - edgecounts[p] = extraDirEdgeIds.size(); - vComm->AllReduce(vertcounts, LibUtilities::ReduceSum); - vComm->AllReduce(edgecounts, LibUtilities::ReduceSum); - nTotVerts = Vmath::Vsum(n, vertcounts, 1); - nTotEdges = Vmath::Vsum(n, edgecounts, 1); + vertcounts[p] = extraDirVertIds.size(); + edgecounts[p] = extraDirEdgeIds.size(); + vComm->AllReduce(vertcounts, LibUtilities::ReduceSum); + vComm->AllReduce(edgecounts, LibUtilities::ReduceSum); + nTotVerts = Vmath::Vsum(n, vertcounts, 1); + nTotEdges = Vmath::Vsum(n, edgecounts, 1); - vertoffsets[0] = edgeoffsets[0] = 0; + vertoffsets[0] = edgeoffsets[0] = 0; - for (i = 1; i < n; ++i) - { - vertoffsets[i] = vertoffsets[i - 1] + vertcounts[i - 1]; - edgeoffsets[i] = edgeoffsets[i - 1] + edgecounts[i - 1]; - } + for (i = 1; i < n; ++i) + { + vertoffsets[i] = vertoffsets[i - 1] + vertcounts[i - 1]; + edgeoffsets[i] = edgeoffsets[i - 1] + edgecounts[i - 1]; + } - Array vertids(nTotVerts, 0); - Array edgeids(nTotEdges, 0); - Array vertprocs(nTotVerts, 0); - Array edgeprocs(nTotEdges, 0); + Array vertids(nTotVerts, 0); + Array edgeids(nTotEdges, 0); + Array vertprocs(nTotVerts, 0); + Array edgeprocs(nTotEdges, 0); - i = 0; - for (auto &it : extraDirVertIds) - { - vertids[vertoffsets[p] + i] = it.first; - vertprocs[vertoffsets[p] + i] = it.second; - ++i; - } + i = 0; + for (auto &it : extraDirVertIds) + { + vertids[vertoffsets[p] + i] = it.first; + vertprocs[vertoffsets[p] + i] = it.second; + ++i; + } - i = 0; - for (auto &it : extraDirEdgeIds) - { - edgeids[edgeoffsets[p] + i] = it.first; - edgeprocs[edgeoffsets[p] + i] = it.second; - ++i; - } + i = 0; + for (auto &it : extraDirEdgeIds) + { + edgeids[edgeoffsets[p] + i] = it.first; + edgeprocs[edgeoffsets[p] + i] = it.second; + ++i; + } - vComm->AllReduce(vertids, LibUtilities::ReduceSum); - vComm->AllReduce(vertprocs, LibUtilities::ReduceSum); - vComm->AllReduce(edgeids, LibUtilities::ReduceSum); - vComm->AllReduce(edgeprocs, LibUtilities::ReduceSum); + vComm->AllReduce(vertids, LibUtilities::ReduceSum); + vComm->AllReduce(vertprocs, LibUtilities::ReduceSum); + vComm->AllReduce(edgeids, LibUtilities::ReduceSum); + vComm->AllReduce(edgeprocs, LibUtilities::ReduceSum); - // Set up list of vertices that need to be shared to other - // partitions - for (i = 0; i < nTotVerts; ++i) + // Set up list of vertices that need to be shared to other + // partitions + for (i = 0; i < nTotVerts; ++i) + { + if (p == vertprocs[i]) // rank = vertproc[i] { - if (p == vertprocs[i]) // rank = vertproc[i] - { - extraDirVerts.insert(vertids[i]); - } + extraDirVerts.insert(vertids[i]); } + } - // Set up list of edges that need to be shared to other partitions - for (i = 0; i < nTotEdges; ++i) + // Set up list of edges that need to be shared to other partitions + for (i = 0; i < nTotEdges; ++i) + { + if (p == edgeprocs[i]) // rank = vertproc[i] { - if (p == edgeprocs[i]) // rank = vertproc[i] - { - extraDirEdges.insert(edgeids[i]); - } + extraDirEdges.insert(edgeids[i]); } + } - // Check between processes if the whole system is singular - int s = m_systemSingular ? 1 : 0; - vComm->AllReduce(s, LibUtilities::ReduceMin); - m_systemSingular = s == 1 ? true : false; + // Check between processes if the whole system is singular + int s = m_systemSingular ? 1 : 0; + vComm->AllReduce(s, LibUtilities::ReduceMin); + m_systemSingular = s == 1 ? true : false; - // Find the minimum boundary vertex ID on each process - Array bcminvertid(n, 0); - bcminvertid[p] = vMaxVertId; - vComm->AllReduce(bcminvertid, LibUtilities::ReduceMax); + // Find the minimum boundary vertex ID on each process + Array bcminvertid(n, 0); + bcminvertid[p] = vMaxVertId; + vComm->AllReduce(bcminvertid, LibUtilities::ReduceMax); - // Find the process rank with the minimum boundary vertex ID - int maxIdx = Vmath::Imax(n, bcminvertid, 1); + // Find the process rank with the minimum boundary vertex ID + int maxIdx = Vmath::Imax(n, bcminvertid, 1); - // If the system is singular, the process with the maximum - // number of BCs will set a Dirichlet vertex to make - // system non-singular. Note: we find the process with - // maximum boundary regions to ensure we do not try to set - // a Dirichlet vertex on a partition with no intersection - // with the boundary. - meshVertId = 0; + // If the system is singular, the process with the maximum + // number of BCs will set a Dirichlet vertex to make + // system non-singular. Note: we find the process with + // maximum boundary regions to ensure we do not try to set + // a Dirichlet vertex on a partition with no intersection + // with the boundary. + meshVertId = 0; - if (m_systemSingular && checkIfSystemSingular && maxIdx == p) + if (m_systemSingular && checkIfSystemSingular && maxIdx == p) + { + if (m_session->DefinesParameter("SingularVertex")) { - if (m_session->DefinesParameter("SingularVertex")) - { - m_session->LoadParameter("SingularVertex", meshVertId); - } - else if (vMaxVertId == -1) - { - // All boundaries are periodic. - meshVertId = locExpVector[0]->GetGeom()->GetVid(0); - } - else - { - // Set pinned vertex to that with minimum vertex ID to - // ensure consistency in parallel. - meshVertId = bcminvertid[p]; - } + m_session->LoadParameter("SingularVertex", meshVertId); + } + else if (vMaxVertId == -1) + { + // All boundaries are periodic. + meshVertId = locExpVector[0]->GetGeom()->GetVid(0); + } + else + { + // Set pinned vertex to that with minimum vertex ID to + // ensure consistency in parallel. + meshVertId = bcminvertid[p]; + } - if (graph[0].count(meshVertId) == 0) - { - graph[0][meshVertId] = graphVertId++; - } + if (graph[0].count(meshVertId) == 0) + { + graph[0][meshVertId] = graphVertId++; } + } - vComm->AllReduce(meshVertId, LibUtilities::ReduceSum); + vComm->AllReduce(meshVertId, LibUtilities::ReduceSum); - // When running in parallel, we need to ensure that the singular - // mesh vertex is communicated to any periodic vertices, otherwise - // the system may diverge. - if (m_systemSingular && checkIfSystemSingular) + // When running in parallel, we need to ensure that the singular + // mesh vertex is communicated to any periodic vertices, otherwise + // the system may diverge. + if (m_systemSingular && checkIfSystemSingular) + { + // Firstly, we check that no other processors have this + // vertex. If they do, then we mark the vertex as also being + // Dirichlet. + if (maxIdx != p) { - // Firstly, we check that no other processors have this - // vertex. If they do, then we mark the vertex as also being - // Dirichlet. - if (maxIdx != p) + for (i = 0; i < locExpVector.size(); ++i) { - for (i = 0; i < locExpVector.size(); ++i) + for (j = 0; j < locExpVector[i]->GetNverts(); ++j) { - for (j = 0; j < locExpVector[i]->GetNverts(); ++j) + if (locExpVector[i]->GetGeom()->GetVid(j) != meshVertId) { - if (locExpVector[i]->GetGeom()->GetVid(j) != meshVertId) - { - continue; - } + continue; + } - if (graph[0].count(meshVertId) == 0) - { - graph[0][meshVertId] = graphVertId++; - } + if (graph[0].count(meshVertId) == 0) + { + graph[0][meshVertId] = graphVertId++; } } } + } - // In the case that meshVertId is periodic with other vertices, - // this process and all other processes need to make sure that - // the periodic vertices are also marked as Dirichlet. - int gId; + // In the case that meshVertId is periodic with other vertices, + // this process and all other processes need to make sure that + // the periodic vertices are also marked as Dirichlet. + int gId; - // At least one process (maxBCidx) will have already associated - // a graphVertId with meshVertId. Others won't even have any of - // the vertices. The logic below is designed to handle both - // cases. - if (graph[0].count(meshVertId) == 0) - { - gId = -1; - } - else - { - gId = graph[0][meshVertId]; - } + // At least one process (maxBCidx) will have already associated + // a graphVertId with meshVertId. Others won't even have any of + // the vertices. The logic below is designed to handle both + // cases. + if (graph[0].count(meshVertId) == 0) + { + gId = -1; + } + else + { + gId = graph[0][meshVertId]; + } - for (auto &pIt : periodicVerts) + for (auto &pIt : periodicVerts) + { + // Either the vertex is local to this processor (in which + // case it will be in the pIt.first position) or else + // meshVertId might be contained within another processor's + // vertex list. The if statement below covers both cases. If + // we find it, set as Dirichlet with the vertex id gId. + if (pIt.first == meshVertId) { - // Either the vertex is local to this processor (in which - // case it will be in the pIt.first position) or else - // meshVertId might be contained within another processor's - // vertex list. The if statement below covers both cases. If - // we find it, set as Dirichlet with the vertex id gId. - if (pIt.first == meshVertId) - { - gId = gId < 0 ? graphVertId++ : gId; - graph[0][meshVertId] = gId; + gId = gId < 0 ? graphVertId++ : gId; + graph[0][meshVertId] = gId; - for (i = 0; i < pIt.second.size(); ++i) + for (i = 0; i < pIt.second.size(); ++i) + { + if (pIt.second[i].isLocal) { - if (pIt.second[i].isLocal) - { - graph[0][pIt.second[i].id] = graph[0][meshVertId]; - } + graph[0][pIt.second[i].id] = graph[0][meshVertId]; } } - else + } + else + { + bool found = false; + for (i = 0; i < pIt.second.size(); ++i) { - bool found = false; - for (i = 0; i < pIt.second.size(); ++i) + if (pIt.second[i].id == meshVertId) { - if (pIt.second[i].id == meshVertId) - { - found = true; - break; - } + found = true; + break; } + } - if (found) - { - gId = gId < 0 ? graphVertId++ : gId; - graph[0][pIt.first] = gId; + if (found) + { + gId = gId < 0 ? graphVertId++ : gId; + graph[0][pIt.first] = gId; - for (i = 0; i < pIt.second.size(); ++i) + for (i = 0; i < pIt.second.size(); ++i) + { + if (pIt.second[i].isLocal) { - if (pIt.second[i].isLocal) - { - graph[0][pIt.second[i].id] = - graph[0][pIt.first]; - } + graph[0][pIt.second[i].id] = graph[0][pIt.first]; } } } } } + } - // Add extra dirichlet boundary conditions to count. - m_numLocalDirBndCoeffs += nExtraDirichlet; - firstNonDirGraphVertId = graphVertId; - - typedef boost::adjacency_list - BoostGraph; - BoostGraph boostGraphObj; - - vector> tempGraph(3); - map vwgts_map; - Array localVerts; - Array localEdges; - Array localFaces; - - int tempGraphVertId = 0; - int localVertOffset = 0; - int localEdgeOffset = 0; - int localFaceOffset = 0; - int nTotalVerts = 0; - int nTotalEdges = 0; - int nTotalFaces = 0; - int nVerts; - int nEdges; - int nFaces; - int vertCnt; - int edgeCnt; - int faceCnt; - - m_numNonDirVertexModes = 0; - m_numNonDirEdges = 0; - m_numNonDirFaces = 0; - m_numNonDirFaceModes = 0; - m_numNonDirFaceModes = 0; - m_numLocalBndCoeffs = 0; + // Add extra dirichlet boundary conditions to count. + m_numLocalDirBndCoeffs += nExtraDirichlet; + firstNonDirGraphVertId = graphVertId; + + typedef boost::adjacency_list + BoostGraph; + BoostGraph boostGraphObj; + + vector> tempGraph(3); + map vwgts_map; + Array localVerts; + Array localEdges; + Array localFaces; + + int tempGraphVertId = 0; + int localVertOffset = 0; + int localEdgeOffset = 0; + int localFaceOffset = 0; + int nTotalVerts = 0; + int nTotalEdges = 0; + int nTotalFaces = 0; + int nVerts; + int nEdges; + int nFaces; + int vertCnt; + int edgeCnt; + int faceCnt; + + m_numNonDirVertexModes = 0; + m_numNonDirEdges = 0; + m_numNonDirFaces = 0; + m_numNonDirFaceModes = 0; + m_numNonDirFaceModes = 0; + m_numLocalBndCoeffs = 0; + + map EdgeSize; + map FaceSize; + + /// - Count verts, edges, face and add up edges and face sizes + for (i = 0; i < locExpVector.size(); ++i) + { + exp = locExpVector[i]; + nEdges = exp->GetGeom()->GetNumEdges(); + nFaces = exp->GetGeom()->GetNumFaces(); - map EdgeSize; - map FaceSize; + nTotalVerts += exp->GetNverts(); + nTotalEdges += nEdges; + nTotalFaces += nFaces; - /// - Count verts, edges, face and add up edges and face sizes - for (i = 0; i < locExpVector.size(); ++i) + for (j = 0; j < nEdges; ++j) { - exp = locExpVector[i]; - nEdges = exp->GetGeom()->GetNumEdges(); - nFaces = exp->GetGeom()->GetNumFaces(); - - nTotalVerts += exp->GetNverts(); - nTotalEdges += nEdges; - nTotalFaces += nFaces; + meshEdgeId = exp->GetGeom()->GetEid(j); + int nEdgeInt; - for (j = 0; j < nEdges; ++j) + if (nFaces) { - meshEdgeId = exp->GetGeom()->GetEid(j); - int nEdgeInt; - - if (nFaces) - { - nEdgeInt = - exp->as()->GetEdgeNcoeffs( - j) - - 2; - } - else - { - nEdgeInt = exp->GetTraceNcoeffs(j) - 2; - } - - if (EdgeSize.count(meshEdgeId) > 0) - { - EdgeSize[meshEdgeId] = min(EdgeSize[meshEdgeId], nEdgeInt); - } - else - { - EdgeSize[meshEdgeId] = nEdgeInt; - } + nEdgeInt = + exp->as()->GetEdgeNcoeffs(j) - 2; + } + else + { + nEdgeInt = exp->GetTraceNcoeffs(j) - 2; } - faceCnt = 0; - for (j = 0; j < nFaces; ++j) + if (EdgeSize.count(meshEdgeId) > 0) { - meshFaceId = exp->GetGeom()->GetFid(j); - if (FaceSize.count(meshFaceId) > 0) - { - FaceSize[meshFaceId] = - min(FaceSize[meshFaceId], exp->GetTraceIntNcoeffs(j)); - } - else - { - FaceSize[meshFaceId] = exp->GetTraceIntNcoeffs(j); - } - FaceSize[meshFaceId] = exp->GetTraceIntNcoeffs(j); + EdgeSize[meshEdgeId] = min(EdgeSize[meshEdgeId], nEdgeInt); + } + else + { + EdgeSize[meshEdgeId] = nEdgeInt; } } - /// - Periodic vertices - for (auto &pIt : periodicVerts) + faceCnt = 0; + for (j = 0; j < nFaces; ++j) { - meshVertId = pIt.first; - - // This periodic vertex is joined to a Dirichlet condition. - if (graph[0].count(pIt.first) != 0) + meshFaceId = exp->GetGeom()->GetFid(j); + if (FaceSize.count(meshFaceId) > 0) { - for (i = 0; i < pIt.second.size(); ++i) - { - meshVertId2 = pIt.second[i].id; - if (graph[0].count(meshVertId2) == 0 && - pIt.second[i].isLocal) - { - graph[0][meshVertId2] = graph[0][meshVertId]; - } - } - continue; + FaceSize[meshFaceId] = + min(FaceSize[meshFaceId], exp->GetTraceIntNcoeffs(j)); + } + else + { + FaceSize[meshFaceId] = exp->GetTraceIntNcoeffs(j); } + FaceSize[meshFaceId] = exp->GetTraceIntNcoeffs(j); + } + } + + /// - Periodic vertices + for (auto &pIt : periodicVerts) + { + meshVertId = pIt.first; - // One of the attached vertices is Dirichlet. - bool isDirichlet = false; + // This periodic vertex is joined to a Dirichlet condition. + if (graph[0].count(pIt.first) != 0) + { for (i = 0; i < pIt.second.size(); ++i) { - if (!pIt.second[i].isLocal) - { - continue; - } - meshVertId2 = pIt.second[i].id; - if (graph[0].count(meshVertId2) > 0) + if (graph[0].count(meshVertId2) == 0 && pIt.second[i].isLocal) { - isDirichlet = true; - break; + graph[0][meshVertId2] = graph[0][meshVertId]; } } + continue; + } - if (isDirichlet) + // One of the attached vertices is Dirichlet. + bool isDirichlet = false; + for (i = 0; i < pIt.second.size(); ++i) + { + if (!pIt.second[i].isLocal) { - graph[0][meshVertId] = graph[0][pIt.second[i].id]; - - for (j = 0; j < pIt.second.size(); ++j) - { - meshVertId2 = pIt.second[i].id; - if (j == i || !pIt.second[j].isLocal || - graph[0].count(meshVertId2) > 0) - { - continue; - } - - graph[0][meshVertId2] = graph[0][pIt.second[i].id]; - } - continue; } - // Otherwise, see if a vertex ID has already been set. - for (i = 0; i < pIt.second.size(); ++i) + meshVertId2 = pIt.second[i].id; + if (graph[0].count(meshVertId2) > 0) + { + isDirichlet = true; + break; + } + } + + if (isDirichlet) + { + graph[0][meshVertId] = graph[0][pIt.second[i].id]; + + for (j = 0; j < pIt.second.size(); ++j) { - if (!pIt.second[i].isLocal) + meshVertId2 = pIt.second[i].id; + if (j == i || !pIt.second[j].isLocal || + graph[0].count(meshVertId2) > 0) { continue; } - if (tempGraph[0].count(pIt.second[i].id) > 0) - { - break; - } + graph[0][meshVertId2] = graph[0][pIt.second[i].id]; } - if (i == pIt.second.size()) + continue; + } + + // Otherwise, see if a vertex ID has already been set. + for (i = 0; i < pIt.second.size(); ++i) + { + if (!pIt.second[i].isLocal) { - boost::add_vertex(boostGraphObj); - tempGraph[0][meshVertId] = tempGraphVertId++; - m_numNonDirVertexModes++; + continue; } - else + + if (tempGraph[0].count(pIt.second[i].id) > 0) { - tempGraph[0][meshVertId] = tempGraph[0][pIt.second[i].id]; + break; } } - // Store the temporary graph vertex id's of all element edges and - // vertices in these 3 arrays below - localVerts = Array(nTotalVerts, -1); - localEdges = Array(nTotalEdges, -1); - localFaces = Array(nTotalFaces, -1); - - // Set up vertex numbering - for (i = 0; i < locExpVector.size(); ++i) + if (i == pIt.second.size()) { - exp = locExpVector[i]; - vertCnt = 0; - nVerts = exp->GetNverts(); - for (j = 0; j < nVerts; ++j) - { - meshVertId = exp->GetGeom()->GetVid(j); - if (graph[0].count(meshVertId) == 0) - { - if (tempGraph[0].count(meshVertId) == 0) - { - boost::add_vertex(boostGraphObj); - tempGraph[0][meshVertId] = tempGraphVertId++; - m_numNonDirVertexModes += 1; - } - localVerts[localVertOffset + vertCnt++] = - tempGraph[0][meshVertId]; - vwgts_map[tempGraph[0][meshVertId]] = 1; - } - } - - localVertOffset += nVerts; + boost::add_vertex(boostGraphObj); + tempGraph[0][meshVertId] = tempGraphVertId++; + m_numNonDirVertexModes++; } - - /// - Periodic edges - for (auto &pIt : periodicEdges) + else { - meshEdgeId = pIt.first; + tempGraph[0][meshVertId] = tempGraph[0][pIt.second[i].id]; + } + } + + // Store the temporary graph vertex id's of all element edges and + // vertices in these 3 arrays below + localVerts = Array(nTotalVerts, -1); + localEdges = Array(nTotalEdges, -1); + localFaces = Array(nTotalFaces, -1); - // This periodic edge is joined to a Dirichlet condition. - if (graph[1].count(pIt.first) != 0) + // Set up vertex numbering + for (i = 0; i < locExpVector.size(); ++i) + { + exp = locExpVector[i]; + vertCnt = 0; + nVerts = exp->GetNverts(); + for (j = 0; j < nVerts; ++j) + { + meshVertId = exp->GetGeom()->GetVid(j); + if (graph[0].count(meshVertId) == 0) { - for (i = 0; i < pIt.second.size(); ++i) + if (tempGraph[0].count(meshVertId) == 0) { - meshEdgeId2 = pIt.second[i].id; - if (graph[1].count(meshEdgeId2) == 0 && - pIt.second[i].isLocal) - { - graph[1][meshEdgeId2] = graph[1][meshEdgeId]; - } + boost::add_vertex(boostGraphObj); + tempGraph[0][meshVertId] = tempGraphVertId++; + m_numNonDirVertexModes += 1; } - continue; + localVerts[localVertOffset + vertCnt++] = + tempGraph[0][meshVertId]; + vwgts_map[tempGraph[0][meshVertId]] = 1; } + } + + localVertOffset += nVerts; + } + + /// - Periodic edges + for (auto &pIt : periodicEdges) + { + meshEdgeId = pIt.first; - // One of the attached edges is Dirichlet. - bool isDirichlet = false; + // This periodic edge is joined to a Dirichlet condition. + if (graph[1].count(pIt.first) != 0) + { for (i = 0; i < pIt.second.size(); ++i) { - if (!pIt.second[i].isLocal) - { - continue; - } - meshEdgeId2 = pIt.second[i].id; - if (graph[1].count(meshEdgeId2) > 0) + if (graph[1].count(meshEdgeId2) == 0 && pIt.second[i].isLocal) { - isDirichlet = true; - break; + graph[1][meshEdgeId2] = graph[1][meshEdgeId]; } } + continue; + } - if (isDirichlet) + // One of the attached edges is Dirichlet. + bool isDirichlet = false; + for (i = 0; i < pIt.second.size(); ++i) + { + if (!pIt.second[i].isLocal) { - graph[1][meshEdgeId] = graph[1][pIt.second[i].id]; - - for (j = 0; j < pIt.second.size(); ++j) - { - meshEdgeId2 = pIt.second[i].id; - if (j == i || !pIt.second[j].isLocal || - graph[1].count(meshEdgeId2) > 0) - { - continue; - } - - graph[1][meshEdgeId2] = graph[1][pIt.second[i].id]; - } - continue; } - // Otherwise, see if a edge ID has already been set. - for (i = 0; i < pIt.second.size(); ++i) + meshEdgeId2 = pIt.second[i].id; + if (graph[1].count(meshEdgeId2) > 0) + { + isDirichlet = true; + break; + } + } + + if (isDirichlet) + { + graph[1][meshEdgeId] = graph[1][pIt.second[i].id]; + + for (j = 0; j < pIt.second.size(); ++j) { - if (!pIt.second[i].isLocal) + meshEdgeId2 = pIt.second[i].id; + if (j == i || !pIt.second[j].isLocal || + graph[1].count(meshEdgeId2) > 0) { continue; } - if (tempGraph[1].count(pIt.second[i].id) > 0) - { - break; - } + graph[1][meshEdgeId2] = graph[1][pIt.second[i].id]; } - if (i == pIt.second.size()) + continue; + } + + // Otherwise, see if a edge ID has already been set. + for (i = 0; i < pIt.second.size(); ++i) + { + if (!pIt.second[i].isLocal) { - boost::add_vertex(boostGraphObj); - tempGraph[1][meshEdgeId] = tempGraphVertId++; - m_numNonDirEdgeModes += EdgeSize[meshEdgeId]; - m_numNonDirEdges++; + continue; } - else + + if (tempGraph[1].count(pIt.second[i].id) > 0) { - tempGraph[1][meshEdgeId] = tempGraph[1][pIt.second[i].id]; + break; } } - int nEdgeIntCoeffs, nFaceIntCoeffs; - - // Set up edge numbering - for (i = 0; i < locExpVector.size(); ++i) + if (i == pIt.second.size()) + { + boost::add_vertex(boostGraphObj); + tempGraph[1][meshEdgeId] = tempGraphVertId++; + m_numNonDirEdgeModes += EdgeSize[meshEdgeId]; + m_numNonDirEdges++; + } + else { - exp = locExpVector[i]; - edgeCnt = 0; - nEdges = exp->GetGeom()->GetNumEdges(); + tempGraph[1][meshEdgeId] = tempGraph[1][pIt.second[i].id]; + } + } + + int nEdgeIntCoeffs, nFaceIntCoeffs; + + // Set up edge numbering + for (i = 0; i < locExpVector.size(); ++i) + { + exp = locExpVector[i]; + edgeCnt = 0; + nEdges = exp->GetGeom()->GetNumEdges(); - for (j = 0; j < nEdges; ++j) + for (j = 0; j < nEdges; ++j) + { + meshEdgeId = exp->GetGeom()->GetEid(j); + nEdgeIntCoeffs = EdgeSize[meshEdgeId]; + if (graph[1].count(meshEdgeId) == 0) { - meshEdgeId = exp->GetGeom()->GetEid(j); - nEdgeIntCoeffs = EdgeSize[meshEdgeId]; - if (graph[1].count(meshEdgeId) == 0) + if (tempGraph[1].count(meshEdgeId) == 0) { - if (tempGraph[1].count(meshEdgeId) == 0) - { - boost::add_vertex(boostGraphObj); - tempGraph[1][meshEdgeId] = tempGraphVertId++; - m_numNonDirEdgeModes += nEdgeIntCoeffs; + boost::add_vertex(boostGraphObj); + tempGraph[1][meshEdgeId] = tempGraphVertId++; + m_numNonDirEdgeModes += nEdgeIntCoeffs; - m_numNonDirEdges++; - } - localEdges[localEdgeOffset + edgeCnt++] = - tempGraph[1][meshEdgeId]; - vwgts_map[tempGraph[1][meshEdgeId]] = nEdgeIntCoeffs; + m_numNonDirEdges++; } + localEdges[localEdgeOffset + edgeCnt++] = + tempGraph[1][meshEdgeId]; + vwgts_map[tempGraph[1][meshEdgeId]] = nEdgeIntCoeffs; } - - localEdgeOffset += nEdges; } - /// - Periodic faces - for (auto &pIt : periodicFaces) - { - if (!pIt.second[0].isLocal) - { - // The face mapped to is on another process. - meshFaceId = pIt.first; - ASSERTL0( - graph[2].count(meshFaceId) == 0, - "This periodic boundary edge has been specified before"); - boost::add_vertex(boostGraphObj); - tempGraph[2][meshFaceId] = tempGraphVertId++; - nFaceIntCoeffs = FaceSize[meshFaceId]; - m_numNonDirFaceModes += nFaceIntCoeffs; - m_numNonDirFaces++; - } - else if (pIt.first < pIt.second[0].id) - { - ASSERTL0( - graph[2].count(pIt.first) == 0, - "This periodic boundary face has been specified before"); - ASSERTL0( - graph[2].count(pIt.second[0].id) == 0, - "This periodic boundary face has been specified before"); + localEdgeOffset += nEdges; + } - boost::add_vertex(boostGraphObj); - tempGraph[2][pIt.first] = tempGraphVertId; - tempGraph[2][pIt.second[0].id] = tempGraphVertId++; - nFaceIntCoeffs = FaceSize[pIt.first]; - m_numNonDirFaceModes += nFaceIntCoeffs; - m_numNonDirFaces++; - } + /// - Periodic faces + for (auto &pIt : periodicFaces) + { + if (!pIt.second[0].isLocal) + { + // The face mapped to is on another process. + meshFaceId = pIt.first; + ASSERTL0(graph[2].count(meshFaceId) == 0, + "This periodic boundary edge has been specified before"); + boost::add_vertex(boostGraphObj); + tempGraph[2][meshFaceId] = tempGraphVertId++; + nFaceIntCoeffs = FaceSize[meshFaceId]; + m_numNonDirFaceModes += nFaceIntCoeffs; + m_numNonDirFaces++; + } + else if (pIt.first < pIt.second[0].id) + { + ASSERTL0(graph[2].count(pIt.first) == 0, + "This periodic boundary face has been specified before"); + ASSERTL0(graph[2].count(pIt.second[0].id) == 0, + "This periodic boundary face has been specified before"); + + boost::add_vertex(boostGraphObj); + tempGraph[2][pIt.first] = tempGraphVertId; + tempGraph[2][pIt.second[0].id] = tempGraphVertId++; + nFaceIntCoeffs = FaceSize[pIt.first]; + m_numNonDirFaceModes += nFaceIntCoeffs; + m_numNonDirFaces++; } + } - // setup face numbering - for (i = 0; i < locExpVector.size(); ++i) + // setup face numbering + for (i = 0; i < locExpVector.size(); ++i) + { + exp = locExpVector[i]; + nFaces = exp->GetGeom()->GetNumFaces(); + faceCnt = 0; + for (j = 0; j < nFaces; ++j) { - exp = locExpVector[i]; - nFaces = exp->GetGeom()->GetNumFaces(); - faceCnt = 0; - for (j = 0; j < nFaces; ++j) + nFaceIntCoeffs = exp->GetTraceIntNcoeffs(j); + meshFaceId = exp->GetGeom()->GetFid(j); + if (graph[2].count(meshFaceId) == 0) { - nFaceIntCoeffs = exp->GetTraceIntNcoeffs(j); - meshFaceId = exp->GetGeom()->GetFid(j); - if (graph[2].count(meshFaceId) == 0) + if (tempGraph[2].count(meshFaceId) == 0) { - if (tempGraph[2].count(meshFaceId) == 0) - { - boost::add_vertex(boostGraphObj); - tempGraph[2][meshFaceId] = tempGraphVertId++; - m_numNonDirFaceModes += nFaceIntCoeffs; + boost::add_vertex(boostGraphObj); + tempGraph[2][meshFaceId] = tempGraphVertId++; + m_numNonDirFaceModes += nFaceIntCoeffs; - m_numNonDirFaces++; - } - localFaces[localFaceOffset + faceCnt++] = - tempGraph[2][meshFaceId]; - vwgts_map[tempGraph[2][meshFaceId]] = nFaceIntCoeffs; + m_numNonDirFaces++; } + localFaces[localFaceOffset + faceCnt++] = + tempGraph[2][meshFaceId]; + vwgts_map[tempGraph[2][meshFaceId]] = nFaceIntCoeffs; } - m_numLocalBndCoeffs += exp->NumBndryCoeffs(); - - localFaceOffset += nFaces; } + m_numLocalBndCoeffs += exp->NumBndryCoeffs(); - localVertOffset = 0; - localEdgeOffset = 0; - localFaceOffset = 0; - for (i = 0; i < locExpVector.size(); ++i) - { - exp = locExpVector[i]; - nVerts = exp->GetNverts(); - nEdges = exp->GetGeom()->GetNumEdges(); - nFaces = exp->GetGeom()->GetNumFaces(); + localFaceOffset += nFaces; + } + + localVertOffset = 0; + localEdgeOffset = 0; + localFaceOffset = 0; + for (i = 0; i < locExpVector.size(); ++i) + { + exp = locExpVector[i]; + nVerts = exp->GetNverts(); + nEdges = exp->GetGeom()->GetNumEdges(); + nFaces = exp->GetGeom()->GetNumFaces(); - // Now loop over all local faces, edges and vertices of this - // element and define that all other faces, edges and verices of - // this element are adjacent to them. + // Now loop over all local faces, edges and vertices of this + // element and define that all other faces, edges and verices of + // this element are adjacent to them. - // Vertices - for (j = 0; j < nVerts; j++) + // Vertices + for (j = 0; j < nVerts; j++) + { + if (localVerts[j + localVertOffset] == -1) + { + break; + } + // associate to other vertices + for (k = 0; k < nVerts; k++) { - if (localVerts[j + localVertOffset] == -1) + if (localVerts[k + localVertOffset] == -1) { break; } - // associate to other vertices - for (k = 0; k < nVerts; k++) - { - if (localVerts[k + localVertOffset] == -1) - { - break; - } - if (k != j) - { - boost::add_edge((size_t)localVerts[j + localVertOffset], - (size_t)localVerts[k + localVertOffset], - boostGraphObj); - } - } - // associate to other edges - for (k = 0; k < nEdges; k++) + if (k != j) { - if (localEdges[k + localEdgeOffset] == -1) - { - break; - } boost::add_edge((size_t)localVerts[j + localVertOffset], - (size_t)localEdges[k + localEdgeOffset], + (size_t)localVerts[k + localVertOffset], boostGraphObj); } - // associate to other faces - for (k = 0; k < nFaces; k++) + } + // associate to other edges + for (k = 0; k < nEdges; k++) + { + if (localEdges[k + localEdgeOffset] == -1) { - if (localFaces[k + localFaceOffset] == -1) - { - break; - } - boost::add_edge((size_t)localVerts[j + localVertOffset], - (size_t)localFaces[k + localFaceOffset], - boostGraphObj); + break; } + boost::add_edge((size_t)localVerts[j + localVertOffset], + (size_t)localEdges[k + localEdgeOffset], + boostGraphObj); } - - // Edges - for (j = 0; j < nEdges; j++) + // associate to other faces + for (k = 0; k < nFaces; k++) { - if (localEdges[j + localEdgeOffset] == -1) + if (localFaces[k + localFaceOffset] == -1) { break; } - // Associate to other edges - for (k = 0; k < nEdges; k++) + boost::add_edge((size_t)localVerts[j + localVertOffset], + (size_t)localFaces[k + localFaceOffset], + boostGraphObj); + } + } + + // Edges + for (j = 0; j < nEdges; j++) + { + if (localEdges[j + localEdgeOffset] == -1) + { + break; + } + // Associate to other edges + for (k = 0; k < nEdges; k++) + { + if (localEdges[k + localEdgeOffset] == -1) { - if (localEdges[k + localEdgeOffset] == -1) - { - break; - } - if (k != j) - { - boost::add_edge((size_t)localEdges[j + localEdgeOffset], - (size_t)localEdges[k + localEdgeOffset], - boostGraphObj); - } + break; } - // Associate to vertices - for (k = 0; k < nVerts; k++) + if (k != j) { - if (localVerts[k + localVertOffset] == -1) - { - break; - } boost::add_edge((size_t)localEdges[j + localEdgeOffset], - (size_t)localVerts[k + localVertOffset], + (size_t)localEdges[k + localEdgeOffset], boostGraphObj); } - // Associate to faces - for (k = 0; k < nFaces; k++) + } + // Associate to vertices + for (k = 0; k < nVerts; k++) + { + if (localVerts[k + localVertOffset] == -1) { - if (localFaces[k + localFaceOffset] == -1) - { - break; - } - boost::add_edge((size_t)localEdges[j + localEdgeOffset], - (size_t)localFaces[k + localFaceOffset], - boostGraphObj); + break; } + boost::add_edge((size_t)localEdges[j + localEdgeOffset], + (size_t)localVerts[k + localVertOffset], + boostGraphObj); } - - // Faces - for (j = 0; j < nFaces; j++) + // Associate to faces + for (k = 0; k < nFaces; k++) { - if (localFaces[j + localFaceOffset] == -1) + if (localFaces[k + localFaceOffset] == -1) { break; } - // Associate to other faces - for (k = 0; k < nFaces; k++) + boost::add_edge((size_t)localEdges[j + localEdgeOffset], + (size_t)localFaces[k + localFaceOffset], + boostGraphObj); + } + } + + // Faces + for (j = 0; j < nFaces; j++) + { + if (localFaces[j + localFaceOffset] == -1) + { + break; + } + // Associate to other faces + for (k = 0; k < nFaces; k++) + { + if (localFaces[k + localFaceOffset] == -1) { - if (localFaces[k + localFaceOffset] == -1) - { - break; - } - if (k != j) - { - boost::add_edge((size_t)localFaces[j + localFaceOffset], - (size_t)localFaces[k + localFaceOffset], - boostGraphObj); - } + break; } - // Associate to vertices - for (k = 0; k < nVerts; k++) + if (k != j) { - if (localVerts[k + localVertOffset] == -1) - { - break; - } boost::add_edge((size_t)localFaces[j + localFaceOffset], - (size_t)localVerts[k + localVertOffset], + (size_t)localFaces[k + localFaceOffset], boostGraphObj); } - // Associate to edges - for (k = 0; k < nEdges; k++) + } + // Associate to vertices + for (k = 0; k < nVerts; k++) + { + if (localVerts[k + localVertOffset] == -1) { - if (localEdges[k + localEdgeOffset] == -1) - { - break; - } - boost::add_edge((size_t)localFaces[j + localFaceOffset], - (size_t)localEdges[k + localEdgeOffset], - boostGraphObj); + break; } + boost::add_edge((size_t)localFaces[j + localFaceOffset], + (size_t)localVerts[k + localVertOffset], + boostGraphObj); + } + // Associate to edges + for (k = 0; k < nEdges; k++) + { + if (localEdges[k + localEdgeOffset] == -1) + { + break; + } + boost::add_edge((size_t)localFaces[j + localFaceOffset], + (size_t)localEdges[k + localEdgeOffset], + boostGraphObj); } - - localVertOffset += nVerts; - localEdgeOffset += nEdges; - localFaceOffset += nFaces; } - // Container to store vertices of the graph which correspond to - // degrees of freedom along the boundary and periodic BCs. - set partVerts; + localVertOffset += nVerts; + localEdgeOffset += nEdges; + localFaceOffset += nFaces; + } + + // Container to store vertices of the graph which correspond to + // degrees of freedom along the boundary and periodic BCs. + set partVerts; - if (m_solnType == eIterativeMultiLevelStaticCond || - m_solnType == eXxtMultiLevelStaticCond) - { - vector procVerts, procEdges, procFaces; - set foundVerts, foundEdges, foundFaces; + if (m_solnType == eIterativeMultiLevelStaticCond || + m_solnType == eXxtMultiLevelStaticCond) + { + vector procVerts, procEdges, procFaces; + set foundVerts, foundEdges, foundFaces; - // Loop over element and construct the procVerts and procEdges - // vectors, which store the geometry IDs of mesh vertices and - // edges respectively which are local to this process. - for (i = cnt = 0; i < locExpVector.size(); ++i) + // Loop over element and construct the procVerts and procEdges + // vectors, which store the geometry IDs of mesh vertices and + // edges respectively which are local to this process. + for (i = cnt = 0; i < locExpVector.size(); ++i) + { + int elmtid = i; + exp = locExpVector[elmtid]; + for (j = 0; j < exp->GetNverts(); ++j) { - int elmtid = i; - exp = locExpVector[elmtid]; - for (j = 0; j < exp->GetNverts(); ++j) + int vid = exp->GetGeom()->GetVid(j) + 1; + if (foundVerts.count(vid) == 0) { - int vid = exp->GetGeom()->GetVid(j) + 1; - if (foundVerts.count(vid) == 0) - { - procVerts.push_back(vid); - foundVerts.insert(vid); - } + procVerts.push_back(vid); + foundVerts.insert(vid); } + } - for (j = 0; j < exp->GetGeom()->GetNumEdges(); ++j) - { - int eid = exp->GetGeom()->GetEid(j) + 1; + for (j = 0; j < exp->GetGeom()->GetNumEdges(); ++j) + { + int eid = exp->GetGeom()->GetEid(j) + 1; - if (foundEdges.count(eid) == 0) - { - procEdges.push_back(eid); - foundEdges.insert(eid); - } + if (foundEdges.count(eid) == 0) + { + procEdges.push_back(eid); + foundEdges.insert(eid); } + } - for (j = 0; j < exp->GetGeom()->GetNumFaces(); ++j) - { - int fid = exp->GetGeom()->GetFid(j) + 1; + for (j = 0; j < exp->GetGeom()->GetNumFaces(); ++j) + { + int fid = exp->GetGeom()->GetFid(j) + 1; - if (foundFaces.count(fid) == 0) - { - procFaces.push_back(fid); - foundFaces.insert(fid); - } + if (foundFaces.count(fid) == 0) + { + procFaces.push_back(fid); + foundFaces.insert(fid); } } + } - int unique_verts = foundVerts.size(); - int unique_edges = foundEdges.size(); - int unique_faces = foundFaces.size(); + int unique_verts = foundVerts.size(); + int unique_edges = foundEdges.size(); + int unique_faces = foundFaces.size(); - bool verbose = m_session->DefinesCmdLineArgument("verbose"); + bool verbose = m_session->DefinesCmdLineArgument("verbose"); - // Now construct temporary GS objects. These will be used to - // populate the arrays tmp3 and tmp4 with the multiplicity of - // the vertices and edges respectively to identify those - // vertices and edges which are located on partition boundary. - Array vertArray(unique_verts, &procVerts[0]); - Gs::gs_data *tmp1 = Gs::Init(vertArray, vComm, verbose); - Array tmp4(unique_verts, 1.0); - Array tmp5(unique_edges, 1.0); - Array tmp6(unique_faces, 1.0); - Gs::Gather(tmp4, Gs::gs_add, tmp1); - Gs::Finalise(tmp1); + // Now construct temporary GS objects. These will be used to + // populate the arrays tmp3 and tmp4 with the multiplicity of + // the vertices and edges respectively to identify those + // vertices and edges which are located on partition boundary. + Array vertArray(unique_verts, &procVerts[0]); + Gs::gs_data *tmp1 = Gs::Init(vertArray, vComm, verbose); + Array tmp4(unique_verts, 1.0); + Array tmp5(unique_edges, 1.0); + Array tmp6(unique_faces, 1.0); + Gs::Gather(tmp4, Gs::gs_add, tmp1); + Gs::Finalise(tmp1); - if (unique_edges > 0) - { - Array edgeArray(unique_edges, &procEdges[0]); - Gs::gs_data *tmp2 = Gs::Init(edgeArray, vComm, verbose); - Gs::Gather(tmp5, Gs::gs_add, tmp2); - Gs::Finalise(tmp2); - } + if (unique_edges > 0) + { + Array edgeArray(unique_edges, &procEdges[0]); + Gs::gs_data *tmp2 = Gs::Init(edgeArray, vComm, verbose); + Gs::Gather(tmp5, Gs::gs_add, tmp2); + Gs::Finalise(tmp2); + } - if (unique_faces > 0) - { - Array faceArray(unique_faces, &procFaces[0]); - Gs::gs_data *tmp3 = Gs::Init(faceArray, vComm, verbose); - Gs::Gather(tmp6, Gs::gs_add, tmp3); - Gs::Finalise(tmp3); - } + if (unique_faces > 0) + { + Array faceArray(unique_faces, &procFaces[0]); + Gs::gs_data *tmp3 = Gs::Init(faceArray, vComm, verbose); + Gs::Gather(tmp6, Gs::gs_add, tmp3); + Gs::Finalise(tmp3); + } - // Finally, fill the partVerts set with all non-Dirichlet - // vertices which lie on a partition boundary. - for (i = 0; i < unique_verts; ++i) + // Finally, fill the partVerts set with all non-Dirichlet + // vertices which lie on a partition boundary. + for (i = 0; i < unique_verts; ++i) + { + if (tmp4[i] > 1.0) { - if (tmp4[i] > 1.0) + if (graph[0].count(procVerts[i] - 1) == 0) { - if (graph[0].count(procVerts[i] - 1) == 0) - { - partVerts.insert(tempGraph[0][procVerts[i] - 1]); - } + partVerts.insert(tempGraph[0][procVerts[i] - 1]); } } + } - for (i = 0; i < unique_edges; ++i) + for (i = 0; i < unique_edges; ++i) + { + if (tmp5[i] > 1.0) { - if (tmp5[i] > 1.0) + if (graph[1].count(procEdges[i] - 1) == 0) { - if (graph[1].count(procEdges[i] - 1) == 0) - { - partVerts.insert(tempGraph[1][procEdges[i] - 1]); - } + partVerts.insert(tempGraph[1][procEdges[i] - 1]); } } + } - for (i = 0; i < unique_faces; ++i) + for (i = 0; i < unique_faces; ++i) + { + if (tmp6[i] > 1.0) { - if (tmp6[i] > 1.0) + if (graph[2].count(procFaces[i] - 1) == 0) { - if (graph[2].count(procFaces[i] - 1) == 0) - { - partVerts.insert(tempGraph[2][procFaces[i] - 1]); - } + partVerts.insert(tempGraph[2][procFaces[i] - 1]); } } + } - // Now fill with all vertices on periodic BCs - for (auto &pIt : periodicVerts) + // Now fill with all vertices on periodic BCs + for (auto &pIt : periodicVerts) + { + if (graph[0].count(pIt.first) == 0) { - if (graph[0].count(pIt.first) == 0) - { - partVerts.insert(tempGraph[0][pIt.first]); - } + partVerts.insert(tempGraph[0][pIt.first]); } - for (auto &pIt : periodicEdges) + } + for (auto &pIt : periodicEdges) + { + if (graph[1].count(pIt.first) == 0) { - if (graph[1].count(pIt.first) == 0) - { - partVerts.insert(tempGraph[1][pIt.first]); - } + partVerts.insert(tempGraph[1][pIt.first]); } - for (auto &pIt : periodicFaces) + } + for (auto &pIt : periodicFaces) + { + if (graph[2].count(pIt.first) == 0) { - if (graph[2].count(pIt.first) == 0) - { - partVerts.insert(tempGraph[2][pIt.first]); - } + partVerts.insert(tempGraph[2][pIt.first]); } } + } - int nGraphVerts = tempGraphVertId; - Array perm(nGraphVerts); - Array iperm(nGraphVerts); - Array vwgts(nGraphVerts); - ASSERTL1(vwgts_map.size() == nGraphVerts, "Non matching dimensions"); - for (i = 0; i < nGraphVerts; ++i) - { - vwgts[i] = vwgts_map[i]; - } + int nGraphVerts = tempGraphVertId; + Array perm(nGraphVerts); + Array iperm(nGraphVerts); + Array vwgts(nGraphVerts); + ASSERTL1(vwgts_map.size() == nGraphVerts, "Non matching dimensions"); + for (i = 0; i < nGraphVerts; ++i) + { + vwgts[i] = vwgts_map[i]; + } - if (nGraphVerts) + if (nGraphVerts) + { + switch (m_solnType) { - switch (m_solnType) + case eDirectFullMatrix: + case eIterativeFull: + case eIterativeStaticCond: + case ePETScStaticCond: + case ePETScFullMatrix: + case eSaenaStaticCond: + case eSaenaFullMatrix: + case eXxtFullMatrix: + case eXxtStaticCond: { - case eDirectFullMatrix: - case eIterativeFull: - case eIterativeStaticCond: - case ePETScStaticCond: - case ePETScFullMatrix: - case eSaenaStaticCond: - case eSaenaFullMatrix: - case eXxtFullMatrix: - case eXxtStaticCond: - { - NoReordering(boostGraphObj,perm,iperm); - break; - } + NoReordering(boostGraphObj, perm, iperm); + break; + } - case eDirectStaticCond: - { - CuthillMckeeReordering(boostGraphObj,perm,iperm); - break; - } + case eDirectStaticCond: + { + CuthillMckeeReordering(boostGraphObj, perm, iperm); + break; + } - case ePETScMultiLevelStaticCond: - case eDirectMultiLevelStaticCond: - case eIterativeMultiLevelStaticCond: - case eSaenaMultiLevelStaticCond: - case eXxtMultiLevelStaticCond: - { - MultiLevelBisectionReordering(boostGraphObj, perm, - iperm, bottomUpGraph, - partVerts, mdswitch); - break; - } - default: - { - ASSERTL0( - false, - "Unrecognised solution type: " + - std::string(GlobalSysSolnTypeMap[m_solnType])); - } - } - } - - // For parallel multi-level static condensation determine the lowest - // static condensation level amongst processors. - if ((m_solnType == eDirectMultiLevelStaticCond || - m_solnType == ePETScMultiLevelStaticCond || - m_solnType == eIterativeMultiLevelStaticCond || - m_solnType == eXxtMultiLevelStaticCond) && bottomUpGraph) - { - m_lowestStaticCondLevel = bottomUpGraph->GetNlevels()-1; - vComm->AllReduce(m_lowestStaticCondLevel, - LibUtilities::ReduceMax); - } - else - { - m_lowestStaticCondLevel = 0; - } - - /** - * STEP 4: Fill the #graph[0] and - * #graph[1] with the optimal ordering from boost. - */ - for(auto &mapIt : tempGraph[0]) - { - graph[0][mapIt.first] = iperm[mapIt.second] + graphVertId; - } - for(auto &mapIt : tempGraph[1]) + case ePETScMultiLevelStaticCond: + case eDirectMultiLevelStaticCond: + case eIterativeMultiLevelStaticCond: + case eSaenaMultiLevelStaticCond: + case eXxtMultiLevelStaticCond: { - graph[1][mapIt.first] = iperm[mapIt.second] + graphVertId; + MultiLevelBisectionReordering(boostGraphObj, perm, iperm, + bottomUpGraph, partVerts, + mdswitch); + break; } - for(auto &mapIt : tempGraph[2]) + default: { - graph[2][mapIt.first] = iperm[mapIt.second] + graphVertId; + ASSERTL0(false, + "Unrecognised solution type: " + + std::string(GlobalSysSolnTypeMap[m_solnType])); } - - return nGraphVerts; + } } - /** - * - */ - AssemblyMapCG::AssemblyMapCG( - const LibUtilities::SessionReaderSharedPtr &pSession, - const int numLocalCoeffs, const ExpList &locExp, - const BndCondExp &bndCondExp, const BndCond &bndConditions, - const bool checkIfSystemSingular, const std::string variable, - const PeriodicMap &periodicVerts, const PeriodicMap &periodicEdges, - const PeriodicMap &periodicFaces) - : AssemblyMap(pSession, locExp.GetComm(), variable) + // For parallel multi-level static condensation determine the lowest + // static condensation level amongst processors. + if ((m_solnType == eDirectMultiLevelStaticCond || + m_solnType == ePETScMultiLevelStaticCond || + m_solnType == eIterativeMultiLevelStaticCond || + m_solnType == eXxtMultiLevelStaticCond) && + bottomUpGraph) { - int i, j, k; - int p, q, numModes0, numModes1; - int cnt = 0; - int meshVertId, meshEdgeId, meshEdgeId2, meshFaceId, meshFaceId2; - int globalId; - int nEdgeInteriorCoeffs; - int firstNonDirGraphVertId; - LibUtilities::CommSharedPtr vComm = m_comm->GetRowComm(); - LocalRegions::ExpansionSharedPtr exp, bndExp; - StdRegions::Orientation edgeOrient; - StdRegions::Orientation faceOrient; - Array edgeInteriorMap; - Array edgeInteriorSign; - Array faceInteriorMap; - Array faceInteriorSign; - - const LocalRegions::ExpansionVector &locExpVector = *(locExp.GetExp()); - - bool verbose = m_session->DefinesCmdLineArgument("verbose"); - - m_signChange = false; + m_lowestStaticCondLevel = bottomUpGraph->GetNlevels() - 1; + vComm->AllReduce(m_lowestStaticCondLevel, LibUtilities::ReduceMax); + } + else + { + m_lowestStaticCondLevel = 0; + } - // Stores vertex, edge and face reordered vertices. - DofGraph graph(3); - DofGraph dofs(3); - vector> faceModes(2); - map faceType; + /** + * STEP 4: Fill the #graph[0] and + * #graph[1] with the optimal ordering from boost. + */ + for (auto &mapIt : tempGraph[0]) + { + graph[0][mapIt.first] = iperm[mapIt.second] + graphVertId; + } + for (auto &mapIt : tempGraph[1]) + { + graph[1][mapIt.first] = iperm[mapIt.second] + graphVertId; + } + for (auto &mapIt : tempGraph[2]) + { + graph[2][mapIt.first] = iperm[mapIt.second] + graphVertId; + } - set extraDirVerts, extraDirEdges; - BottomUpSubStructuredGraphSharedPtr bottomUpGraph; + return nGraphVerts; +} + +/** + * + */ +AssemblyMapCG::AssemblyMapCG( + const LibUtilities::SessionReaderSharedPtr &pSession, + const int numLocalCoeffs, const ExpList &locExp, + const BndCondExp &bndCondExp, const BndCond &bndConditions, + const bool checkIfSystemSingular, const std::string variable, + const PeriodicMap &periodicVerts, const PeriodicMap &periodicEdges, + const PeriodicMap &periodicFaces) + : AssemblyMap(pSession, locExp.GetComm(), variable) +{ + int i, j, k; + int p, q, numModes0, numModes1; + int cnt = 0; + int meshVertId, meshEdgeId, meshEdgeId2, meshFaceId, meshFaceId2; + int globalId; + int nEdgeInteriorCoeffs; + int firstNonDirGraphVertId; + LibUtilities::CommSharedPtr vComm = m_comm->GetRowComm(); + LocalRegions::ExpansionSharedPtr exp, bndExp; + StdRegions::Orientation edgeOrient; + StdRegions::Orientation faceOrient; + Array edgeInteriorMap; + Array edgeInteriorSign; + Array faceInteriorMap; + Array faceInteriorSign; + + const LocalRegions::ExpansionVector &locExpVector = *(locExp.GetExp()); + + bool verbose = m_session->DefinesCmdLineArgument("verbose"); + + m_signChange = false; + + // Stores vertex, edge and face reordered vertices. + DofGraph graph(3); + DofGraph dofs(3); + vector> faceModes(2); + map faceType; + + set extraDirVerts, extraDirEdges; + BottomUpSubStructuredGraphSharedPtr bottomUpGraph; + + // Construct list of number of degrees of freedom for each vertex, + // edge and face. + for (i = 0; i < locExpVector.size(); ++i) + { + exp = locExpVector[i]; - // Construct list of number of degrees of freedom for each vertex, - // edge and face. - for (i = 0; i < locExpVector.size(); ++i) + for (j = 0; j < exp->GetNverts(); ++j) { - exp = locExpVector[i]; + dofs[0][exp->GetGeom()->GetVid(j)] = 1; + } - for (j = 0; j < exp->GetNverts(); ++j) + for (j = 0; j < exp->GetGeom()->GetNumEdges(); ++j) + { + int nEdgeInt; + if (exp->GetGeom()->GetNumFaces()) { - dofs[0][exp->GetGeom()->GetVid(j)] = 1; + nEdgeInt = + exp->as()->GetEdgeNcoeffs(j) - 2; } - - for (j = 0; j < exp->GetGeom()->GetNumEdges(); ++j) + else { - int nEdgeInt; - if (exp->GetGeom()->GetNumFaces()) - { - nEdgeInt = - exp->as()->GetEdgeNcoeffs( - j) - - 2; - } - else - { - nEdgeInt = exp->GetTraceNcoeffs(j) - 2; - } + nEdgeInt = exp->GetTraceNcoeffs(j) - 2; + } - if (dofs[1].count(exp->GetGeom()->GetEid(j)) > 0) - { - if (dofs[1][exp->GetGeom()->GetEid(j)] != nEdgeInt) - { - ASSERTL0((exp->GetBasisType(0) == - LibUtilities::eModified_A) || - (exp->GetBasisType(1) == - LibUtilities::eModified_B) || - (exp->GetBasisType(2) == - LibUtilities::eModified_C) || - (exp->GetBasisType(2) == - LibUtilities::eModifiedPyr_C), - "CG with variable order only available with " - "modal expansion"); - } - dofs[1][exp->GetGeom()->GetEid(j)] = - min(dofs[1][exp->GetGeom()->GetEid(j)], nEdgeInt); - } - else + if (dofs[1].count(exp->GetGeom()->GetEid(j)) > 0) + { + if (dofs[1][exp->GetGeom()->GetEid(j)] != nEdgeInt) { - dofs[1][exp->GetGeom()->GetEid(j)] = nEdgeInt; + ASSERTL0( + (exp->GetBasisType(0) == LibUtilities::eModified_A) || + (exp->GetBasisType(1) == + LibUtilities::eModified_B) || + (exp->GetBasisType(2) == + LibUtilities::eModified_C) || + (exp->GetBasisType(2) == + LibUtilities::eModifiedPyr_C), + "CG with variable order only available with " + "modal expansion"); } + dofs[1][exp->GetGeom()->GetEid(j)] = + min(dofs[1][exp->GetGeom()->GetEid(j)], nEdgeInt); + } + else + { + dofs[1][exp->GetGeom()->GetEid(j)] = nEdgeInt; } + } - for (j = 0; j < exp->GetGeom()->GetNumFaces(); ++j) + for (j = 0; j < exp->GetGeom()->GetNumFaces(); ++j) + { + faceOrient = exp->GetGeom()->GetForient(j); + meshFaceId = exp->GetGeom()->GetFid(j); + exp->GetTraceNumModes(j, numModes0, numModes1, faceOrient); + + if (faceModes[0].count(meshFaceId) > 0) { - faceOrient = exp->GetGeom()->GetForient(j); - meshFaceId = exp->GetGeom()->GetFid(j); - exp->GetTraceNumModes(j, numModes0, numModes1, faceOrient); + faceModes[0][meshFaceId] = + min(faceModes[0][meshFaceId], numModes0); - if (faceModes[0].count(meshFaceId) > 0) - { - faceModes[0][meshFaceId] = - min(faceModes[0][meshFaceId], numModes0); + faceModes[1][meshFaceId] = + min(faceModes[1][meshFaceId], numModes1); + } + else + { + faceModes[0][meshFaceId] = numModes0; + faceModes[1][meshFaceId] = numModes1; - faceModes[1][meshFaceId] = - min(faceModes[1][meshFaceId], numModes1); - } - else - { - faceModes[0][meshFaceId] = numModes0; - faceModes[1][meshFaceId] = numModes1; - - // Get shape of this face - SpatialDomains::Geometry3DSharedPtr geom; - geom = - std::dynamic_pointer_cast( - exp->GetGeom()); - faceType[meshFaceId] = geom->GetFace(j)->GetShapeType(); - } + // Get shape of this face + SpatialDomains::Geometry3DSharedPtr geom; + geom = std::dynamic_pointer_cast( + exp->GetGeom()); + faceType[meshFaceId] = geom->GetFace(j)->GetShapeType(); } } + } - // Add non-local periodic dofs to the map - for (auto &pIt : periodicEdges) + // Add non-local periodic dofs to the map + for (auto &pIt : periodicEdges) + { + for (i = 0; i < pIt.second.size(); ++i) { - for (i = 0; i < pIt.second.size(); ++i) + meshEdgeId2 = pIt.second[i].id; + if (dofs[1].count(meshEdgeId2) == 0) { - meshEdgeId2 = pIt.second[i].id; - if (dofs[1].count(meshEdgeId2) == 0) - { - dofs[1][meshEdgeId2] = 1e6; - } + dofs[1][meshEdgeId2] = 1e6; } } - for (auto &pIt : periodicFaces) + } + for (auto &pIt : periodicFaces) + { + for (i = 0; i < pIt.second.size(); ++i) { - for (i = 0; i < pIt.second.size(); ++i) + meshFaceId2 = pIt.second[i].id; + if (faceModes[0].count(meshFaceId2) == 0) { - meshFaceId2 = pIt.second[i].id; - if (faceModes[0].count(meshFaceId2) == 0) - { - faceModes[0][meshFaceId2] = 1e6; - faceModes[1][meshFaceId2] = 1e6; - } + faceModes[0][meshFaceId2] = 1e6; + faceModes[1][meshFaceId2] = 1e6; } } + } - // Now use information from all partitions to determine the correct - // size + // Now use information from all partitions to determine the correct + // size - // edges - Array edgeId(dofs[1].size()); - Array edgeDof(dofs[1].size()); - i = 0; - for (auto &dofIt : dofs[1]) - { - edgeId[i] = dofIt.first + 1; - edgeDof[i++] = (NekDouble)dofIt.second; - } - Gs::gs_data *tmp = Gs::Init(edgeId, vComm, verbose); - Gs::Gather(edgeDof, Gs::gs_min, tmp); - Gs::Finalise(tmp); - for (i = 0; i < dofs[1].size(); i++) + // edges + Array edgeId(dofs[1].size()); + Array edgeDof(dofs[1].size()); + i = 0; + for (auto &dofIt : dofs[1]) + { + edgeId[i] = dofIt.first + 1; + edgeDof[i++] = (NekDouble)dofIt.second; + } + Gs::gs_data *tmp = Gs::Init(edgeId, vComm, verbose); + Gs::Gather(edgeDof, Gs::gs_min, tmp); + Gs::Finalise(tmp); + for (i = 0; i < dofs[1].size(); i++) + { + dofs[1][edgeId[i] - 1] = (int)(edgeDof[i] + 0.5); + } + // Periodic edges + for (auto &pIt : periodicEdges) + { + meshEdgeId = pIt.first; + for (i = 0; i < pIt.second.size(); ++i) { - dofs[1][edgeId[i] - 1] = (int)(edgeDof[i] + 0.5); + meshEdgeId2 = pIt.second[i].id; + if (dofs[1][meshEdgeId2] < dofs[1][meshEdgeId]) + { + dofs[1][meshEdgeId] = dofs[1][meshEdgeId2]; + } } - // Periodic edges - for (auto &pIt : periodicEdges) + } + // faces + Array faceId(faceModes[0].size()); + Array faceP(faceModes[0].size()); + Array faceQ(faceModes[0].size()); + + i = 0; + for (auto dofIt = faceModes[0].begin(), dofIt2 = faceModes[1].begin(); + dofIt != faceModes[0].end(); dofIt++, dofIt2++, i++) + { + faceId[i] = dofIt->first + 1; + faceP[i] = (NekDouble)dofIt->second; + faceQ[i] = (NekDouble)dofIt2->second; + } + Gs::gs_data *tmp2 = Gs::Init(faceId, vComm, verbose); + Gs::Gather(faceP, Gs::gs_min, tmp2); + Gs::Gather(faceQ, Gs::gs_min, tmp2); + Gs::Finalise(tmp2); + for (i = 0; i < faceModes[0].size(); i++) + { + faceModes[0][faceId[i] - 1] = (int)(faceP[i] + 0.5); + faceModes[1][faceId[i] - 1] = (int)(faceQ[i] + 0.5); + } + // Periodic faces + for (auto &pIt : periodicFaces) + { + meshFaceId = pIt.first; + for (i = 0; i < pIt.second.size(); ++i) { - meshEdgeId = pIt.first; - for (i = 0; i < pIt.second.size(); ++i) + meshFaceId2 = pIt.second[i].id; + if (faceModes[0][meshFaceId2] < faceModes[0][meshFaceId]) { - meshEdgeId2 = pIt.second[i].id; - if (dofs[1][meshEdgeId2] < dofs[1][meshEdgeId]) - { - dofs[1][meshEdgeId] = dofs[1][meshEdgeId2]; - } + faceModes[0][meshFaceId] = faceModes[0][meshFaceId2]; + } + if (faceModes[1][meshFaceId2] < faceModes[1][meshFaceId]) + { + faceModes[1][meshFaceId] = faceModes[1][meshFaceId2]; } } - // faces - Array faceId(faceModes[0].size()); - Array faceP(faceModes[0].size()); - Array faceQ(faceModes[0].size()); - - i = 0; - for (auto dofIt = faceModes[0].begin(), dofIt2 = faceModes[1].begin(); - dofIt != faceModes[0].end(); dofIt++, dofIt2++, i++) + } + // Calculate number of dof in each face + int P, Q; + for (i = 0; i < faceModes[0].size(); i++) + { + P = faceModes[0][faceId[i] - 1]; + Q = faceModes[1][faceId[i] - 1]; + if (faceType[faceId[i] - 1] == LibUtilities::eQuadrilateral) { - faceId[i] = dofIt->first + 1; - faceP[i] = (NekDouble)dofIt->second; - faceQ[i] = (NekDouble)dofIt2->second; + // Quad face + dofs[2][faceId[i] - 1] = + LibUtilities::StdQuadData::getNumberOfCoefficients(P, Q) - + LibUtilities::StdQuadData::getNumberOfBndCoefficients(P, Q); } - Gs::gs_data *tmp2 = Gs::Init(faceId, vComm, verbose); - Gs::Gather(faceP, Gs::gs_min, tmp2); - Gs::Gather(faceQ, Gs::gs_min, tmp2); - Gs::Finalise(tmp2); - for (i = 0; i < faceModes[0].size(); i++) + else { - faceModes[0][faceId[i] - 1] = (int)(faceP[i] + 0.5); - faceModes[1][faceId[i] - 1] = (int)(faceQ[i] + 0.5); + // Tri face + dofs[2][faceId[i] - 1] = + LibUtilities::StdTriData::getNumberOfCoefficients(P, Q) - + LibUtilities::StdTriData::getNumberOfBndCoefficients(P, Q); } - // Periodic faces - for (auto &pIt : periodicFaces) + } + + Array bndCondVec(1, bndConditions); + + // Note that nExtraDirichlet is not used in the logic below; it just + // needs to be set so that the coupled solver in + // IncNavierStokesSolver can work. + int nExtraDirichlet; + int mdswitch; + m_session->LoadParameter("MDSwitch", mdswitch, 10); + + int nGraphVerts = CreateGraph( + locExp, bndCondExp, bndCondVec, checkIfSystemSingular, periodicVerts, + periodicEdges, periodicFaces, graph, bottomUpGraph, extraDirVerts, + extraDirEdges, firstNonDirGraphVertId, nExtraDirichlet, mdswitch); + + /* + * Set up an array which contains the offset information of the + * different graph vertices. + * + * This basically means to identify to how many global degrees of + * freedom the individual graph vertices correspond. Obviously, + * the graph vertices corresponding to the mesh-vertices account + * for a single global DOF. However, the graph vertices + * corresponding to the element edges correspond to N-2 global DOF + * where N is equal to the number of boundary modes on this edge. + */ + Array graphVertOffset( + graph[0].size() + graph[1].size() + graph[2].size() + 1, 0); + + graphVertOffset[0] = 0; + + for (i = 0; i < locExpVector.size(); ++i) + { + exp = locExpVector[i]; + + for (j = 0; j < exp->GetNverts(); ++j) { - meshFaceId = pIt.first; - for (i = 0; i < pIt.second.size(); ++i) - { - meshFaceId2 = pIt.second[i].id; - if (faceModes[0][meshFaceId2] < faceModes[0][meshFaceId]) - { - faceModes[0][meshFaceId] = faceModes[0][meshFaceId2]; - } - if (faceModes[1][meshFaceId2] < faceModes[1][meshFaceId]) - { - faceModes[1][meshFaceId] = faceModes[1][meshFaceId2]; - } - } + meshVertId = exp->GetGeom()->GetVid(j); + graphVertOffset[graph[0][meshVertId] + 1] = 1; } - // Calculate number of dof in each face - int P, Q; - for (i = 0; i < faceModes[0].size(); i++) + + for (j = 0; j < exp->GetGeom()->GetNumEdges(); ++j) { - P = faceModes[0][faceId[i] - 1]; - Q = faceModes[1][faceId[i] - 1]; - if (faceType[faceId[i] - 1] == LibUtilities::eQuadrilateral) + if (exp->GetGeom()->GetNumFaces()) // 3D version { - // Quad face - dofs[2][faceId[i] - 1] = - LibUtilities::StdQuadData::getNumberOfCoefficients(P, Q) - - LibUtilities::StdQuadData::getNumberOfBndCoefficients(P, Q); + nEdgeInteriorCoeffs = + exp->as()->GetEdgeNcoeffs(j) - 2; } else { - // Tri face - dofs[2][faceId[i] - 1] = - LibUtilities::StdTriData::getNumberOfCoefficients(P, Q) - - LibUtilities::StdTriData::getNumberOfBndCoefficients(P, Q); + nEdgeInteriorCoeffs = exp->GetTraceNcoeffs(j) - 2; } - } - - Array bndCondVec(1, bndConditions); + meshEdgeId = exp->GetGeom()->GetEid(j); + graphVertOffset[graph[1][meshEdgeId] + 1] = dofs[1][meshEdgeId]; - // Note that nExtraDirichlet is not used in the logic below; it just - // needs to be set so that the coupled solver in - // IncNavierStokesSolver can work. - int nExtraDirichlet; - int mdswitch; - m_session->LoadParameter("MDSwitch", mdswitch, 10); + // Need a sign vector for modal expansions if nEdgeCoeffs + // >=3 (not 4 because of variable order case) + if (nEdgeInteriorCoeffs && + (exp->GetBasisType(0) == LibUtilities::eModified_A)) + { + m_signChange = true; + } + } - int nGraphVerts = - CreateGraph(locExp, bndCondExp, bndCondVec, checkIfSystemSingular, - periodicVerts, periodicEdges, periodicFaces, graph, - bottomUpGraph, extraDirVerts, extraDirEdges, - firstNonDirGraphVertId, nExtraDirichlet, mdswitch); + for (j = 0; j < exp->GetGeom()->GetNumFaces(); ++j) + { + meshFaceId = exp->GetGeom()->GetFid(j); + graphVertOffset[graph[2][meshFaceId] + 1] = dofs[2][meshFaceId]; + } + } - /* - * Set up an array which contains the offset information of the - * different graph vertices. - * - * This basically means to identify to how many global degrees of - * freedom the individual graph vertices correspond. Obviously, - * the graph vertices corresponding to the mesh-vertices account - * for a single global DOF. However, the graph vertices - * corresponding to the element edges correspond to N-2 global DOF - * where N is equal to the number of boundary modes on this edge. - */ - Array graphVertOffset( - graph[0].size() + graph[1].size() + graph[2].size() + 1, 0); + for (i = 1; i < graphVertOffset.size(); i++) + { + graphVertOffset[i] += graphVertOffset[i - 1]; + } - graphVertOffset[0] = 0; + // Allocate the proper amount of space for the class-data + m_numLocalCoeffs = numLocalCoeffs; + m_numGlobalDirBndCoeffs = graphVertOffset[firstNonDirGraphVertId]; + m_localToGlobalMap = Array(m_numLocalCoeffs, -1); + m_localToGlobalBndMap = Array(m_numLocalBndCoeffs, -1); + m_localToLocalBndMap = Array(m_numLocalBndCoeffs, -1); + m_localToLocalIntMap = + Array(m_numLocalCoeffs - m_numLocalBndCoeffs, -1); + m_bndCondCoeffsToLocalCoeffsMap = + Array(m_numLocalBndCondCoeffs, -1); + + // If required, set up the sign-vector + if (m_signChange) + { + m_localToGlobalSign = Array(m_numLocalCoeffs, 1.0); + m_localToGlobalBndSign = + Array(m_numLocalBndCoeffs, 1.0); + m_bndCondCoeffsToLocalCoeffsSign = + Array(m_numLocalBndCondCoeffs, 1.0); + } - for (i = 0; i < locExpVector.size(); ++i) - { - exp = locExpVector[i]; + m_staticCondLevel = 0; + m_numPatches = locExpVector.size(); + m_numLocalBndCoeffsPerPatch = Array(m_numPatches); + m_numLocalIntCoeffsPerPatch = Array(m_numPatches); + for (i = 0; i < m_numPatches; ++i) + { + m_numLocalBndCoeffsPerPatch[i] = + (unsigned int)locExpVector[i]->NumBndryCoeffs(); + m_numLocalIntCoeffsPerPatch[i] = + (unsigned int)locExpVector[i]->GetNcoeffs() - + locExpVector[i]->NumBndryCoeffs(); + } - for (j = 0; j < exp->GetNverts(); ++j) - { - meshVertId = exp->GetGeom()->GetVid(j); - graphVertOffset[graph[0][meshVertId] + 1] = 1; - } + /** + * STEP 6: Now, all ingredients are ready to set up the actual + * local to global mapping. + * + * The remainder of the map consists of the element-interior + * degrees of freedom. This leads to the block-diagonal submatrix + * as each element-interior mode is globally orthogonal to modes + * in all other elements. + */ + cnt = 0; + + // Loop over all the elements in the domain + int cntbdry = 0; + int cntint = 0; + for (i = 0; i < locExpVector.size(); ++i) + { + exp = locExpVector[i]; + cnt = locExp.GetCoeff_Offset(i); - for (j = 0; j < exp->GetGeom()->GetNumEdges(); ++j) - { - if (exp->GetGeom()->GetNumFaces()) // 3D version - { - nEdgeInteriorCoeffs = - exp->as()->GetEdgeNcoeffs( - j) - - 2; - } - else - { - nEdgeInteriorCoeffs = exp->GetTraceNcoeffs(j) - 2; - } - meshEdgeId = exp->GetGeom()->GetEid(j); - graphVertOffset[graph[1][meshEdgeId] + 1] = dofs[1][meshEdgeId]; + int nbdry = exp->NumBndryCoeffs(); + int nint = exp->GetNcoeffs() - nbdry; - // Need a sign vector for modal expansions if nEdgeCoeffs - // >=3 (not 4 because of variable order case) - if (nEdgeInteriorCoeffs && - (exp->GetBasisType(0) == LibUtilities::eModified_A)) - { - m_signChange = true; - } - } + Array bmap(nbdry); + Array imap(nint); - for (j = 0; j < exp->GetGeom()->GetNumFaces(); ++j) - { - meshFaceId = exp->GetGeom()->GetFid(j); - graphVertOffset[graph[2][meshFaceId] + 1] = dofs[2][meshFaceId]; - } - } + exp->GetBoundaryMap(bmap); + exp->GetInteriorMap(imap); - for (i = 1; i < graphVertOffset.size(); i++) + for (j = 0; j < nbdry; ++j) { - graphVertOffset[i] += graphVertOffset[i - 1]; + m_localToLocalBndMap[cntbdry++] = cnt + bmap[j]; } - // Allocate the proper amount of space for the class-data - m_numLocalCoeffs = numLocalCoeffs; - m_numGlobalDirBndCoeffs = graphVertOffset[firstNonDirGraphVertId]; - m_localToGlobalMap = Array(m_numLocalCoeffs, -1); - m_localToGlobalBndMap = Array(m_numLocalBndCoeffs, -1); - m_localToLocalBndMap = Array(m_numLocalBndCoeffs, -1); - m_localToLocalIntMap = - Array(m_numLocalCoeffs - m_numLocalBndCoeffs, -1); - m_bndCondCoeffsToLocalCoeffsMap = - Array(m_numLocalBndCondCoeffs, -1); - - // If required, set up the sign-vector - if (m_signChange) + for (j = 0; j < nint; ++j) { - m_localToGlobalSign = Array(m_numLocalCoeffs, 1.0); - m_localToGlobalBndSign = - Array(m_numLocalBndCoeffs, 1.0); - m_bndCondCoeffsToLocalCoeffsSign = - Array(m_numLocalBndCondCoeffs, 1.0); + m_localToLocalIntMap[cntint++] = cnt + imap[j]; } - m_staticCondLevel = 0; - m_numPatches = locExpVector.size(); - m_numLocalBndCoeffsPerPatch = Array(m_numPatches); - m_numLocalIntCoeffsPerPatch = Array(m_numPatches); - for (i = 0; i < m_numPatches; ++i) + for (j = 0; j < exp->GetNverts(); ++j) { - m_numLocalBndCoeffsPerPatch[i] = - (unsigned int)locExpVector[i]->NumBndryCoeffs(); - m_numLocalIntCoeffsPerPatch[i] = - (unsigned int)locExpVector[i]->GetNcoeffs() - - locExpVector[i]->NumBndryCoeffs(); - } + meshVertId = exp->GetGeom()->GetVid(j); - /** - * STEP 6: Now, all ingredients are ready to set up the actual - * local to global mapping. - * - * The remainder of the map consists of the element-interior - * degrees of freedom. This leads to the block-diagonal submatrix - * as each element-interior mode is globally orthogonal to modes - * in all other elements. - */ - cnt = 0; + // Set the global DOF for vertex j of element i + m_localToGlobalMap[cnt + exp->GetVertexMap(j)] = + graphVertOffset[graph[0][meshVertId]]; + } - // Loop over all the elements in the domain - int cntbdry = 0; - int cntint = 0; - for (i = 0; i < locExpVector.size(); ++i) + for (j = 0; j < exp->GetGeom()->GetNumEdges(); ++j) { - exp = locExpVector[i]; - cnt = locExp.GetCoeff_Offset(i); - - int nbdry = exp->NumBndryCoeffs(); - int nint = exp->GetNcoeffs() - nbdry; - - Array bmap(nbdry); - Array imap(nint); + if (exp->GetGeom()->GetNumFaces()) + { + nEdgeInteriorCoeffs = + exp->as()->GetEdgeNcoeffs(j) - 2; + } + else + { + nEdgeInteriorCoeffs = exp->GetTraceNcoeffs(j) - 2; + } + edgeOrient = exp->GetGeom()->GetEorient(j); + meshEdgeId = exp->GetGeom()->GetEid(j); - exp->GetBoundaryMap(bmap); - exp->GetInteriorMap(imap); + auto pIt = periodicEdges.find(meshEdgeId); - for (j = 0; j < nbdry; ++j) + // See if this edge is periodic. If it is, then we map all + // edges to the one with lowest ID, and align all + // coefficients to this edge orientation. + if (pIt != periodicEdges.end()) { - m_localToLocalBndMap[cntbdry++] = cnt + bmap[j]; + pair idOrient = + DeterminePeriodicEdgeOrientId(meshEdgeId, edgeOrient, + pIt->second); + edgeOrient = idOrient.second; } - for (j = 0; j < nint; ++j) + if (exp->GetGeom()->GetNumFaces()) { - m_localToLocalIntMap[cntint++] = cnt + imap[j]; + exp->as() + ->GetEdgeInteriorToElementMap(j, edgeInteriorMap, + edgeInteriorSign, edgeOrient); } - - for (j = 0; j < exp->GetNverts(); ++j) + else { - meshVertId = exp->GetGeom()->GetVid(j); - - // Set the global DOF for vertex j of element i - m_localToGlobalMap[cnt + exp->GetVertexMap(j)] = - graphVertOffset[graph[0][meshVertId]]; + exp->GetTraceInteriorToElementMap(j, edgeInteriorMap, + edgeInteriorSign, edgeOrient); } - for (j = 0; j < exp->GetGeom()->GetNumEdges(); ++j) + // Set the global DOF's for the interior modes of edge j + for (k = 0; k < dofs[1][meshEdgeId]; ++k) { - if (exp->GetGeom()->GetNumFaces()) - { - nEdgeInteriorCoeffs = - exp->as()->GetEdgeNcoeffs( - j) - - 2; - } - else - { - nEdgeInteriorCoeffs = exp->GetTraceNcoeffs(j) - 2; - } - edgeOrient = exp->GetGeom()->GetEorient(j); - meshEdgeId = exp->GetGeom()->GetEid(j); - - auto pIt = periodicEdges.find(meshEdgeId); - - // See if this edge is periodic. If it is, then we map all - // edges to the one with lowest ID, and align all - // coefficients to this edge orientation. - if (pIt != periodicEdges.end()) - { - pair idOrient = - DeterminePeriodicEdgeOrientId(meshEdgeId, edgeOrient, - pIt->second); - edgeOrient = idOrient.second; - } - - if (exp->GetGeom()->GetNumFaces()) - { - exp->as() - ->GetEdgeInteriorToElementMap( - j, edgeInteriorMap, edgeInteriorSign, edgeOrient); - } - else - { - exp->GetTraceInteriorToElementMap( - j, edgeInteriorMap, edgeInteriorSign, edgeOrient); - } + m_localToGlobalMap[cnt + edgeInteriorMap[k]] = + graphVertOffset[graph[1][meshEdgeId]] + k; + } + for (k = dofs[1][meshEdgeId]; k < nEdgeInteriorCoeffs; ++k) + { + m_localToGlobalMap[cnt + edgeInteriorMap[k]] = 0; + } - // Set the global DOF's for the interior modes of edge j + // Fill the sign vector if required + if (m_signChange) + { for (k = 0; k < dofs[1][meshEdgeId]; ++k) { - m_localToGlobalMap[cnt + edgeInteriorMap[k]] = - graphVertOffset[graph[1][meshEdgeId]] + k; + m_localToGlobalSign[cnt + edgeInteriorMap[k]] = + (NekDouble)edgeInteriorSign[k]; } for (k = dofs[1][meshEdgeId]; k < nEdgeInteriorCoeffs; ++k) { - m_localToGlobalMap[cnt + edgeInteriorMap[k]] = 0; - } - - // Fill the sign vector if required - if (m_signChange) - { - for (k = 0; k < dofs[1][meshEdgeId]; ++k) - { - m_localToGlobalSign[cnt + edgeInteriorMap[k]] = - (NekDouble)edgeInteriorSign[k]; - } - for (k = dofs[1][meshEdgeId]; k < nEdgeInteriorCoeffs; ++k) - { - m_localToGlobalSign[cnt + edgeInteriorMap[k]] = 0.0; - } + m_localToGlobalSign[cnt + edgeInteriorMap[k]] = 0.0; } } + } - for (j = 0; j < exp->GetGeom()->GetNumFaces(); ++j) - { - faceOrient = exp->GetGeom()->GetForient(j); - meshFaceId = exp->GetGeom()->GetFid(j); + for (j = 0; j < exp->GetGeom()->GetNumFaces(); ++j) + { + faceOrient = exp->GetGeom()->GetForient(j); + meshFaceId = exp->GetGeom()->GetFid(j); - auto pIt = periodicFaces.find(meshFaceId); + auto pIt = periodicFaces.find(meshFaceId); - if (pIt != periodicFaces.end() && - meshFaceId == min(meshFaceId, pIt->second[0].id)) - { - faceOrient = DeterminePeriodicFaceOrient( - faceOrient, pIt->second[0].orient); - } + if (pIt != periodicFaces.end() && + meshFaceId == min(meshFaceId, pIt->second[0].id)) + { + faceOrient = DeterminePeriodicFaceOrient(faceOrient, + pIt->second[0].orient); + } - exp->GetTraceInteriorToElementMap(j, faceInteriorMap, - faceInteriorSign, faceOrient); + exp->GetTraceInteriorToElementMap(j, faceInteriorMap, + faceInteriorSign, faceOrient); - // Set the global DOF's for the interior modes of face j - exp->GetTraceNumModes(j, numModes0, numModes1, faceOrient); - switch (faceType[meshFaceId]) + // Set the global DOF's for the interior modes of face j + exp->GetTraceNumModes(j, numModes0, numModes1, faceOrient); + switch (faceType[meshFaceId]) + { + case LibUtilities::eQuadrilateral: { - case LibUtilities::eQuadrilateral: + int kLoc = 0; + k = 0; + for (q = 2; q < numModes1; q++) { - int kLoc=0; - k = 0; - for( q = 2; q < numModes1; q++) + for (p = 2; p < numModes0; p++) { - for( p = 2; p < numModes0; p++) + if ((p < faceModes[0][meshFaceId]) && + (q < faceModes[1][meshFaceId])) { - if( (p < faceModes[0][meshFaceId]) && - (q < faceModes[1][meshFaceId])) + m_localToGlobalMap[cnt + + faceInteriorMap[kLoc]] = + graphVertOffset[graph[2][meshFaceId]] + k; + if (m_signChange) { - m_localToGlobalMap[cnt+faceInteriorMap[kLoc]] = - graphVertOffset[graph[2][meshFaceId]]+k; - if(m_signChange) - { - m_localToGlobalSign[cnt+faceInteriorMap[kLoc]] = - (NekDouble) faceInteriorSign[kLoc]; - } - k++; + m_localToGlobalSign[cnt + + faceInteriorMap[kLoc]] = + (NekDouble)faceInteriorSign[kLoc]; } - else + k++; + } + else + { + m_localToGlobalMap[cnt + + faceInteriorMap[kLoc]] = 0; + if (m_signChange) { - m_localToGlobalMap[cnt+faceInteriorMap[kLoc]] = 0; - if(m_signChange) - { - m_localToGlobalSign[cnt+faceInteriorMap[kLoc]] = 0.0; - } + m_localToGlobalSign[cnt + + faceInteriorMap[kLoc]] = + 0.0; } - kLoc++; } + kLoc++; } } - break; - case LibUtilities::eTriangle: + } + break; + case LibUtilities::eTriangle: + { + int kLoc = 0; + k = 0; + for (p = 2; p < numModes0; p++) { - int kLoc=0; - k = 0; - for( p = 2; p < numModes0; p++) + for (q = 1; q < numModes1 - p; q++) { - for( q = 1; q < numModes1-p; q++) + if ((p < faceModes[0][meshFaceId]) && + (p + q < faceModes[1][meshFaceId])) { - if( (p < faceModes[0][meshFaceId]) && - (p+q < faceModes[1][meshFaceId])) + m_localToGlobalMap[cnt + + faceInteriorMap[kLoc]] = + graphVertOffset[graph[2][meshFaceId]] + k; + if (m_signChange) { - m_localToGlobalMap[cnt+faceInteriorMap[kLoc]] = - graphVertOffset[graph[2][meshFaceId]]+k; - if(m_signChange) - { - m_localToGlobalSign[cnt+faceInteriorMap[kLoc]] = - (NekDouble) faceInteriorSign[kLoc]; - } - k++; + m_localToGlobalSign[cnt + + faceInteriorMap[kLoc]] = + (NekDouble)faceInteriorSign[kLoc]; } - else + k++; + } + else + { + m_localToGlobalMap[cnt + + faceInteriorMap[kLoc]] = 0; + if (m_signChange) { - m_localToGlobalMap[cnt+faceInteriorMap[kLoc]] = 0; - if(m_signChange) - { - m_localToGlobalSign[cnt+faceInteriorMap[kLoc]] = 0.0; - } + m_localToGlobalSign[cnt + + faceInteriorMap[kLoc]] = + 0.0; } - kLoc++; } + kLoc++; } } - break; - default: - ASSERTL0(false,"Shape not recognised"); - break; - } } + break; + default: + ASSERTL0(false, "Shape not recognised"); + break; } + } + } - // Set up the mapping for the boundary conditions - // Set up boundary mapping - map > traceToElmtTraceMap; - int id; - - for(cnt = i = 0; i < locExpVector.size(); ++i) - { - exp = locExpVector[i]; + // Set up the mapping for the boundary conditions + // Set up boundary mapping + map> traceToElmtTraceMap; + int id; - for(j = 0; j < exp->GetNtraces(); ++j) - { - id = exp->GetGeom()->GetTid(j); - - traceToElmtTraceMap[id] = pair(i,j); - } - } + for (cnt = i = 0; i < locExpVector.size(); ++i) + { + exp = locExpVector[i]; - Array maparray; - Array signarray; - map> GloDirBndCoeffToLocalCoeff; - set CoeffOnDirTrace; - - cnt = 0; - int offset = 0; - for(i = 0; i < bndCondExp.size(); i++) - { - set foundExtraVerts, foundExtraEdges; - for(j = 0; j < bndCondExp[i]->GetNumElmts(); j++) - { - bndExp = bndCondExp[i]->GetExp(j); - cnt = offset + bndCondExp[i]->GetCoeff_Offset(j); + for (j = 0; j < exp->GetNtraces(); ++j) + { + id = exp->GetGeom()->GetTid(j); - int id = bndExp->GetGeom()->GetGlobalID(); + traceToElmtTraceMap[id] = pair(i, j); + } + } - ASSERTL1(traceToElmtTraceMap.count(id) > 0, - "Failed to find trace id"); + Array maparray; + Array signarray; + map> GloDirBndCoeffToLocalCoeff; + set CoeffOnDirTrace; - int eid = traceToElmtTraceMap[id].first; - int tid = traceToElmtTraceMap[id].second; + cnt = 0; + int offset = 0; + for (i = 0; i < bndCondExp.size(); i++) + { + set foundExtraVerts, foundExtraEdges; + for (j = 0; j < bndCondExp[i]->GetNumElmts(); j++) + { + bndExp = bndCondExp[i]->GetExp(j); + cnt = offset + bndCondExp[i]->GetCoeff_Offset(j); - exp = locExpVector[eid]; - int dim = exp->GetShapeDimension(); - - if(dim == 1) - { - m_bndCondCoeffsToLocalCoeffsMap [cnt] = - locExp.GetCoeff_Offset(eid) + exp->GetVertexMap(tid); - } - else - { - if(dim == 2) - { - exp->GetTraceToElementMap(tid, - maparray,signarray, - exp->GetGeom()-> - GetEorient(tid), - bndExp->GetBasisNumModes(0)); - } - else if (dim == 3) - { - exp->GetTraceToElementMap(tid, maparray,signarray, - exp->GetGeom()-> - GetForient(tid), - bndExp->GetBasisNumModes(0), - bndExp->GetBasisNumModes(1)); - } - - for(k = 0; k < bndExp->GetNcoeffs(); k++) - { - m_bndCondCoeffsToLocalCoeffsMap [cnt+k] = - locExp.GetCoeff_Offset(eid) + maparray[k]; - if(m_signChange) - { - m_bndCondCoeffsToLocalCoeffsSign[cnt+k] = signarray[k]; - } - } + int id = bndExp->GetGeom()->GetGlobalID(); - } + ASSERTL1(traceToElmtTraceMap.count(id) > 0, + "Failed to find trace id"); - // we now need some information to work out how to - // handle vertices and edges that are only just - // touching a dirichlet boundary (and not the - // whole edge/face) - - for(k = 0; k < bndExp->GetNcoeffs(); k++) - { - int locid = m_bndCondCoeffsToLocalCoeffsMap [cnt+k]; - int gloid = m_localToGlobalMap[locid]; - NekDouble sign = 1.0; + int eid = traceToElmtTraceMap[id].first; + int tid = traceToElmtTraceMap[id].second; - if(m_signChange) - { - sign = m_bndCondCoeffsToLocalCoeffsSign[cnt+k]; - } + exp = locExpVector[eid]; + int dim = exp->GetShapeDimension(); - if (bndConditions[i]->GetBoundaryConditionType() == - SpatialDomains::eDirichlet) - { - CoeffOnDirTrace.insert(locid); - - // store the local id and sign from global id - // back to local space; - GloDirBndCoeffToLocalCoeff[gloid] = - pair (locid,sign); - } + if (dim == 1) + { + m_bndCondCoeffsToLocalCoeffsMap[cnt] = + locExp.GetCoeff_Offset(eid) + exp->GetVertexMap(tid); + } + else + { + if (dim == 2) + { + exp->GetTraceToElementMap(tid, maparray, signarray, + exp->GetGeom()->GetEorient(tid), + bndExp->GetBasisNumModes(0)); + } + else if (dim == 3) + { + exp->GetTraceToElementMap(tid, maparray, signarray, + exp->GetGeom()->GetForient(tid), + bndExp->GetBasisNumModes(0), + bndExp->GetBasisNumModes(1)); + } + + for (k = 0; k < bndExp->GetNcoeffs(); k++) + { + m_bndCondCoeffsToLocalCoeffsMap[cnt + k] = + locExp.GetCoeff_Offset(eid) + maparray[k]; + if (m_signChange) + { + m_bndCondCoeffsToLocalCoeffsSign[cnt + k] = + signarray[k]; } } - offset += bndCondExp[i]->GetNcoeffs(); } - - globalId = Vmath::Vmax(m_numLocalCoeffs,&m_localToGlobalMap[0],1)+1; - m_numGlobalBndCoeffs = globalId; - // Set up a mapping list of Dirichlet Local Dofs that - // arise due to one vertex or edge just touching a - // Dirichlet boundary and need the value from another - // local coeff that has been filled by the boundary - // coeffs. + // we now need some information to work out how to + // handle vertices and edges that are only just + // touching a dirichlet boundary (and not the + // whole edge/face) - Array gloParaDirBnd(m_numGlobalBndCoeffs,-1.0); - - Array bndmap; - cnt = 0; - for(i = 0; i < locExpVector.size(); ++i) + for (k = 0; k < bndExp->GetNcoeffs(); k++) { - int gloid; - - exp = locExpVector[i]; + int locid = m_bndCondCoeffsToLocalCoeffsMap[cnt + k]; + int gloid = m_localToGlobalMap[locid]; + NekDouble sign = 1.0; - exp->GetBoundaryMap(bndmap); - - for(j = 0; j < bndmap.size(); ++j) + if (m_signChange) { - k = cnt + bndmap[j]; - - if(CoeffOnDirTrace.count(k) == 0) - { - gloid = m_localToGlobalMap[k]; - - if(gloid < m_numGlobalDirBndCoeffs) // point on Dir BC - { - if(GloDirBndCoeffToLocalCoeff.count(gloid)) - { - int locid = GloDirBndCoeffToLocalCoeff[gloid]. - first; - NekDouble sign = 1.0; - - if(m_signChange) - { - sign = m_localToGlobalSign[locid]* - m_localToGlobalSign[k]; - } - - ExtraDirDof DirDofs(k,locid,sign); - // could make same `structure as extraDirDof - m_copyLocalDirDofs.insert(DirDofs); - } - else // else could be on another parallel partition. - { - gloParaDirBnd[gloid] = gloid; - } - } - } - } - - cnt += exp->GetNcoeffs(); - } - - /* - * The boundary condition mapping is generated from the same vertex - * renumbering. - */ - cnt=0; - for(i = 0; i < m_numLocalCoeffs; ++i) - { - if(m_localToGlobalMap[i] == -1) - { - m_localToGlobalMap[i] = globalId++; + sign = m_bndCondCoeffsToLocalCoeffsSign[cnt + k]; } - else + + if (bndConditions[i]->GetBoundaryConditionType() == + SpatialDomains::eDirichlet) { - if(m_signChange) - { - m_localToGlobalBndSign[cnt]=m_localToGlobalSign[i]; - } - m_localToGlobalBndMap[cnt++]=m_localToGlobalMap[i]; + CoeffOnDirTrace.insert(locid); + + // store the local id and sign from global id + // back to local space; + GloDirBndCoeffToLocalCoeff[gloid] = + pair(locid, sign); } } + } + offset += bndCondExp[i]->GetNcoeffs(); + } - m_numGlobalCoeffs = globalId; + globalId = Vmath::Vmax(m_numLocalCoeffs, &m_localToGlobalMap[0], 1) + 1; + m_numGlobalBndCoeffs = globalId; - SetUpUniversalC0ContMap(locExp, periodicVerts, periodicEdges, - periodicFaces); + // Set up a mapping list of Dirichlet Local Dofs that + // arise due to one vertex or edge just touching a + // Dirichlet boundary and need the value from another + // local coeff that has been filled by the boundary + // coeffs. - // Now that universal map is setup reset gloParaDirBnd to - // 0 if no point communicated or universal value of not - // equal to -1.0 - for(i = 0; i < m_numGlobalBndCoeffs; ++i) - { - int gloid = gloParaDirBnd[i]; - if(gloid == -1) - { - gloParaDirBnd[i] = 0.0; - } - else - { - gloParaDirBnd[i] = m_globalToUniversalMap[gloid]; - } - } - - // Use parallel boundary communication to set parallel - // dirichlet values on all processors Needs to be after - // SetupUuniversialC0ContMap - Gs::Gather(gloParaDirBnd,Gs::gs_max,m_bndGsh); - - // copy global ids back to local values in partition to - //initialise gs communicator. - Array paraDirBnd(m_numLocalCoeffs); - for(i = 0; i < numLocalCoeffs; ++i) - { - paraDirBnd[i] = 0.0; + Array gloParaDirBnd(m_numGlobalBndCoeffs, -1.0); - int id = m_localToGlobalMap[i]; + Array bndmap; + cnt = 0; + for (i = 0; i < locExpVector.size(); ++i) + { + int gloid; - if(id >= m_numGlobalDirBndCoeffs) - { - continue; - } + exp = locExpVector[i]; - paraDirBnd[i] = gloParaDirBnd[id]; + exp->GetBoundaryMap(bndmap); - if(gloParaDirBnd[id] > 0.0) - { - // gather any sign changes due to edge modes - if(m_signChange) - { - if(m_localToGlobalSign[i] < 0) - { - m_parallelDirBndSign.insert(i); - } - } - } - } + for (j = 0; j < bndmap.size(); ++j) + { + k = cnt + bndmap[j]; - m_dirBndGsh = Gs::Init(paraDirBnd,vComm,verbose); - - // Set up the local to global map for the next level when using - // multi-level static condensation - if ((m_solnType == eDirectMultiLevelStaticCond || - m_solnType == eIterativeMultiLevelStaticCond || - m_solnType == eXxtMultiLevelStaticCond || - m_solnType == ePETScMultiLevelStaticCond) && nGraphVerts) + if (CoeffOnDirTrace.count(k) == 0) { - if (m_staticCondLevel < (bottomUpGraph->GetNlevels()-1)) + gloid = m_localToGlobalMap[k]; + + if (gloid < m_numGlobalDirBndCoeffs) // point on Dir BC { - Array vwgts_perm( - graph[0].size() + graph[1].size() + graph[2].size() - - firstNonDirGraphVertId); - - for (i = 0; i < locExpVector.size(); ++i) + if (GloDirBndCoeffToLocalCoeff.count(gloid)) { - exp = locExpVector[i]; - - for (j = 0; j < exp->GetNverts(); ++j) - { - meshVertId = exp->GetGeom()->GetVid(j); - - if (graph[0][meshVertId] >= firstNonDirGraphVertId) - { - vwgts_perm[graph[0][meshVertId] - - firstNonDirGraphVertId] = - dofs[0][meshVertId]; - } - } + int locid = GloDirBndCoeffToLocalCoeff[gloid].first; + NekDouble sign = 1.0; - for (j = 0; j < exp->GetGeom()->GetNumEdges(); ++j) + if (m_signChange) { - meshEdgeId = exp->GetGeom()->GetEid(j); - - if (graph[1][meshEdgeId] >= firstNonDirGraphVertId) - { - vwgts_perm[graph[1][meshEdgeId] - - firstNonDirGraphVertId] = - dofs[1][meshEdgeId]; - } + sign = m_localToGlobalSign[locid] * + m_localToGlobalSign[k]; } - for (j = 0; j < exp->GetGeom()->GetNumFaces(); ++j) - { - meshFaceId = exp->GetGeom()->GetFid(j); - - if (graph[2][meshFaceId] >= firstNonDirGraphVertId) - { - vwgts_perm[graph[2][meshFaceId] - - firstNonDirGraphVertId] = - dofs[2][meshFaceId]; - } - } + ExtraDirDof DirDofs(k, locid, sign); + // could make same `structure as extraDirDof + m_copyLocalDirDofs.insert(DirDofs); + } + else // else could be on another parallel partition. + { + gloParaDirBnd[gloid] = gloid; } - - bottomUpGraph->ExpandGraphWithVertexWeights(vwgts_perm); - m_nextLevelLocalToGlobalMap = MemoryManager:: - AllocateSharedPtr(this, bottomUpGraph); - } - } - - m_hash = hash_range(m_localToGlobalMap.begin(), - m_localToGlobalMap.end()); - - // Add up hash values if parallel - int hash = m_hash; - vComm->AllReduce(hash, LibUtilities::ReduceSum); - m_hash = hash; - - CalculateBndSystemBandWidth(); - CalculateFullSystemBandWidth(); - } - - /** - * - */ - AssemblyMapCG::~AssemblyMapCG() - { - Gs::Finalise(m_gsh); - Gs::Finalise(m_bndGsh); - } - - /** - * @brief Determine orientation of an edge to its periodic equivalents, - * as well as the ID of the representative edge. - * - * Since an edge may be periodic with more than one other edge (e.g. a - * periodic cube has sets of four periodic edges in each coordinate - * direction), we have to define a 'representative' edge. In this - * assembly map we define it to be the one with the minimum ID. This - * routine is set up to calculate the orientation of a given edge with - * ID @p meshEdgeId with respect to the edge ID. - * - * @param meshEdgeId ID of a periodic edge. - * @param edgeOrient Edge orientation of meshEdgeId with respect to - * its parent element. - * @param periodicEdges The map of all periodic edges. - * - * @return Pair containing the ID of the periodic edge and the - * orientation of @p meshEdgeID with respect to this edge. - */ - pair DeterminePeriodicEdgeOrientId( - int meshEdgeId, - StdRegions::Orientation edgeOrient, - const vector &periodicEdges) - { - int minId = periodicEdges[0].id; - int minIdK = 0; - int k; - - for (k = 1; k < periodicEdges.size(); ++k) - { - if (periodicEdges[k].id < minId) - { - minId = min(minId, periodicEdges[k].id); - minIdK = k; } } + } - minId = min(minId, meshEdgeId); + cnt += exp->GetNcoeffs(); + } - if (meshEdgeId != minId) + /* + * The boundary condition mapping is generated from the same vertex + * renumbering. + */ + cnt = 0; + for (i = 0; i < m_numLocalCoeffs; ++i) + { + if (m_localToGlobalMap[i] == -1) + { + m_localToGlobalMap[i] = globalId++; + } + else + { + if (m_signChange) { - if (periodicEdges[minIdK].orient == StdRegions::eBackwards) - { - // Swap edge orientation - edgeOrient = (edgeOrient == StdRegions::eForwards) ? - StdRegions::eBackwards : StdRegions::eForwards; - } + m_localToGlobalBndSign[cnt] = m_localToGlobalSign[i]; } - - return make_pair(minId, edgeOrient); + m_localToGlobalBndMap[cnt++] = m_localToGlobalMap[i]; } + } - /** - * @brief Determine relative orientation between two faces. - * - * Given the orientation of a local element to its local face, defined - * as @p faceOrient, and @p perFaceOrient which states the alignment of - * one periodic face to the other global face, this routine determines - * the orientation that takes this local element face to the - * global/unique face. - * - * @param faceOrient Orientation of the face with respect to its - * parent element. - * @param perFaceOrient Orientation of the representative/global face. - * - * @return Orientation between the two faces. - */ - StdRegions::Orientation DeterminePeriodicFaceOrient( - StdRegions::Orientation faceOrient, - StdRegions::Orientation perFaceOrient) + m_numGlobalCoeffs = globalId; + + SetUpUniversalC0ContMap(locExp, periodicVerts, periodicEdges, + periodicFaces); + + // Now that universal map is setup reset gloParaDirBnd to + // 0 if no point communicated or universal value of not + // equal to -1.0 + for (i = 0; i < m_numGlobalBndCoeffs; ++i) + { + int gloid = gloParaDirBnd[i]; + if (gloid == -1) { - StdRegions::Orientation returnval = faceOrient; + gloParaDirBnd[i] = 0.0; + } + else + { + gloParaDirBnd[i] = m_globalToUniversalMap[gloid]; + } + } - if(perFaceOrient != StdRegions::eDir1FwdDir1_Dir2FwdDir2) - { - int tmp1 = (int)faceOrient - 5; - int tmp2 = (int)perFaceOrient - 5; + // Use parallel boundary communication to set parallel + // dirichlet values on all processors Needs to be after + // SetupUuniversialC0ContMap + Gs::Gather(gloParaDirBnd, Gs::gs_max, m_bndGsh); - int flipDir1Map [8] = {2,3,0,1,6,7,4,5}; - int flipDir2Map [8] = {1,0,3,2,5,4,7,6}; - int transposeMap[8] = {4,5,6,7,0,2,1,3}; + // copy global ids back to local values in partition to + // initialise gs communicator. + Array paraDirBnd(m_numLocalCoeffs); + for (i = 0; i < numLocalCoeffs; ++i) + { + paraDirBnd[i] = 0.0; - // Transpose orientation - if (tmp2 > 3) - { - tmp1 = transposeMap[tmp1]; - } + int id = m_localToGlobalMap[i]; - // Reverse orientation in direction 1. - if (tmp2 == 2 || tmp2 == 3 || tmp2 == 6 || tmp2 == 7) - { - tmp1 = flipDir1Map[tmp1]; - } + if (id >= m_numGlobalDirBndCoeffs) + { + continue; + } - // Reverse orientation in direction 2 - if (tmp2 % 2 == 1) - { - tmp1 = flipDir2Map[tmp1]; - } + paraDirBnd[i] = gloParaDirBnd[id]; - returnval = (StdRegions::Orientation)(tmp1+5); - } - return returnval; - } - - - /** - * Sets up the global to universal mapping of degrees of freedom across - * processors. - */ - void AssemblyMapCG::SetUpUniversalC0ContMap( - const ExpList &locExp, - const PeriodicMap &perVerts, - const PeriodicMap &perEdges, - const PeriodicMap &perFaces) - { - LocalRegions::ExpansionSharedPtr exp; - int nVert = 0; - int nEdge = 0; - int nFace = 0; - int maxEdgeDof = 0; - int maxFaceDof = 0; - int maxIntDof = 0; - int dof = 0; - int cnt; - int i,j,k,l; - int meshVertId; - int meshEdgeId; - int meshFaceId; - int elementId; - int vGlobalId; - int maxBndGlobalId = 0; - StdRegions::Orientation edgeOrient; - StdRegions::Orientation faceOrient; - Array edgeInteriorMap; - Array edgeInteriorSign; - Array faceInteriorMap; - Array faceInteriorSign; - Array interiorMap; - - const LocalRegions::ExpansionVector &locExpVector = *(locExp.GetExp()); - LibUtilities::CommSharedPtr vCommRow = m_comm->GetRowComm(); - const bool verbose = locExp.GetSession()->DefinesCmdLineArgument("verbose"); - - m_globalToUniversalMap = Nektar::Array(m_numGlobalCoeffs, -1); - m_globalToUniversalMapUnique = Nektar::Array(m_numGlobalCoeffs, -1); - m_globalToUniversalBndMap = Nektar::Array(m_numGlobalBndCoeffs, -1); - m_globalToUniversalBndMapUnique = Nektar::Array(m_numGlobalBndCoeffs, -1); - - // Loop over all the elements in the domain to gather mesh data - for(i = 0; i < locExpVector.size(); ++i) + if (gloParaDirBnd[id] > 0.0) + { + // gather any sign changes due to edge modes + if (m_signChange) { - exp = locExpVector[i]; - - int nv = exp->GetNverts(); - int ne = exp->GetGeom()->GetNumEdges(); - int nf = exp->GetGeom()->GetNumFaces(); - - nVert += nv; - nEdge += ne; - nFace += nf; - - // Loop over all edges (and vertices) of element i - for(j = 0; j < ne; ++j) - { - if(nf) - { - dof = exp->as()-> - GetEdgeNcoeffs(j)-2; - } - else - { - dof = exp->GetTraceNcoeffs(j)-2; - } - - maxEdgeDof = (dof > maxEdgeDof ? dof : maxEdgeDof); - } - for(j = 0; j < nf; ++j) + if (m_localToGlobalSign[i] < 0) { - dof = exp->GetTraceIntNcoeffs(j); - maxFaceDof = (dof > maxFaceDof ? dof : maxFaceDof); + m_parallelDirBndSign.insert(i); } - exp->GetInteriorMap(interiorMap); - dof = interiorMap.size(); - maxIntDof = (dof > maxIntDof ? dof : maxIntDof); } + } + } + + m_dirBndGsh = Gs::Init(paraDirBnd, vComm, verbose); - // Tell other processes about how many dof we have - vCommRow->AllReduce(nVert, LibUtilities::ReduceSum); - vCommRow->AllReduce(nEdge, LibUtilities::ReduceSum); - vCommRow->AllReduce(nFace, LibUtilities::ReduceSum); - vCommRow->AllReduce(maxEdgeDof, LibUtilities::ReduceMax); - vCommRow->AllReduce(maxFaceDof, LibUtilities::ReduceMax); - vCommRow->AllReduce(maxIntDof, LibUtilities::ReduceMax); + // Set up the local to global map for the next level when using + // multi-level static condensation + if ((m_solnType == eDirectMultiLevelStaticCond || + m_solnType == eIterativeMultiLevelStaticCond || + m_solnType == eXxtMultiLevelStaticCond || + m_solnType == ePETScMultiLevelStaticCond) && + nGraphVerts) + { + if (m_staticCondLevel < (bottomUpGraph->GetNlevels() - 1)) + { + Array vwgts_perm(graph[0].size() + graph[1].size() + + graph[2].size() - + firstNonDirGraphVertId); - // Assemble global to universal mapping for this process - for(i = 0; i < locExpVector.size(); ++i) + for (i = 0; i < locExpVector.size(); ++i) { exp = locExpVector[i]; - cnt = locExp.GetCoeff_Offset(i); - int nf = exp->GetGeom()->GetNumFaces(); - - // Loop over all vertices of element i - for(j = 0; j < exp->GetNverts(); ++j) + for (j = 0; j < exp->GetNverts(); ++j) { meshVertId = exp->GetGeom()->GetVid(j); - vGlobalId = m_localToGlobalMap[cnt+exp->GetVertexMap(j)]; - auto pIt = perVerts.find(meshVertId); - if (pIt != perVerts.end()) + if (graph[0][meshVertId] >= firstNonDirGraphVertId) { - for (k = 0; k < pIt->second.size(); ++k) - { - meshVertId = min(meshVertId, pIt->second[k].id); - } + vwgts_perm[graph[0][meshVertId] - + firstNonDirGraphVertId] = + dofs[0][meshVertId]; } - - m_globalToUniversalMap[vGlobalId] = meshVertId + 1; - m_globalToUniversalBndMap[vGlobalId] = - m_globalToUniversalMap[vGlobalId]; - maxBndGlobalId = (vGlobalId > maxBndGlobalId ? - vGlobalId : maxBndGlobalId); } - // Loop over all edges of element i - for(j = 0; j < exp->GetGeom()->GetNumEdges(); ++j) + for (j = 0; j < exp->GetGeom()->GetNumEdges(); ++j) { meshEdgeId = exp->GetGeom()->GetEid(j); - auto pIt = perEdges.find(meshEdgeId); - edgeOrient = exp->GetGeom()->GetEorient(j); - if (pIt != perEdges.end()) + if (graph[1][meshEdgeId] >= firstNonDirGraphVertId) { - pair idOrient = - DeterminePeriodicEdgeOrientId( - meshEdgeId, edgeOrient, pIt->second); - meshEdgeId = idOrient.first; - edgeOrient = idOrient.second; + vwgts_perm[graph[1][meshEdgeId] - + firstNonDirGraphVertId] = + dofs[1][meshEdgeId]; } + } - if(nf) // 3D version - { - exp->as()-> - GetEdgeInteriorToElementMap(j,edgeInteriorMap, - edgeInteriorSign,edgeOrient); - dof = exp->as()-> - GetEdgeNcoeffs(j)-2; - } - else // 2D version - { - exp->GetTraceInteriorToElementMap(j,edgeInteriorMap, - edgeInteriorSign,edgeOrient); - dof = exp->GetTraceNcoeffs(j)-2; - } + for (j = 0; j < exp->GetGeom()->GetNumFaces(); ++j) + { + meshFaceId = exp->GetGeom()->GetFid(j); - // Set the global DOF's for the interior modes of edge j - // for varP, ignore modes with sign == 0 - for(k = 0, l = 0; k < dof; ++k) + if (graph[2][meshFaceId] >= firstNonDirGraphVertId) { - if (m_signChange) - { - if (m_localToGlobalSign[cnt+edgeInteriorMap[k]]==0) - { - continue; - } - } - vGlobalId = m_localToGlobalMap[cnt+edgeInteriorMap[k]]; - m_globalToUniversalMap[vGlobalId] - = nVert + meshEdgeId * maxEdgeDof + l + 1; - m_globalToUniversalBndMap[vGlobalId]= - m_globalToUniversalMap[vGlobalId]; - maxBndGlobalId = (vGlobalId > maxBndGlobalId ? - vGlobalId : maxBndGlobalId); - l++; + vwgts_perm[graph[2][meshFaceId] - + firstNonDirGraphVertId] = + dofs[2][meshFaceId]; } } + } - // Loop over all faces of element i - for(j = 0; j < exp->GetGeom()->GetNumFaces(); ++j) - { - faceOrient = exp->GetGeom()->GetForient(j); + bottomUpGraph->ExpandGraphWithVertexWeights(vwgts_perm); + m_nextLevelLocalToGlobalMap = + MemoryManager::AllocateSharedPtr(this, + bottomUpGraph); + } + } - meshFaceId = exp->GetGeom()->GetFid(j); + m_hash = hash_range(m_localToGlobalMap.begin(), m_localToGlobalMap.end()); - auto pIt = perFaces.find(meshFaceId); - if (pIt != perFaces.end()) - { - if(meshFaceId == min(meshFaceId, pIt->second[0].id)) - { - faceOrient = DeterminePeriodicFaceOrient - (faceOrient,pIt->second[0].orient); - } - meshFaceId = min(meshFaceId, pIt->second[0].id); - } + // Add up hash values if parallel + int hash = m_hash; + vComm->AllReduce(hash, LibUtilities::ReduceSum); + m_hash = hash; + CalculateBndSystemBandWidth(); + CalculateFullSystemBandWidth(); +} - exp->GetTraceInteriorToElementMap(j,faceInteriorMap, - faceInteriorSign,faceOrient); - dof = exp->GetTraceIntNcoeffs(j); +/** + * + */ +AssemblyMapCG::~AssemblyMapCG() +{ + Gs::Finalise(m_gsh); + Gs::Finalise(m_bndGsh); +} + +/** + * @brief Determine orientation of an edge to its periodic equivalents, + * as well as the ID of the representative edge. + * + * Since an edge may be periodic with more than one other edge (e.g. a + * periodic cube has sets of four periodic edges in each coordinate + * direction), we have to define a 'representative' edge. In this + * assembly map we define it to be the one with the minimum ID. This + * routine is set up to calculate the orientation of a given edge with + * ID @p meshEdgeId with respect to the edge ID. + * + * @param meshEdgeId ID of a periodic edge. + * @param edgeOrient Edge orientation of meshEdgeId with respect to + * its parent element. + * @param periodicEdges The map of all periodic edges. + * + * @return Pair containing the ID of the periodic edge and the + * orientation of @p meshEdgeID with respect to this edge. + */ +pair DeterminePeriodicEdgeOrientId( + int meshEdgeId, StdRegions::Orientation edgeOrient, + const vector &periodicEdges) +{ + int minId = periodicEdges[0].id; + int minIdK = 0; + int k; - for(k = 0, l = 0; k < dof; ++k) - { - if (m_signChange) - { - if (m_localToGlobalSign[cnt+faceInteriorMap[k]]==0) - { - continue; - } - } - vGlobalId = m_localToGlobalMap[cnt+faceInteriorMap[k]]; - m_globalToUniversalMap[vGlobalId] - = nVert + nEdge*maxEdgeDof + meshFaceId * maxFaceDof - + l + 1; - m_globalToUniversalBndMap[vGlobalId]= - m_globalToUniversalMap[vGlobalId]; - - maxBndGlobalId = (vGlobalId > maxBndGlobalId ? - vGlobalId : maxBndGlobalId); - l++; - } - } + for (k = 1; k < periodicEdges.size(); ++k) + { + if (periodicEdges[k].id < minId) + { + minId = min(minId, periodicEdges[k].id); + minIdK = k; + } + } - // Add interior DOFs to complete universal numbering - exp->GetInteriorMap(interiorMap); - dof = interiorMap.size(); - elementId = (exp->GetGeom())->GetGlobalID(); - for (k = 0; k < dof; ++k) - { - vGlobalId = m_localToGlobalMap[cnt+interiorMap[k]]; - m_globalToUniversalMap[vGlobalId] - = nVert + nEdge*maxEdgeDof + nFace*maxFaceDof + elementId*maxIntDof + k + 1; - } - } + minId = min(minId, meshEdgeId); - // Set up the GSLib universal assemble mapping - // Internal DOF do not participate in any data - // exchange, so we keep these set to the special GSLib id=0 so - // they are ignored. - Nektar::Array tmp(m_numGlobalCoeffs); - Vmath::Zero(m_numGlobalCoeffs, tmp, 1); - Nektar::Array tmp2(m_numGlobalBndCoeffs, tmp); - for (unsigned int i = 0; i < m_numGlobalBndCoeffs; ++i) - { - tmp[i] = m_globalToUniversalMap[i]; - } + if (meshEdgeId != minId) + { + if (periodicEdges[minIdK].orient == StdRegions::eBackwards) + { + // Swap edge orientation + edgeOrient = (edgeOrient == StdRegions::eForwards) + ? StdRegions::eBackwards + : StdRegions::eForwards; + } + } - m_gsh = Gs::Init(tmp, vCommRow, verbose); - m_bndGsh = Gs::Init(tmp2, vCommRow, verbose); - Gs::Unique(tmp, vCommRow); - for (unsigned int i = 0; i < m_numGlobalCoeffs; ++i) - { - m_globalToUniversalMapUnique[i] = (tmp[i] >= 0 ? 1 : 0); - } - for (unsigned int i = 0; i < m_numGlobalBndCoeffs; ++i) - { - m_globalToUniversalBndMapUnique[i] = (tmp2[i] >= 0 ? 1 : 0); - } + return make_pair(minId, edgeOrient); +} + +/** + * @brief Determine relative orientation between two faces. + * + * Given the orientation of a local element to its local face, defined + * as @p faceOrient, and @p perFaceOrient which states the alignment of + * one periodic face to the other global face, this routine determines + * the orientation that takes this local element face to the + * global/unique face. + * + * @param faceOrient Orientation of the face with respect to its + * parent element. + * @param perFaceOrient Orientation of the representative/global face. + * + * @return Orientation between the two faces. + */ +StdRegions::Orientation DeterminePeriodicFaceOrient( + StdRegions::Orientation faceOrient, StdRegions::Orientation perFaceOrient) +{ + StdRegions::Orientation returnval = faceOrient; + + if (perFaceOrient != StdRegions::eDir1FwdDir1_Dir2FwdDir2) + { + int tmp1 = (int)faceOrient - 5; + int tmp2 = (int)perFaceOrient - 5; + + int flipDir1Map[8] = {2, 3, 0, 1, 6, 7, 4, 5}; + int flipDir2Map[8] = {1, 0, 3, 2, 5, 4, 7, 6}; + int transposeMap[8] = {4, 5, 6, 7, 0, 2, 1, 3}; + + // Transpose orientation + if (tmp2 > 3) + { + tmp1 = transposeMap[tmp1]; } - /** - * @brief Construct an AssemblyMapCG object which corresponds to the - * linear space of the current object. - * - * This function is used to create a linear-space assembly map, which is - * then used in the linear space preconditioner in the conjugate - * gradient solve. - */ - AssemblyMapSharedPtr AssemblyMapCG::v_LinearSpaceMap( - const ExpList &locexp, GlobalSysSolnType solnType) + // Reverse orientation in direction 1. + if (tmp2 == 2 || tmp2 == 3 || tmp2 == 6 || tmp2 == 7) { - AssemblyMapCGSharedPtr returnval; + tmp1 = flipDir1Map[tmp1]; + } - int i, j; - int nverts = 0; - const std::shared_ptr exp - = locexp.GetExp(); - int nelmts = exp->size(); - const bool verbose = locexp.GetSession()->DefinesCmdLineArgument("verbose"); + // Reverse orientation in direction 2 + if (tmp2 % 2 == 1) + { + tmp1 = flipDir2Map[tmp1]; + } - // Get Default Map and turn off any searched values. - returnval = MemoryManager - ::AllocateSharedPtr(m_session,locexp.GetComm()); - returnval->m_solnType = solnType; - returnval->m_preconType = eNull; - returnval->m_maxStaticCondLevel = 0; - returnval->m_signChange = false; - returnval->m_comm = m_comm; + returnval = (StdRegions::Orientation)(tmp1 + 5); + } + return returnval; +} + +/** + * Sets up the global to universal mapping of degrees of freedom across + * processors. + */ +void AssemblyMapCG::SetUpUniversalC0ContMap(const ExpList &locExp, + const PeriodicMap &perVerts, + const PeriodicMap &perEdges, + const PeriodicMap &perFaces) +{ + LocalRegions::ExpansionSharedPtr exp; + int nVert = 0; + int nEdge = 0; + int nFace = 0; + int maxEdgeDof = 0; + int maxFaceDof = 0; + int maxIntDof = 0; + int dof = 0; + int cnt; + int i, j, k, l; + int meshVertId; + int meshEdgeId; + int meshFaceId; + int elementId; + int vGlobalId; + int maxBndGlobalId = 0; + StdRegions::Orientation edgeOrient; + StdRegions::Orientation faceOrient; + Array edgeInteriorMap; + Array edgeInteriorSign; + Array faceInteriorMap; + Array faceInteriorSign; + Array interiorMap; + + const LocalRegions::ExpansionVector &locExpVector = *(locExp.GetExp()); + LibUtilities::CommSharedPtr vCommRow = m_comm->GetRowComm(); + const bool verbose = locExp.GetSession()->DefinesCmdLineArgument("verbose"); + + m_globalToUniversalMap = Nektar::Array(m_numGlobalCoeffs, -1); + m_globalToUniversalMapUnique = + Nektar::Array(m_numGlobalCoeffs, -1); + m_globalToUniversalBndMap = + Nektar::Array(m_numGlobalBndCoeffs, -1); + m_globalToUniversalBndMapUnique = + Nektar::Array(m_numGlobalBndCoeffs, -1); + + // Loop over all the elements in the domain to gather mesh data + for (i = 0; i < locExpVector.size(); ++i) + { + exp = locExpVector[i]; + + int nv = exp->GetNverts(); + int ne = exp->GetGeom()->GetNumEdges(); + int nf = exp->GetGeom()->GetNumFaces(); - // Count the number of vertices - for (i = 0; i < nelmts; ++i) + nVert += nv; + nEdge += ne; + nFace += nf; + + // Loop over all edges (and vertices) of element i + for (j = 0; j < ne; ++j) + { + if (nf) + { + dof = + exp->as()->GetEdgeNcoeffs(j) - 2; + } + else { - nverts += (*exp)[i]->GetNverts(); + dof = exp->GetTraceNcoeffs(j) - 2; } - returnval->m_numLocalCoeffs = nverts; - returnval->m_localToGlobalMap = Array(nverts, -1); + maxEdgeDof = (dof > maxEdgeDof ? dof : maxEdgeDof); + } + for (j = 0; j < nf; ++j) + { + dof = exp->GetTraceIntNcoeffs(j); + maxFaceDof = (dof > maxFaceDof ? dof : maxFaceDof); + } + exp->GetInteriorMap(interiorMap); + dof = interiorMap.size(); + maxIntDof = (dof > maxIntDof ? dof : maxIntDof); + } - // Store original global ids in this map - returnval->m_localToGlobalBndMap = Array(nverts, -1); + // Tell other processes about how many dof we have + vCommRow->AllReduce(nVert, LibUtilities::ReduceSum); + vCommRow->AllReduce(nEdge, LibUtilities::ReduceSum); + vCommRow->AllReduce(nFace, LibUtilities::ReduceSum); + vCommRow->AllReduce(maxEdgeDof, LibUtilities::ReduceMax); + vCommRow->AllReduce(maxFaceDof, LibUtilities::ReduceMax); + vCommRow->AllReduce(maxIntDof, LibUtilities::ReduceMax); - int cnt = 0; - int cnt1 = 0; - Array GlobCoeffs(m_numGlobalCoeffs, -1); + // Assemble global to universal mapping for this process + for (i = 0; i < locExpVector.size(); ++i) + { + exp = locExpVector[i]; + cnt = locExp.GetCoeff_Offset(i); + + int nf = exp->GetGeom()->GetNumFaces(); + + // Loop over all vertices of element i + for (j = 0; j < exp->GetNverts(); ++j) + { + meshVertId = exp->GetGeom()->GetVid(j); + vGlobalId = m_localToGlobalMap[cnt + exp->GetVertexMap(j)]; - // Set up local to global map; - for (i = 0; i < nelmts; ++i) + auto pIt = perVerts.find(meshVertId); + if (pIt != perVerts.end()) { - for (j = 0; j < (*exp)[i]->GetNverts(); ++j) + for (k = 0; k < pIt->second.size(); ++k) { - returnval->m_localToGlobalMap[cnt] = - returnval->m_localToGlobalBndMap[cnt] = - m_localToGlobalMap[cnt1 + (*exp)[i]->GetVertexMap(j,true)]; - GlobCoeffs[returnval->m_localToGlobalMap[cnt]] = 1; - - // Set up numLocalDirBndCoeffs - if ((returnval->m_localToGlobalMap[cnt]) < - m_numGlobalDirBndCoeffs) - { - returnval->m_numLocalDirBndCoeffs++; - } - cnt++; + meshVertId = min(meshVertId, pIt->second[k].id); } - cnt1 += (*exp)[i]->GetNcoeffs(); } - cnt = 0; - // Reset global numbering and count number of dofs - for (i = 0; i < m_numGlobalCoeffs; ++i) + m_globalToUniversalMap[vGlobalId] = meshVertId + 1; + m_globalToUniversalBndMap[vGlobalId] = + m_globalToUniversalMap[vGlobalId]; + maxBndGlobalId = + (vGlobalId > maxBndGlobalId ? vGlobalId : maxBndGlobalId); + } + + // Loop over all edges of element i + for (j = 0; j < exp->GetGeom()->GetNumEdges(); ++j) + { + meshEdgeId = exp->GetGeom()->GetEid(j); + auto pIt = perEdges.find(meshEdgeId); + edgeOrient = exp->GetGeom()->GetEorient(j); + + if (pIt != perEdges.end()) { - if (GlobCoeffs[i] != -1) - { - GlobCoeffs[i] = cnt++; - } + pair idOrient = + DeterminePeriodicEdgeOrientId(meshEdgeId, edgeOrient, + pIt->second); + meshEdgeId = idOrient.first; + edgeOrient = idOrient.second; } - // Set up number of globalCoeffs; - returnval->m_numGlobalCoeffs = cnt; + if (nf) // 3D version + { + exp->as() + ->GetEdgeInteriorToElementMap(j, edgeInteriorMap, + edgeInteriorSign, edgeOrient); + dof = + exp->as()->GetEdgeNcoeffs(j) - 2; + } + else // 2D version + { + exp->GetTraceInteriorToElementMap(j, edgeInteriorMap, + edgeInteriorSign, edgeOrient); + dof = exp->GetTraceNcoeffs(j) - 2; + } - // Set up number of global Dirichlet boundary coefficients - for (i = 0; i < m_numGlobalDirBndCoeffs; ++i) + // Set the global DOF's for the interior modes of edge j + // for varP, ignore modes with sign == 0 + for (k = 0, l = 0; k < dof; ++k) { - if (GlobCoeffs[i] != -1) + if (m_signChange) { - returnval->m_numGlobalDirBndCoeffs++; + if (m_localToGlobalSign[cnt + edgeInteriorMap[k]] == 0) + { + continue; + } } + vGlobalId = m_localToGlobalMap[cnt + edgeInteriorMap[k]]; + m_globalToUniversalMap[vGlobalId] = + nVert + meshEdgeId * maxEdgeDof + l + 1; + m_globalToUniversalBndMap[vGlobalId] = + m_globalToUniversalMap[vGlobalId]; + maxBndGlobalId = + (vGlobalId > maxBndGlobalId ? vGlobalId : maxBndGlobalId); + l++; } + } - // Set up global to universal map - if (m_globalToUniversalMap.size()) - { - LibUtilities::CommSharedPtr vCommRow - = m_session->GetComm()->GetRowComm(); - int nglocoeffs = returnval->m_numGlobalCoeffs; - returnval->m_globalToUniversalMap - = Array (nglocoeffs); - returnval->m_globalToUniversalMapUnique - = Array (nglocoeffs); - - // Reset local to global map and setup universal map - for (i = 0; i < nverts; ++i) - { - cnt = returnval->m_localToGlobalMap[i]; - returnval->m_localToGlobalMap[i] = GlobCoeffs[cnt]; + // Loop over all faces of element i + for (j = 0; j < exp->GetGeom()->GetNumFaces(); ++j) + { + faceOrient = exp->GetGeom()->GetForient(j); - returnval->m_globalToUniversalMap[GlobCoeffs[cnt]] = - m_globalToUniversalMap[cnt]; - } + meshFaceId = exp->GetGeom()->GetFid(j); - Nektar::Array tmp(nglocoeffs); - Vmath::Zero(nglocoeffs, tmp, 1); - for (unsigned int i = 0; i < nglocoeffs; ++i) - { - tmp[i] = returnval->m_globalToUniversalMap[i]; - } - returnval->m_gsh = Gs::Init(tmp, vCommRow, verbose); - Gs::Unique(tmp, vCommRow); - for (unsigned int i = 0; i < nglocoeffs; ++i) - { - returnval->m_globalToUniversalMapUnique[i] - = (tmp[i] >= 0 ? 1 : 0); - } - } - else // not sure this option is ever needed. + auto pIt = perFaces.find(meshFaceId); + if (pIt != perFaces.end()) { - for (i = 0; i < nverts; ++i) + if (meshFaceId == min(meshFaceId, pIt->second[0].id)) { - cnt = returnval->m_localToGlobalMap[i]; - returnval->m_localToGlobalMap[i] = GlobCoeffs[cnt]; + faceOrient = DeterminePeriodicFaceOrient( + faceOrient, pIt->second[0].orient); } + meshFaceId = min(meshFaceId, pIt->second[0].id); } - return returnval; - } - - /** - * The bandwidth calculated here corresponds to what is referred to as - * half-bandwidth. If the elements of the matrix are designated as - * a_ij, it corresponds to the maximum value of |i-j| for non-zero - * a_ij. As a result, the value also corresponds to the number of - * sub- or super-diagonals. - * - * The bandwith can be calculated elementally as it corresponds to the - * maximal elemental bandwith (i.e. the maximal difference in global - * DOF index for every element). - * - * We caluclate here the bandwith of the full global system. - */ - void AssemblyMapCG::CalculateFullSystemBandWidth() - { - int i,j; - int cnt = 0; - int locSize; - int maxId; - int minId; - int bwidth = -1; - for(i = 0; i < m_numPatches; ++i) - { - locSize = m_numLocalBndCoeffsPerPatch[i]+m_numLocalIntCoeffsPerPatch[i]; - maxId = -1; - minId = m_numLocalCoeffs+1; - for(j = 0; j < locSize; j++) + exp->GetTraceInteriorToElementMap(j, faceInteriorMap, + faceInteriorSign, faceOrient); + dof = exp->GetTraceIntNcoeffs(j); + + for (k = 0, l = 0; k < dof; ++k) + { + if (m_signChange) { - if(m_localToGlobalMap[cnt+j] >= m_numGlobalDirBndCoeffs) + if (m_localToGlobalSign[cnt + faceInteriorMap[k]] == 0) { - if(m_localToGlobalMap[cnt+j] > maxId) - { - maxId = m_localToGlobalMap[cnt+j]; - } - - if(m_localToGlobalMap[cnt+j] < minId) - { - minId = m_localToGlobalMap[cnt+j]; - } + continue; } } - bwidth = (bwidth>(maxId-minId))?bwidth:(maxId-minId); + vGlobalId = m_localToGlobalMap[cnt + faceInteriorMap[k]]; + m_globalToUniversalMap[vGlobalId] = nVert + nEdge * maxEdgeDof + + meshFaceId * maxFaceDof + + l + 1; + m_globalToUniversalBndMap[vGlobalId] = + m_globalToUniversalMap[vGlobalId]; - cnt+=locSize; + maxBndGlobalId = + (vGlobalId > maxBndGlobalId ? vGlobalId : maxBndGlobalId); + l++; } - - m_fullSystemBandWidth = bwidth; } - - int AssemblyMapCG::v_GetLocalToGlobalMap(const int i) const + // Add interior DOFs to complete universal numbering + exp->GetInteriorMap(interiorMap); + dof = interiorMap.size(); + elementId = (exp->GetGeom())->GetGlobalID(); + for (k = 0; k < dof; ++k) { - return m_localToGlobalMap[i]; + vGlobalId = m_localToGlobalMap[cnt + interiorMap[k]]; + m_globalToUniversalMap[vGlobalId] = nVert + nEdge * maxEdgeDof + + nFace * maxFaceDof + + elementId * maxIntDof + k + 1; } + } + + // Set up the GSLib universal assemble mapping + // Internal DOF do not participate in any data + // exchange, so we keep these set to the special GSLib id=0 so + // they are ignored. + Nektar::Array tmp(m_numGlobalCoeffs); + Vmath::Zero(m_numGlobalCoeffs, tmp, 1); + Nektar::Array tmp2(m_numGlobalBndCoeffs, tmp); + for (unsigned int i = 0; i < m_numGlobalBndCoeffs; ++i) + { + tmp[i] = m_globalToUniversalMap[i]; + } - int AssemblyMapCG::v_GetGlobalToUniversalMap(const int i) const + m_gsh = Gs::Init(tmp, vCommRow, verbose); + m_bndGsh = Gs::Init(tmp2, vCommRow, verbose); + Gs::Unique(tmp, vCommRow); + for (unsigned int i = 0; i < m_numGlobalCoeffs; ++i) + { + m_globalToUniversalMapUnique[i] = (tmp[i] >= 0 ? 1 : 0); + } + for (unsigned int i = 0; i < m_numGlobalBndCoeffs; ++i) + { + m_globalToUniversalBndMapUnique[i] = (tmp2[i] >= 0 ? 1 : 0); + } +} + +/** + * @brief Construct an AssemblyMapCG object which corresponds to the + * linear space of the current object. + * + * This function is used to create a linear-space assembly map, which is + * then used in the linear space preconditioner in the conjugate + * gradient solve. + */ +AssemblyMapSharedPtr AssemblyMapCG::v_LinearSpaceMap(const ExpList &locexp, + GlobalSysSolnType solnType) +{ + AssemblyMapCGSharedPtr returnval; + + int i, j; + int nverts = 0; + const std::shared_ptr exp = locexp.GetExp(); + int nelmts = exp->size(); + const bool verbose = locexp.GetSession()->DefinesCmdLineArgument("verbose"); + + // Get Default Map and turn off any searched values. + returnval = MemoryManager::AllocateSharedPtr( + m_session, locexp.GetComm()); + returnval->m_solnType = solnType; + returnval->m_preconType = eNull; + returnval->m_maxStaticCondLevel = 0; + returnval->m_signChange = false; + returnval->m_comm = m_comm; + + // Count the number of vertices + for (i = 0; i < nelmts; ++i) + { + nverts += (*exp)[i]->GetNverts(); + } + + returnval->m_numLocalCoeffs = nverts; + returnval->m_localToGlobalMap = Array(nverts, -1); + + // Store original global ids in this map + returnval->m_localToGlobalBndMap = Array(nverts, -1); + + int cnt = 0; + int cnt1 = 0; + Array GlobCoeffs(m_numGlobalCoeffs, -1); + + // Set up local to global map; + for (i = 0; i < nelmts; ++i) + { + for (j = 0; j < (*exp)[i]->GetNverts(); ++j) { - return m_globalToUniversalMap[i]; + returnval->m_localToGlobalMap[cnt] = + returnval->m_localToGlobalBndMap[cnt] = + m_localToGlobalMap[cnt1 + (*exp)[i]->GetVertexMap(j, true)]; + GlobCoeffs[returnval->m_localToGlobalMap[cnt]] = 1; + + // Set up numLocalDirBndCoeffs + if ((returnval->m_localToGlobalMap[cnt]) < m_numGlobalDirBndCoeffs) + { + returnval->m_numLocalDirBndCoeffs++; + } + cnt++; } + cnt1 += (*exp)[i]->GetNcoeffs(); + } - int AssemblyMapCG::v_GetGlobalToUniversalMapUnique(const int i) const + cnt = 0; + // Reset global numbering and count number of dofs + for (i = 0; i < m_numGlobalCoeffs; ++i) + { + if (GlobCoeffs[i] != -1) { - return m_globalToUniversalMapUnique[i]; + GlobCoeffs[i] = cnt++; } + } + + // Set up number of globalCoeffs; + returnval->m_numGlobalCoeffs = cnt; - const Array& - AssemblyMapCG::v_GetLocalToGlobalMap(void) + // Set up number of global Dirichlet boundary coefficients + for (i = 0; i < m_numGlobalDirBndCoeffs; ++i) + { + if (GlobCoeffs[i] != -1) { - return m_localToGlobalMap; + returnval->m_numGlobalDirBndCoeffs++; } + } + + // Set up global to universal map + if (m_globalToUniversalMap.size()) + { + LibUtilities::CommSharedPtr vCommRow = + m_session->GetComm()->GetRowComm(); + int nglocoeffs = returnval->m_numGlobalCoeffs; + returnval->m_globalToUniversalMap = Array(nglocoeffs); + returnval->m_globalToUniversalMapUnique = Array(nglocoeffs); - const Array& - AssemblyMapCG::v_GetGlobalToUniversalMap(void) + // Reset local to global map and setup universal map + for (i = 0; i < nverts; ++i) { - return m_globalToUniversalMap; + cnt = returnval->m_localToGlobalMap[i]; + returnval->m_localToGlobalMap[i] = GlobCoeffs[cnt]; + + returnval->m_globalToUniversalMap[GlobCoeffs[cnt]] = + m_globalToUniversalMap[cnt]; } - const Array& - AssemblyMapCG::v_GetGlobalToUniversalMapUnique(void) + Nektar::Array tmp(nglocoeffs); + Vmath::Zero(nglocoeffs, tmp, 1); + for (unsigned int i = 0; i < nglocoeffs; ++i) { - return m_globalToUniversalMapUnique; + tmp[i] = returnval->m_globalToUniversalMap[i]; } - - NekDouble AssemblyMapCG::v_GetLocalToGlobalSign( - const int i) const + returnval->m_gsh = Gs::Init(tmp, vCommRow, verbose); + Gs::Unique(tmp, vCommRow); + for (unsigned int i = 0; i < nglocoeffs; ++i) { - if(m_signChange) - { - return m_localToGlobalSign[i]; - } - else - { - return 1.0; - } + returnval->m_globalToUniversalMapUnique[i] = (tmp[i] >= 0 ? 1 : 0); } - - const Array& AssemblyMapCG::v_GetLocalToGlobalSign() const + } + else // not sure this option is ever needed. + { + for (i = 0; i < nverts; ++i) { - return m_localToGlobalSign; + cnt = returnval->m_localToGlobalMap[i]; + returnval->m_localToGlobalMap[i] = GlobCoeffs[cnt]; } + } - void AssemblyMapCG::v_LocalToGlobal( - const Array& loc, - Array& global, - bool useComm) const + return returnval; +} + +/** + * The bandwidth calculated here corresponds to what is referred to as + * half-bandwidth. If the elements of the matrix are designated as + * a_ij, it corresponds to the maximum value of |i-j| for non-zero + * a_ij. As a result, the value also corresponds to the number of + * sub- or super-diagonals. + * + * The bandwith can be calculated elementally as it corresponds to the + * maximal elemental bandwith (i.e. the maximal difference in global + * DOF index for every element). + * + * We caluclate here the bandwith of the full global system. + */ +void AssemblyMapCG::CalculateFullSystemBandWidth() +{ + int i, j; + int cnt = 0; + int locSize; + int maxId; + int minId; + int bwidth = -1; + for (i = 0; i < m_numPatches; ++i) + { + locSize = + m_numLocalBndCoeffsPerPatch[i] + m_numLocalIntCoeffsPerPatch[i]; + maxId = -1; + minId = m_numLocalCoeffs + 1; + for (j = 0; j < locSize; j++) { - Array local; - if(global.data() == loc.data()) + if (m_localToGlobalMap[cnt + j] >= m_numGlobalDirBndCoeffs) { - local = Array(m_numLocalCoeffs,loc.data()); - } - else - { - local = loc; // create reference + if (m_localToGlobalMap[cnt + j] > maxId) + { + maxId = m_localToGlobalMap[cnt + j]; + } + + if (m_localToGlobalMap[cnt + j] < minId) + { + minId = m_localToGlobalMap[cnt + j]; + } } + } + bwidth = (bwidth > (maxId - minId)) ? bwidth : (maxId - minId); + cnt += locSize; + } - if(m_signChange) - { - Vmath::Scatr(m_numLocalCoeffs, m_localToGlobalSign.get(), local.get(), m_localToGlobalMap.get(), global.get()); - } - else - { - Vmath::Scatr(m_numLocalCoeffs, local.get(), m_localToGlobalMap.get(), global.get()); - } + m_fullSystemBandWidth = bwidth; +} - // ensure all values are unique by calling a max - if(useComm) - { - Gs::Gather(global, Gs::gs_max, m_gsh); - } - } +int AssemblyMapCG::v_GetLocalToGlobalMap(const int i) const +{ + return m_localToGlobalMap[i]; +} +int AssemblyMapCG::v_GetGlobalToUniversalMap(const int i) const +{ + return m_globalToUniversalMap[i]; +} - void AssemblyMapCG::v_LocalToGlobal( - const NekVector& loc, - NekVector< NekDouble>& global, - bool useComm) const - { - LocalToGlobal(loc.GetPtr(),global.GetPtr(),useComm); - } +int AssemblyMapCG::v_GetGlobalToUniversalMapUnique(const int i) const +{ + return m_globalToUniversalMapUnique[i]; +} - void AssemblyMapCG::v_GlobalToLocal( - const Array& global, - Array& loc) const - { - Array glo; - if(global.data() == loc.data()) - { - glo = Array(m_numGlobalCoeffs,global.data()); - } - else - { - glo = global; // create reference - } +const Array &AssemblyMapCG::v_GetLocalToGlobalMap(void) +{ + return m_localToGlobalMap; +} +const Array &AssemblyMapCG::v_GetGlobalToUniversalMap(void) +{ + return m_globalToUniversalMap; +} - if(m_signChange) - { - Vmath::Gathr(m_numLocalCoeffs, m_localToGlobalSign.get(), glo.get(), m_localToGlobalMap.get(), loc.get()); - } - else - { - Vmath::Gathr(m_numLocalCoeffs, glo.get(), m_localToGlobalMap.get(), loc.get()); - } - } +const Array &AssemblyMapCG::v_GetGlobalToUniversalMapUnique( + void) +{ + return m_globalToUniversalMapUnique; +} - void AssemblyMapCG::v_GlobalToLocal( - const NekVector& global, - NekVector< NekDouble>& loc) const - { - GlobalToLocal(global.GetPtr(),loc.GetPtr()); - } +NekDouble AssemblyMapCG::v_GetLocalToGlobalSign(const int i) const +{ + if (m_signChange) + { + return m_localToGlobalSign[i]; + } + else + { + return 1.0; + } +} - void AssemblyMapCG::v_Assemble( - const Array &loc, - Array &global) const - { - Array local; - if(global.data() == loc.data()) - { - local = Array(m_numLocalCoeffs,loc.data()); - } - else - { - local = loc; // create reference - } +const Array &AssemblyMapCG::v_GetLocalToGlobalSign() const +{ + return m_localToGlobalSign; +} - Vmath::Zero(m_numGlobalCoeffs, global.get(), 1); +void AssemblyMapCG::v_LocalToGlobal(const Array &loc, + Array &global, + bool useComm) const +{ + Array local; + if (global.data() == loc.data()) + { + local = Array(m_numLocalCoeffs, loc.data()); + } + else + { + local = loc; // create reference + } - if(m_signChange) - { - Vmath::Assmb(m_numLocalCoeffs, m_localToGlobalSign.get(), local.get(), m_localToGlobalMap.get(), global.get()); - } - else - { - Vmath::Assmb(m_numLocalCoeffs, local.get(), m_localToGlobalMap.get(), global.get()); - } - UniversalAssemble(global); - } + if (m_signChange) + { + Vmath::Scatr(m_numLocalCoeffs, m_localToGlobalSign.get(), local.get(), + m_localToGlobalMap.get(), global.get()); + } + else + { + Vmath::Scatr(m_numLocalCoeffs, local.get(), m_localToGlobalMap.get(), + global.get()); + } - void AssemblyMapCG::v_Assemble( - const NekVector& loc, - NekVector< NekDouble>& global) const - { - Assemble(loc.GetPtr(),global.GetPtr()); - } + // ensure all values are unique by calling a max + if (useComm) + { + Gs::Gather(global, Gs::gs_max, m_gsh); + } +} - void AssemblyMapCG::v_UniversalAssemble( - Array& pGlobal) const - { - Gs::Gather(pGlobal, Gs::gs_add, m_gsh); - } +void AssemblyMapCG::v_LocalToGlobal(const NekVector &loc, + NekVector &global, + bool useComm) const +{ + LocalToGlobal(loc.GetPtr(), global.GetPtr(), useComm); +} - void AssemblyMapCG::v_UniversalAssemble( - NekVector< NekDouble>& pGlobal) const - { - UniversalAssemble(pGlobal.GetPtr()); - } +void AssemblyMapCG::v_GlobalToLocal(const Array &global, + Array &loc) const +{ + Array glo; + if (global.data() == loc.data()) + { + glo = Array(m_numGlobalCoeffs, global.data()); + } + else + { + glo = global; // create reference + } - void AssemblyMapCG::v_UniversalAssemble( - Array& pGlobal, - int offset) const - { - Array tmp(offset); - Vmath::Vcopy(offset, pGlobal, 1, tmp, 1); - UniversalAssemble(pGlobal); - Vmath::Vcopy(offset, tmp, 1, pGlobal, 1); - } + if (m_signChange) + { + Vmath::Gathr(m_numLocalCoeffs, m_localToGlobalSign.get(), glo.get(), + m_localToGlobalMap.get(), loc.get()); + } + else + { + Vmath::Gathr(m_numLocalCoeffs, glo.get(), m_localToGlobalMap.get(), + loc.get()); + } +} - int AssemblyMapCG::v_GetFullSystemBandWidth() const - { - return m_fullSystemBandWidth; - } +void AssemblyMapCG::v_GlobalToLocal(const NekVector &global, + NekVector &loc) const +{ + GlobalToLocal(global.GetPtr(), loc.GetPtr()); +} - int AssemblyMapCG::v_GetNumNonDirVertexModes() const - { - return m_numNonDirVertexModes; - } +void AssemblyMapCG::v_Assemble(const Array &loc, + Array &global) const +{ + Array local; + if (global.data() == loc.data()) + { + local = Array(m_numLocalCoeffs, loc.data()); + } + else + { + local = loc; // create reference + } - int AssemblyMapCG::v_GetNumNonDirEdgeModes() const - { - return m_numNonDirEdgeModes; - } + Vmath::Zero(m_numGlobalCoeffs, global.get(), 1); - int AssemblyMapCG::v_GetNumNonDirFaceModes() const - { - return m_numNonDirFaceModes; - } + if (m_signChange) + { + Vmath::Assmb(m_numLocalCoeffs, m_localToGlobalSign.get(), local.get(), + m_localToGlobalMap.get(), global.get()); + } + else + { + Vmath::Assmb(m_numLocalCoeffs, local.get(), m_localToGlobalMap.get(), + global.get()); + } + UniversalAssemble(global); +} - int AssemblyMapCG::v_GetNumDirEdges() const - { - return m_numDirEdges; - } +void AssemblyMapCG::v_Assemble(const NekVector &loc, + NekVector &global) const +{ + Assemble(loc.GetPtr(), global.GetPtr()); +} - int AssemblyMapCG::v_GetNumDirFaces() const - { - return m_numDirFaces; - } +void AssemblyMapCG::v_UniversalAssemble(Array &pGlobal) const +{ + Gs::Gather(pGlobal, Gs::gs_add, m_gsh); +} - int AssemblyMapCG::v_GetNumNonDirEdges() const - { - return m_numNonDirEdges; - } +void AssemblyMapCG::v_UniversalAssemble(NekVector &pGlobal) const +{ + UniversalAssemble(pGlobal.GetPtr()); +} - int AssemblyMapCG::v_GetNumNonDirFaces() const - { - return m_numNonDirFaces; - } +void AssemblyMapCG::v_UniversalAssemble(Array &pGlobal, + int offset) const +{ + Array tmp(offset); + Vmath::Vcopy(offset, pGlobal, 1, tmp, 1); + UniversalAssemble(pGlobal); + Vmath::Vcopy(offset, tmp, 1, pGlobal, 1); +} - const Array& AssemblyMapCG::v_GetExtraDirEdges() - { - return m_extraDirEdges; - } - } // namespace -} // namespace +int AssemblyMapCG::v_GetFullSystemBandWidth() const +{ + return m_fullSystemBandWidth; +} + +int AssemblyMapCG::v_GetNumNonDirVertexModes() const +{ + return m_numNonDirVertexModes; +} + +int AssemblyMapCG::v_GetNumNonDirEdgeModes() const +{ + return m_numNonDirEdgeModes; +} + +int AssemblyMapCG::v_GetNumNonDirFaceModes() const +{ + return m_numNonDirFaceModes; +} + +int AssemblyMapCG::v_GetNumDirEdges() const +{ + return m_numDirEdges; +} + +int AssemblyMapCG::v_GetNumDirFaces() const +{ + return m_numDirFaces; +} + +int AssemblyMapCG::v_GetNumNonDirEdges() const +{ + return m_numNonDirEdges; +} + +int AssemblyMapCG::v_GetNumNonDirFaces() const +{ + return m_numNonDirFaces; +} + +const Array &AssemblyMapCG::v_GetExtraDirEdges() +{ + return m_extraDirEdges; +} +} // namespace MultiRegions +} // namespace Nektar diff --git a/library/MultiRegions/GlobalLinSys.cpp b/library/MultiRegions/GlobalLinSys.cpp index d640404a2..75257be73 100644 --- a/library/MultiRegions/GlobalLinSys.cpp +++ b/library/MultiRegions/GlobalLinSys.cpp @@ -34,423 +34,398 @@ #include +#include +#include +#include #include #include -#include -#include -#include -#include #include +#include namespace Nektar { - namespace MultiRegions - { - std::string GlobalLinSys::lookupIds[15] = { - LibUtilities::SessionReader::RegisterEnumValue( - "GlobalSysSoln", "DirectFull", - MultiRegions::eDirectFullMatrix), - LibUtilities::SessionReader::RegisterEnumValue( - "GlobalSysSoln", "DirectStaticCond", - MultiRegions::eDirectStaticCond), - LibUtilities::SessionReader::RegisterEnumValue( - "GlobalSysSoln", "DirectMultiLevelStaticCond", - MultiRegions::eDirectMultiLevelStaticCond), - LibUtilities::SessionReader::RegisterEnumValue( - "GlobalSysSoln", "IterativeFull", - MultiRegions::eIterativeFull), - LibUtilities::SessionReader::RegisterEnumValue( - "GlobalSysSoln", "IterativeStaticCond", - MultiRegions::eIterativeStaticCond), - LibUtilities::SessionReader::RegisterEnumValue( - "GlobalSysSoln", "IterativeMultiLevelStaticCond", - MultiRegions::eIterativeMultiLevelStaticCond), - LibUtilities::SessionReader::RegisterEnumValue( - "GlobalSysSoln", "XxtFull", - MultiRegions::eXxtFullMatrix), - LibUtilities::SessionReader::RegisterEnumValue( - "GlobalSysSoln", "XxtStaticCond", - MultiRegions::eXxtStaticCond), - LibUtilities::SessionReader::RegisterEnumValue( - "GlobalSysSoln", "XxtMultiLevelStaticCond", - MultiRegions::eXxtMultiLevelStaticCond), - LibUtilities::SessionReader::RegisterEnumValue( - "GlobalSysSoln", "PETScFull", - MultiRegions::ePETScFullMatrix), - LibUtilities::SessionReader::RegisterEnumValue( - "GlobalSysSoln", "PETScStaticCond", - MultiRegions::ePETScStaticCond), - LibUtilities::SessionReader::RegisterEnumValue( - "GlobalSysSoln", "PETScMultiLevelStaticCond", - MultiRegions::ePETScMultiLevelStaticCond), - LibUtilities::SessionReader::RegisterEnumValue( - "GlobalSysSoln", "SaenaFull", - MultiRegions::eSaenaFullMatrix), - LibUtilities::SessionReader::RegisterEnumValue( - "GlobalSysSoln", "SaenaStaticCond", - MultiRegions::eSaenaStaticCond), - LibUtilities::SessionReader::RegisterEnumValue( - "GlobalSysSoln", "SaenaMultiLevelStaticCond", - MultiRegions::eSaenaMultiLevelStaticCond) - }; +namespace MultiRegions +{ +std::string GlobalLinSys::lookupIds[15] = { + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "DirectFull", MultiRegions::eDirectFullMatrix), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "DirectStaticCond", MultiRegions::eDirectStaticCond), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "DirectMultiLevelStaticCond", + MultiRegions::eDirectMultiLevelStaticCond), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "IterativeFull", MultiRegions::eIterativeFull), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "IterativeStaticCond", + MultiRegions::eIterativeStaticCond), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "IterativeMultiLevelStaticCond", + MultiRegions::eIterativeMultiLevelStaticCond), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "XxtFull", MultiRegions::eXxtFullMatrix), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "XxtStaticCond", MultiRegions::eXxtStaticCond), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "XxtMultiLevelStaticCond", + MultiRegions::eXxtMultiLevelStaticCond), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "PETScFull", MultiRegions::ePETScFullMatrix), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "PETScStaticCond", MultiRegions::ePETScStaticCond), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "PETScMultiLevelStaticCond", + MultiRegions::ePETScMultiLevelStaticCond), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "SaenaFull", MultiRegions::eSaenaFullMatrix), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "SaenaStaticCond", MultiRegions::eSaenaStaticCond), + LibUtilities::SessionReader::RegisterEnumValue( + "GlobalSysSoln", "SaenaMultiLevelStaticCond", + MultiRegions::eSaenaMultiLevelStaticCond)}; #ifdef NEKTAR_USE_SCOTCH - std::string GlobalLinSys::def = LibUtilities::SessionReader:: - RegisterDefaultSolverInfo("GlobalSysSoln", - "DirectMultiLevelStaticCond"); +std::string GlobalLinSys::def = + LibUtilities::SessionReader::RegisterDefaultSolverInfo( + "GlobalSysSoln", "DirectMultiLevelStaticCond"); #else - std::string GlobalLinSys::def = LibUtilities::SessionReader:: - RegisterDefaultSolverInfo("GlobalSysSoln", - "DirectStaticCond"); +std::string GlobalLinSys::def = + LibUtilities::SessionReader::RegisterDefaultSolverInfo("GlobalSysSoln", + "DirectStaticCond"); #endif - /** - * @class GlobalLinSys - * - * Consider the linear system - * \f$\boldsymbol{M\hat{u}}_g=\boldsymbol{\hat{f}}\f$. - * Distinguishing between the boundary and interior components of - * \f$\boldsymbol{\hat{u}}_g\f$ and \f$\boldsymbol{\hat{f}}\f$ using - * \f$\boldsymbol{\hat{u}}_b\f$,\f$\boldsymbol{\hat{u}}_i\f$ and - * \f$\boldsymbol{\hat{f}}_b\f$,\f$\boldsymbol{\hat{f}}_i\f$ - * respectively, this system can be split into its constituent parts as - * \f[\left[\begin{array}{cc} - * \boldsymbol{M}_b&\boldsymbol{M}_{c1}\\ - * \boldsymbol{M}_{c2}&\boldsymbol{M}_i\\ - * \end{array}\right] - * \left[\begin{array}{c} - * \boldsymbol{\hat{u}_b}\\ - * \boldsymbol{\hat{u}_i}\\ - * \end{array}\right]= - * \left[\begin{array}{c} - * \boldsymbol{\hat{f}_b}\\ - * \boldsymbol{\hat{f}_i}\\ - * \end{array}\right]\f] - * where \f$\boldsymbol{M}_b\f$ represents the components of - * \f$\boldsymbol{M}\f$ resulting from boundary-boundary mode - * interactions, - * \f$\boldsymbol{M}_{c1}\f$ and \f$\boldsymbol{M}_{c2}\f$ represent the - * components resulting from coupling between the boundary-interior - * modes, and \f$\boldsymbol{M}_i\f$ represents the components of - * \f$\boldsymbol{M}\f$ resulting from interior-interior mode - * interactions. - * - * The solution of the linear system can now be determined in two steps: - * \f{eqnarray*} - * \mathrm{step 1:}&\quad&(\boldsymbol{M}_b-\boldsymbol{M}_{c1} - * \boldsymbol{M}_i^{-1}\boldsymbol{M}_{c2}) \boldsymbol{\hat{u}_b} = - * \boldsymbol{\hat{f}}_b - \boldsymbol{M}_{c1}\boldsymbol{M}_i^{-1} - * \boldsymbol{\hat{f}}_i,\nonumber \\ - * \mathrm{step 2:}&\quad&\boldsymbol{\hat{u}_i}=\boldsymbol{M}_i^{-1} - * \left( \boldsymbol{\hat{f}}_i - * - \boldsymbol{M}_{c2}\boldsymbol{\hat{u}_b} - * \right). \nonumber \\ \f} - * As the inverse of \f$\boldsymbol{M}_i^{-1}\f$ is - * \f[ \boldsymbol{M}_i^{-1} = \left [\underline{\boldsymbol{M}^e_i} - * \right ]^{-1} = \underline{[\boldsymbol{M}^e_i]}^{-1} \f] - * and the following operations can be evaluated as, - * \f{eqnarray*} - * \boldsymbol{M}_{c1}\boldsymbol{M}_i^{-1}\boldsymbol{\hat{f}}_i & - * =& \boldsymbol{\mathcal{A}}_b^T \underline{\boldsymbol{M}^e_{c1}} - * \underline{[\boldsymbol{M}^e_i]}^{-1} \boldsymbol{\hat{f}}_i \\ - * \boldsymbol{M}_{c2} \boldsymbol{\hat{u}_b} &=& - * \underline{\boldsymbol{M}^e_{c2}} \boldsymbol{\mathcal{A}}_b - * \boldsymbol{\hat{u}_b}.\f} - * where \f$\boldsymbol{\mathcal{A}}_b \f$ is the permutation matrix - * which scatters from global to local degrees of freedom, only the - * following four matrices should be constructed: - * - \f$\underline{[\boldsymbol{M}^e_i]}^{-1}\f$ - * - \f$\underline{\boldsymbol{M}^e_{c1}} - * \underline{[\boldsymbol{M}^e_i]}^{-1}\f$ - * - \f$\underline{\boldsymbol{M}^e_{c2}}\f$ - * - The Schur complement: \f$\boldsymbol{M}_{\mathrm{Schur}}= - * \quad\boldsymbol{M}_b-\boldsymbol{M}_{c1}\boldsymbol{M}_i^{-1} - * \boldsymbol{M}_{c2}\f$ - * - * The first three matrices are just a concatenation of the - * corresponding local matrices and they can be created as such. They - * also allow for an elemental evaluation of the operations concerned. - * - * The global Schur complement however should be assembled from the - * concatenation of the local elemental Schur complements, that is, - * \f[ \boldsymbol{M}_{\mathrm{Schur}}=\boldsymbol{M}_b - * - \boldsymbol{M}_{c1} - * \boldsymbol{M}_i^{-1} \boldsymbol{M}_{c2} = - * \boldsymbol{\mathcal{A}}_b^T \left [\underline{\boldsymbol{M}^e_b - - * \boldsymbol{M}^e_{c1} [\boldsymbol{M}^e_i]^{-1} - * (\boldsymbol{M}^e_{c2})} \right ] \boldsymbol{\mathcal{A}}_b \f] - * and it is the only matrix operation that need to be evaluated on a - * global level when using static condensation. - * However, due to the size and sparsity of the matrix - * \f$\boldsymbol{\mathcal{A}}_b\f$, it is more efficient to assemble - * the global Schur matrix using the mapping array bmap\f$[e][i]\f$ - * contained in the input argument \a locToGloMap. The global Schur - * complement is then constructed as: - * \f[\boldsymbol{M}_{\mathrm{Schur}}\left[\mathrm{\a bmap}[e][i]\right] - * \left[\mathrm{\a bmap}[e][j]\right]=\mathrm{\a bsign}[e][i]\cdot - * \mathrm{\a bsign}[e][j] - * \cdot\boldsymbol{M}^e_{\mathrm{Schur}}[i][j]\f] - * All four matrices are stored in the \a GlobalLinSys returned by this - * function. - */ - - /** - * Given a block matrix, construct a global matrix system according to - * a local to global mapping. #m_linSys is constructed by - * AssembleFullMatrix(). - * @param pkey Associated linear system key. - * @param locToGloMap Local to global mapping. - */ - GlobalLinSys::GlobalLinSys(const GlobalLinSysKey &pKey, - const std::weak_ptr &pExpList, - const std::shared_ptr - &pLocToGloMap): - m_linSysKey(pKey), - m_expList(pExpList), - m_robinBCInfo(m_expList.lock()->GetRobinBCInfo()), - m_verbose(m_expList.lock()->GetSession()-> - DefinesCmdLineArgument("verbose")) - { - boost::ignore_unused(pLocToGloMap); - } +/** + * @class GlobalLinSys + * + * Consider the linear system + * \f$\boldsymbol{M\hat{u}}_g=\boldsymbol{\hat{f}}\f$. + * Distinguishing between the boundary and interior components of + * \f$\boldsymbol{\hat{u}}_g\f$ and \f$\boldsymbol{\hat{f}}\f$ using + * \f$\boldsymbol{\hat{u}}_b\f$,\f$\boldsymbol{\hat{u}}_i\f$ and + * \f$\boldsymbol{\hat{f}}_b\f$,\f$\boldsymbol{\hat{f}}_i\f$ + * respectively, this system can be split into its constituent parts as + * \f[\left[\begin{array}{cc} + * \boldsymbol{M}_b&\boldsymbol{M}_{c1}\\ + * \boldsymbol{M}_{c2}&\boldsymbol{M}_i\\ + * \end{array}\right] + * \left[\begin{array}{c} + * \boldsymbol{\hat{u}_b}\\ + * \boldsymbol{\hat{u}_i}\\ + * \end{array}\right]= + * \left[\begin{array}{c} + * \boldsymbol{\hat{f}_b}\\ + * \boldsymbol{\hat{f}_i}\\ + * \end{array}\right]\f] + * where \f$\boldsymbol{M}_b\f$ represents the components of + * \f$\boldsymbol{M}\f$ resulting from boundary-boundary mode + * interactions, + * \f$\boldsymbol{M}_{c1}\f$ and \f$\boldsymbol{M}_{c2}\f$ represent the + * components resulting from coupling between the boundary-interior + * modes, and \f$\boldsymbol{M}_i\f$ represents the components of + * \f$\boldsymbol{M}\f$ resulting from interior-interior mode + * interactions. + * + * The solution of the linear system can now be determined in two steps: + * \f{eqnarray*} + * \mathrm{step 1:}&\quad&(\boldsymbol{M}_b-\boldsymbol{M}_{c1} + * \boldsymbol{M}_i^{-1}\boldsymbol{M}_{c2}) \boldsymbol{\hat{u}_b} = + * \boldsymbol{\hat{f}}_b - \boldsymbol{M}_{c1}\boldsymbol{M}_i^{-1} + * \boldsymbol{\hat{f}}_i,\nonumber \\ + * \mathrm{step 2:}&\quad&\boldsymbol{\hat{u}_i}=\boldsymbol{M}_i^{-1} + * \left( \boldsymbol{\hat{f}}_i + * - \boldsymbol{M}_{c2}\boldsymbol{\hat{u}_b} + * \right). \nonumber \\ \f} + * As the inverse of \f$\boldsymbol{M}_i^{-1}\f$ is + * \f[ \boldsymbol{M}_i^{-1} = \left [\underline{\boldsymbol{M}^e_i} + * \right ]^{-1} = \underline{[\boldsymbol{M}^e_i]}^{-1} \f] + * and the following operations can be evaluated as, + * \f{eqnarray*} + * \boldsymbol{M}_{c1}\boldsymbol{M}_i^{-1}\boldsymbol{\hat{f}}_i & + * =& \boldsymbol{\mathcal{A}}_b^T \underline{\boldsymbol{M}^e_{c1}} + * \underline{[\boldsymbol{M}^e_i]}^{-1} \boldsymbol{\hat{f}}_i \\ + * \boldsymbol{M}_{c2} \boldsymbol{\hat{u}_b} &=& + * \underline{\boldsymbol{M}^e_{c2}} \boldsymbol{\mathcal{A}}_b + * \boldsymbol{\hat{u}_b}.\f} + * where \f$\boldsymbol{\mathcal{A}}_b \f$ is the permutation matrix + * which scatters from global to local degrees of freedom, only the + * following four matrices should be constructed: + * - \f$\underline{[\boldsymbol{M}^e_i]}^{-1}\f$ + * - \f$\underline{\boldsymbol{M}^e_{c1}} + * \underline{[\boldsymbol{M}^e_i]}^{-1}\f$ + * - \f$\underline{\boldsymbol{M}^e_{c2}}\f$ + * - The Schur complement: \f$\boldsymbol{M}_{\mathrm{Schur}}= + * \quad\boldsymbol{M}_b-\boldsymbol{M}_{c1}\boldsymbol{M}_i^{-1} + * \boldsymbol{M}_{c2}\f$ + * + * The first three matrices are just a concatenation of the + * corresponding local matrices and they can be created as such. They + * also allow for an elemental evaluation of the operations concerned. + * + * The global Schur complement however should be assembled from the + * concatenation of the local elemental Schur complements, that is, + * \f[ \boldsymbol{M}_{\mathrm{Schur}}=\boldsymbol{M}_b + * - \boldsymbol{M}_{c1} + * \boldsymbol{M}_i^{-1} \boldsymbol{M}_{c2} = + * \boldsymbol{\mathcal{A}}_b^T \left [\underline{\boldsymbol{M}^e_b - + * \boldsymbol{M}^e_{c1} [\boldsymbol{M}^e_i]^{-1} + * (\boldsymbol{M}^e_{c2})} \right ] \boldsymbol{\mathcal{A}}_b \f] + * and it is the only matrix operation that need to be evaluated on a + * global level when using static condensation. + * However, due to the size and sparsity of the matrix + * \f$\boldsymbol{\mathcal{A}}_b\f$, it is more efficient to assemble + * the global Schur matrix using the mapping array bmap\f$[e][i]\f$ + * contained in the input argument \a locToGloMap. The global Schur + * complement is then constructed as: + * \f[\boldsymbol{M}_{\mathrm{Schur}}\left[\mathrm{\a bmap}[e][i]\right] + * \left[\mathrm{\a bmap}[e][j]\right]=\mathrm{\a bsign}[e][i]\cdot + * \mathrm{\a bsign}[e][j] + * \cdot\boldsymbol{M}^e_{\mathrm{Schur}}[i][j]\f] + * All four matrices are stored in the \a GlobalLinSys returned by this + * function. + */ + +/** + * Given a block matrix, construct a global matrix system according to + * a local to global mapping. #m_linSys is constructed by + * AssembleFullMatrix(). + * @param pkey Associated linear system key. + * @param locToGloMap Local to global mapping. + */ +GlobalLinSys::GlobalLinSys(const GlobalLinSysKey &pKey, + const std::weak_ptr &pExpList, + const std::shared_ptr &pLocToGloMap) + : m_linSysKey(pKey), m_expList(pExpList), + m_robinBCInfo(m_expList.lock()->GetRobinBCInfo()), + m_verbose( + m_expList.lock()->GetSession()->DefinesCmdLineArgument("verbose")) +{ + boost::ignore_unused(pLocToGloMap); +} - /** - * - */ - GlobalLinSysFactory& GetGlobalLinSysFactory() - { - static GlobalLinSysFactory instance; - return instance; - } +/** + * + */ +GlobalLinSysFactory &GetGlobalLinSysFactory() +{ + static GlobalLinSysFactory instance; + return instance; +} + +/** + * @brief Create a preconditioner object from the parameters defined in + * the supplied assembly map. + * + * @param asmMap Assembly map used to construct the global system. + */ +PreconditionerSharedPtr GlobalLinSys::CreatePrecon(AssemblyMapSharedPtr asmMap) +{ + PreconditionerType pType = asmMap->GetPreconType(); + std::string PreconType = MultiRegions::PreconditionerTypeMap[pType]; + return GetPreconFactory().CreateInstance(PreconType, GetSharedThisPtr(), + asmMap); +} + +/** + * @brief Get the number of blocks in this system. + * + * At the top level this corresponds to the number of elements in the + * expansion list. + */ +int GlobalLinSys::v_GetNumBlocks() +{ + return m_expList.lock()->GetExpSize(); +} - /** - * @brief Create a preconditioner object from the parameters defined in - * the supplied assembly map. - * - * @param asmMap Assembly map used to construct the global system. - */ - PreconditionerSharedPtr GlobalLinSys::CreatePrecon(AssemblyMapSharedPtr - asmMap) - { - PreconditionerType pType = asmMap->GetPreconType(); - std::string PreconType = MultiRegions::PreconditionerTypeMap[pType]; - return GetPreconFactory().CreateInstance( - PreconType, GetSharedThisPtr(), asmMap); - } +/** + Assemble the matrix key for each block n +**/ + +LocalRegions::MatrixKey GlobalLinSys::GetBlockMatrixKey(unsigned int n) +{ + std::shared_ptr expList = m_expList.lock(); + int cnt = 0; - /** - * @brief Get the number of blocks in this system. - * - * At the top level this corresponds to the number of elements in the - * expansion list. - */ - int GlobalLinSys::v_GetNumBlocks() + LocalRegions::ExpansionSharedPtr vExp = expList->GetExp(n); + + // need to be initialised with zero size for non variable + // coefficient case + StdRegions::VarCoeffMap vVarCoeffMap; + + StdRegions::ConstFactorMap vConstFactorMap = m_linSysKey.GetConstFactors(); + + // setup variable factors + if (m_linSysKey.GetNVarFactors() > 0) + { + if (m_linSysKey.GetVarFactors().count( + StdRegions::eFactorSVVDiffCoeff) != 0) { - return m_expList.lock()->GetExpSize(); - } + vConstFactorMap[StdRegions::eFactorSVVDiffCoeff] = + m_linSysKey.GetVarFactors(StdRegions::eFactorSVVDiffCoeff)[n]; + ASSERTL1(m_linSysKey.GetConstFactors().count( + StdRegions::eFactorSVVCutoffRatio), + "VarCoeffSVVCuroffRatio is set but " + " not FactorSVVCutoffRatio"); - /** - Assemble the matrix key for each block n - **/ - - LocalRegions::MatrixKey GlobalLinSys::GetBlockMatrixKey(unsigned int n) - { - - std::shared_ptr expList = m_expList.lock(); - int cnt = 0; - - LocalRegions::ExpansionSharedPtr vExp = expList->GetExp( n ); - - // need to be initialised with zero size for non variable - // coefficient case - StdRegions::VarCoeffMap vVarCoeffMap; - - StdRegions::ConstFactorMap vConstFactorMap = - m_linSysKey.GetConstFactors(); - - // setup variable factors - if(m_linSysKey.GetNVarFactors() > 0) - { - if(m_linSysKey.GetVarFactors(). - count(StdRegions::eFactorSVVDiffCoeff) != 0) - { - vConstFactorMap[StdRegions::eFactorSVVDiffCoeff] = - m_linSysKey.GetVarFactors( - StdRegions::eFactorSVVDiffCoeff)[n]; - - ASSERTL1(m_linSysKey.GetConstFactors(). - count(StdRegions::eFactorSVVCutoffRatio), - "VarCoeffSVVCuroffRatio is set but " - " not FactorSVVCutoffRatio"); - - vConstFactorMap[StdRegions::eFactorSVVCutoffRatio] = - m_linSysKey.GetVarFactors( - StdRegions::eFactorSVVCutoffRatio)[n]; - - } - - if(m_linSysKey.GetVarFactors(). - count(StdRegions::eFactorSVVPowerKerDiffCoeff) != 0) - { - vConstFactorMap[StdRegions::eFactorSVVPowerKerDiffCoeff] = - m_linSysKey.GetVarFactors( - StdRegions::eFactorSVVPowerKerDiffCoeff)[n]; - } - - if(m_linSysKey.GetVarFactors(). - count(StdRegions::eFactorSVVDGKerDiffCoeff) != 0) - { - vConstFactorMap[StdRegions::eFactorSVVDGKerDiffCoeff] = - m_linSysKey.GetVarFactors( - StdRegions::eFactorSVVDGKerDiffCoeff)[n]; - } - } - - // retrieve variable coefficients - if(m_linSysKey.GetNVarCoeffs() > 0) - { - cnt = expList->GetPhys_Offset(n); - - for (auto &x : m_linSysKey.GetVarCoeffs()) - { - vVarCoeffMap[x.first] = x.second + cnt; - } - } - - - LocalRegions::MatrixKey matkey(m_linSysKey.GetMatrixType(), - vExp->DetShapeType(), - *vExp, - vConstFactorMap, - vVarCoeffMap); - return matkey; + vConstFactorMap[StdRegions::eFactorSVVCutoffRatio] = + m_linSysKey.GetVarFactors(StdRegions::eFactorSVVCutoffRatio)[n]; } - /** - * @brief Retrieves the block matrix from n-th expansion using the - * matrix key provided by the #m_linSysKey. - * - * @param n Number of the expansion. - * @return Block matrix for the specified expansion. - */ - DNekScalMatSharedPtr GlobalLinSys::v_GetBlock(unsigned int n) + if (m_linSysKey.GetVarFactors().count( + StdRegions::eFactorSVVPowerKerDiffCoeff) != 0) { - LocalRegions::ExpansionSharedPtr vExp = m_expList.lock()->GetExp( n ); - DNekScalMatSharedPtr loc_mat; - loc_mat = vExp->GetLocMatrix(GetBlockMatrixKey(n)); - - // apply robin boundary conditions to the matrix. - if(m_robinBCInfo.count(n) != 0) // add robin mass matrix - { - RobinBCInfoSharedPtr rBC; - - // declare local matrix from scaled matrix. - int rows = loc_mat->GetRows(); - int cols = loc_mat->GetColumns(); - const NekDouble *dat = loc_mat->GetRawPtr(); - DNekMatSharedPtr new_mat = MemoryManager:: - AllocateSharedPtr(rows,cols,dat); - Blas::Dscal(rows*cols,loc_mat->Scale(),new_mat->GetRawPtr(),1); - - // add local matrix contribution - for(rBC = m_robinBCInfo.find(n)->second;rBC; rBC = rBC->next) - { - vExp->AddRobinMassMatrix( - rBC->m_robinID, rBC->m_robinPrimitiveCoeffs, new_mat); - } - - // redeclare loc_mat to point to new_mat plus the scalar. - loc_mat = MemoryManager::AllocateSharedPtr( - 1.0, new_mat); - } - - // finally return the matrix. - return loc_mat; + vConstFactorMap[StdRegions::eFactorSVVPowerKerDiffCoeff] = + m_linSysKey.GetVarFactors( + StdRegions::eFactorSVVPowerKerDiffCoeff)[n]; } - /** - * @brief Retrieves a the static condensation block matrices from n-th - * expansion using the matrix key provided by the #m_linSysKey. - * - * @param n Number of the expansion - * @return 2x2 Block matrix holding the static condensation - * matrices for the n-th expansion. - */ - DNekScalBlkMatSharedPtr GlobalLinSys::v_GetStaticCondBlock( - unsigned int n) + if (m_linSysKey.GetVarFactors().count( + StdRegions::eFactorSVVDGKerDiffCoeff) != 0) { - - LocalRegions::ExpansionSharedPtr vExp = m_expList.lock()->GetExp( n ); - DNekScalBlkMatSharedPtr loc_mat; - loc_mat = vExp->GetLocStaticCondMatrix(GetBlockMatrixKey(n)); - - if(m_robinBCInfo.count(n) != 0) // add robin mass matrix - { - DNekScalMatSharedPtr tmp_mat; - RobinBCInfoSharedPtr rBC; - - tmp_mat = loc_mat->GetBlock(0,0); - - // declare local matrix from scaled matrix. - int rows = tmp_mat->GetRows(); - int cols = tmp_mat->GetColumns(); - const NekDouble *dat = tmp_mat->GetRawPtr(); - DNekMatSharedPtr new_mat = MemoryManager:: - AllocateSharedPtr(rows, cols, dat); - Blas::Dscal(rows*cols,tmp_mat->Scale(),new_mat->GetRawPtr(),1); - - // add local matrix contribution - for(rBC = m_robinBCInfo.find(n)->second;rBC; rBC = rBC->next) - { - vExp->AddRobinMassMatrix( - rBC->m_robinID, rBC->m_robinPrimitiveCoeffs, new_mat); - } - - // redeclare loc_mat to point to new_mat plus the scalar. - tmp_mat = MemoryManager::AllocateSharedPtr( - 1.0, new_mat); - DNekScalBlkMatSharedPtr new_loc_mat; - unsigned int exp_size[] = {tmp_mat->GetRows(), loc_mat->GetBlock(1,1)->GetRows()}; - unsigned int nblks = 2; - new_loc_mat = MemoryManager::AllocateSharedPtr(nblks, nblks, exp_size, exp_size); - - - new_loc_mat->SetBlock(0,0,tmp_mat); - new_loc_mat->SetBlock(0,1,loc_mat->GetBlock(0,1)); - new_loc_mat->SetBlock(1,0,loc_mat->GetBlock(1,0)); - new_loc_mat->SetBlock(1,1,loc_mat->GetBlock(1,1)); - loc_mat = new_loc_mat; - } - - return loc_mat; + vConstFactorMap[StdRegions::eFactorSVVDGKerDiffCoeff] = + m_linSysKey.GetVarFactors( + StdRegions::eFactorSVVDGKerDiffCoeff)[n]; } + } - /** - * @brief Releases the static condensation block matrices from NekManager - * of n-th expansion using the matrix key provided by the #m_linSysKey. - * - * @param n Number of the expansion - */ - void GlobalLinSys::v_DropStaticCondBlock(unsigned int n) + // retrieve variable coefficients + if (m_linSysKey.GetNVarCoeffs() > 0) + { + cnt = expList->GetPhys_Offset(n); + + for (auto &x : m_linSysKey.GetVarCoeffs()) { - LocalRegions::ExpansionSharedPtr vExp = m_expList.lock()->GetExp( n ); - vExp->DropLocStaticCondMatrix(GetBlockMatrixKey(n)); + vVarCoeffMap[x.first] = x.second + cnt; } + } + + LocalRegions::MatrixKey matkey(m_linSysKey.GetMatrixType(), + vExp->DetShapeType(), *vExp, vConstFactorMap, + vVarCoeffMap); + return matkey; +} + +/** + * @brief Retrieves the block matrix from n-th expansion using the + * matrix key provided by the #m_linSysKey. + * + * @param n Number of the expansion. + * @return Block matrix for the specified expansion. + */ +DNekScalMatSharedPtr GlobalLinSys::v_GetBlock(unsigned int n) +{ + LocalRegions::ExpansionSharedPtr vExp = m_expList.lock()->GetExp(n); + DNekScalMatSharedPtr loc_mat; + loc_mat = vExp->GetLocMatrix(GetBlockMatrixKey(n)); - void GlobalLinSys::v_InitObject() + // apply robin boundary conditions to the matrix. + if (m_robinBCInfo.count(n) != 0) // add robin mass matrix + { + RobinBCInfoSharedPtr rBC; + + // declare local matrix from scaled matrix. + int rows = loc_mat->GetRows(); + int cols = loc_mat->GetColumns(); + const NekDouble *dat = loc_mat->GetRawPtr(); + DNekMatSharedPtr new_mat = + MemoryManager::AllocateSharedPtr(rows, cols, dat); + Blas::Dscal(rows * cols, loc_mat->Scale(), new_mat->GetRawPtr(), 1); + + // add local matrix contribution + for (rBC = m_robinBCInfo.find(n)->second; rBC; rBC = rBC->next) { - NEKERROR(ErrorUtil::efatal, "Method does not exist" ); + vExp->AddRobinMassMatrix(rBC->m_robinID, + rBC->m_robinPrimitiveCoeffs, new_mat); } - void GlobalLinSys::v_Initialise( - const std::shared_ptr& pLocToGloMap) + // redeclare loc_mat to point to new_mat plus the scalar. + loc_mat = MemoryManager::AllocateSharedPtr(1.0, new_mat); + } + + // finally return the matrix. + return loc_mat; +} + +/** + * @brief Retrieves a the static condensation block matrices from n-th + * expansion using the matrix key provided by the #m_linSysKey. + * + * @param n Number of the expansion + * @return 2x2 Block matrix holding the static condensation + * matrices for the n-th expansion. + */ +DNekScalBlkMatSharedPtr GlobalLinSys::v_GetStaticCondBlock(unsigned int n) +{ + + LocalRegions::ExpansionSharedPtr vExp = m_expList.lock()->GetExp(n); + DNekScalBlkMatSharedPtr loc_mat; + loc_mat = vExp->GetLocStaticCondMatrix(GetBlockMatrixKey(n)); + + if (m_robinBCInfo.count(n) != 0) // add robin mass matrix + { + DNekScalMatSharedPtr tmp_mat; + RobinBCInfoSharedPtr rBC; + + tmp_mat = loc_mat->GetBlock(0, 0); + + // declare local matrix from scaled matrix. + int rows = tmp_mat->GetRows(); + int cols = tmp_mat->GetColumns(); + const NekDouble *dat = tmp_mat->GetRawPtr(); + DNekMatSharedPtr new_mat = + MemoryManager::AllocateSharedPtr(rows, cols, dat); + Blas::Dscal(rows * cols, tmp_mat->Scale(), new_mat->GetRawPtr(), 1); + + // add local matrix contribution + for (rBC = m_robinBCInfo.find(n)->second; rBC; rBC = rBC->next) { - boost::ignore_unused(pLocToGloMap); - NEKERROR(ErrorUtil::efatal, "Method does not exist" ); + vExp->AddRobinMassMatrix(rBC->m_robinID, + rBC->m_robinPrimitiveCoeffs, new_mat); } - } //end of namespace -} //end of namespace + // redeclare loc_mat to point to new_mat plus the scalar. + tmp_mat = MemoryManager::AllocateSharedPtr(1.0, new_mat); + DNekScalBlkMatSharedPtr new_loc_mat; + unsigned int exp_size[] = {tmp_mat->GetRows(), + loc_mat->GetBlock(1, 1)->GetRows()}; + unsigned int nblks = 2; + new_loc_mat = MemoryManager::AllocateSharedPtr( + nblks, nblks, exp_size, exp_size); + + new_loc_mat->SetBlock(0, 0, tmp_mat); + new_loc_mat->SetBlock(0, 1, loc_mat->GetBlock(0, 1)); + new_loc_mat->SetBlock(1, 0, loc_mat->GetBlock(1, 0)); + new_loc_mat->SetBlock(1, 1, loc_mat->GetBlock(1, 1)); + loc_mat = new_loc_mat; + } + + return loc_mat; +} + +/** + * @brief Releases the static condensation block matrices from NekManager + * of n-th expansion using the matrix key provided by the #m_linSysKey. + * + * @param n Number of the expansion + */ +void GlobalLinSys::v_DropStaticCondBlock(unsigned int n) +{ + LocalRegions::ExpansionSharedPtr vExp = m_expList.lock()->GetExp(n); + vExp->DropLocStaticCondMatrix(GetBlockMatrixKey(n)); +} + +void GlobalLinSys::v_InitObject() +{ + NEKERROR(ErrorUtil::efatal, "Method does not exist"); +} + +void GlobalLinSys::v_Initialise( + const std::shared_ptr &pLocToGloMap) +{ + boost::ignore_unused(pLocToGloMap); + NEKERROR(ErrorUtil::efatal, "Method does not exist"); +} +} // namespace MultiRegions +} // namespace Nektar diff --git a/library/MultiRegions/GlobalLinSysSaena.cpp b/library/MultiRegions/GlobalLinSysSaena.cpp index 3a30b1949..47f61ec26 100644 --- a/library/MultiRegions/GlobalLinSysSaena.cpp +++ b/library/MultiRegions/GlobalLinSysSaena.cpp @@ -33,239 +33,236 @@ // /////////////////////////////////////////////////////////////////////////////// +#include #include #include -#include using namespace std; namespace Nektar { - namespace MultiRegions +namespace MultiRegions +{ +/** + * @class GlobalLinSysSaena + * + * Solves a linear system using Saena. + */ +GlobalLinSysSaena::GlobalLinSysSaena( + const GlobalLinSysKey &pKey, const std::weak_ptr &pExp, + const std::shared_ptr &pLocToGloMap, + const unsigned int pPolynomialOrder) + : GlobalLinSys(pKey, pExp, pLocToGloMap) +{ + if (pPolynomialOrder) { - /** - * @class GlobalLinSysSaena - * - * Solves a linear system using Saena. - */ - GlobalLinSysSaena::GlobalLinSysSaena( - const GlobalLinSysKey &pKey, - const std::weak_ptr &pExp, - const std::shared_ptr &pLocToGloMap, - const unsigned int pPolynomialOrder) - : GlobalLinSys(pKey, pExp, pLocToGloMap) - { - if (pPolynomialOrder) { - // setup and use supplied - SetPolyOrder(pPolynomialOrder); - } - } + // setup and use supplied + SetPolyOrder(pPolynomialOrder); + } +} - /** - * @brief Clean up Saena objects. - * - * Note that if SessionReader::Finalize is called before the end of the - * program, Saena may have been finalized already, at which point we - * cannot deallocate our objects. If that's the case we do nothing and - * let the kernel clear up after us. - */ - GlobalLinSysSaena::~GlobalLinSysSaena() - { - } +/** + * @brief Clean up Saena objects. + * + * Note that if SessionReader::Finalize is called before the end of the + * program, Saena may have been finalized already, at which point we + * cannot deallocate our objects. If that's the case we do nothing and + * let the kernel clear up after us. + */ +GlobalLinSysSaena::~GlobalLinSysSaena() +{ +} - /** - * @brief Solve linear system using Saena. - * - * The general strategy being a Saena solve is to: - * - * - Copy values into the Saena vector #m_b - * - Solve the system #m_ksp and place result into #m_x. - * - Scatter results back into #m_locVec using #m_ctx scatter object. - * - Copy from #m_locVec to output array #pOutput. - */ - void GlobalLinSysSaena::v_SolveLinearSystem( - const int pNumRows, - const Array &pInput, - Array &pOutput, - const AssemblyMapSharedPtr &locToGloMap, - const int pNumDir) - { - boost::ignore_unused(locToGloMap); +/** + * @brief Solve linear system using Saena. + * + * The general strategy being a Saena solve is to: + * + * - Copy values into the Saena vector #m_b + * - Solve the system #m_ksp and place result into #m_x. + * - Scatter results back into #m_locVec using #m_ctx scatter object. + * - Copy from #m_locVec to output array #pOutput. + */ +void GlobalLinSysSaena::v_SolveLinearSystem( + const int pNumRows, const Array &pInput, + Array &pOutput, const AssemblyMapSharedPtr &locToGloMap, + const int pNumDir) +{ + boost::ignore_unused(locToGloMap); - // @TODO: shouldn't need to but we require a new RHS vector every - // time this is called. - saena::vector m_rhs; - m_rhs.set_comm(m_comm); + // @TODO: shouldn't need to but we require a new RHS vector every + // time this is called. + saena::vector m_rhs; + m_rhs.set_comm(m_comm); - const int nHomDofs = pNumRows - pNumDir; + const int nHomDofs = pNumRows - pNumDir; - m_rhs.set(&m_reorderedMap[0], &pInput[pNumDir], nHomDofs); - m_rhs.assemble(); - m_amg.set_rhs(m_rhs); + m_rhs.set(&m_reorderedMap[0], &pInput[pNumDir], nHomDofs); + m_rhs.assemble(); + m_amg.set_rhs(m_rhs); - // Temporary solution storage? - NekDouble *sol = nullptr; + // Temporary solution storage? + NekDouble *sol = nullptr; - // Solve with pCG method - m_amg.solve_pCG(sol, &m_opts); + // Solve with pCG method + m_amg.solve_pCG(sol, &m_opts); - Vmath::Vcopy(nHomDofs, sol, 1, &pOutput[pNumDir], 1); + Vmath::Vcopy(nHomDofs, sol, 1, &pOutput[pNumDir], 1); - if(sol != nullptr) - { - free(sol); - sol = nullptr; - } - } + if (sol != nullptr) + { + free(sol); + sol = nullptr; + } +} - /** - * @brief Calculate a reordering of universal IDs for Saena. - * - * Saena requires a unique, contiguous index of all global and universal - * degrees of freedom which represents its position inside the - * matrix. Presently Gs does not guarantee this, so this routine - * constructs a new universal mapping. - * - * @param glo2uniMap Global to universal map - * @param glo2unique Global to unique map - * @param pLocToGloMap Assembly map for this system - */ - void GlobalLinSysSaena::CalculateReordering( - const Array &glo2uniMap, - const Array &glo2unique, - const AssemblyMapSharedPtr &pLocToGloMap) - { - LibUtilities::CommSharedPtr vComm - = m_expList.lock()->GetSession()->GetComm(); - - const int nDirDofs = pLocToGloMap->GetNumGlobalDirBndCoeffs(); - const int nHomDofs = glo2uniMap.size() - nDirDofs; - const int nProc = vComm->GetSize(); - const int rank = vComm->GetRank(); - - int n, cnt; - - // Count number of unique degrees of freedom on each process. - m_nLocal = Vmath::Vsum(nHomDofs, glo2unique + nDirDofs, 1); - m_reorderedMap.resize(nHomDofs); - - // Reduce coefficient counts across all processors. - Array localCounts(nProc, 0), localOffset(nProc, 0); - localCounts[rank] = nHomDofs; - vComm->AllReduce(localCounts, LibUtilities::ReduceSum); - - for (n = 1; n < nProc; ++n) - { - localOffset[n] = localOffset[n-1] + localCounts[n-1]; - } - - int totHomDofs = Vmath::Vsum(nProc, localCounts, 1); - vector allUniIds(totHomDofs, 0); - - // Assemble list of universal IDs - for (n = 0; n < nHomDofs; ++n) - { - int gid = n + nDirDofs; - allUniIds[n + localOffset[rank]] = glo2uniMap[gid]; - } - - // Reduce this across processors so that each process has a list of - // all universal IDs. - vComm->AllReduce(allUniIds, LibUtilities::ReduceSum); - std::sort(allUniIds.begin(), allUniIds.end()); - map uniIdReorder; - - // Renumber starting from 0. - for (cnt = n = 0; n < allUniIds.size(); ++n) - { - if (uniIdReorder.count(allUniIds[n]) > 0) - { - continue; - } - - uniIdReorder[allUniIds[n]] = cnt++; - } - - // Populate reordering map. - for (n = 0; n < nHomDofs; ++n) - { - int gid = n + nDirDofs; - int uniId = glo2uniMap[gid]; - ASSERTL0(uniIdReorder.count(uniId) > 0, "Error in ordering"); - m_reorderedMap[n] = uniIdReorder[uniId]; - } - - m_bdydof = nDirDofs; - } +/** + * @brief Calculate a reordering of universal IDs for Saena. + * + * Saena requires a unique, contiguous index of all global and universal + * degrees of freedom which represents its position inside the + * matrix. Presently Gs does not guarantee this, so this routine + * constructs a new universal mapping. + * + * @param glo2uniMap Global to universal map + * @param glo2unique Global to unique map + * @param pLocToGloMap Assembly map for this system + */ +void GlobalLinSysSaena::CalculateReordering( + const Array &glo2uniMap, + const Array &glo2unique, + const AssemblyMapSharedPtr &pLocToGloMap) +{ + LibUtilities::CommSharedPtr vComm = + m_expList.lock()->GetSession()->GetComm(); - /** - * @brief Construct Saena matrix and vector handles. - * - * @todo Preallocation should be done at this point, since presently - * matrix allocation takes a significant amount of time. - * - * @param nGlobal Number of global degrees of freedom in the system (on - * this processor) - * @param nDir Number of Dirichlet degrees of freedom (on this - * processor). - */ - void GlobalLinSysSaena::SetUpMatVec() - { - LibUtilities::CommSharedPtr comm = - m_expList.lock()->GetSession()->GetComm(); - auto mpiComm = std::dynamic_pointer_cast< - LibUtilities::CommMpi>(comm); - - m_comm = mpiComm->GetComm(); - m_matrix.set_comm(m_comm); - m_matrix.add_duplicates(true); - m_rhs.set_comm(m_comm); - } + const int nDirDofs = pLocToGloMap->GetNumGlobalDirBndCoeffs(); + const int nHomDofs = glo2uniMap.size() - nDirDofs; + const int nProc = vComm->GetSize(); + const int rank = vComm->GetRank(); - /** - * @brief Set up KSP solver object. - * - * This is reasonably generic setup -- most solver types can be changed - * using the ? file. - * - * @param tolerance Residual tolerance to converge to. - */ - void GlobalLinSysSaena::SetUpSolver(NekDouble tolerance) - { - m_scale = false; - m_opts.set_relative_tolerance(tolerance); - // m_opts.set_dynamic_levels(false); - // m_opts.set_max_lev(5); - // m_opts.set_vcycle_num(400); - // m_opts.set_smoother("chebyshev"); // chebyshev, jacobi - // m_opts.set_preSmooth(3); - // m_opts.set_postSmooth(3); - } + int n, cnt; - void GlobalLinSysSaena::SetUpMultigrid() + // Count number of unique degrees of freedom on each process. + m_nLocal = Vmath::Vsum(nHomDofs, glo2unique + nDirDofs, 1); + m_reorderedMap.resize(nHomDofs); + + // Reduce coefficient counts across all processors. + Array localCounts(nProc, 0), localOffset(nProc, 0); + localCounts[rank] = nHomDofs; + vComm->AllReduce(localCounts, LibUtilities::ReduceSum); + + for (n = 1; n < nProc; ++n) { - int nummodes = m_expList.lock()->GetFieldDefinitions()[0]->m_numModes[0]; - int p_order = m_polyOrder == 0 ? nummodes - 1 : m_polyOrder; - int prodim = m_expList.lock()->GetCoordim(0); + localOffset[n] = localOffset[n - 1] + localCounts[n - 1]; + } + + int totHomDofs = Vmath::Vsum(nProc, localCounts, 1); + vector allUniIds(totHomDofs, 0); + + // Assemble list of universal IDs + for (n = 0; n < nHomDofs; ++n) + { + int gid = n + nDirDofs; + allUniIds[n + localOffset[rank]] = glo2uniMap[gid]; + } - m_matrix.set_p_order(p_order); - m_matrix.set_prodim(prodim); + // Reduce this across processors so that each process has a list of + // all universal IDs. + vComm->AllReduce(allUniIds, LibUtilities::ReduceSum); + std::sort(allUniIds.begin(), allUniIds.end()); + map uniIdReorder; - // set p_coarsen levels computation. subtract by a constant. - vector order_dif; - for(int i = 0; i < p_order - 1; ++i) + // Renumber starting from 0. + for (cnt = n = 0; n < allUniIds.size(); ++n) + { + if (uniIdReorder.count(allUniIds[n]) > 0) { - order_dif.emplace_back(1); + continue; } - // set number of multigrid levels - int max_h_level = 1; // h-multigrid levels - m_amg.set_multigrid_max_level( - static_cast(order_dif.size()) + max_h_level); + uniIdReorder[allUniIds[n]] = cnt++; + } - m_amg.set_scale(m_scale); - m_amg.set_matrix( - &m_matrix, &m_opts, m_l2g, m_reorderedMap, m_bdydof, order_dif); + // Populate reordering map. + for (n = 0; n < nHomDofs; ++n) + { + int gid = n + nDirDofs; + int uniId = glo2uniMap[gid]; + ASSERTL0(uniIdReorder.count(uniId) > 0, "Error in ordering"); + m_reorderedMap[n] = uniIdReorder[uniId]; } + + m_bdydof = nDirDofs; +} + +/** + * @brief Construct Saena matrix and vector handles. + * + * @todo Preallocation should be done at this point, since presently + * matrix allocation takes a significant amount of time. + * + * @param nGlobal Number of global degrees of freedom in the system (on + * this processor) + * @param nDir Number of Dirichlet degrees of freedom (on this + * processor). + */ +void GlobalLinSysSaena::SetUpMatVec() +{ + LibUtilities::CommSharedPtr comm = + m_expList.lock()->GetSession()->GetComm(); + auto mpiComm = std::dynamic_pointer_cast(comm); + + m_comm = mpiComm->GetComm(); + m_matrix.set_comm(m_comm); + m_matrix.add_duplicates(true); + m_rhs.set_comm(m_comm); +} + +/** + * @brief Set up KSP solver object. + * + * This is reasonably generic setup -- most solver types can be changed + * using the ? file. + * + * @param tolerance Residual tolerance to converge to. + */ +void GlobalLinSysSaena::SetUpSolver(NekDouble tolerance) +{ + m_scale = false; + m_opts.set_relative_tolerance(tolerance); + // m_opts.set_dynamic_levels(false); + // m_opts.set_max_lev(5); + // m_opts.set_vcycle_num(400); + // m_opts.set_smoother("chebyshev"); // chebyshev, jacobi + // m_opts.set_preSmooth(3); + // m_opts.set_postSmooth(3); +} + +void GlobalLinSysSaena::SetUpMultigrid() +{ + int nummodes = m_expList.lock()->GetFieldDefinitions()[0]->m_numModes[0]; + int p_order = m_polyOrder == 0 ? nummodes - 1 : m_polyOrder; + int prodim = m_expList.lock()->GetCoordim(0); + + m_matrix.set_p_order(p_order); + m_matrix.set_prodim(prodim); + + // set p_coarsen levels computation. subtract by a constant. + vector order_dif; + for (int i = 0; i < p_order - 1; ++i) + { + order_dif.emplace_back(1); } + + // set number of multigrid levels + int max_h_level = 1; // h-multigrid levels + m_amg.set_multigrid_max_level(static_cast(order_dif.size()) + + max_h_level); + + m_amg.set_scale(m_scale); + m_amg.set_matrix(&m_matrix, &m_opts, m_l2g, m_reorderedMap, m_bdydof, + order_dif); } +} // namespace MultiRegions +} // namespace Nektar diff --git a/library/MultiRegions/GlobalLinSysSaena.h b/library/MultiRegions/GlobalLinSysSaena.h index d9b334d2a..15e696fdb 100644 --- a/library/MultiRegions/GlobalLinSysSaena.h +++ b/library/MultiRegions/GlobalLinSysSaena.h @@ -35,8 +35,8 @@ #ifndef NEKTAR_LIB_MULTIREGIONS_GLOBALLINSYSSAENA_H #define NEKTAR_LIB_MULTIREGIONS_GLOBALLINSYSSAENA_H -#include #include +#include #include #include @@ -54,19 +54,17 @@ class GlobalLinSysSaena : virtual public GlobalLinSys public: /// Constructor for full direct matrix solve. MULTI_REGIONS_EXPORT GlobalLinSysSaena( - const GlobalLinSysKey &pKey, - const std::weak_ptr &pExp, - const std::shared_ptr &pLocToGloMap, - const unsigned int pPolynomialOrder=0); + const GlobalLinSysKey &pKey, const std::weak_ptr &pExp, + const std::shared_ptr &pLocToGloMap, + const unsigned int pPolynomialOrder = 0); MULTI_REGIONS_EXPORT virtual ~GlobalLinSysSaena(); - virtual void v_SolveLinearSystem( - const int pNumRows, - const Array &pInput, - Array &pOutput, - const AssemblyMapSharedPtr &locToGloMap, - const int pNumDir); + virtual void v_SolveLinearSystem(const int pNumRows, + const Array &pInput, + Array &pOutput, + const AssemblyMapSharedPtr &locToGloMap, + const int pNumDir); void SetPolyOrder(int p) { @@ -75,39 +73,38 @@ public: protected: /// Saena matrix object. - saena::matrix m_matrix; + saena::matrix m_matrix; /// Saena vector to store rhs - saena::vector m_rhs; + saena::vector m_rhs; /// Saena object for options - saena::options m_opts; + saena::options m_opts; /// Saena object that represents solver system. - saena::amg m_amg; + saena::amg m_amg; /// Reordering that takes universal IDs to a unique row in the Saena /// matrix. @see GlobalLinSysSaena::CalculateReordering - std::vector m_reorderedMap; + std::vector m_reorderedMap; /// MPI communicator - MPI_Comm m_comm; + MPI_Comm m_comm; /// Number of unique degrees of freedom on this process. - int m_nLocal; + int m_nLocal; /// Number of boundary degrees of freedom - int m_bdydof; + int m_bdydof; /// Mesh information std::vector> m_l2g; /// flag to set the linear system to be scaled bool m_scale; - int m_polyOrder = 0; + int m_polyOrder = 0; PreconditionerSharedPtr m_precon; void SetUpMatVec(); void SetUpSolver(NekDouble tolerance); void SetUpMultigrid(); - void CalculateReordering( - const Array &glo2uniMap, - const Array &glo2unique, - const AssemblyMapSharedPtr &pLocToGloMap); + void CalculateReordering(const Array &glo2uniMap, + const Array &glo2unique, + const AssemblyMapSharedPtr &pLocToGloMap); }; -} -} +} // namespace MultiRegions +} // namespace Nektar #endif diff --git a/library/MultiRegions/GlobalLinSysSaenaFull.cpp b/library/MultiRegions/GlobalLinSysSaenaFull.cpp index 1362d7445..1cb8944a3 100644 --- a/library/MultiRegions/GlobalLinSysSaenaFull.cpp +++ b/library/MultiRegions/GlobalLinSysSaenaFull.cpp @@ -33,231 +33,231 @@ // /////////////////////////////////////////////////////////////////////////////// -#include #include +#include using namespace std; namespace Nektar { - namespace MultiRegions +namespace MultiRegions +{ +/** + * @class GlobalLinSysSaenaFull + */ + +/** + * Registers the class with the Factory. + */ +string GlobalLinSysSaenaFull::className = + GetGlobalLinSysFactory().RegisterCreatorFunction( + "SaenaFull", GlobalLinSysSaenaFull::create, "Saena Full Matrix."); + +/// Constructor for full direct matrix solve. +GlobalLinSysSaenaFull::GlobalLinSysSaenaFull( + const GlobalLinSysKey &pLinSysKey, const std::weak_ptr &pExp, + const std::shared_ptr &pLocToGloMap, + const unsigned int pPolynomialOrder) + : GlobalLinSys(pLinSysKey, pExp, pLocToGloMap), + GlobalLinSysSaena( + pLinSysKey, pExp, pLocToGloMap, + pPolynomialOrder) // @hari - Change this constructor instead +{ + // SET UP VECTORS AND MATRIX + SetUpMatVec(); + + int rank = 0, nprocs = 0; + MPI_Comm_size(m_comm, &nprocs); + MPI_Comm_rank(m_comm, &rank); + + auto tbegin = clock(); + + const int nDirDofs = pLocToGloMap->GetNumGlobalDirBndCoeffs(); + + int i, j, n, cnt, gid1, gid2, loc_lda; + NekDouble sign1, sign2, value; + DNekScalMatSharedPtr loc_mat; + + // CALCULATE REORDERING MAPPING + CalculateReordering(pLocToGloMap->GetGlobalToUniversalMap(), + pLocToGloMap->GetGlobalToUniversalMapUnique(), + pLocToGloMap); + + // STORE MESH INFO TO BE PASSED TO SAENA + // int total_elm = this->GetExp()->size(); + auto ExpTmp = m_expList.lock()->GetExp(); + int total_elm = ExpTmp->size(); + // std::cout << total_elm << "\n"; + + int counter = 0; + vector dof_elems; + for (i = 0; i < total_elm; ++i) { - /** - * @class GlobalLinSysSaenaFull - */ - - /** - * Registers the class with the Factory. - */ - string GlobalLinSysSaenaFull::className - = GetGlobalLinSysFactory().RegisterCreatorFunction( - "SaenaFull", - GlobalLinSysSaenaFull::create, - "Saena Full Matrix."); - - - /// Constructor for full direct matrix solve. - GlobalLinSysSaenaFull::GlobalLinSysSaenaFull( - const GlobalLinSysKey &pLinSysKey, - const std::weak_ptr &pExp, - const std::shared_ptr &pLocToGloMap, - const unsigned int pPolynomialOrder) - : GlobalLinSys (pLinSysKey, pExp, pLocToGloMap), - GlobalLinSysSaena(pLinSysKey, pExp, pLocToGloMap, pPolynomialOrder) // @hari - Change this constructor instead + // std::cout << ExpTmp->at(i)->GetNcoeffs() << std::endl; + for (j = 0; j < ExpTmp->at(i)->GetNcoeffs(); ++j) { - // SET UP VECTORS AND MATRIX - SetUpMatVec(); - - int rank = 0, nprocs = 0; - MPI_Comm_size(m_comm, &nprocs); - MPI_Comm_rank(m_comm, &rank); - - auto tbegin = clock(); - - const int nDirDofs = pLocToGloMap->GetNumGlobalDirBndCoeffs(); - - int i, j, n, cnt, gid1, gid2, loc_lda; - NekDouble sign1, sign2, value; - DNekScalMatSharedPtr loc_mat; - - // CALCULATE REORDERING MAPPING - CalculateReordering(pLocToGloMap->GetGlobalToUniversalMap(), - pLocToGloMap->GetGlobalToUniversalMapUnique(), - pLocToGloMap); - - // STORE MESH INFO TO BE PASSED TO SAENA -// int total_elm = this->GetExp()->size(); - auto ExpTmp = m_expList.lock()->GetExp(); - int total_elm = ExpTmp->size(); -// std::cout << total_elm << "\n"; - - int counter = 0; - vector dof_elems; - for (i = 0; i < total_elm; ++i){ -// std::cout << ExpTmp->at(i)->GetNcoeffs() << std::endl; - for (j = 0; j < ExpTmp->at(i)->GetNcoeffs(); ++j){ -// printf("%i\t", pLocToGloMap->GetLocalToGlobalMap()[counter]); - dof_elems.emplace_back(pLocToGloMap->GetLocalToGlobalMap()[counter] + 1); - ++counter; - } - m_l2g.emplace_back(dof_elems); - dof_elems.clear(); - } + // printf("%i\t", + // pLocToGloMap->GetLocalToGlobalMap()[counter]); + dof_elems.emplace_back( + pLocToGloMap->GetLocalToGlobalMap()[counter] + 1); + ++counter; + } + m_l2g.emplace_back(dof_elems); + dof_elems.clear(); + } - auto tend = clock(); - auto t = double(tend - tbegin) / CLOCKS_PER_SEC; - double t_ave = 0.0; - MPI_Reduce(&t, &t_ave, 1, MPI_DOUBLE, MPI_SUM, 0, m_comm); - if(!rank) printf("Saena mesh info generation time: %f\n", t_ave / nprocs); + auto tend = clock(); + auto t = double(tend - tbegin) / CLOCKS_PER_SEC; + double t_ave = 0.0; + MPI_Reduce(&t, &t_ave, 1, MPI_DOUBLE, MPI_SUM, 0, m_comm); + if (!rank) + printf("Saena mesh info generation time: %f\n", t_ave / nprocs); - // CONSTRUCT KSP OBJECT - SetUpSolver(pLocToGloMap->GetIterativeTolerance()); + // CONSTRUCT KSP OBJECT + SetUpSolver(pLocToGloMap->GetIterativeTolerance()); - tbegin = clock(); + tbegin = clock(); - m_matrix.erase_no_shrink_to_fit(); + m_matrix.erase_no_shrink_to_fit(); - // POPULATE MATRIX - for(n = cnt = 0; n < m_expList.lock()->GetNumElmts(); ++n) - { - loc_mat = GetBlock(n); - loc_lda = loc_mat->GetRows(); + // POPULATE MATRIX + for (n = cnt = 0; n < m_expList.lock()->GetNumElmts(); ++n) + { + loc_mat = GetBlock(n); + loc_lda = loc_mat->GetRows(); - for(i = 0; i < loc_lda; ++i) + for (i = 0; i < loc_lda; ++i) + { + gid1 = pLocToGloMap->GetLocalToGlobalMap(cnt + i) - nDirDofs; + sign1 = pLocToGloMap->GetLocalToGlobalSign(cnt + i); + if (gid1 >= 0) + { + int gid1ro = m_reorderedMap[gid1]; + for (j = 0; j < loc_lda; ++j) { - gid1 = pLocToGloMap->GetLocalToGlobalMap(cnt+i) - nDirDofs; - sign1 = pLocToGloMap->GetLocalToGlobalSign(cnt + i); - if(gid1 >= 0) + gid2 = + pLocToGloMap->GetLocalToGlobalMap(cnt + j) - nDirDofs; + sign2 = pLocToGloMap->GetLocalToGlobalSign(cnt + j); + if (gid2 >= 0) { - int gid1ro = m_reorderedMap[gid1]; - for(j = 0; j < loc_lda; ++j) - { - gid2 = pLocToGloMap->GetLocalToGlobalMap(cnt + j) - - nDirDofs; - sign2 = pLocToGloMap->GetLocalToGlobalSign(cnt + j); - if(gid2 >= 0) - { - int gid2ro = m_reorderedMap[gid2]; - value = sign1*sign2*(*loc_mat)(i,j); - m_matrix.set(gid1ro, gid2ro, value); - } - } + int gid2ro = m_reorderedMap[gid2]; + value = sign1 * sign2 * (*loc_mat)(i, j); + m_matrix.set(gid1ro, gid2ro, value); } } - cnt += loc_lda; } + } + cnt += loc_lda; + } - // timing - tend = clock(); - t = double(tend - tbegin) / CLOCKS_PER_SEC; - MPI_Reduce(&t, &t_ave, 1, MPI_DOUBLE, MPI_SUM, 0, m_comm); - if(!rank) printf("nektar assembly time: %f\n", t_ave / nprocs); + // timing + tend = clock(); + t = double(tend - tbegin) / CLOCKS_PER_SEC; + MPI_Reduce(&t, &t_ave, 1, MPI_DOUBLE, MPI_SUM, 0, m_comm); + if (!rank) + printf("nektar assembly time: %f\n", t_ave / nprocs); - tbegin = clock(); + tbegin = clock(); - // ASSEMBLE MATRIX -// m_matrix.set_num_threads(1); - m_matrix.assemble(m_scale); -// m_matrix.assemble_writeToFile("matrix_folder"); + // ASSEMBLE MATRIX + // m_matrix.set_num_threads(1); + m_matrix.assemble(m_scale); + // m_matrix.assemble_writeToFile("matrix_folder"); - // timing - tend = clock(); - t = double(tend - tbegin) / CLOCKS_PER_SEC; - MPI_Reduce(&t, &t_ave, 1, MPI_DOUBLE, MPI_SUM, 0, m_comm); - if(!rank) printf("Saena matrix assembly time: %f\n", t_ave / nprocs); + // timing + tend = clock(); + t = double(tend - tbegin) / CLOCKS_PER_SEC; + MPI_Reduce(&t, &t_ave, 1, MPI_DOUBLE, MPI_SUM, 0, m_comm); + if (!rank) + printf("Saena matrix assembly time: %f\n", t_ave / nprocs); - SetUpMultigrid(); - } + SetUpMultigrid(); +} +GlobalLinSysSaenaFull::~GlobalLinSysSaenaFull() +{ +} - GlobalLinSysSaenaFull::~GlobalLinSysSaenaFull() - { +/** + * Solve the linear system using a full global matrix system. + */ +void GlobalLinSysSaenaFull::v_Solve( + const Array &pLocInput, + Array &pLocOutput, + const AssemblyMapSharedPtr &pLocToGloMap, + const Array &pDirForcing) +{ + std::shared_ptr expList = m_expList.lock(); + bool dirForcCalculated = (bool)pDirForcing.size(); + int nDirDofs = pLocToGloMap->GetNumGlobalDirBndCoeffs(); + int nGlobDofs = pLocToGloMap->GetNumGlobalCoeffs(); + int nLocDofs = pLocToGloMap->GetNumLocalCoeffs(); - } + // m_locToGloMap = pLocToGloMap; // required for DoMatrixMultiply + + Array tmp(nLocDofs); + Array tmp1(nLocDofs); + Array global(nGlobDofs, 0.0); + int nDirTotal = nDirDofs; + expList->GetComm()->GetRowComm()->AllReduce(nDirTotal, + LibUtilities::ReduceSum); - /** - * Solve the linear system using a full global matrix system. - */ - void GlobalLinSysSaenaFull::v_Solve( - const Array &pLocInput, - Array &pLocOutput, - const AssemblyMapSharedPtr &pLocToGloMap, - const Array &pDirForcing) + if (nDirTotal) + { + // calculate the dirichlet forcing + if (dirForcCalculated) { - std::shared_ptr expList = m_expList.lock(); - bool dirForcCalculated = (bool) pDirForcing.size(); - int nDirDofs = pLocToGloMap->GetNumGlobalDirBndCoeffs(); - int nGlobDofs = pLocToGloMap->GetNumGlobalCoeffs(); - int nLocDofs = pLocToGloMap->GetNumLocalCoeffs(); + // assume pDirForcing is in local space + ASSERTL0( + pDirForcing.size() >= nLocDofs, + "DirForcing is not of sufficient size. Is it in local space?"); + Vmath::Vsub(nLocDofs, pLocInput, 1, pDirForcing, 1, tmp1, 1); + } + else + { + // Calculate the dirichlet forcing and substract it + // from the rhs + expList->GeneralMatrixOp(m_linSysKey, pLocOutput, tmp); -// m_locToGloMap = pLocToGloMap; // required for DoMatrixMultiply + // Apply robin boundary conditions to the solution. + for (auto &r : m_robinBCInfo) // add robin mass matrix + { + RobinBCInfoSharedPtr rBC; + Array tmploc; - Array tmp(nLocDofs); - Array tmp1(nLocDofs); - Array global(nGlobDofs,0.0); + int n = r.first; - int nDirTotal = nDirDofs; - expList->GetComm()->GetRowComm() - ->AllReduce(nDirTotal, LibUtilities::ReduceSum); + int offset = expList->GetCoeff_Offset(n); + LocalRegions::ExpansionSharedPtr vExp = expList->GetExp(n); - if(nDirTotal) - { - // calculate the dirichlet forcing - if(dirForcCalculated) + // add local matrix contribution + for (rBC = r.second; rBC; rBC = rBC->next) { - // assume pDirForcing is in local space - ASSERTL0(pDirForcing.size() >= nLocDofs, - "DirForcing is not of sufficient size. Is it in local space?"); - Vmath::Vsub(nLocDofs, pLocInput, 1, - pDirForcing, 1,tmp1, 1); + vExp->AddRobinTraceContribution( + rBC->m_robinID, rBC->m_robinPrimitiveCoeffs, + pLocOutput + offset, tmploc = tmp + offset); } - else - { - // Calculate the dirichlet forcing and substract it - // from the rhs - expList->GeneralMatrixOp( - m_linSysKey, pLocOutput, tmp); - - // Apply robin boundary conditions to the solution. - for(auto &r : m_robinBCInfo) // add robin mass matrix - { - RobinBCInfoSharedPtr rBC; - Array tmploc; - - int n = r.first; - - int offset = expList->GetCoeff_Offset(n); - LocalRegions::ExpansionSharedPtr vExp = expList->GetExp(n); - - // add local matrix contribution - for(rBC = r.second;rBC; rBC = rBC->next) - { - vExp->AddRobinTraceContribution(rBC->m_robinID, - rBC->m_robinPrimitiveCoeffs, - pLocOutput + offset, - tmploc = tmp + offset); - } - } + } - Vmath::Vsub(nLocDofs, pLocInput, 1, tmp, 1, tmp1, 1); - } + Vmath::Vsub(nLocDofs, pLocInput, 1, tmp, 1, tmp1, 1); + } - pLocToGloMap->Assemble(tmp1,tmp); + pLocToGloMap->Assemble(tmp1, tmp); - SolveLinearSystem(nGlobDofs,tmp, global, pLocToGloMap, nDirDofs); + SolveLinearSystem(nGlobDofs, tmp, global, pLocToGloMap, nDirDofs); - pLocToGloMap->GlobalToLocal(global,tmp); + pLocToGloMap->GlobalToLocal(global, tmp); - // Add back initial and boundary condition - Vmath::Vadd(nLocDofs, tmp, 1, pLocOutput, 1, pLocOutput, 1); - } - else - { - pLocToGloMap->Assemble(pLocInput,tmp); - SolveLinearSystem(nGlobDofs, tmp,global, pLocToGloMap); - pLocToGloMap->GlobalToLocal(global,pLocOutput); - } - } + // Add back initial and boundary condition + Vmath::Vadd(nLocDofs, tmp, 1, pLocOutput, 1, pLocOutput, 1); + } + else + { + pLocToGloMap->Assemble(pLocInput, tmp); + SolveLinearSystem(nGlobDofs, tmp, global, pLocToGloMap); + pLocToGloMap->GlobalToLocal(global, pLocOutput); } } +} // namespace MultiRegions +} // namespace Nektar diff --git a/library/MultiRegions/GlobalLinSysSaenaFull.h b/library/MultiRegions/GlobalLinSysSaenaFull.h index 094025c82..84364d85a 100644 --- a/library/MultiRegions/GlobalLinSysSaenaFull.h +++ b/library/MultiRegions/GlobalLinSysSaenaFull.h @@ -36,57 +36,55 @@ #ifndef NEKTAR_LIB_MULTIREGIONS_GLOBALLINSYSSAENAFULL_H #define NEKTAR_LIB_MULTIREGIONS_GLOBALLINSYSSAENAFULL_H -#include -#include #include +#include +#include namespace Nektar { - namespace MultiRegions - { - // Forward declarations - - //class AssemblyMapDG; - class ExpList; +namespace MultiRegions +{ +// Forward declarations - /// A global linear system. - class GlobalLinSysSaenaFull : public GlobalLinSysSaena - { - public: +// class AssemblyMapDG; +class ExpList; - /// Creates an instance of this class - static GlobalLinSysSharedPtr create( - const GlobalLinSysKey &pLinSysKey, - const std::weak_ptr &pExpList, - const std::shared_ptr &pLocToGloMap) - { - return MemoryManager - ::AllocateSharedPtr(pLinSysKey, pExpList, pLocToGloMap); // check for def args for poly order - } +/// A global linear system. +class GlobalLinSysSaenaFull : public GlobalLinSysSaena +{ +public: + /// Creates an instance of this class + static GlobalLinSysSharedPtr create( + const GlobalLinSysKey &pLinSysKey, + const std::weak_ptr &pExpList, + const std::shared_ptr &pLocToGloMap) + { + return MemoryManager::AllocateSharedPtr( + pLinSysKey, pExpList, + pLocToGloMap); // check for def args for poly order + } - /// Name of class - MULTI_REGIONS_EXPORT static std::string className; + /// Name of class + MULTI_REGIONS_EXPORT static std::string className; - /// Constructor for full direct matrix solve. - MULTI_REGIONS_EXPORT GlobalLinSysSaenaFull( - const GlobalLinSysKey &pLinSysKey, - const std::weak_ptr &pExpList, - const std::shared_ptr &pLocToGloMap, - const unsigned int pPolynomialOrder=0); + /// Constructor for full direct matrix solve. + MULTI_REGIONS_EXPORT GlobalLinSysSaenaFull( + const GlobalLinSysKey &pLinSysKey, + const std::weak_ptr &pExpList, + const std::shared_ptr &pLocToGloMap, + const unsigned int pPolynomialOrder = 0); - MULTI_REGIONS_EXPORT virtual ~GlobalLinSysSaenaFull(); + MULTI_REGIONS_EXPORT virtual ~GlobalLinSysSaenaFull(); - private: - /// Solve the linear system for given input and output vectors - /// using a specified local to global map. - virtual void v_Solve( - const Array &in, - Array &out, - const AssemblyMapSharedPtr &locToGloMap, - const Array &dirForcing - = NullNekDouble1DArray); - }; - } -} +private: + /// Solve the linear system for given input and output vectors + /// using a specified local to global map. + virtual void v_Solve( + const Array &in, Array &out, + const AssemblyMapSharedPtr &locToGloMap, + const Array &dirForcing = NullNekDouble1DArray); +}; +} // namespace MultiRegions +} // namespace Nektar #endif diff --git a/library/MultiRegions/GlobalLinSysSaenaStaticCond.cpp b/library/MultiRegions/GlobalLinSysSaenaStaticCond.cpp index d530a313d..c30a7810d 100644 --- a/library/MultiRegions/GlobalLinSysSaenaStaticCond.cpp +++ b/library/MultiRegions/GlobalLinSysSaenaStaticCond.cpp @@ -43,189 +43,181 @@ using namespace std; namespace Nektar { - namespace MultiRegions - { - /** - * @class GlobalLinSysSaena - * - * Solves a linear system using single- or multi-level static - * condensation. - */ - - /** - * Registers the class with the Factory. - */ - string GlobalLinSysSaenaStaticCond::className - = GetGlobalLinSysFactory().RegisterCreatorFunction( - "SaenaStaticCond", - GlobalLinSysSaenaStaticCond::create, - "Saena static condensation."); - - string GlobalLinSysSaenaStaticCond::className2 - = GetGlobalLinSysFactory().RegisterCreatorFunction( - "SaenaMultiLevelStaticCond", - GlobalLinSysSaenaStaticCond::create, - "Saena multi-level static condensation."); - - /** - * For a matrix system of the form @f[ - * \left[ \begin{array}{cc} - * \boldsymbol{A} & \boldsymbol{B}\\ - * \boldsymbol{C} & \boldsymbol{D} - * \end{array} \right] - * \left[ \begin{array}{c} \boldsymbol{x_1}\\ \boldsymbol{x_2} - * \end{array}\right] - * = \left[ \begin{array}{c} \boldsymbol{y_1}\\ \boldsymbol{y_2} - * \end{array}\right], - * @f] - * where @f$\boldsymbol{D}@f$ and - * @f$(\boldsymbol{A-BD^{-1}C})@f$ are invertible, store and assemble - * a static condensation system, according to a given local to global - * mapping. #m_linSys is constructed by AssembleSchurComplement(). - * @param mKey Associated matrix key. - * @param pLocMatSys LocalMatrixSystem - * @param locToGloMap Local to global mapping. - */ - GlobalLinSysSaenaStaticCond::GlobalLinSysSaenaStaticCond( - const GlobalLinSysKey &pKey, - const std::weak_ptr &pExpList, - const std::shared_ptr &pLocToGloMap) - : GlobalLinSys (pKey, pExpList, pLocToGloMap), - GlobalLinSysSaena (pKey, pExpList, pLocToGloMap), - GlobalLinSysStaticCond(pKey, pExpList, pLocToGloMap) - { - std::cout << __func__ << std::endl; - - ASSERTL1((pKey.GetGlobalSysSolnType()==eSaenaStaticCond)|| - (pKey.GetGlobalSysSolnType()==eSaenaMultiLevelStaticCond), - "This constructor is only valid when using static " - "condensation"); - ASSERTL1(pKey.GetGlobalSysSolnType() - == pLocToGloMap->GetGlobalSysSolnType(), - "The local to global map is not set up for the requested " - "solution type"); - } +namespace MultiRegions +{ +/** + * @class GlobalLinSysSaena + * + * Solves a linear system using single- or multi-level static + * condensation. + */ + +/** + * Registers the class with the Factory. + */ +string GlobalLinSysSaenaStaticCond::className = + GetGlobalLinSysFactory().RegisterCreatorFunction( + "SaenaStaticCond", GlobalLinSysSaenaStaticCond::create, + "Saena static condensation."); + +string GlobalLinSysSaenaStaticCond::className2 = + GetGlobalLinSysFactory().RegisterCreatorFunction( + "SaenaMultiLevelStaticCond", GlobalLinSysSaenaStaticCond::create, + "Saena multi-level static condensation."); + +/** + * For a matrix system of the form @f[ + * \left[ \begin{array}{cc} + * \boldsymbol{A} & \boldsymbol{B}\\ + * \boldsymbol{C} & \boldsymbol{D} + * \end{array} \right] + * \left[ \begin{array}{c} \boldsymbol{x_1}\\ \boldsymbol{x_2} + * \end{array}\right] + * = \left[ \begin{array}{c} \boldsymbol{y_1}\\ \boldsymbol{y_2} + * \end{array}\right], + * @f] + * where @f$\boldsymbol{D}@f$ and + * @f$(\boldsymbol{A-BD^{-1}C})@f$ are invertible, store and assemble + * a static condensation system, according to a given local to global + * mapping. #m_linSys is constructed by AssembleSchurComplement(). + * @param mKey Associated matrix key. + * @param pLocMatSys LocalMatrixSystem + * @param locToGloMap Local to global mapping. + */ +GlobalLinSysSaenaStaticCond::GlobalLinSysSaenaStaticCond( + const GlobalLinSysKey &pKey, const std::weak_ptr &pExpList, + const std::shared_ptr &pLocToGloMap) + : GlobalLinSys(pKey, pExpList, pLocToGloMap), + GlobalLinSysSaena(pKey, pExpList, pLocToGloMap), + GlobalLinSysStaticCond(pKey, pExpList, pLocToGloMap) +{ + std::cout << __func__ << std::endl; + + ASSERTL1((pKey.GetGlobalSysSolnType() == eSaenaStaticCond) || + (pKey.GetGlobalSysSolnType() == eSaenaMultiLevelStaticCond), + "This constructor is only valid when using static " + "condensation"); + ASSERTL1(pKey.GetGlobalSysSolnType() == + pLocToGloMap->GetGlobalSysSolnType(), + "The local to global map is not set up for the requested " + "solution type"); +} - /** - * - */ - GlobalLinSysSaenaStaticCond::GlobalLinSysSaenaStaticCond( - const GlobalLinSysKey &pKey, - const std::weak_ptr &pExpList, - const DNekScalBlkMatSharedPtr pSchurCompl, - const DNekScalBlkMatSharedPtr pBinvD, - const DNekScalBlkMatSharedPtr pC, - const DNekScalBlkMatSharedPtr pInvD, - const std::shared_ptr &pLocToGloMap, - const PreconditionerSharedPtr pPrecon) - : GlobalLinSys (pKey, pExpList, pLocToGloMap), - GlobalLinSysSaena (pKey, pExpList, pLocToGloMap), - GlobalLinSysStaticCond(pKey, pExpList, pLocToGloMap) - { - std::cout << __func__ << std::endl; +/** + * + */ +GlobalLinSysSaenaStaticCond::GlobalLinSysSaenaStaticCond( + const GlobalLinSysKey &pKey, const std::weak_ptr &pExpList, + const DNekScalBlkMatSharedPtr pSchurCompl, + const DNekScalBlkMatSharedPtr pBinvD, const DNekScalBlkMatSharedPtr pC, + const DNekScalBlkMatSharedPtr pInvD, + const std::shared_ptr &pLocToGloMap, + const PreconditionerSharedPtr pPrecon) + : GlobalLinSys(pKey, pExpList, pLocToGloMap), + GlobalLinSysSaena(pKey, pExpList, pLocToGloMap), + GlobalLinSysStaticCond(pKey, pExpList, pLocToGloMap) +{ + std::cout << __func__ << std::endl; - m_schurCompl = pSchurCompl; - m_BinvD = pBinvD; - m_C = pC; - m_invD = pInvD; - m_precon = pPrecon; - } + m_schurCompl = pSchurCompl; + m_BinvD = pBinvD; + m_C = pC; + m_invD = pInvD; + m_precon = pPrecon; +} - /** - * - */ - GlobalLinSysSaenaStaticCond::~GlobalLinSysSaenaStaticCond() - { +/** + * + */ +GlobalLinSysSaenaStaticCond::~GlobalLinSysSaenaStaticCond() +{ +} - } +/** + * Assemble the schur complement matrix from the block matrices stored + * in #m_blkMatrices and the given local to global mapping information. + * @param locToGloMap Local to global mapping information. + */ +void GlobalLinSysSaenaStaticCond::v_AssembleSchurComplement( + AssemblyMapSharedPtr pLocToGloMap) +{ + std::cout << __func__ << std::endl; - /** - * Assemble the schur complement matrix from the block matrices stored - * in #m_blkMatrices and the given local to global mapping information. - * @param locToGloMap Local to global mapping information. - */ - void GlobalLinSysSaenaStaticCond::v_AssembleSchurComplement( - AssemblyMapSharedPtr pLocToGloMap) - { - std::cout << __func__ << std::endl; + int i, j, n, cnt, gid1, gid2, loc_lda; + NekDouble sign1, sign2, value; - int i, j, n, cnt, gid1, gid2, loc_lda; - NekDouble sign1, sign2, value; + const int nDirDofs = pLocToGloMap->GetNumGlobalDirBndCoeffs(); - const int nDirDofs = pLocToGloMap->GetNumGlobalDirBndCoeffs(); + DNekScalBlkMatSharedPtr SchurCompl = m_schurCompl; + DNekScalBlkMatSharedPtr BinvD = m_BinvD; + DNekScalBlkMatSharedPtr C = m_C; + DNekScalBlkMatSharedPtr invD = m_invD; + DNekScalMatSharedPtr loc_mat; - DNekScalBlkMatSharedPtr SchurCompl = m_schurCompl; - DNekScalBlkMatSharedPtr BinvD = m_BinvD; - DNekScalBlkMatSharedPtr C = m_C; - DNekScalBlkMatSharedPtr invD = m_invD; - DNekScalMatSharedPtr loc_mat; + // CALCULATE REORDERING MAPPING + CalculateReordering(pLocToGloMap->GetGlobalToUniversalBndMap(), + pLocToGloMap->GetGlobalToUniversalBndMapUnique(), + pLocToGloMap); - // CALCULATE REORDERING MAPPING - CalculateReordering(pLocToGloMap->GetGlobalToUniversalBndMap(), - pLocToGloMap->GetGlobalToUniversalBndMapUnique(), - pLocToGloMap); + // SET UP VECTORS AND MATRIX + // SetUpMatVec(pLocToGloMap->GetNumGlobalBndCoeffs(), nDirDofs); + SetUpMatVec(); - // SET UP VECTORS AND MATRIX -// SetUpMatVec(pLocToGloMap->GetNumGlobalBndCoeffs(), nDirDofs); - SetUpMatVec(); + // CONSTRUCT KSP OBJECT + SetUpSolver(pLocToGloMap->GetIterativeTolerance()); - // CONSTRUCT KSP OBJECT - SetUpSolver(pLocToGloMap->GetIterativeTolerance()); + // POPULATE MATRIX + for (n = cnt = 0; n < m_schurCompl->GetNumberOfBlockRows(); ++n) + { + loc_mat = m_schurCompl->GetBlock(n, n); + loc_lda = loc_mat->GetRows(); - // POPULATE MATRIX - for(n = cnt = 0; n < m_schurCompl->GetNumberOfBlockRows(); ++n) + for (i = 0; i < loc_lda; ++i) + { + gid1 = pLocToGloMap->GetLocalToGlobalBndMap(cnt + i) - nDirDofs; + sign1 = pLocToGloMap->GetLocalToGlobalBndSign(cnt + i); + if (gid1 >= 0) { - loc_mat = m_schurCompl->GetBlock(n,n); - loc_lda = loc_mat->GetRows(); - - for(i = 0; i < loc_lda; ++i) + int gid1ro = m_reorderedMap[gid1]; + for (j = 0; j < loc_lda; ++j) { - gid1 = pLocToGloMap->GetLocalToGlobalBndMap(cnt + i)-nDirDofs; - sign1 = pLocToGloMap->GetLocalToGlobalBndSign(cnt + i); - if(gid1 >= 0) + gid2 = pLocToGloMap->GetLocalToGlobalBndMap(cnt + j) - + nDirDofs; + sign2 = pLocToGloMap->GetLocalToGlobalBndSign(cnt + j); + if (gid2 >= 0) { - int gid1ro = m_reorderedMap[gid1]; - for(j = 0; j < loc_lda; ++j) - { - gid2 = pLocToGloMap->GetLocalToGlobalBndMap(cnt + j) - - nDirDofs; - sign2 = pLocToGloMap->GetLocalToGlobalBndSign(cnt + j); - if(gid2 >= 0) - { - int gid2ro = m_reorderedMap[gid2]; - value = sign1*sign2*(*loc_mat)(i,j); - m_matrix.set(gid1ro, gid2ro, value); - } - } + int gid2ro = m_reorderedMap[gid2]; + value = sign1 * sign2 * (*loc_mat)(i, j); + m_matrix.set(gid1ro, gid2ro, value); } } - cnt += loc_lda; } - - m_matrix.assemble(); - } - - GlobalLinSysStaticCondSharedPtr GlobalLinSysSaenaStaticCond::v_Recurse( - const GlobalLinSysKey &mkey, - const std::weak_ptr &pExpList, - const DNekScalBlkMatSharedPtr pSchurCompl, - const DNekScalBlkMatSharedPtr pBinvD, - const DNekScalBlkMatSharedPtr pC, - const DNekScalBlkMatSharedPtr pInvD, - const std::shared_ptr &l2gMap) - { -// GlobalLinSysSaenaStaticCondSharedPtr sys = MemoryManager< -// GlobalLinSysSaenaStaticCond>::AllocateSharedPtr( -// mkey, pExpList, pSchurCompl, pBinvD, pC, pInvD, l2gMap, -// m_precon); - GlobalLinSysSaenaStaticCondSharedPtr sys = MemoryManager< - GlobalLinSysSaenaStaticCond>::AllocateSharedPtr( - mkey, pExpList, pSchurCompl, pBinvD, pC, pInvD, l2gMap); - - std::cout << __func__ << std::endl; - - sys->Initialise(l2gMap); - return sys; } + cnt += loc_lda; } + + m_matrix.assemble(); +} + +GlobalLinSysStaticCondSharedPtr GlobalLinSysSaenaStaticCond::v_Recurse( + const GlobalLinSysKey &mkey, const std::weak_ptr &pExpList, + const DNekScalBlkMatSharedPtr pSchurCompl, + const DNekScalBlkMatSharedPtr pBinvD, const DNekScalBlkMatSharedPtr pC, + const DNekScalBlkMatSharedPtr pInvD, + const std::shared_ptr &l2gMap) +{ + // GlobalLinSysSaenaStaticCondSharedPtr sys = MemoryManager< + // GlobalLinSysSaenaStaticCond>::AllocateSharedPtr( + // mkey, pExpList, pSchurCompl, pBinvD, pC, pInvD, + // l2gMap, m_precon); + GlobalLinSysSaenaStaticCondSharedPtr sys = + MemoryManager::AllocateSharedPtr( + mkey, pExpList, pSchurCompl, pBinvD, pC, pInvD, l2gMap); + + std::cout << __func__ << std::endl; + + sys->Initialise(l2gMap); + return sys; } +} // namespace MultiRegions +} // namespace Nektar diff --git a/library/MultiRegions/GlobalLinSysSaenaStaticCond.h b/library/MultiRegions/GlobalLinSysSaenaStaticCond.h index a8c1d4124..e550fc4f8 100644 --- a/library/MultiRegions/GlobalLinSysSaenaStaticCond.h +++ b/library/MultiRegions/GlobalLinSysSaenaStaticCond.h @@ -42,73 +42,66 @@ namespace Nektar { - namespace MultiRegions - { - // Forward declarations - class ExpList; - class GlobalLinSysSaenaStaticCond; - - typedef std::shared_ptr - GlobalLinSysSaenaStaticCondSharedPtr; +namespace MultiRegions +{ +// Forward declarations +class ExpList; +class GlobalLinSysSaenaStaticCond; - /// A global linear system. - class GlobalLinSysSaenaStaticCond : virtual public GlobalLinSysSaena, - virtual public GlobalLinSysStaticCond - { - public: - /// Creates an instance of this class - static GlobalLinSysSharedPtr create( - const GlobalLinSysKey &pLinSysKey, - const std::weak_ptr &pExpList, - const std::shared_ptr &pLocToGloMap) - { - GlobalLinSysSharedPtr p = MemoryManager< - GlobalLinSysSaenaStaticCond>::AllocateSharedPtr( - pLinSysKey, pExpList, pLocToGloMap); - p->InitObject(); - return p; - } +typedef std::shared_ptr + GlobalLinSysSaenaStaticCondSharedPtr; - /// Name of class - MULTI_REGIONS_EXPORT static std::string className; - static std::string className2; +/// A global linear system. +class GlobalLinSysSaenaStaticCond : virtual public GlobalLinSysSaena, + virtual public GlobalLinSysStaticCond +{ +public: + /// Creates an instance of this class + static GlobalLinSysSharedPtr create( + const GlobalLinSysKey &pLinSysKey, + const std::weak_ptr &pExpList, + const std::shared_ptr &pLocToGloMap) + { + GlobalLinSysSharedPtr p = + MemoryManager::AllocateSharedPtr( + pLinSysKey, pExpList, pLocToGloMap); + p->InitObject(); + return p; + } - /// Constructor for full direct matrix solve. - MULTI_REGIONS_EXPORT GlobalLinSysSaenaStaticCond( - const GlobalLinSysKey &mkey, - const std::weak_ptr &pExpList, - const std::shared_ptr &locToGloMap); + /// Name of class + MULTI_REGIONS_EXPORT static std::string className; + static std::string className2; - /// Constructor for full direct matrix solve. - MULTI_REGIONS_EXPORT GlobalLinSysSaenaStaticCond( - const GlobalLinSysKey &mkey, - const std::weak_ptr &pExpList, - const DNekScalBlkMatSharedPtr pSchurCompl, - const DNekScalBlkMatSharedPtr pBinvD, - const DNekScalBlkMatSharedPtr pC, - const DNekScalBlkMatSharedPtr pInvD, - const std::shared_ptr &locToGloMap, - const PreconditionerSharedPtr pPrecon = - PreconditionerSharedPtr()); + /// Constructor for full direct matrix solve. + MULTI_REGIONS_EXPORT GlobalLinSysSaenaStaticCond( + const GlobalLinSysKey &mkey, const std::weak_ptr &pExpList, + const std::shared_ptr &locToGloMap); - MULTI_REGIONS_EXPORT virtual ~GlobalLinSysSaenaStaticCond(); + /// Constructor for full direct matrix solve. + MULTI_REGIONS_EXPORT GlobalLinSysSaenaStaticCond( + const GlobalLinSysKey &mkey, const std::weak_ptr &pExpList, + const DNekScalBlkMatSharedPtr pSchurCompl, + const DNekScalBlkMatSharedPtr pBinvD, const DNekScalBlkMatSharedPtr pC, + const DNekScalBlkMatSharedPtr pInvD, + const std::shared_ptr &locToGloMap, + const PreconditionerSharedPtr pPrecon = PreconditionerSharedPtr()); - protected: - /// Assemble the Schur complement matrix. - virtual void v_AssembleSchurComplement( - std::shared_ptr locToGloMap); + MULTI_REGIONS_EXPORT virtual ~GlobalLinSysSaenaStaticCond(); - virtual GlobalLinSysStaticCondSharedPtr v_Recurse( - const GlobalLinSysKey &mkey, - const std::weak_ptr &pExpList, - const DNekScalBlkMatSharedPtr pSchurCompl, - const DNekScalBlkMatSharedPtr pBinvD, - const DNekScalBlkMatSharedPtr pC, - const DNekScalBlkMatSharedPtr pInvD, - const std::shared_ptr &locToGloMap); +protected: + /// Assemble the Schur complement matrix. + virtual void v_AssembleSchurComplement( + std::shared_ptr locToGloMap); - }; - } -} + virtual GlobalLinSysStaticCondSharedPtr v_Recurse( + const GlobalLinSysKey &mkey, const std::weak_ptr &pExpList, + const DNekScalBlkMatSharedPtr pSchurCompl, + const DNekScalBlkMatSharedPtr pBinvD, const DNekScalBlkMatSharedPtr pC, + const DNekScalBlkMatSharedPtr pInvD, + const std::shared_ptr &locToGloMap); +}; +} // namespace MultiRegions +} // namespace Nektar #endif diff --git a/library/MultiRegions/PreconditionerLinear.cpp b/library/MultiRegions/PreconditionerLinear.cpp index dcf38ddf8..60302ab51 100644 --- a/library/MultiRegions/PreconditionerLinear.cpp +++ b/library/MultiRegions/PreconditionerLinear.cpp @@ -33,11 +33,11 @@ /////////////////////////////////////////////////////////////////////////////// #include -#include -#include -#include #include +#include #include +#include +#include #ifdef NEKTAR_USING_PETSC #include @@ -54,255 +54,243 @@ using namespace std; namespace Nektar { - namespace MultiRegions - { - /** - * Registers the class with the Factory. - */ +namespace MultiRegions +{ +/** + * Registers the class with the Factory. + */ + +string PreconditionerLinear::className1 = + GetPreconFactory().RegisterCreatorFunction( + "FullLinearSpace", PreconditionerLinear::create, + "Full Linear space inverse Preconditioning"); + +std::string PreconditionerLinear::solveType = + LibUtilities::SessionReader::RegisterDefaultSolverInfo("LinearPreconSolver", + "Xxt"); +std::string PreconditionerLinear::solveTypeIds[] = { + LibUtilities::SessionReader::RegisterEnumValue( + "LinearPreconSolver", "PETSc", MultiRegions::eLinearPreconPETSc), + LibUtilities::SessionReader::RegisterEnumValue( + "LinearPreconSolver", "Saena", MultiRegions::eLinearPreconSaena), + LibUtilities::SessionReader::RegisterEnumValue( + "LinearPreconSolver", "Xxt", MultiRegions::eLinearPreconXxt)}; + +/** + * @class PreconditionerLinear + * + * This class implements preconditioning for the conjugate + * gradient matrix solver. + */ + +PreconditionerLinear::PreconditionerLinear( + const std::shared_ptr &plinsys, + const AssemblyMapSharedPtr &pLocToGloMap) + : Preconditioner(plinsys, pLocToGloMap) +{ +} - string PreconditionerLinear::className1 - = GetPreconFactory().RegisterCreatorFunction( - "FullLinearSpace", - PreconditionerLinear::create, - "Full Linear space inverse Preconditioning"); +void PreconditionerLinear::v_InitObject() +{ +} - std::string PreconditionerLinear::solveType = - LibUtilities::SessionReader::RegisterDefaultSolverInfo( - "LinearPreconSolver", - "Xxt"); - std::string PreconditionerLinear::solveTypeIds[] = { - LibUtilities::SessionReader::RegisterEnumValue( - "LinearPreconSolver", - "PETSc", - MultiRegions::eLinearPreconPETSc), - LibUtilities::SessionReader::RegisterEnumValue( - "LinearPreconSolver", - "Saena", - MultiRegions::eLinearPreconSaena), - LibUtilities::SessionReader::RegisterEnumValue( - "LinearPreconSolver", - "Xxt", - MultiRegions::eLinearPreconXxt) - }; +void PreconditionerLinear::v_BuildPreconditioner() +{ + GlobalSysSolnType sType = m_locToGloMap.lock()->GetGlobalSysSolnType(); + ASSERTL0(sType == eIterativeStaticCond || sType == ePETScStaticCond, + "This type of preconditioning is not implemented " + "for this solver"); - /** - * @class PreconditionerLinear - * - * This class implements preconditioning for the conjugate - * gradient matrix solver. - */ + std::shared_ptr expList = + ((m_linsys.lock())->GetLocMat()).lock(); - PreconditionerLinear::PreconditionerLinear( - const std::shared_ptr &plinsys, - const AssemblyMapSharedPtr &pLocToGloMap) - : Preconditioner(plinsys, pLocToGloMap) - { - } + LinearPreconSolver solveType = + expList->GetSession()->GetSolverInfoAsEnum( + "LinearPreconSolver"); - void PreconditionerLinear::v_InitObject() - { - } + GlobalSysSolnType linSolveType; - void PreconditionerLinear::v_BuildPreconditioner() + switch (solveType) + { + case eLinearPreconPETSc: { - GlobalSysSolnType sType = m_locToGloMap.lock()->GetGlobalSysSolnType(); - ASSERTL0(sType == eIterativeStaticCond || sType == ePETScStaticCond, - "This type of preconditioning is not implemented " - "for this solver"); - - std::shared_ptr - expList=((m_linsys.lock())->GetLocMat()).lock(); - - LinearPreconSolver solveType = - expList->GetSession()->GetSolverInfoAsEnum( - "LinearPreconSolver"); - - GlobalSysSolnType linSolveType; - - switch(solveType) - { - case eLinearPreconPETSc: - { - linSolveType = ePETScFullMatrix; + linSolveType = ePETScFullMatrix; #ifndef NEKTAR_USING_PETSC - NEKERROR(ErrorUtil::efatal, - "Nektar++ has not been compiled with " - "PETSc support."); + NEKERROR(ErrorUtil::efatal, "Nektar++ has not been compiled with " + "PETSc support."); #endif - break; - } - case eLinearPreconSaena: - { - linSolveType = eSaenaFullMatrix; + break; + } + case eLinearPreconSaena: + { + linSolveType = eSaenaFullMatrix; #ifndef NEKTAR_USING_SAENA - NEKERROR(ErrorUtil::efatal, - "Nektar++ has not been compiled with " - "Saena support."); + NEKERROR(ErrorUtil::efatal, "Nektar++ has not been compiled with " + "Saena support."); #endif - break; - } - case eLinearPreconXxt: - default: - { - linSolveType = eXxtFullMatrix; - break; - } - } + break; + } + case eLinearPreconXxt: + default: + { + linSolveType = eXxtFullMatrix; + break; + } + } - m_vertLocToGloMap = m_locToGloMap.lock()->LinearSpaceMap(*expList, linSolveType); + m_vertLocToGloMap = + m_locToGloMap.lock()->LinearSpaceMap(*expList, linSolveType); - // Generate linear solve system. - StdRegions::MatrixType mType = - m_linsys.lock()->GetKey().GetMatrixType() == StdRegions::eMass ? - StdRegions::ePreconLinearSpaceMass : - StdRegions::ePreconLinearSpace; + // Generate linear solve system. + StdRegions::MatrixType mType = + m_linsys.lock()->GetKey().GetMatrixType() == StdRegions::eMass + ? StdRegions::ePreconLinearSpaceMass + : StdRegions::ePreconLinearSpace; - GlobalLinSysKey preconKey(mType, m_vertLocToGloMap, (m_linsys.lock())->GetKey().GetConstFactors()); + GlobalLinSysKey preconKey(mType, m_vertLocToGloMap, + (m_linsys.lock())->GetKey().GetConstFactors()); - switch(solveType) - { - case eLinearPreconXxt: - { - m_vertLinsys = MemoryManager:: - AllocateSharedPtr(preconKey,expList,m_vertLocToGloMap); - break; - } - case eLinearPreconPETSc: - { + switch (solveType) + { + case eLinearPreconXxt: + { + m_vertLinsys = + MemoryManager::AllocateSharedPtr( + preconKey, expList, m_vertLocToGloMap); + break; + } + case eLinearPreconPETSc: + { #ifdef NEKTAR_USING_PETSC - m_vertLinsys = MemoryManager:: - AllocateSharedPtr(preconKey,expList,m_vertLocToGloMap); + m_vertLinsys = + MemoryManager::AllocateSharedPtr( + preconKey, expList, m_vertLocToGloMap); #else - ASSERTL0(false, "Nektar++ has not been compiled with " - "PETSc support."); + ASSERTL0(false, "Nektar++ has not been compiled with " + "PETSc support."); #endif - break; - } - case eLinearPreconSaena: - { + break; + } + case eLinearPreconSaena: + { #ifdef NEKTAR_USING_SAENA - auto vertLinsys = MemoryManager:: - AllocateSharedPtr(preconKey,expList,m_vertLocToGloMap, 1); - // vertLinsys->SetPolyOrder(1); - m_vertLinsys = vertLinsys; + auto vertLinsys = + MemoryManager::AllocateSharedPtr( + preconKey, expList, m_vertLocToGloMap, 1); + // vertLinsys->SetPolyOrder(1); + m_vertLinsys = vertLinsys; #else - ASSERTL0(false, "Nektar++ has not been compiled with " - "Saena support."); + ASSERTL0(false, "Nektar++ has not been compiled with " + "Saena support."); #endif - break; - } - } - } - - /** - * - */ - void PreconditionerLinear::v_DoPreconditioner( - const Array& pInput, - Array& pOutput) - { - v_DoPreconditionerWithNonVertOutput(pInput,pOutput,NullNekDouble1DArray, - NullNekDouble1DArray); + break; } + } +} - /** - * - */ - void PreconditionerLinear::v_DoPreconditionerWithNonVertOutput( - const Array& pInput, - Array& pOutput, - const Array& pNonVertOutput, - Array& pVertForce) - { - GlobalSysSolnType solvertype=m_locToGloMap.lock()->GetGlobalSysSolnType(); - switch(solvertype) - { - case MultiRegions::eIterativeStaticCond: - case MultiRegions::ePETScStaticCond: - { - int i,val; - int nloc = m_vertLocToGloMap->GetNumLocalCoeffs(); - int nglo = m_vertLocToGloMap->GetNumGlobalCoeffs(); - // mapping from full space to vertices - Array LocToGloBnd = m_vertLocToGloMap->GetLocalToGlobalBndMap(); +/** + * + */ +void PreconditionerLinear::v_DoPreconditioner( + const Array &pInput, Array &pOutput) +{ + v_DoPreconditionerWithNonVertOutput(pInput, pOutput, NullNekDouble1DArray, + NullNekDouble1DArray); +} - // Global to local for linear solver (different from above) - Array LocToGlo = m_vertLocToGloMap->GetLocalToGlobalMap(); +/** + * + */ +void PreconditionerLinear::v_DoPreconditionerWithNonVertOutput( + const Array &pInput, Array &pOutput, + const Array &pNonVertOutput, + Array &pVertForce) +{ + GlobalSysSolnType solvertype = m_locToGloMap.lock()->GetGlobalSysSolnType(); + switch (solvertype) + { + case MultiRegions::eIterativeStaticCond: + case MultiRegions::ePETScStaticCond: + { + int i, val; + int nloc = m_vertLocToGloMap->GetNumLocalCoeffs(); + int nglo = m_vertLocToGloMap->GetNumGlobalCoeffs(); + // mapping from full space to vertices + Array LocToGloBnd = + m_vertLocToGloMap->GetLocalToGlobalBndMap(); - // number of Dir coeffs in from full problem - int nDirFull = m_locToGloMap.lock()->GetNumGlobalDirBndCoeffs(); + // Global to local for linear solver (different from above) + Array LocToGlo = + m_vertLocToGloMap->GetLocalToGlobalMap(); - Array In(nglo,0.0); - Array Out(nglo,0.0); + // number of Dir coeffs in from full problem + int nDirFull = m_locToGloMap.lock()->GetNumGlobalDirBndCoeffs(); - // Gather rhs - for(i = 0; i < nloc; ++i) - { - val = LocToGloBnd[i]; - if(val >= nDirFull) - { - In[LocToGlo[i]] = pInput[val-nDirFull]; - } - } + Array In(nglo, 0.0); + Array Out(nglo, 0.0); - // Do solve without enforcing any boundary conditions. - m_vertLinsys->SolveLinearSystem( - m_vertLocToGloMap->GetNumGlobalCoeffs(), - In,Out,m_vertLocToGloMap, - m_vertLocToGloMap->GetNumGlobalDirBndCoeffs()); + // Gather rhs + for (i = 0; i < nloc; ++i) + { + val = LocToGloBnd[i]; + if (val >= nDirFull) + { + In[LocToGlo[i]] = pInput[val - nDirFull]; + } + } + // Do solve without enforcing any boundary conditions. + m_vertLinsys->SolveLinearSystem( + m_vertLocToGloMap->GetNumGlobalCoeffs(), In, Out, + m_vertLocToGloMap, + m_vertLocToGloMap->GetNumGlobalDirBndCoeffs()); - if(pNonVertOutput != NullNekDouble1DArray) - { - ASSERTL1(pNonVertOutput.size() >= pOutput.size(),"Non Vert output is not of sufficient length"); - Vmath::Vcopy(pOutput.size(),pNonVertOutput,1,pOutput,1); - } - else - { - //Copy input to output as a unit preconditioner on - //any other value - Vmath::Vcopy(pInput.size(),pInput,1,pOutput,1); - } + if (pNonVertOutput != NullNekDouble1DArray) + { + ASSERTL1(pNonVertOutput.size() >= pOutput.size(), + "Non Vert output is not of sufficient length"); + Vmath::Vcopy(pOutput.size(), pNonVertOutput, 1, pOutput, 1); + } + else + { + // Copy input to output as a unit preconditioner on + // any other value + Vmath::Vcopy(pInput.size(), pInput, 1, pOutput, 1); + } - if(pVertForce != NullNekDouble1DArray) + if (pVertForce != NullNekDouble1DArray) + { + Vmath::Zero(pVertForce.size(), pVertForce, 1); + // Scatter back soln from linear solve + for (i = 0; i < nloc; ++i) + { + val = LocToGloBnd[i]; + if (val >= nDirFull) { - Vmath::Zero(pVertForce.size(),pVertForce,1); - // Scatter back soln from linear solve - for(i = 0; i < nloc; ++i) - { - val = LocToGloBnd[i]; - if(val >= nDirFull) - { - pOutput[val-nDirFull] = Out[LocToGlo[i]]; - // copy vertex forcing into this vector - pVertForce[val-nDirFull] = In[LocToGlo[i]]; - } - } + pOutput[val - nDirFull] = Out[LocToGlo[i]]; + // copy vertex forcing into this vector + pVertForce[val - nDirFull] = In[LocToGlo[i]]; } - else + } + } + else + { + // Scatter back soln from linear solve + for (i = 0; i < nloc; ++i) + { + val = LocToGloBnd[i]; + if (val >= nDirFull) { - // Scatter back soln from linear solve - for(i = 0; i < nloc; ++i) - { - val = LocToGloBnd[i]; - if(val >= nDirFull) - { - pOutput[val-nDirFull] = Out[LocToGlo[i]]; - } - } + pOutput[val - nDirFull] = Out[LocToGlo[i]]; } } - break; - default: - ASSERTL0(0,"Unsupported solver type"); - break; - } + } } + break; + default: + ASSERTL0(0, "Unsupported solver type"); + break; } } - - - - - - +} // namespace MultiRegions +} // namespace Nektar diff --git a/library/MultiRegions/PreconditionerLinear.h b/library/MultiRegions/PreconditionerLinear.h index efc2e953b..224f2f328 100644 --- a/library/MultiRegions/PreconditionerLinear.h +++ b/library/MultiRegions/PreconditionerLinear.h @@ -33,76 +33,75 @@ /////////////////////////////////////////////////////////////////////////////// #ifndef NEKTAR_LIB_MULTIREGIONS_PRECONDITIONERLINEAR_H #define NEKTAR_LIB_MULTIREGIONS_PRECONDITIONERLINEAR_H +#include +#include +#include #include -#include #include -#include -#include -#include - +#include namespace Nektar { - namespace MultiRegions - { - enum LinearPreconSolver - { - eLinearPreconXxt, - eLinearPreconPETSc, - eLinearPreconSaena - }; +namespace MultiRegions +{ +enum LinearPreconSolver +{ + eLinearPreconXxt, + eLinearPreconPETSc, + eLinearPreconSaena +}; - class PreconditionerLinear; - typedef std::shared_ptr PreconditionerLinearSharedPtr; +class PreconditionerLinear; +typedef std::shared_ptr PreconditionerLinearSharedPtr; - class PreconditionerLinear: public Preconditioner - { - public: - /// Creates an instance of this class - static PreconditionerSharedPtr create( - const std::shared_ptr &plinsys, - const std::shared_ptr &pLocToGloMap) - { - PreconditionerSharedPtr p = MemoryManager::AllocateSharedPtr(plinsys,pLocToGloMap); - p->InitObject(); - return p; - } +class PreconditionerLinear : public Preconditioner +{ +public: + /// Creates an instance of this class + static PreconditionerSharedPtr create( + const std::shared_ptr &plinsys, + const std::shared_ptr &pLocToGloMap) + { + PreconditionerSharedPtr p = + MemoryManager::AllocateSharedPtr( + plinsys, pLocToGloMap); + p->InitObject(); + return p; + } - /// Name of class - static std::string className1; + /// Name of class + static std::string className1; - MULTI_REGIONS_EXPORT PreconditionerLinear( - const std::shared_ptr &plinsys, - const AssemblyMapSharedPtr &pLocToGloMap); + MULTI_REGIONS_EXPORT PreconditionerLinear( + const std::shared_ptr &plinsys, + const AssemblyMapSharedPtr &pLocToGloMap); - MULTI_REGIONS_EXPORT - virtual ~PreconditionerLinear() {} - - protected: - GlobalLinSysSharedPtr m_vertLinsys; - std::shared_ptr m_vertLocToGloMap; + MULTI_REGIONS_EXPORT + virtual ~PreconditionerLinear() + { + } - private: - static std::string solveType; - static std::string solveTypeIds[]; +protected: + GlobalLinSysSharedPtr m_vertLinsys; + std::shared_ptr m_vertLocToGloMap; - virtual void v_InitObject(); +private: + static std::string solveType; + static std::string solveTypeIds[]; + virtual void v_InitObject(); - virtual void v_DoPreconditionerWithNonVertOutput( - const Array& pInput, - Array& pOutput, - const Array& pNonVertOutput, - Array& pVertForce); - - virtual void v_DoPreconditioner( - const Array& pInput, - Array& pOutput); - - virtual void v_BuildPreconditioner(); + virtual void v_DoPreconditionerWithNonVertOutput( + const Array &pInput, Array &pOutput, + const Array &pNonVertOutput, + Array &pVertForce); - }; - } -} + virtual void v_DoPreconditioner(const Array &pInput, + Array &pOutput); + + virtual void v_BuildPreconditioner(); +}; +} // namespace MultiRegions +} // namespace Nektar #endif -- GitLab From 5fc1df18398e2abddc255b7efd560c0bbebc3c6e Mon Sep 17 00:00:00 2001 From: David Moxey Date: Thu, 6 Oct 2022 18:03:36 +0100 Subject: [PATCH 12/13] Change CMake options --- cmake/ThirdPartySaena.cmake | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cmake/ThirdPartySaena.cmake b/cmake/ThirdPartySaena.cmake index deb3ed413..3fa93deb5 100644 --- a/cmake/ThirdPartySaena.cmake +++ b/cmake/ThirdPartySaena.cmake @@ -6,9 +6,12 @@ # ######################################################################## -CMAKE_DEPENDENT_OPTION(NEKTAR_USE_SAENA - "Enable Saena parallel matrix solver support." OFF - "NEKTAR_USE_MKL;NEKTAR_USE_MPI" ON) +OPTION(NEKTAR_USE_SAENA "Enable Saena parallel matrix solver support." OFF) + +# Deactivate if forced ON but NEKTAR_USE_MKL or NEKTAR_USE_MPI are OFF +CMAKE_DEPENDENT_OPTION (NEKTAR_USE_SAENA + "Enable saena parallel matrix solver support." ${NEKTAR_USE_SAENA} + "NEKTAR_USE_MKL;NEKTAR_USE_MPI" OFF) IF (NEKTAR_USE_SAENA) SET(BUILD_SAENA ON) -- GitLab From 99c23ef85c549601479df1a2cf38ca77437ad8f2 Mon Sep 17 00:00:00 2001 From: David Moxey Date: Mon, 9 Jan 2023 09:54:28 +0000 Subject: [PATCH 13/13] Update version of Saena --- cmake/ThirdPartySaena.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/ThirdPartySaena.cmake b/cmake/ThirdPartySaena.cmake index 3fa93deb5..2b3ffc4ea 100644 --- a/cmake/ThirdPartySaena.cmake +++ b/cmake/ThirdPartySaena.cmake @@ -27,8 +27,8 @@ IF (NEKTAR_USE_SAENA) saena PREFIX ${TPSRC} STAMP_DIR ${TPBUILD}/stamp - GIT_REPOSITORY https://github.com/mdave/Saena_Public.git - GIT_TAG 0a6b9ddc9a3074488e41cf3e45d1eb090968a352 + GIT_REPOSITORY https://github.com/paralab/Saena_Public.git + GIT_TAG 8f49d89347d931ef21a26979246c955d1d1de4fb DOWNLOAD_DIR ${TPSRC} SOURCE_DIR ${TPBUILD}/saena TMP_DIR ${TPBUILD}/saena-tmp -- GitLab