diff --git a/.gitignore b/.gitignore index 1f5dcc33c16a7ae52938f0416c551d8df56d878e..59247d9be06a3a0d1cef9139eb04b0a3063bd9d0 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ build .ccls-cache/ a.out .ccls-cache/ +.cache diff --git a/CMakeLists.txt b/CMakeLists.txt index 68eeedc03150049799d821021525da3c7aa9a25b..d84013d68b779719205485b4d3cb3a91a5be98c5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,11 +2,25 @@ cmake_minimum_required(VERSION 3.0) project(Redesign CXX) option(NEKTAR_USE_CUDA "Enable CUDA support" FALSE) +option(NEKTAR_USE_SIMD "Enable SIMD support, if available" TRUE) set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) -set(SRC LibUtilities/ErrorUtil.cpp Operators/Operator.cpp Operators/OperatorBwdTrans.cpp Operators/BwdTrans/BwdTransImpl.cpp) +# Default install location: build/dist +IF (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) + SET(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/dist CACHE PATH "" FORCE) +ENDIF() + +find_package(Nektar++ REQUIRED) +include_directories(SYSTEM ${NEKTAR++_INCLUDE_DIRS} ${NEKTAR++_TP_INCLUDE_DIRS}) +link_directories(${NEKTAR++_LIBRARY_DIRS} ${NEKTAR++_TP_LIBRARY_DIRS}) +message(STATUS "Found Nektar++: version ${NEKTAR++_VERSION}") + +set(CMAKE_INSTALL_RPATH "${NEKTAR++_LIBRARY_DIRS}") + +set(SRC Operators/Operator.cpp Operators/OperatorBwdTrans.cpp Operators/BwdTrans/BwdTransImpl.cpp) if (NEKTAR_USE_CUDA) enable_language(CUDA) @@ -14,13 +28,26 @@ if (NEKTAR_USE_CUDA) set(SRC ${SRC} MemoryRegionCUDA.cu Operators/BwdTrans/BwdTransCUDA.cu) endif() +if (NEKTAR_USE_SIMD) + add_compile_definitions(NEKTAR_ENABLE_SIMD_AVX512 NEKTAR_ENABLE_SIMD_AVX2 NEKTAR_ENABLE_SIMD_SSE2) + add_compile_options("-march=native") +endif() + +# add_compile_options("-fsanitize=address,undefined") +# add_link_options("-fsanitize=address,undefined") + +add_compile_definitions(NEKTAR_FULLDEBUG) # Test ASSERT/WARNING macros + add_library(Operators SHARED ${SRC}) +target_link_libraries(Operators PUBLIC ${NEKTAR++_LIBRARIES} ${NEKTAR++_TP_LIBRARIES}) target_include_directories(Operators PRIVATE "${CMAKE_SOURCE_DIR}") +target_compile_definitions(Operators PUBLIC ${NEKTAR++_DEFINITIONS}) set(TEST_SRC main.cpp) add_executable(main ${TEST_SRC}) target_link_libraries(main Operators) target_include_directories(main PRIVATE "${CMAKE_SOURCE_DIR}") +target_compile_definitions(main PUBLIC ${NEKTAR++_DEFINITIONS}) find_package(Doxygen) add_custom_target(doc COMMAND ${DOXYGEN_EXECUTABLE} ${CMAKE_SOURCE_DIR}/Doxyfile) diff --git a/Field.hpp b/Field.hpp index 52f8d0905812bc7a5fd0cb46517f71f4c3dba524..a5dc600fc765a54c9cfa6bae38ee41c8d99bd58f 100644 --- a/Field.hpp +++ b/Field.hpp @@ -1,23 +1,20 @@ #pragma once +#include <StdRegions/StdExpansion.h> +#include <StdRegions/StdRegions.hpp> #include <array> #include <iostream> #include <memory> +#include <numeric> #include <stdexcept> #include <string> #include <vector> -#include "LibUtilities/ErrorUtil.hpp" #include "MemoryRegionCPU.hpp" - -/** - * @brief Element shape enum used when defining Blocks of elements. - */ -enum class ShapeType -{ - eQuadrilateral, - eTriangle -}; +#include <LibUtilities/BasicUtils/ErrorUtil.hpp> +#include <LibUtilities/BasicUtils/ShapeType.hpp> +#include <LibUtilities/BasicUtils/SharedArray.hpp> +#include <StdRegions/StdExpansion.h> /** * @brief Captures the structure of a block of elements of identical shape @@ -25,14 +22,20 @@ enum class ShapeType */ struct BlockAttributes { - ShapeType shape; - std::array<int, 3> dofs; + BlockAttributes(size_t num_elements, size_t num_pts) + : num_elements(num_elements), num_pts(num_pts), + block_size(num_elements * num_pts) + { + } + size_t num_elements; + size_t num_pts; + size_t block_size; }; /** * @brief Possible states for Field data. - * + * * These identify the mathematical representation of the field data. The two * main states are *Phys*, representing the field at the quadrature points, * and *Coeff*, representing the field in terms of its spectral/hp element @@ -51,9 +54,10 @@ static constexpr FieldState DefaultState = FieldState::Phys; * @tparam TType The floating-point representation used by the field. * @tparam TState A FieldState value representing the state of the field. */ -template <typename TType = double, FieldState TState = DefaultState> -class Field +template <typename TType = double, FieldState TState = DefaultState> class Field { + using ExpansionSharedPtr = Nektar::StdRegions::StdExpansionSharedPtr; + public: Field(const Field &) = delete; virtual ~Field() = default; @@ -61,21 +65,22 @@ public: /** * @brief Construct a new Field object by moving storage from an existing * Field object. - * - * @param rhs + * + * @param rhs */ Field(Field &&rhs) : m_storage(std::move(rhs.m_storage)), block_attributes(std::move(rhs.block_attributes)), - component_names(std::move(component_names)) + component_names(std::move(rhs.component_names)), + m_curVecWidth(rhs.m_curVecWidth) { } /** * @brief Move assignment operator. - * - * @param rhs - * @return Field& + * + * @param rhs + * @return Field& */ Field &operator=(Field &&rhs) { @@ -88,27 +93,22 @@ public: /** * @brief Static templated creation method. - * + * * @tparam TMemoryRegion Type of memory region to use * @param blocks Field data specification. * @param num_components Number of components for a vector field. - * @return Field<TType, TState> + * @return Field<TType, TState> */ template <template <typename> class TMemoryRegion = MemoryRegionCPU> - static Field<TType, TState> create(std::vector<BlockAttributes> &blocks, + static Field<TType, TState> create(std::vector<BlockAttributes> blocks, int num_components = 1) { - auto field = Field(blocks, num_components); + auto field = Field(std::move(blocks), num_components); - size_t storage_size = 0; - for (int i = 0; i < blocks.size(); ++i) - { - auto &block = blocks[i]; - size_t blockSize = block.num_elements * block.dofs[0] * - block.dofs[1] * - block.dofs[2]; // wasteful but an upper bound - storage_size += blockSize; - } + size_t storage_size = std::accumulate( + field.block_attributes.begin(), field.block_attributes.end(), 0, + [](size_t acc, const BlockAttributes &block) + { return acc + block.block_size; }); // Create new TMemoryRegion and polymorphically store as MemoryRegionCPU field.m_storage = std::make_unique<TMemoryRegion<TType>>( @@ -117,14 +117,32 @@ public: return field; } + template <template <typename> class TMemoryRegion = MemoryRegionCPU> + static Field<TType, TState> fromArray( + std::vector<BlockAttributes> blocks, + Nektar::Array<Nektar::OneD, TType> const &array) + { + auto field = create<MemoryRegionCPU>(blocks); + + ASSERTL0( + field.GetStorage().size() == array.size(), + "Array size does not match size calculated from provided elements.") + + std::copy(array.begin(), array.end(), field.GetStorage().GetCPUPtr()); + + field.template GetStorage<TMemoryRegion>(); // convert storage to + // TMemoryRegion + return field; + } + /** * @brief Get the underlying storage of the field as the requested type. * @return MemoryRegion storage converted to the requested type - * + * * This routine performs MemoryRegion conversions if necessary to enable * casting of, for example a CUDA memory region to a CPU memory region to * support the use of a CPU-only operator if necessary. - * + * * A runtime warning is provided if a transfer of data from device to host * is required to achieve the conversion. */ @@ -172,30 +190,166 @@ public: } } + /** + * @brief Reshapes the storage to a prescribed vector width. + * + * This routine reorders the elemental data to interleave elements to a + * prescribed vector width VW. This therefore puts the same DOF for groups + * of VW elements contiguously in memory, enabling the efficient use of + * vectorised instructions. At the moment this routine reshapes from VW0 to + * VW1 by first reshaping from VW0 to a vector width of 1, and then to VW1. + * + * @tparam VectorWidth Target vector width. + * @tparam Align Memory alignment to use. + */ + template <size_t VectorWidth, + size_t Align = __STDCPP_DEFAULT_NEW_ALIGNMENT__> + void ReshapeStorage() + { + // No reshape required, early return + if (m_curVecWidth == VectorWidth) + return; + + ReshapeToScalar<Align>(); + + // Early return if "scalar" shape is required + if (VectorWidth == 1) + return; + + // New memory region with requested alignment + MemoryRegionCPU<TType> reshapedStorage(m_storage->size(), Align); + + TType *inptr = m_storage->GetCPUPtr(); + TType *outptr = reshapedStorage.GetCPUPtr(); + + for (const auto &block : block_attributes) + { + ASSERTL1( + block.num_elements % VectorWidth == 0, + "Number of elements not divisible by VectorWidth, padding not " + "implemented yet."); + + const size_t numMetaBlocks = block.num_elements / VectorWidth; + for (size_t metaBlock = 0; metaBlock < numMetaBlocks; ++metaBlock) + { + InterleaveFromScalar<VectorWidth>(inptr, block.num_pts, outptr); + inptr += block.num_pts * VectorWidth; + outptr += block.num_pts * VectorWidth; + } + } + + m_curVecWidth = VectorWidth; + m_storage = std::make_unique<MemoryRegionCPU<TType>>( + std::move(reshapedStorage)); + } + + std::vector<BlockAttributes> const &GetBlocks() const + { + return block_attributes; + } + /** * @brief Gets the number of components for a vector field. - * - * @return size_t + * + * @return size_t */ size_t GetNumComponents() { return component_names.size(); } + Nektar::Array<Nektar::OneD, TType> toArray() const + { + return Nektar::Array<Nektar::OneD, TType>(m_storage->size(), + m_storage->GetCPUPtr()); + } + private: + /** + * @brief Reshapes the current storage interleaving to a non-interleaved + * arrangement. + */ + template <size_t Align = __STDCPP_DEFAULT_NEW_ALIGNMENT__> + void ReshapeToScalar() + { + if (m_curVecWidth == 1) + return; + + MemoryRegionCPU<TType> reshapedStorage(m_storage->size(), Align); + + TType *inptr = m_storage->GetCPUPtr(); + TType *outptr = reshapedStorage.GetCPUPtr(); + + for (const auto &block : block_attributes) + { + const size_t numMetaBlocks = block.num_elements / m_curVecWidth; + + for (size_t metaBlock = 0; metaBlock < numMetaBlocks; ++metaBlock) + { + Deinterleave(inptr, block.num_pts, outptr); + inptr += m_curVecWidth * block.num_pts; + outptr += m_curVecWidth * block.num_pts; + } + } + + m_curVecWidth = 1; + m_storage = std::make_unique<MemoryRegionCPU<TType>>( + std::move(reshapedStorage)); + } + + /** + * @brief Interleave the data block to the given vector width + * @tparam VectorWidth Target vector width. + * @param in Input array with VW of 1. + * @param dataLen Length of data blocks to be interleaved. + * @param out Output array of VW specified by VectorWidth. + */ + template <size_t VectorWidth> + void InterleaveFromScalar(const TType *in, size_t dataLen, TType *out) + { + // TODO: SIMD this + for (size_t idx = 0; idx < dataLen; ++idx) + { + for (size_t vecElem = 0; vecElem < VectorWidth; ++vecElem) + { + out[idx * VectorWidth + vecElem] = in[vecElem * dataLen + idx]; + } + } + } + + /** + * @brief Deinterleave data from the current vector width to be + * non-interleaved. + * @param in Input array + * @param dataLen Length of a block of data (e.g. an element) + * @param out Output array + */ + void Deinterleave(const TType *in, size_t dataLen, TType *out) + { + for (size_t idx = 0; idx < dataLen; ++idx) + { + for (size_t vecElem = 0; vecElem < m_curVecWidth; ++vecElem) + { + out[vecElem * dataLen + idx] = + in[idx * m_curVecWidth + vecElem]; + } + } + } + /** * @brief Construct a new Field object. - * + * * @param blocks Field data layout specification * @param num_components Number of components for a vector field. */ - Field(std::vector<BlockAttributes> &blocks, int num_components = 1) - : block_attributes(blocks) + Field(std::vector<BlockAttributes> blocks, int num_components = 1) + : block_attributes(std::move(blocks)) { } std::unique_ptr<MemoryRegionCPU<TType>> m_storage; - // std::vector<MemoryView<TType>> m_views; std::vector<BlockAttributes> block_attributes; std::vector<std::string> component_names; + + size_t m_curVecWidth = 1; }; diff --git a/LibUtilities/ErrorUtil.cpp b/LibUtilities/ErrorUtil.cpp deleted file mode 100644 index 56c2bee0a72e1a41abe2195491db184431b38862..0000000000000000000000000000000000000000 --- a/LibUtilities/ErrorUtil.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include "ErrorUtil.hpp" - -namespace Nektar -{ - std::ostream *ErrorUtil::m_outStream = &std::cerr; - bool ErrorUtil::m_printBacktrace = true; -} \ No newline at end of file diff --git a/LibUtilities/ErrorUtil.hpp b/LibUtilities/ErrorUtil.hpp deleted file mode 100644 index 44977d2d82643a14f4106d19158e0b0b46a94c5a..0000000000000000000000000000000000000000 --- a/LibUtilities/ErrorUtil.hpp +++ /dev/null @@ -1,279 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -// File ErrorUtil.hpp -// -// For more information, please see: http://www.nektar.info -// -// The MIT License -// -// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA), -// Department of Aeronautics, Imperial College London (UK), and Scientific -// Computing and Imaging Institute, University of Utah (USA). -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS IN THE SOFTWARE. -// -// Description: error related utilities -// -/////////////////////////////////////////////////////////////////////////////// -#ifndef ERRORUTIL_HPP -#define ERRORUTIL_HPP - -#include <iostream> -#include <stdexcept> -#include <string> - -#include <boost/core/ignore_unused.hpp> - -// #include <LibUtilities/LibUtilitiesDeclspec.h> -#define LIB_UTILITIES_EXPORT - -#if defined(NEKTAR_USE_MPI) -#include <mpi.h> -#endif - -#ifndef _WIN32 -#include <execinfo.h> -#endif - -namespace Nektar -{ - -class ErrorUtil -{ -public: - class NekError : public std::runtime_error - { - public: - NekError(const std::string &message) : std::runtime_error(message) - { - } - }; - - enum ErrType - { - efatal, - ewarning - }; - - inline static void SetErrorStream(std::ostream &o) - { - m_outStream = &o; - } - - inline static void SetPrintBacktrace(bool b) - { - m_printBacktrace = b; - } - - inline static bool HasCustomErrorStream() - { - return m_outStream != &std::cerr; - } - - inline static void Error(ErrType type, const char *routine, int lineNumber, - const char *msg, unsigned int level, - bool DoComm = false) - { - boost::ignore_unused(DoComm); - - // The user of outStream is primarily for the unit tests. The unit - // tests often generate errors on purpose to make sure invalid usage is - // flagged appropriately. Printing the error messages to cerr made the - // unit test output hard to parse. - - std::string baseMsg = - "Level " + std::to_string(level) + " assertion violation\n"; -#if defined(NEKTAR_DEBUG) || defined(NEKTAR_FULLDEBUG) - baseMsg += "Where : " + std::string(routine) + "[" + - std::to_string(lineNumber) + "]\nMessage : "; -#else - boost::ignore_unused(routine, lineNumber); -#endif - baseMsg += std::string(msg); - - // Default rank is zero. If MPI used and initialised, populate with - // the correct rank. Messages are only printed on rank zero. - int rank = 0; -#if defined(NEKTAR_USE_MPI) && !defined(NEKTAR_USE_CWIPI) - int flag = 0; - if (DoComm) - { - MPI_Initialized(&flag); - if (flag) - { - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - } - } -#else - boost::ignore_unused(DoComm); -#endif - - std::string btMessage(""); -#if defined(NEKTAR_FULLDEBUG) -#ifndef _WIN32 - if (m_printBacktrace) - { - void *btArray[40]; - int btSize; - char **btStrings; - - btSize = backtrace(btArray, 40); - btStrings = backtrace_symbols(btArray, btSize); - - for (int i = 0; i < btSize; ++i) - { - btMessage += std::string(btStrings[i]) + "\n"; - } - free(btStrings); - } -#endif -#endif - - switch (type) - { - case efatal: - if (!rank) - { - if (m_printBacktrace) - { - (*m_outStream) << btMessage; - } - (*m_outStream) << "Fatal : " << baseMsg << std::endl; - } - -#if defined(NEKTAR_USE_MPI) && !defined(NEKTAR_USE_CWIPI) - if (DoComm) - { - if (flag) - { - MPI_Barrier(MPI_COMM_WORLD); - } - } -#endif - throw NekError(baseMsg); - break; - case ewarning: - if (!rank) - { - if (m_printBacktrace) - { - (*m_outStream) << btMessage; - } - (*m_outStream) << "Warning : " << baseMsg << std::endl; - } - break; - default: - (*m_outStream) - << "Unknown warning type: " << baseMsg << std::endl; - } - } - - inline static void Error(ErrType type, const char *routine, int lineNumber, - const std::string &msg, unsigned int level) - { - Error(type, routine, lineNumber, msg.c_str(), level); - } - - inline static void Error(ErrType type, const char *routine, int lineNumber, - const char *msg) - { - Error(type, routine, lineNumber, msg, 0); - } - -private: - LIB_UTILITIES_EXPORT static std::ostream *m_outStream; - LIB_UTILITIES_EXPORT static bool m_printBacktrace; -}; - -/// Assert Level 0 -- Fundamental assert which -/// is used whether in FULLDEBUG, DEBUG or OPT -/// compilation mode. This level assert is -/// considered code critical, even under -/// optimized compilation. - -#define NEKERROR(type, msg) \ - Nektar::ErrorUtil::Error(type, __FILE__, __LINE__, msg, 0); - -#define ROOTONLY_NEKERROR(type, msg) \ - Nektar::ErrorUtil::Error(type, __FILE__, __LINE__, msg, 0, true); - -#define ASSERTL0(condition, msg) \ - if (!(condition)) \ - { \ - Nektar::ErrorUtil::Error(Nektar::ErrorUtil::efatal, __FILE__, \ - __LINE__, msg, 0); \ - } - -#define WARNINGL0(condition, msg) \ - if (!(condition)) \ - { \ - Nektar::ErrorUtil::Error(Nektar::ErrorUtil::ewarning, __FILE__, \ - __LINE__, msg, 0); \ - } - -/// Assert Level 1 -- Debugging which is used whether in FULLDEBUG or -/// DEBUG compilation mode. This level assert is designed for aiding -/// in standard debug (-g) mode -#if defined(NEKTAR_DEBUG) || defined(NEKTAR_FULLDEBUG) - -#define ASSERTL1(condition, msg) \ - if (!(condition)) \ - { \ - Nektar::ErrorUtil::Error(Nektar::ErrorUtil::efatal, __FILE__, \ - __LINE__, msg, 1); \ - } - -#define WARNINGL1(condition, msg) \ - if (!(condition)) \ - { \ - Nektar::ErrorUtil::Error(Nektar::ErrorUtil::ewarning, __FILE__, \ - __LINE__, msg, 1); \ - } - -#else // defined(NEKTAR_DEBUG) || defined(NEKTAR_FULLDEBUG) -#define ASSERTL1(condition, msg) -#define WARNINGL1(condition, msg) -#endif // defined(NEKTAR_DEBUG) || defined(NEKTAR_FULLDEBUG) - -/// Assert Level 2 -- Debugging which is used FULLDEBUG compilation -/// mode. This level assert is designed to provide addition safety -/// checks within the code (such as bounds checking, etc.). -#ifdef NEKTAR_FULLDEBUG - -#define ASSERTL2(condition, msg) \ - if (!(condition)) \ - { \ - Nektar::ErrorUtil::Error(Nektar::ErrorUtil::efatal, __FILE__, \ - __LINE__, msg, 2); \ - } -#define WARNINGL2(condition, msg) \ - if (!(condition)) \ - { \ - Nektar::ErrorUtil::Error(Nektar::ErrorUtil::ewarning, __FILE__, \ - __LINE__, msg, 2); \ - } - -#else // NEKTAR_FULLDEBUG -#define ASSERTL2(condition, msg) -#define WARNINGL2(condition, msg) -#endif // NEKTAR_FULLDEBUG - -} // namespace Nektar - -#endif // ERRORUTIL_HPP diff --git a/LibUtilities/NekFactory.hpp b/LibUtilities/NekFactory.hpp deleted file mode 100644 index c725a7eefc38c1bda85ee4a93ffef55cae338757..0000000000000000000000000000000000000000 --- a/LibUtilities/NekFactory.hpp +++ /dev/null @@ -1,316 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -// File: NekFactory.hpp -// -// For more information, please see: http://www.nektar.info -// -// The MIT License -// -// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA), -// Department of Aeronautics, Imperial College London (UK), and Scientific -// Computing and Imaging Institute, University of Utah (USA). -// -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS IN THE SOFTWARE. -// -// Description: Factory pattern class for Nektar -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef NEKTAR_LIBUTILITIES_BASICUTILS_NEKFACTORY -#define NEKTAR_LIBUTILITIES_BASICUTILS_NEKFACTORY - -#include <functional> -#include <iostream> -#include <map> -#include <memory> -#include <sstream> -#include <string> - -#ifdef NEKTAR_USE_THREAD_SAFETY -#include <boost/thread/locks.hpp> -#include <boost/thread/shared_mutex.hpp> -#endif - -#include "ErrorUtil.hpp" - -namespace Nektar -{ -namespace LibUtilities -{ - -#ifdef NEKTAR_USE_THREAD_SAFETY -// Generate parameter typenames with default type of 'none' -typedef boost::unique_lock<boost::shared_mutex> WriteLock; -typedef boost::shared_lock<boost::shared_mutex> ReadLock; -#endif - -/** - * @class NekFactory - * - * @brief Provides a generic Factory class. - * - * Implements a generic object factory. Class-types which use an arbitrary - * number of parameters may be used via C++ variadic templating. - * - * To allow a class to be instantiated by the factory, the following are - * required in each class definition (in the case of a single parameter): - * - * \code - * static [baseclass]* create([paramtype1] &P) { - * return new [derivedclass](P); - * } - * static std::string className; - * \endcode - * - * and outside the class definition in the implementation: - * - * \code - * std::string [derivedclass]::className - * = Factory<std::string,[baseclass],[paramtype1]>:: - * RegisterCreatorFunction("[derivedclass]", - * [derivedclass]::create,"Description"); - * \endcode - * - * The assignment of the static variable className is done through the call to - * RegisterCreatorFunction, which registers the class with the factory prior to - * the start of the main() routine. - * - * To create an instance of a derived class, for instance: - * \code - * [baseclass]* var_name = - * Factory<std::string,[baseclass],[paramtype1]> - * ::CreateInstance("[derivedclass]",Param1); - * \endcode - */ -template <typename tKey, // reference tag (e.g. string, int) - typename tBase, // base class - typename... tParam> -class NekFactory -{ -public: - /// Comparison predicator of key - typedef std::less<tKey> tPredicator; - /// Shared pointer to an object of baseclass type. - typedef std::unique_ptr<tBase> tBaseUniquePtr; - /// CreatorFunction type which takes parameter and returns base class shared - /// pointer. - typedef std::function<tBaseUniquePtr(tParam...)> CreatorFunction; - - /// Define a struct to hold the information about a module. - struct ModuleEntry - { - ModuleEntry(CreatorFunction pFunc, const std::string pDesc) - : m_func(pFunc), m_desc(pDesc) - { - } - - /// Function used to create instance of class. - CreatorFunction m_func; - /// Description of class for use in listing available classes. - std::string m_desc; - }; - - /// Factory map between key and module data. - typedef std::map<tKey, ModuleEntry, tPredicator> TMapFactory; - -public: - NekFactory() = default; - - /** - * @brief Create an instance of the class referred to by \c idKey. - * - * Searches the factory's map for the given key and returns a shared - * base class pointer to a new instance of the associated class. - * @param idKey Key of class to create. - * @param x Parameter to pass to class constructor. - * @returns Base class pointer to new instance. - */ - tBaseUniquePtr CreateInstance(tKey idKey, tParam... args) - { -#ifdef NEKTAR_USE_THREAD_SAFETY - ReadLock vReadLock(m_mutex); -#endif - - // Now try and find the key in the map. - auto it = getMapFactory()->find(idKey); - - // If successful, check the CreatorFunction is defined and - // create a new instance of the class. - if (it != getMapFactory()->end()) - { - ModuleEntry *tmp = &(it->second); -#ifdef NEKTAR_USE_THREAD_SAFETY - vReadLock.unlock(); -#endif - - if (tmp->m_func) - { - try - { - return tmp->m_func(args...); - } - catch (const std::string &s) - { - std::stringstream errstr; - errstr << "Unable to create module: " << idKey << "\n"; - errstr << s; - NEKERROR(ErrorUtil::efatal, errstr.str()); - } - } - } - - // If we get this far, the key doesn't exist, so throw an error. - std::stringstream errstr; - errstr << "No such module: " << idKey << std::endl; - PrintAvailableClasses(errstr); - NEKERROR(ErrorUtil::efatal, errstr.str()); - return tBaseUniquePtr(); - } - - /** - * @brief Register a class with the factory. - * - * This function is called by each class in a static context (prior - * to the execution of main()) and creates an entry for the class - * in the factory's map. - * @param idKey Key used to reference the class. - * @param classCreator Function to call to create an instance - * of this class. - * @param pDesc Optional description of class. - * @returns The given key \c idKey. - */ - tKey RegisterCreatorFunction(tKey idKey, CreatorFunction classCreator, - std::string pDesc = "") - { -#ifdef NEKTAR_USE_THREAD_SAFETY - WriteLock vWriteLock(m_mutex); -#endif - - ModuleEntry e(classCreator, pDesc); - getMapFactory()->insert(std::pair<tKey, ModuleEntry>(idKey, e)); - return idKey; - } - - /** - * @brief Checks if a particular module is available. - */ - bool ModuleExists(tKey idKey) - { -#ifdef NEKTAR_USE_THREAD_SAFETY - ReadLock vReadLock(m_mutex); -#endif - - // Now try and find the key in the map. - auto it = getMapFactory()->find(idKey); - - if (it != getMapFactory()->end()) - { - return true; - } - return false; - } - - /** - * @brief Prints the available classes to stdout. - */ - void PrintAvailableClasses(std::ostream &pOut = std::cout) - { -#ifdef NEKTAR_USE_THREAD_SAFETY - ReadLock vReadLock(m_mutex); -#endif - - pOut << std::endl << "Available classes: " << std::endl; - for (auto &it : *getMapFactory()) - { - pOut << " " << it.first; - if (it.second.m_desc != "") - { - pOut << ":" << std::endl - << " " << it.second.m_desc << std::endl; - } - else - { - pOut << std::endl; - } - } - } - - /** - * @brief Retrieves a key, given a description - */ - tKey GetKey(std::string pDesc) - { -#ifdef NEKTAR_USE_THREAD_SAFETY - ReadLock vReadLock(m_mutex); -#endif - - for (auto &it : *getMapFactory()) - { - if (it.second.m_desc == pDesc) - { - return it.first; - } - } - std::string errstr = "Module '" + pDesc + "' is not known."; - ASSERTL0(false, errstr); - } - - /** - * @brief Returns the description of a class - */ - std::string GetClassDescription(tKey idKey) - { -#ifdef NEKTAR_USE_THREAD_SAFETY - ReadLock vReadLock(m_mutex); -#endif - - // Now try and find the key in the map. - auto it = getMapFactory()->find(idKey); - - std::stringstream errstr; - errstr << "No such module: " << idKey << std::endl; - ASSERTL0(it != getMapFactory()->end(), errstr.str()); - return it->second.m_desc; - } - -protected: - /** - * @brief Ensure the factory's map is created. - * @returns The factory's map. - */ - TMapFactory *getMapFactory() - { - return &mMapFactory; - } - -private: - NekFactory(const NekFactory &rhs); - NekFactory &operator=(const NekFactory &rhs); - - TMapFactory mMapFactory; - -#ifdef NEKTAR_USE_THREAD_SAFETY - boost::shared_mutex m_mutex; -#endif -}; - -} // namespace LibUtilities -} // namespace Nektar - -#endif diff --git a/MemoryRegionCPU.hpp b/MemoryRegionCPU.hpp index 1a3c5d89ea602115efda41806054287066ab257a..877f27cf47d1a9afa7a70332954e374a089f66af 100644 --- a/MemoryRegionCPU.hpp +++ b/MemoryRegionCPU.hpp @@ -1,51 +1,64 @@ #pragma once +#include <new> + /** * @brief Stores underlying data for a Field on the CPU. - * + * * It acts as a holder for a contiguous block of memory, allocated on the - * host system. - * + * host system. + * * This class also acts as a base class for device-aware builds. */ template <typename TData> class MemoryRegionCPU { public: - MemoryRegionCPU() = delete; - MemoryRegionCPU(const MemoryRegionCPU &rhs) = delete; + MemoryRegionCPU() = delete; + MemoryRegionCPU(const MemoryRegionCPU &rhs) = delete; + MemoryRegionCPU &operator=(const MemoryRegionCPU &rhs) = delete; MemoryRegionCPU(MemoryRegionCPU &&rhs) { - m_host = rhs.m_host; - m_size = rhs.m_size; - rhs.m_host = nullptr; + m_host = rhs.m_host; + m_size = rhs.m_size; + m_alignment = rhs.m_alignment; + rhs.m_host = nullptr; } - MemoryRegionCPU(size_t n) + MemoryRegionCPU(size_t n, + size_t alignment = __STDCPP_DEFAULT_NEW_ALIGNMENT__) { - m_host = new TData[n]; - m_size = n; + // C++17 aligned new + m_host = new (std::align_val_t(alignment)) TData[n]; + m_alignment = alignment; + m_size = n; } virtual ~MemoryRegionCPU() { if (m_host != nullptr) { - delete[] m_host; + operator delete[](m_host, std::align_val_t(m_alignment)); m_host = nullptr; } } - void operator=(MemoryRegionCPU &&rhs) + MemoryRegionCPU &operator=(MemoryRegionCPU &&rhs) { - m_host = rhs.m_host; - m_size = rhs.m_size; - rhs.m_host = nullptr; + if (m_host) + operator delete[](m_host, std::align_val_t(m_alignment)); + + m_host = rhs.m_host; + m_size = rhs.m_size; + m_alignment = rhs.m_alignment; + rhs.m_host = nullptr; + + return *this; } /** * @brief Get the pointer to the CPU memory. - * + * * This is a virtual function so that subclasses can move memory to the * CPU from a device if needed. */ @@ -56,7 +69,7 @@ public: /** * @brief Move memory to the CPU. - * + * * This is a virtual function so that subclasses can move memory to the * CPU from a device if needed. */ @@ -70,6 +83,7 @@ public: } protected: - TData *m_host = nullptr; - size_t m_size = 0; + TData *m_host = nullptr; + size_t m_size = 0; + size_t m_alignment = __STDCPP_DEFAULT_NEW_ALIGNMENT__; }; diff --git a/MemoryRegionCUDA.hpp b/MemoryRegionCUDA.hpp index e634cc1dbea0ae23cea6098efba4afc11e80276f..2187341bee8d1e5033656e0746b0c576cd9fd40f 100644 --- a/MemoryRegionCUDA.hpp +++ b/MemoryRegionCUDA.hpp @@ -91,7 +91,7 @@ protected: private: void initFromSize(size_t n); - TData *m_device = nullptr; ///< Device memory pointer - size_t m_size = 0; ///< Device storage size - bool m_ondevice = false; ///< Flag indicating if data is on device + TData *m_device = nullptr; ///< Device memory pointer + size_t m_size = 0; ///< Device storage size + bool m_ondevice = false; ///< Flag indicating if data is on device }; diff --git a/Operators/BwdTrans/BwdTransCUDA.hpp b/Operators/BwdTrans/BwdTransCUDA.hpp index 1d3abfd4031aee536c213882f3115269b1779ab6..236504ed9661509dd20979cf7c9240f0ceac7e93 100644 --- a/Operators/BwdTrans/BwdTransCUDA.hpp +++ b/Operators/BwdTrans/BwdTransCUDA.hpp @@ -9,6 +9,11 @@ template <typename TData> class OperatorBwdTransImpl<TData, ImplCUDA> : public OperatorBwdTrans<TData> { public: + OperatorBwdTransImpl(const MultiRegions::ExpListSharedPtr& expansionList) + : OperatorBwdTrans<TData>(std::move(expansionList)) + { + } + void apply(Field<TData, FieldState::Coeff> &in, Field<TData, FieldState::Phys> &out) override { @@ -17,9 +22,11 @@ public: std::cout << "Op bwd trans CUDA\n"; } - static std::unique_ptr<Operator<TData>> instantiate() + static std::unique_ptr<Operator<TData>> instantiate( + MultiRegions::ExpListSharedPtr expansionList) { - return std::make_unique<OperatorBwdTransImpl<TData, ImplCUDA>>(); + return std::make_unique<OperatorBwdTransImpl<TData, ImplCUDA>>( + std::move(expansionList)); } static std::string className; diff --git a/Operators/BwdTrans/BwdTransImpl.cpp b/Operators/BwdTrans/BwdTransImpl.cpp index 07df19524ed28c4e9e9ac1a287f883d833144b09..d2290d59c497bbf205b527775b964f8bb516c4eb 100644 --- a/Operators/BwdTrans/BwdTransImpl.cpp +++ b/Operators/BwdTrans/BwdTransImpl.cpp @@ -1,5 +1,6 @@ -#include "BwdTransSumFac.hpp" +#include "BwdTransStdMat.hpp" #include "BwdTransMatFree.hpp" +#include "BwdTransSumFac.hpp" namespace Nektar::Operators::detail { @@ -14,7 +15,13 @@ std::string OperatorBwdTransImpl<double, ImplMatFree>::className = template <> std::string OperatorBwdTransImpl<double, ImplSumFac>::className = GetOperatorFactory<double>().RegisterCreatorFunction( - "BwdTransSumFac", - OperatorBwdTransImpl<double, ImplSumFac>::instantiate, "..."); + "BwdTransSumFac", OperatorBwdTransImpl<double, ImplSumFac>::instantiate, + "..."); + +template <> +std::string OperatorBwdTransImpl<double, ImplStdMat>::className = + GetOperatorFactory<double>().RegisterCreatorFunction( + "BwdTransStdMat", OperatorBwdTransImpl<double, ImplStdMat>::instantiate, + "..."); -} \ No newline at end of file +} // namespace Nektar::Operators::detail diff --git a/Operators/BwdTrans/BwdTransMatFree.hpp b/Operators/BwdTrans/BwdTransMatFree.hpp index 5bb5f2e65c602b8972a1d2836f8e75c470a2df26..182aa6355b6d600dbab0fc9077be3bff5a54ad75 100644 --- a/Operators/BwdTrans/BwdTransMatFree.hpp +++ b/Operators/BwdTrans/BwdTransMatFree.hpp @@ -8,6 +8,11 @@ template <typename TData> class OperatorBwdTransImpl<TData, ImplMatFree> : public OperatorBwdTrans<TData> { public: + OperatorBwdTransImpl(const MultiRegions::ExpListSharedPtr& expansionList) + : OperatorBwdTrans<TData>(std::move(expansionList)) + { + } + void apply(Field<TData, FieldState::Coeff> &in, Field<TData, FieldState::Phys> &out) override { @@ -21,9 +26,11 @@ public: std::cout << "Op bwd trans mat free\n"; } - static std::unique_ptr<Operator<TData>> instantiate() + static std::unique_ptr<Operator<TData>> instantiate( + const MultiRegions::ExpListSharedPtr& expansionList) { - return std::make_unique<OperatorBwdTransImpl<TData, ImplMatFree>>(); + return std::make_unique<OperatorBwdTransImpl<TData, ImplMatFree>>( + std::move(expansionList)); } static std::string className; diff --git a/Operators/BwdTrans/BwdTransMatFreeKernels.hpp b/Operators/BwdTrans/BwdTransMatFreeKernels.hpp index d9138db6f9d6869ebe156ef0e9ce7c0ce911097f..9bbe9e5543448af991841d127f83d197b9d9b18a 100644 --- a/Operators/BwdTrans/BwdTransMatFreeKernels.hpp +++ b/Operators/BwdTrans/BwdTransMatFreeKernels.hpp @@ -1,5 +1,4 @@ -namespace Nektar::Operators::detail::matfree { +namespace Nektar::Operators::detail::matfree +{ - - -} \ No newline at end of file +} diff --git a/Operators/BwdTrans/BwdTransStdMat.hpp b/Operators/BwdTrans/BwdTransStdMat.hpp new file mode 100644 index 0000000000000000000000000000000000000000..053dc419f7f476f2b5189fac0d3eb2ef01c6722d --- /dev/null +++ b/Operators/BwdTrans/BwdTransStdMat.hpp @@ -0,0 +1,54 @@ +#include "Operators/OperatorBwdTrans.hpp" +#include <StdRegions/StdExpansion.h> + +namespace Nektar::Operators::detail +{ + +// sum-factorisation implementation +template <typename TData> +class OperatorBwdTransImpl<TData, ImplStdMat> : public OperatorBwdTrans<TData> +{ +public: + OperatorBwdTransImpl(const MultiRegions::ExpListSharedPtr& expansionList) + : OperatorBwdTrans<TData>(std::move(expansionList)) + { + } + + void apply(Field<TData, FieldState::Coeff> &in, + Field<TData, FieldState::Phys> &out) override + { + auto const *inptr = in.GetStorage().GetCPUPtr(); + auto *outptr = out.GetStorage().GetCPUPtr(); + + for (size_t block_idx = 0; block_idx < in.GetBlocks().size(); + ++block_idx) + { + auto const expPtr = this->m_expansionList->GetExp(block_idx); + + Nektar::StdRegions::StdMatrixKey key( + StdRegions::eBwdTrans, expPtr->DetShapeType(), *expPtr); + auto const matPtr = expPtr->GetStdMatrix(key); + + auto const &block = in.GetBlocks()[block_idx]; + + Blas::Dgemm('N', 'N', matPtr->GetRows(), block.num_elements, + matPtr->GetColumns(), 1.0, matPtr->GetRawPtr(), + matPtr->GetRows(), inptr, block.num_pts, 0.0, outptr, + expPtr->GetTotPoints()); + + inptr += block.block_size; + outptr += expPtr->GetTotPoints() * block.num_elements; + } + } + + static std::unique_ptr<Operator<TData>> instantiate( + const MultiRegions::ExpListSharedPtr& expansionList) + { + return std::make_unique<OperatorBwdTransImpl<TData, ImplStdMat>>( + std::move(expansionList)); + } + + static std::string className; +}; + +} // namespace Nektar::Operators::detail diff --git a/Operators/BwdTrans/BwdTransSumFac.hpp b/Operators/BwdTrans/BwdTransSumFac.hpp index d436bcd89298cc4abb684ceb0fed3ff193adcefc..0eda71e86c1dfd697387ff8b5e27e5328d00aeee 100644 --- a/Operators/BwdTrans/BwdTransSumFac.hpp +++ b/Operators/BwdTrans/BwdTransSumFac.hpp @@ -2,24 +2,31 @@ namespace Nektar::Operators::detail { - + // sum-factorisation implementation template <typename TData> class OperatorBwdTransImpl<TData, ImplSumFac> : public OperatorBwdTrans<TData> { public: + OperatorBwdTransImpl(const MultiRegions::ExpListSharedPtr& expansionList) + : OperatorBwdTrans<TData>(std::move(expansionList)) + { + } + void apply(Field<TData, FieldState::Coeff> &in, Field<TData, FieldState::Phys> &out) override { std::cout << "Op bwd trans sum fac\n"; } - static std::unique_ptr<Operator<TData>> instantiate() + static std::unique_ptr<Operator<TData>> instantiate( + const MultiRegions::ExpListSharedPtr& expansionList) { - return std::make_unique<OperatorBwdTransImpl<TData, ImplSumFac>>(); + return std::make_unique<OperatorBwdTransImpl<TData, ImplSumFac>>( + std::move(expansionList)); } static std::string className; }; -} \ No newline at end of file +} // namespace Nektar::Operators::detail diff --git a/Operators/Operator.cpp b/Operators/Operator.cpp index 4ff199615c11d57067b52d209bb6c668a0cfea4a..def9aed3988c5340e9111609775520e553909952 100644 --- a/Operators/Operator.cpp +++ b/Operators/Operator.cpp @@ -2,9 +2,8 @@ namespace Nektar::Operators { - -template< typename TData> -OperatorFactory<TData> &GetOperatorFactory() + +template <typename TData> OperatorFactory<TData> &GetOperatorFactory() { static OperatorFactory<TData> instance; return instance; @@ -12,4 +11,4 @@ OperatorFactory<TData> &GetOperatorFactory() template OperatorFactory<double> &GetOperatorFactory(); -} \ No newline at end of file +} // namespace Nektar::Operators diff --git a/Operators/Operator.hpp b/Operators/Operator.hpp index 1b7b0062dc2551ee55a9e588f4cbed6686afc9f5..54e0ea7000d5df450282f41e7a4ea9e41ede715b 100644 --- a/Operators/Operator.hpp +++ b/Operators/Operator.hpp @@ -2,7 +2,9 @@ #include <string> -#include "LibUtilities/NekFactory.hpp" +#include <LibUtilities/BasicUtils/NekFactory.hpp> +#include <MultiRegions/ExpList.h> +//#include <StdRegions/StdExpansion.h> namespace Nektar::Operators { @@ -11,41 +13,56 @@ namespace Nektar::Operators // allow extension by users without modifying library using default_fp_type = double; -// Implementation types -struct ImplLocMat; +// Core implementation types +struct ImplStdMat; struct ImplSumFac; struct ImplMatFree; struct ImplCUDA; // Forward-declare the Operator base class so we can define the factory -template< typename TData> class Operator; +template <typename TData> class Operator; // Typename alias for the factory -template< typename TData> +template <typename TData> using OperatorFactory = - Nektar::LibUtilities::NekFactory<std::string, Operator<TData>>; + Nektar::LibUtilities::NekFactory<std::string, Operator<TData>, + const MultiRegions::ExpListSharedPtr&>; // Operator factory singleton -template< typename TData> -OperatorFactory<TData> &GetOperatorFactory(); +template <typename TData> OperatorFactory<TData> &GetOperatorFactory(); -template <typename TData> -class Operator +template <typename TData> class Operator { public: - template< typename TDescriptor> - static std::unique_ptr<typename TDescriptor::class_name> create(std::string pKey = "") + virtual ~Operator() = default; + + Operator(const MultiRegions::ExpListSharedPtr& expansionList) + : m_expansionList(std::move(expansionList)) + { + } + + template <typename TDescriptor> + static std::shared_ptr<typename TDescriptor::class_name> create( + const MultiRegions::ExpListSharedPtr& expansionList, + std::string pKey = "") { std::string key = TDescriptor::key; - if (pKey.empty()) { + if (pKey.empty()) + { key += TDescriptor::default_impl; } - else { + else + { key += pKey; } - auto x = GetOperatorFactory<TData>().CreateInstance(key); - return std::unique_ptr<typename TDescriptor::class_name>(static_cast<typename TDescriptor::class_name*>(x.release())); + + return std::static_pointer_cast<typename TDescriptor::class_name>( + GetOperatorFactory<TData>().CreateInstance( + key, std::move(expansionList))); } + +protected: + MultiRegions::ExpListSharedPtr m_expansionList; }; -} \ No newline at end of file +} // namespace Nektar::Operators diff --git a/Operators/OperatorBwdTrans.cpp b/Operators/OperatorBwdTrans.cpp index 37aa2f859b78a8ee17d6c7fe04bfabd4f0103126..58ccec285115fde0e8edad5b742452ec078e5225 100644 --- a/Operators/OperatorBwdTrans.cpp +++ b/Operators/OperatorBwdTrans.cpp @@ -2,8 +2,8 @@ namespace Nektar::Operators { - -template<> const std::string BwdTrans<>::key = "BwdTrans"; -template<> const std::string BwdTrans<>::default_impl = "MatFree"; -} \ No newline at end of file +template <> const std::string BwdTrans<>::key = "BwdTrans"; +template <> const std::string BwdTrans<>::default_impl = "MatFree"; + +} // namespace Nektar::Operators diff --git a/Operators/OperatorBwdTrans.hpp b/Operators/OperatorBwdTrans.hpp index a95b67a4c15a83dd1038af68a385d6c23876ab30..d5108b4232d39b0745a16c73c311e0e700476b2f 100644 --- a/Operators/OperatorBwdTrans.hpp +++ b/Operators/OperatorBwdTrans.hpp @@ -8,39 +8,47 @@ namespace Nektar::Operators // BwdTrans base class // Defines the apply operator to enforce apply parameter types -template <typename TData> -class OperatorBwdTrans : public Operator<TData> +template <typename TData> class OperatorBwdTrans : public Operator<TData> { public: + virtual ~OperatorBwdTrans() = default; + + OperatorBwdTrans(const MultiRegions::ExpListSharedPtr& expansionList) + : Operator<TData>(std::move(expansionList)) + { + } + virtual void apply(Field<TData, FieldState::Coeff> &in, Field<TData, FieldState::Phys> &out) = 0; virtual void operator()(Field<TData, FieldState::Coeff> &in, - Field<TData, FieldState::Phys> &out) + Field<TData, FieldState::Phys> &out) { apply(in, out); } }; // Descriptor / traits class for BwdTrans -template<typename TData = default_fp_type> -struct BwdTrans { +template <typename TData = default_fp_type> struct BwdTrans +{ using class_name = OperatorBwdTrans<TData>; - using FieldIn = Field<TData, FieldState::Coeff>; - using FieldOut = Field<TData, FieldState::Phys>; + using FieldIn = Field<TData, FieldState::Coeff>; + using FieldOut = Field<TData, FieldState::Phys>; static const std::string key; static const std::string default_impl; - static std::unique_ptr<class_name> create(std::string pKey = "") + static std::shared_ptr<class_name> create( + const MultiRegions::ExpListSharedPtr& expansionList, + std::string pKey = "") { - return Operator<TData>::template create<BwdTrans>(pKey); + return Operator<TData>::template create<BwdTrans>( + std::move(expansionList), pKey); } }; -namespace detail { +namespace detail +{ // Template for BwdTrans implementations -template <typename TData, typename Op> -class OperatorBwdTransImpl; -} - +template <typename TData, typename Op> class OperatorBwdTransImpl; +} // namespace detail -} +} // namespace Nektar::Operators diff --git a/main.cpp b/main.cpp index 3f14fba40462ccc790c1e8d6728d1658c27540f0..a47fcb3910e37932fb0569d214cb8f55a4f89131 100644 --- a/main.cpp +++ b/main.cpp @@ -4,10 +4,10 @@ * @brief Demonstrator program for the new Field class. * @version 0.1 * @date 2023-02-13 - * + * * @copyright Copyright (c) 2023 Imperial College London, University of Utah, * Kings College London - * + * */ #include <iostream> #include <memory> @@ -15,24 +15,72 @@ #include "Field.hpp" #include "Operators/OperatorBwdTrans.hpp" +#include <LibUtilities/BasicUtils/SessionReader.h> +//#include <LibUtilities/SimdLib/tinysimd.hpp> +#include <SpatialDomains/MeshGraph.h> +#include <MultiRegions/ExpList.h> + #ifdef NEKTAR_USE_CUDA #include "MemoryRegionCUDA.hpp" #endif using namespace Nektar::Operators; +using namespace Nektar::LibUtilities; +using namespace Nektar; + +template <typename TType, FieldState State> +std::ostream &operator<<(std::ostream &stream, Field<TType, State> &f) +{ + auto *x = f.GetStorage().GetCPUPtr(); + for (size_t i = 0; i < f.GetStorage().size(); ++i) + { + stream << x[i] << ' '; + } + stream << std::endl; + + return stream; +} + +std::vector<BlockAttributes> GetBlockAttributes( + FieldState state, + const MultiRegions::ExpListSharedPtr explist) +{ + const int n = explist->GetNumElmts(); + std::map<std::tuple<LibUtilities::ShapeType,unsigned int,unsigned int>,size_t> blockList; + for (int i = 0; i < explist->GetNumElmts(); ++i) + { + auto e = explist->GetExp(i); + blockList[{e->DetShapeType(),e->GetNcoeffs(),e->GetTotPoints()}]++; + } + std::vector<BlockAttributes> blockAttr; + for (auto &x : blockList) + { + auto val = state == FieldState::Phys ? std::get<2>(x.first) : std::get<1>(x.first); + blockAttr.push_back( { x.second, val } ); + } + return blockAttr; +} -int main() +int main(int argc, char *argv[]) { - // Define the structure of the computational domain. In this case, we - // suppose it consists of two composites: 10 quadrilaterals and 20 - // triangles, each with 4 modes / 4 quadrature points. - std::vector<BlockAttributes> blocks = { - {ShapeType::eQuadrilateral, {4, 4, 1}, 10}, - {ShapeType::eTriangle, {4, 4, 1}, 20}}; + // Initialise a session, graph and create an expansion list + LibUtilities::SessionReaderSharedPtr session; + SpatialDomains::MeshGraphSharedPtr graph; + MultiRegions::ExpListSharedPtr explist; + + session = LibUtilities::SessionReader::CreateInstance(argc, argv); + graph = SpatialDomains::MeshGraph::Read(session); + explist = MemoryManager<MultiRegions::ExpList>::AllocateSharedPtr + (session, graph); + + // Generate a blocks definition from the expansion list for each state + auto blocks_phys = GetBlockAttributes(FieldState::Phys, explist); + auto blocks_coeff = GetBlockAttributes(FieldState::Coeff, explist); // Create two Field objects with a MemoryRegionCPU backend by default - auto in = Field<double, FieldState::Coeff>::create(blocks); - auto out = Field<double, FieldState::Phys >::create(blocks); + auto in = Field<double, FieldState::Coeff>::create(blocks_coeff); + auto in2 = Field<double, FieldState::Coeff>::create(blocks_coeff); + auto out = Field<double, FieldState::Phys >::create(blocks_phys); // Populate the field with some data. In this case, we just grab a pointer // to the memory on the CPU and populate the array with values. Operators, @@ -44,13 +92,66 @@ int main() // implicitly transfer the data from the GPU to the host. The intention is // that all operators support multiple implementations and the appropriate // choice is made based on the backend selected. - double *x = in.GetStorage().GetCPUPtr(); - const size_t n = in.GetStorage().size(); - for (size_t i = 0; i < n; ++i) + double *x = in.GetStorage().GetCPUPtr(); + for (auto const &block : blocks_coeff) + { + for (size_t el = 0; el < block.num_elements; ++el) + { + for (size_t coeff = 0; coeff < block.num_pts; ++coeff) + { + // Each element is the index of the degree of freedom + // this is useful for testing reshapes + *(x++) = coeff; + } + } + } + + memset(out.GetStorage().GetCPUPtr(), 0, out.GetStorage().size()*sizeof(double)); + + std::cout << "Initial shape:\n" << in << std::endl << std::endl; + + // Test out field reshaping + in.ReshapeStorage<4>(); + std::cout << "Reshaped to 4:\n" << in << std::endl << std::endl; + +#ifdef NEKTAR_ENABLE_SIMD_AVX2 + // Test out SIMD instructions + using vec_t = tinysimd::simd<double>; + + // Reshape into vec_t::width, with the right memory alignment requirements + in.ReshapeStorage<vec_t::width, vec_t::alignment>(); + std::cout << "SIMD In:\n" << in << std::endl << std::endl; + + in2.ReshapeStorage<vec_t::width, vec_t::alignment>(); + std::cout << "SIMD Out (before):\n" << out << std::endl << std::endl; + + // Reinterpret casting from double to vector type allows for efficient + // conversion to SIMD intrinsics + vec_t::vectorType *inptr = + reinterpret_cast<vec_t::vectorType *>(in.GetStorage().GetCPUPtr()); + vec_t::scalarType *in2ptr = in2.GetStorage().GetCPUPtr(); + + // Loop over each block in the field and square each element + for (auto const &block : blocks_coeff) { - x[i] = i; + const size_t numMetaBlocks = block.num_elements / vec_t::width; + for (size_t metaBlock = 0; metaBlock < numMetaBlocks * block.num_pts; + ++metaBlock) + { + (vec_t(inptr[metaBlock]) * vec_t(inptr[metaBlock])).store(in2ptr); + in2ptr += vec_t::width; + } + + inptr += numMetaBlocks * block.num_pts; } + // Back to non-interleaved for non-SIMD Operators + in.ReshapeStorage<1>(); + in2.ReshapeStorage<1>(); + + std::cout << "Out:\n" << in2 << std::endl; +#endif + // Operators are instantiated using a Factory pattern. First lets check // which operators have been registered with the Factory. GetOperatorFactory<double>().PrintAvailableClasses(); @@ -58,49 +159,71 @@ int main() // We can create a BwdTrans operator (default implementation) // Default implementation might be provided through configuration options, // benchmarking, etc eventually. - auto bt = BwdTrans<>::create(); + auto bt = BwdTrans<>::create(explist); // ...and then apply it bt->apply(in, out); // We can combine these for one-shot operations - BwdTrans<>::create()->apply(in, out); + BwdTrans<>::create(explist)->apply(in, out); // We can also explicitly select the implementation of the operator to use - BwdTrans<>::create("SumFac")->apply(in, out); + BwdTrans<>::create(explist, "SumFac")->apply(in, out); // Let's display the result - double *y = out.GetStorage().GetCPUPtr(); - for (size_t i = 0; i < n; ++i) + std::cout << out << std::endl; + + in = Field<double, FieldState::Coeff>::create(blocks_coeff); + auto &inStorage = in.GetStorage(); + std::fill(inStorage.GetCPUPtr(), inStorage.GetCPUPtr() + inStorage.size(), + 0); + out = Field<double, FieldState::Phys>::create(blocks_phys); + + auto &outStorage = out.GetStorage(); + std::fill(outStorage.GetCPUPtr(), + outStorage.GetCPUPtr() + outStorage.size(), 0); + + for (auto const &block : in.GetBlocks()) { - std::cout << y[i] << std::endl; + for (size_t el = 0; el < block.num_elements; ++el) + { + in.GetStorage().GetCPUPtr()[el * block.num_pts] = 1; + } } + std::cout << in << std::endl; + + BwdTrans<>::create(explist, "StdMat")->apply(in, out); + + std::cout << out << std::endl; + + #ifdef NEKTAR_USE_CUDA // Test CUDA MemoryRegion // Create two Fields with memory on the GPU - in = Field<double, FieldState::Coeff>::create<MemoryRegionCUDA>(blocks); - out = Field<double, FieldState::Phys >::create<MemoryRegionCUDA>(blocks); + in = Field<double, FieldState::Coeff>::create<MemoryRegionCUDA>(blocks_coeff); + out = Field<double, FieldState::Phys>::create<MemoryRegionCUDA>(blocks_phys); // Perform the BwdTrans on the fields using the CUDA implementation // Since this is a CUDA operator, acting on CUDA fields, everything happens // on the GPU. - BwdTrans<>::create("CUDA")->apply(in, out); + BwdTrans<>::create(explist, "CUDA")->apply(in, out); // Test the GPU-backed fields with a CPU operator // This should show a debug warning due to the implicit conversion. The // purpose of this is to allow us to transition the code to the new // infrastructure and add CUDA operators, without the need to add ALL CUDA // operators before we can test anything. - BwdTrans<>::create("MatFree")->apply(in, out); + BwdTrans<>::create(explist, "MatFree")->apply(in, out); // Create two CPU backed fields and use them with a GPU operator // This call implicitly converts the fields to a GPU backend and warns the // user about that fact in = Field<double, FieldState::Coeff>::create(blocks); out = Field<double, FieldState::Phys>::create(blocks); - BwdTrans<>::create("CUDA")->apply(in, out); + BwdTrans<>::create(explist, "CUDA")->apply(in, out); #endif + std::cout << "END" << std::endl; return 0; } diff --git a/square.geo b/square.geo new file mode 100644 index 0000000000000000000000000000000000000000..ee9a66908d7bda260df522c03ab39f086ee5affd --- /dev/null +++ b/square.geo @@ -0,0 +1,13 @@ +width=1; +height=1; +res=4.0; // number of elements per unit length + +Point(1) = {-width/2,-height/2,0,0.1}; +l[] = Extrude {width,0,0} { + Point{1}; Layers{width*res}; Recombine; +}; +s[] = Extrude {0,height,0} { + Line{l[1]}; Layers{height*res}; Recombine; +}; +Physical Surface(0) = {s[1]}; +Physical Line(1) = {1,2,3,4}; diff --git a/square.xml b/square.xml new file mode 100644 index 0000000000000000000000000000000000000000..63241c7866d35354fcb64c61d9edbac4379d3b72 --- /dev/null +++ b/square.xml @@ -0,0 +1,39 @@ +<?xml version="1.0" encoding="utf-8" ?> +<NEKTAR> + <GEOMETRY DIM="2" SPACE="2"> + <VERTEX COMPRESSED="B64Z-LittleEndian" BITSIZE="64">eJx1j8sVgjAABIOIgIrK324o3RYoIaV4yi7guBfey2SySwjbxM/+G0K24+sPPyFfxXN83/yMvlOg71y4f0knJfeLV+hH8Rp98+tx0GHnDbn/847cadhX/4P7xZ/oed8Lufe1f3pTOvZ1r+d+8QF3e9+I3Psm5M7Mvu69uV/8C2/eNsUA</VERTEX> + <EDGE COMPRESSED="B64Z-LittleEndian" BITSIZE="64">eJx1kEkSglAMBQEVZxxBHEARh/vf0I29oKvyN6l+FUI6STJ8aVCzoI6CyhurH54EnIuZN1U/fTMxfXMx8xbB90sx36+Ceet/5T7MLcTM3Yi5x1Y5/9uJ8d4rZ4+DGL+jcvYrxXhX2p+9T2L2rgOPs3J8LmI8rsrxvInxaJTj34rxu2t/7vIQc5dOjMdTOffqxXi8lHPHtxiPj3Lu+xXj9wOocAa/</EDGE> + <ELEMENT> + <Q COMPRESSED="B64Z-LittleEndian" BITSIZE="64">eJx10EcOwjAABdFQ0kkPvYTO/W/IZrxgJLx50kSyvxJFv2eGc1z86UuMMdH30FPMMNe9oRdY4krvVFhjo12xeoud7knUexy0N1Ufca29Yf8Gt7jT3lx9jwftLdSPeNLeUv2MF+0N/3HCK960t1K/40N7a/UnvrS3UX/jR3u/iOQFQAAA</Q> + </ELEMENT> + <COMPOSITE> + <C ID="0"> Q[0-15] </C> + <C ID="1"> E[0,13,22,31,11,21,30,39,3,6,9,12,32,34,36,38] </C> + </COMPOSITE> + <DOMAIN> C[0] </DOMAIN> + </GEOMETRY> + <Metadata> + <Provenance> + <GitBranch></GitBranch> + <GitSHA1>88b1cb3be29cc9090d8a14af19c26decd88345db</GitSHA1> + <Hostname>kingscross</Hostname> + <NektarVersion>5.1.0</NektarVersion> + <Timestamp>27-Apr-2023 15:32:08</Timestamp> + </Provenance> + <NekMeshCommandLine>square.msh square.xml </NekMeshCommandLine> + </Metadata> + <EXPANSIONS> + <E COMPOSITE="C[0]" NUMMODES="4" TYPE="MODIFIED" FIELDS="u" /> + </EXPANSIONS> + <CONDITIONS> + <SOLVERINFO> + <I PROPERTY="EQTYPE" VALUE="Helmholtz" /> + </SOLVERINFO> + <VARIABLES> + <V ID="0">u</V> + </VARIABLES> + <FUNCTION NAME="Forcing"> + <E VAR="u" VALUE="0" /> + </FUNCTION> + </CONDITIONS> +</NEKTAR>