Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • nektar/redesign-prototypes
1 result
Show changes
Commits on Source (6)
Showing with 1782 additions and 202 deletions
...@@ -5,6 +5,7 @@ builds ...@@ -5,6 +5,7 @@ builds
a.out a.out
.ccls-cache/ .ccls-cache/
.cache .cache
*.opt
.* .*
!.gitignore !.gitignore
!.gitlab-ci.yml !.gitlab-ci.yml
......
...@@ -10,6 +10,10 @@ set(CMAKE_CXX_STANDARD 17) ...@@ -10,6 +10,10 @@ set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
set(CMAKE_CUDA_ARCHITECTURES 86)
endif()
# Default install location: build/dist # Default install location: build/dist
IF (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT) IF (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
SET(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/dist CACHE PATH "" FORCE) SET(CMAKE_INSTALL_PREFIX ${CMAKE_BINARY_DIR}/dist CACHE PATH "" FORCE)
......
...@@ -76,7 +76,7 @@ public: ...@@ -76,7 +76,7 @@ public:
void HostToDevice(); void HostToDevice();
void DeviceToHost(); void DeviceToHost();
bool GetOnDevice() const bool IsOnDevice() const
{ {
return m_ondevice; return m_ondevice;
} }
......
#include "MemoryRegionCUDA.hpp" #include "MemoryRegionCUDA.hpp"
#include "Operators/BwdTrans/BwdTransCUDAKernels.cuh"
#include "Operators/OperatorBwdTrans.hpp" #include "Operators/OperatorBwdTrans.hpp"
#include "Operators/OperatorHelper.cuh"
namespace Nektar::Operators::detail namespace Nektar::Operators::detail
{ {
// Matrix-free implementation template <typename TData>
void BwdTrans1DKernel(const size_t gridSize, const size_t blockSize,
const size_t nm0, const size_t nq0, const size_t nElmts,
const TData *basis0, const TData *in, TData *out);
template <typename TData>
void BwdTrans1DKernel_QP(const size_t nm0, const size_t nq0,
const size_t nElmts, const TData *basis0,
const TData *in, TData *out);
template <typename TData>
void BwdTrans2DKernel(const size_t gridSize, const size_t blockSize,
LibUtilities::ShapeType shapetype, const size_t nm0,
const size_t nm1, const size_t nq0, const size_t nq1,
const size_t nElmts, const bool correct,
const TData *basis0, const TData *basis1, const TData *in,
TData *out);
template <typename TData>
void BwdTrans2DKernel_QP(LibUtilities::ShapeType shapetype, const size_t nm0,
const size_t nm1, const size_t nq0, const size_t nq1,
const size_t nElmts, const bool correct,
const TData *basis0, const TData *basis1,
const TData *in, TData *out);
template <typename TData>
void BwdTrans3DKernel(const size_t gridSize, const size_t blockSize,
LibUtilities::ShapeType shapetype, const size_t nm0,
const size_t nm1, const size_t nm2, const size_t nq0,
const size_t nq1, const size_t nq2, const size_t nElmts,
const bool correct, const TData *basis0,
const TData *basis1, const TData *basis2, const TData *in,
TData *out);
template <typename TData>
void BwdTrans3DKernel_QP(LibUtilities::ShapeType shapetype, const size_t nm0,
const size_t nm1, const size_t nm2, const size_t nq0,
const size_t nq1, const size_t nq2,
const size_t nElmts, const bool correct,
const TData *basis0, const TData *basis1,
const TData *basis2, const TData *in, TData *out);
// BwdTrans implementation
template <typename TData> template <typename TData>
class OperatorBwdTransImpl<TData, ImplCUDA> : public OperatorBwdTrans<TData> class OperatorBwdTransImpl<TData, ImplCUDA> : public OperatorBwdTrans<TData>
{ {
...@@ -12,14 +56,102 @@ public: ...@@ -12,14 +56,102 @@ public:
OperatorBwdTransImpl(const MultiRegions::ExpListSharedPtr &expansionList) OperatorBwdTransImpl(const MultiRegions::ExpListSharedPtr &expansionList)
: OperatorBwdTrans<TData>(expansionList) : OperatorBwdTrans<TData>(expansionList)
{ {
m_basis = GetBasisDataCUDA<TData>(expansionList);
}
~OperatorBwdTransImpl()
{
for (auto &basis : m_basis)
{
for (size_t i = 0; i < basis.second.size(); i++)
{
cudaFree(basis.second[i]);
}
}
} }
void apply(Field<TData, FieldState::Coeff> &in, void apply(Field<TData, FieldState::Coeff> &in,
Field<TData, FieldState::Phys> &out) override Field<TData, FieldState::Phys> &out) override
{ {
TData *x = in.template GetStorage<MemoryRegionCUDA>().GetGPUPtr(); // Copy memory to GPU, if necessary and get raw pointers.
TData *y = out.template GetStorage<MemoryRegionCUDA>().GetGPUPtr(); auto const *inptr =
std::cout << "Op bwd trans CUDA\n"; in.template GetStorage<MemoryRegionCUDA>().GetGPUPtr();
auto *outptr = out.template GetStorage<MemoryRegionCUDA>().GetGPUPtr();
// Initialize index.
size_t expIdx = 0;
// Initialize basiskey.
std::vector<LibUtilities::BasisKey> basisKeys(
3, LibUtilities::NullBasisKey);
// Loop over the blocks.
for (size_t block_idx = 0; block_idx < in.GetBlocks().size();
++block_idx)
{
// Determine shape and type of the element.
auto const expPtr = this->m_expansionList->GetExp(expIdx);
auto nElmts = in.GetBlocks()[block_idx].num_elements;
auto nmTot = expPtr->GetNcoeffs();
auto nqTot = expPtr->GetTotPoints();
auto shape = expPtr->DetShapeType();
// Deterime CUDA grid parameters.
m_gridSize = nElmts / m_blockSize;
m_gridSize += (nElmts % m_blockSize == 0) ? 0 : 1;
// Flag for collapsed coordinate correction.
bool correct = expPtr->GetBasis(0)->GetBasisType() ==
LibUtilities::eModified_A;
// Fetch basis key for the current element type.
for (size_t d = 0; d < expPtr->GetShapeDimension(); d++)
{
basisKeys[d] = expPtr->GetBasis(d)->GetBasisKey();
}
// Function call to kernel functions.
if (expPtr->GetShapeDimension() == 1)
{
auto basis0 = m_basis[basisKeys][0];
auto nm0 = expPtr->GetBasisNumModes(0);
auto nq0 = expPtr->GetNumPoints(0);
BwdTrans1DKernel(m_gridSize, m_blockSize, nm0, nq0, nElmts,
basis0, inptr, outptr);
}
else if (expPtr->GetShapeDimension() == 2)
{
auto basis0 = m_basis[basisKeys][0];
auto basis1 = m_basis[basisKeys][1];
auto nm0 = expPtr->GetBasisNumModes(0);
auto nm1 = expPtr->GetBasisNumModes(1);
auto nq0 = expPtr->GetNumPoints(0);
auto nq1 = expPtr->GetNumPoints(1);
BwdTrans2DKernel(m_gridSize, m_blockSize, shape, nm0, nm1, nq0,
nq1, nElmts, correct, basis0, basis1, inptr,
outptr);
}
else if (expPtr->GetShapeDimension() == 3)
{
auto basis0 = m_basis[basisKeys][0];
auto basis1 = m_basis[basisKeys][1];
auto basis2 = m_basis[basisKeys][2];
auto nm0 = expPtr->GetBasisNumModes(0);
auto nm1 = expPtr->GetBasisNumModes(1);
auto nm2 = expPtr->GetBasisNumModes(2);
auto nq0 = expPtr->GetNumPoints(0);
auto nq1 = expPtr->GetNumPoints(1);
auto nq2 = expPtr->GetNumPoints(2);
BwdTrans3DKernel(m_gridSize, m_blockSize, shape, nm0, nm1, nm2,
nq0, nq1, nq2, nElmts, correct, basis0, basis1,
basis2, inptr, outptr);
}
// Increment pointer and index for next element type.
inptr += nmTot * nElmts;
outptr += nqTot * nElmts;
expIdx += nElmts;
}
} }
static std::unique_ptr<Operator<TData>> instantiate( static std::unique_ptr<Operator<TData>> instantiate(
...@@ -30,6 +162,184 @@ public: ...@@ -30,6 +162,184 @@ public:
} }
static std::string className; static std::string className;
private:
std::map<std::vector<LibUtilities::BasisKey>, std::vector<TData *>> m_basis;
size_t m_blockSize = 32;
size_t m_gridSize;
}; };
template <typename TData>
void BwdTrans1DKernel(const size_t gridSize, const size_t blockSize,
const size_t nm0, const size_t nq0, const size_t nElmts,
const TData *basis0, const TData *in, TData *out)
{
BwdTransSegKernel<<<gridSize, blockSize>>>(nm0, nq0, nElmts, basis0, in,
out);
}
template <typename TData>
void BwdTrans1DKernel_QP(const size_t nm0, const size_t nq0,
const size_t nElmts, const TData *basis0,
const TData *in, TData *out)
{
BwdTransSegKernel_QP<<<nElmts, nq0>>>(nm0, nq0, basis0, in, out);
}
template <typename TData>
void BwdTrans2DKernel(const size_t gridSize, const size_t blockSize,
LibUtilities::ShapeType shapetype, const size_t nm0,
const size_t nm1, const size_t nq0, const size_t nq1,
const size_t nElmts, const bool correct,
const TData *basis0, const TData *basis1, const TData *in,
TData *out)
{
if (shapetype == LibUtilities::Quad)
{
BwdTransQuadKernel<<<gridSize, blockSize>>>(nm0, nm1, nq0, nq1, nElmts,
basis0, basis1, in, out);
}
else if (shapetype == LibUtilities::Tri)
{
size_t nmTot =
LibUtilities::StdTriData::getNumberOfCoefficients(nm0, nm1);
BwdTransTriKernel<<<gridSize, blockSize>>>(nm0, nm1, nmTot, nq0, nq1,
nElmts, correct, basis0,
basis1, in, out);
}
}
template <typename TData>
void BwdTrans2DKernel_QP(LibUtilities::ShapeType shapetype, const size_t nm0,
const size_t nm1, const size_t nq0, const size_t nq1,
const size_t nElmts, const bool correct,
const TData *basis0, const TData *basis1,
const TData *in, TData *out)
{
if (shapetype == LibUtilities::Quad)
{
TData *wsp = 0;
cudaMalloc((void **)&wsp, sizeof(TData) * nq0 * nm1);
BwdTransQuadKernel_QP<<<nElmts, dim3(nq0, nq1)>>>(
nm0, nm1, nq0, nq1, basis0, basis1, in, wsp, out);
cudaFree(wsp);
}
else if (shapetype == LibUtilities::Tri)
{
size_t nmTot =
LibUtilities::StdTriData::getNumberOfCoefficients(nm0, nm1);
TData *wsp = 0;
cudaMalloc((void **)&wsp, sizeof(TData) * nm0 * nq1);
BwdTransTriKernel_QP<<<nElmts, dim3(nq0, nq1)>>>(
nm0, nm1, nmTot, nq0, nq1, correct, basis0, basis1, in, wsp, out);
cudaFree(wsp);
}
}
template <typename TData>
void BwdTrans3DKernel(const size_t gridSize, const size_t blockSize,
LibUtilities::ShapeType shapetype, const size_t nm0,
const size_t nm1, const size_t nm2, const size_t nq0,
const size_t nq1, const size_t nq2, const size_t nElmts,
const bool correct, const TData *basis0,
const TData *basis1, const TData *basis2, const TData *in,
TData *out)
{
if (shapetype == LibUtilities::Hex)
{
BwdTransHexKernel<<<gridSize, blockSize>>>(nm0, nm1, nm2, nq0, nq1, nq2,
nElmts, basis0, basis1,
basis2, in, out);
}
else if (shapetype == LibUtilities::Tet)
{
size_t nmTot =
LibUtilities::StdTetData::getNumberOfCoefficients(nm0, nm1, nm2);
BwdTransTetKernel<<<gridSize, blockSize>>>(
nm0, nm1, nm2, nmTot, nq0, nq1, nq2, nElmts, correct, basis0,
basis1, basis2, in, out);
}
else if (shapetype == LibUtilities::Pyr)
{
size_t nmTot =
LibUtilities::StdPyrData::getNumberOfCoefficients(nm0, nm1, nm2);
BwdTransPyrKernel<<<gridSize, blockSize>>>(
nm0, nm1, nm2, nmTot, nq0, nq1, nq2, nElmts, correct, basis0,
basis1, basis2, in, out);
}
else if (shapetype == LibUtilities::Prism)
{
size_t nmTot =
LibUtilities::StdPrismData::getNumberOfCoefficients(nm0, nm1, nm2);
BwdTransPrismKernel<<<gridSize, blockSize>>>(
nm0, nm1, nm2, nmTot, nq0, nq1, nq2, nElmts, correct, basis0,
basis1, basis2, in, out);
}
}
template <typename TData>
void BwdTrans3DKernel_QP(LibUtilities::ShapeType shapetype, const size_t nm0,
const size_t nm1, const size_t nm2, const size_t nq0,
const size_t nq1, const size_t nq2,
const size_t nElmts, const bool correct,
const TData *basis0, const TData *basis1,
const TData *basis2, const TData *in, TData *out)
{
if (shapetype == LibUtilities::Hex)
{
TData *wsp0 = 0;
TData *wsp1 = 0;
cudaMalloc((void **)&wsp0, sizeof(TData) * nq0 * nm1 * nm2);
cudaMalloc((void **)&wsp1, sizeof(TData) * nm0 * nq1 * nq2);
BwdTransHexKernel_QP<<<nElmts, dim3(nq0, nq1, nq2)>>>(
nm0, nm1, nm2, nq0, nq1, nq2, basis0, basis1, basis2, in, wsp0,
wsp1, out);
cudaFree(wsp0);
cudaFree(wsp1);
}
if (shapetype == LibUtilities::Tet)
{
size_t nmTot =
LibUtilities::StdTetData::getNumberOfCoefficients(nm0, nm1, nm2);
TData *fpq = 0;
TData *fp = 0;
cudaMalloc((void **)&fpq,
sizeof(TData) * (2 * nm1 - nm0 + 1) * nm0 / 2 * nq2);
cudaMalloc((void **)&fp, sizeof(TData) * nm0 * nq1 * nq2);
BwdTransTetKernel_QP<<<nElmts, dim3(nq0, nq1, nq2)>>>(
nm0, nm1, nm2, nmTot, nq0, nq1, nq2, correct, basis0, basis1,
basis2, in, fpq, fp, out);
cudaFree(fpq);
cudaFree(fp);
}
if (shapetype == LibUtilities::Pyr)
{
size_t nmTot =
LibUtilities::StdPyrData::getNumberOfCoefficients(nm0, nm1, nm2);
TData *fpq = 0;
TData *fp = 0;
cudaMalloc((void **)&fpq, sizeof(TData) * nm0 * nm1 * nq2);
cudaMalloc((void **)&fp, sizeof(TData) * nm0 * nq1 * nq2);
BwdTransPyrKernel_QP<<<nElmts, dim3(nq0, nq1, nq2)>>>(
nm0, nm1, nm2, nmTot, nq0, nq1, nq2, correct, basis0, basis1,
basis2, in, fpq, fp, out);
cudaFree(fpq);
cudaFree(fp);
}
if (shapetype == LibUtilities::Prism)
{
size_t nmTot =
LibUtilities::StdPrismData::getNumberOfCoefficients(nm0, nm1, nm2);
TData *fpq = 0;
TData *fp = 0;
cudaMalloc((void **)&fpq, sizeof(TData) * nm0 * nm1 * nq2);
cudaMalloc((void **)&fp, sizeof(TData) * nm0 * nq1 * nq2);
BwdTransPrismKernel_QP<<<nElmts, dim3(nq0, nq1, nq2)>>>(
nm0, nm1, nm2, nmTot, nq0, nq1, nq2, correct, basis0, basis1,
basis2, in, fpq, fp, out);
cudaFree(fpq);
cudaFree(fp);
}
}
} // namespace Nektar::Operators::detail } // namespace Nektar::Operators::detail
This diff is collapsed.
...@@ -4,7 +4,7 @@ ...@@ -4,7 +4,7 @@
namespace Nektar::Operators::detail namespace Nektar::Operators::detail
{ {
// sum-factorisation implementation // standard matrix implementation
template <typename TData> template <typename TData>
class OperatorBwdTransImpl<TData, ImplStdMat> : public OperatorBwdTrans<TData> class OperatorBwdTransImpl<TData, ImplStdMat> : public OperatorBwdTrans<TData>
{ {
...@@ -17,29 +17,34 @@ public: ...@@ -17,29 +17,34 @@ public:
void apply(Field<TData, FieldState::Coeff> &in, void apply(Field<TData, FieldState::Coeff> &in,
Field<TData, FieldState::Phys> &out) override Field<TData, FieldState::Phys> &out) override
{ {
// Initialize pointers.
auto const *inptr = in.GetStorage().GetCPUPtr(); auto const *inptr = in.GetStorage().GetCPUPtr();
auto *outptr = out.GetStorage().GetCPUPtr(); auto *outptr = out.GetStorage().GetCPUPtr();
size_t exp_idx = 0; size_t expIdx = 0;
for (size_t block_idx = 0; block_idx < in.GetBlocks().size(); for (size_t block_idx = 0; block_idx < in.GetBlocks().size();
++block_idx) ++block_idx)
{ {
auto const expPtr = this->m_expansionList->GetExp(exp_idx); // Determine shape and type of the element.
auto const expPtr = this->m_expansionList->GetExp(expIdx);
auto nElmts = in.GetBlocks()[block_idx].num_elements;
auto nmTot = expPtr->GetNcoeffs();
auto nqTot = expPtr->GetTotPoints();
// Get BwdTrans matrix.
Nektar::StdRegions::StdMatrixKey key( Nektar::StdRegions::StdMatrixKey key(
StdRegions::eBwdTrans, expPtr->DetShapeType(), *expPtr); StdRegions::eBwdTrans, expPtr->DetShapeType(), *expPtr);
auto const matPtr = expPtr->GetStdMatrix(key); auto const matPtr = expPtr->GetStdMatrix(key);
auto const &block = in.GetBlocks()[block_idx]; // Perform matrix-matrix multiply.
Blas::Dgemm('N', 'N', nqTot, nElmts, nmTot, 1.0,
matPtr->GetRawPtr(), nqTot, inptr, nmTot, 0.0, outptr,
nqTot);
Blas::Dgemm('N', 'N', matPtr->GetRows(), block.num_elements, // Increment pointer and index for next element type.
matPtr->GetColumns(), 1.0, matPtr->GetRawPtr(), inptr += nmTot * nElmts;
matPtr->GetRows(), inptr, block.num_pts, 0.0, outptr, outptr += nqTot * nElmts;
expPtr->GetTotPoints()); expIdx += nElmts;
inptr += block.block_size;
outptr += expPtr->GetTotPoints() * block.num_elements;
exp_idx += block.num_elements;
} }
} }
......
#pragma once #pragma once
#include "Field.hpp"
#include <string> #include <string>
#include <LibUtilities/BasicUtils/NekFactory.hpp> #include <LibUtilities/BasicUtils/NekFactory.hpp>
#include <MultiRegions/ExpList.h> #include <MultiRegions/ExpList.h>
// #include <StdRegions/StdExpansion.h>
namespace Nektar::Operators namespace Nektar::Operators
{ {
......
#ifdef NEKTAR_USE_CUDA
#include "MemoryRegionCUDA.hpp"
#endif
#include <MultiRegions/ExpList.h>
namespace Nektar::Operators
{
template <typename TData>
using BasisMap =
std::map<std::vector<LibUtilities::BasisKey>, std::vector<TData *>>;
template <typename TData>
BasisMap<TData> GetBasisDataCUDA(
const MultiRegions::ExpListSharedPtr &expansionList)
{
// Initialize data map.
BasisMap<TData> basis;
// Initialize basiskey.
std::vector<LibUtilities::BasisKey> basisKeys(3,
LibUtilities::NullBasisKey);
// Loop over the elements of expansionList.
size_t nDim = expansionList->GetShapeDimension();
for (size_t i = 0; i < expansionList->GetNumElmts(); ++i)
{
auto const expPtr = expansionList->GetExp(i);
// Fetch basiskeys of current element.
for (size_t d = 0; d < nDim; d++)
{
basisKeys[d] = expPtr->GetBasis(d)->GetBasisKey();
}
// Copy data to basis, if necessary.
if (basis.find(basisKeys) == basis.end())
{
basis[basisKeys] = std::vector<TData *>(nDim, 0);
for (size_t d = 0; d < expPtr->GetShapeDimension(); d++)
{
auto ndata = expPtr->GetBasis(d)->GetBdata().size();
auto hostPtr = expPtr->GetBasis(d)->GetBdata().get();
auto &devicePtr = basis[basisKeys][d];
cudaMalloc((void **)&devicePtr, sizeof(TData) * ndata);
cudaMemcpy(devicePtr, hostPtr, sizeof(TData) * ndata,
cudaMemcpyHostToDevice);
}
}
}
return basis;
}
} // namespace Nektar::Operators
<?xml version="1.0" encoding="utf-8" ?>
<NEKTAR>
<EXPANSIONS>
<E COMPOSITE="C[0]" NUMMODES="4,5,5" FIELDS="u" BASISTYPE="Modified_A,Modified_B,Modified_C" NUMPOINTS="5,6,6" POINTSTYPE="GaussLobattoLegendre,GaussRadauMAlpha1Beta0,GaussRadauMAlpha2Beta0"/>
<E COMPOSITE="C[7]" NUMMODES="4,5,6" FIELDS="u" BASISTYPE="Modified_A,Modified_A,Modified_A" NUMPOINTS="5,6,7" POINTSTYPE="GaussLobattoLegendre,GaussLobattoLegendre,GaussLobattoLegendre"/>
<E COMPOSITE="C[8]" NUMMODES="4,5,6" FIELDS="u" BASISTYPE="Modified_A,Modified_A,Modified_B" NUMPOINTS="5,6,7" POINTSTYPE="GaussLobattoLegendre,GaussLobattoLegendre,GaussRadauMAlpha1Beta0"/>
<E COMPOSITE="C[9]" NUMMODES="4,5,5" FIELDS="u" BASISTYPE="Modified_A,Modified_A,ModifiedPyr_C" NUMPOINTS="5,6,6" POINTSTYPE="GaussLobattoLegendre,GaussLobattoLegendre,GaussRadauMAlpha2Beta0"/>
</EXPANSIONS>
<GEOMETRY DIM="3" SPACE="3">
<VERTEX COMPRESSED="B64Z-LittleEndian" BITSIZE="64">eJx9lTlTFFEUhdt9A9cRcQM3BMUVwV2vOyq4gDSiAu6oiEtiomaWEQGRf2ICMquMrfIfQELiaNVLMCExMLN0+syrOfPOe8mr11+fc8+93dOTJMVV8/lPT5pOWnZM6j/+6vpSN2mzkvACny304+0/3ubGv9uc7Dzxqao3Tb8Z9qH/a8rmCn/o59F11EH9+YJDvyDan7OFIt+73D+Hn7Yo2r+zxdF8zpbQdeZVpH/T8CFNU1fqr5ryFU+F0vyWCj36Xyb06G95tL6zFZSf+crgdGZK+VeRnnlO6JF/NeUfnXqfz+enS/3XCH/o10TzOasV/pjP2mh+Z+tobqiA/tYLjnwbynIVKn4fG6P+zuqEHvnrSc/v36agv38+m8mf579F6NHfVqFHvm3R+s4aiPP7t73Mf6Zi/o2CI19TdkYunv8O0vP3Z6fg8G+O5nO2S9THfHZH6zvbQ5y/H3sFR759VL948t+X/VF/Zy1Cj/wHgv3796tVcORrI39+/w5G/Z0dEnrkO1ymL1TkP5LE19Fs5/8XrGPE+b7jdD/zE0KP+Z0kzt8JDsT8lNDD/3Qwl3+PzwSn4vlZURfrHHHu/7zgyHeBONdpFxz6i8F8Pv8l0RfWZeKcv4M45+sM+vs6V7Kdnz9+H1fFfMCvif7Br5M/8y7qF/eNff3d1D82bd08kGyB3xD5wXuEP+aQCn/wXuEPfjPo6/P1CQ79LaqI+UB/m/TM7wg9/PuFHnxAzAX+g2Iu4HeFHvs9ocd+n3Lz83sgOPQPg337+T8SHPrHxHm+Q4JD/yTo7+fzVHDszygP9z8sOM7Pg3P1/Y8IjvMLOnP/LwXH+VXQ3/f/WnDsfwFYCm9Q</VERTEX>
<EDGE COMPRESSED="B64Z-LittleEndian" BITSIZE="64">eJx1mmWQFkcURRk0sNjiIcgii0MguC9OcHcL7iS4u7sEjbu7u7u7u7u7/nlnq+ZUwZ+ue+b2nf56emem35AnT/pfcpw2r9o80vnE0fmVAy9wnP4F5ad/IfnhJ0iTU1g5nK+INP0yxMkpKs74iqk/vLg0OSXk5/eUjDaveKb8/M5S8sFLKwdeRn7mpazy4eWkySkfbUEdryCO/0T5mPeK8sNPkianknK4TpWjLSReRZqcquKcJytari/Xu5o0460uP+ughvzwmuoPz5afdVNLfnhtaXLqiPO76oqz/urJB6+v88IbyM96bah8eCNpck5WDuu7cbQZ4k3EyTlFPnhTcc7fLFr+bvg7aS7N30kL+eEtxfn7aaUceGv1h7eRn7+3tvLD20mT0z5a1g/j6iDN9ekoPzk58sE7iTOuztLcF7ponPCu0twvuikH3l058B7K4f7SU374qdLk9NJ5WSe9pfH3iba4eF/5GVc/cc7fP9pM8QHi5A+Mlvsl/wZJc98cLD98iDj306HKgQ9Tf/hwcZ4zI6TxjxTnfj1KOfDR8nMfHyMffKxy4OPk5/kwXhr/BHGeE6cpBz5RmpxJ4ty3JkvjnxIt94GsaKdKs66nyc/zabr88BnqD58pP8+zWfLDZ0uTM0ec9TxXnOfiPPngp+u88DPk5zk6X/nwBdLkLIw2U8cXRZstvlicfkukyV8qP3xZtDzP+ftbLs1zfYX88JXiPO9XKQe+Wv3ha8T5u1krjX9dtLxHcP710rxPbJAfvlGc/E3ywTeLM94t0ryvbNX44dukeY/Zrhz4DuXAdyqH+8Euafy7lc970p5o64nvlSZnn/zw/fIzrgMaD8fPlOb+dFCa/EPR8h7HdTsszfvcEfnhR8V5zzumHPhZ6g8/W5z5OEca/7nivEeepxz4+fLzfnmBfPALlQO/SH7m+2Jp/JdEy/zT71Jx/JeJc7+5XP3hV0iTc2W0vC9zfa6S5r35avnh14jzPn2tcuDXqT/8enHm9QZp/DdGm6Xz3yTN8+Fm+cm5RT74reKM6zZp9gm3a5zwO6TZP9ypHPhdyoHfrRyu8z3S+O9VPs+1+6Tx3x9tdfEH5Of8D0abLf6QODkPR5uh449Ey3pmH/Wo/PDH1B/+eLSZ4k9Eyz6MfdeT0uy7npJm3/W0NHnPSFNnelb5/HtO+ezfnlc+dYUX1B//i9L4X1I++8CXpdkHviLN+V+VPyfa1+RnH/m6xoP/DWn8byqffeZb0uwz35bm/O/Iz3vxu/KzD35P48H/frRcL/axH0izj/1Qmvn+SH7eyz6Wn33vJ/LBP5VmXj9TDvvkz6WZ1y/E2T9/qXz4V9I50X6tHPbb30hzHb4Vz4r2O+XDv4+WeWPf/oM0+/YfpZmPn+Tn/fRn+Xnv+EWa+fhVfuoCv0kzH7+L89z7Q5rf/af81Bf+kmb+/hbn+fZPtKwP6hH/RsvvoR7xn8ZBPYIbV05I6hFJks4lJ2+S1uTkS9KanPxJWpNXIDj1DOodBcWp+xZK0r7cOnFw6h8cL6wcjheRn+dMRnDqItRNioqTVyxJ+zhv8eDUUTheQjkcLyk/48rU+Km/lErSmrpFaXFyyui81GvKJmlNv3LijKu85p/6ToUkrXPrXsFHyV9RObn1IPl5nlfSPFMnqpykNflVgo+Rv6pyOJ4lP+evpnUCr67rRb8awalLUYeqmaQ1dcJs+alz1NJ8cry2OHl1glPfmhBt3SSt8deTn/z6mk+ONxAnr6GuFzmNNJ/knBycuhrz2ljrluNN5M/dDwWnDsd1aKp1y/Fm8pPXXOelftciSWv20y3FyWml81Lva52kNf3aiDOutsGpCzKv7bQOOd5eft6LOwSnjsh16Kh1y/Ec+cnrpPmHd9Z80q9LcOqUrIeu4uwnumn9cLx7cOqarJ8e4vh7ar1x/FTND/5e+r34e+v6Uj/tk6Q1+/i+4sxTP11f6q39k7Sm3wBx5nWgxkN9dlCS1tR/BouTP0TjoZ47NElr+g0TZ1zDtR6o/45I0pr96Ejx3HWt9UO9eHSS1vQbI851G6v5xz9O80m/8cGpQ7MeJoiznz1N64fjE4NTt2b9TBLHP1nrjeNTND/4p+r34p+m9zG+k0wX5/8bzAjeXnxmcL7z0m9W8Azx2crne/occb5Hz9V54fP0Hgg/Xe+f8DOC892NfvN1Xr7/LgieKf9CnRf/InG+ey7WeOBLgvOdgn5LlcP3u2V6T6auvFyc72UrgncTX6nfC1+leYavDs7zkvOs0Xn5DrVW+fB1wXmfrxbt+uA9xDdo3uAbNT/wTcGpO3GezcGzxbdoPHxn2ap8+Db9Xuq728X57rBD8w/fKT918l3yU3/eLT98j8ZPnW+vOP59mmf4fuVT1z2gHPiZyqHOeVB++CFx6niHdV7qbEe0fuBHlQM/pusF/x9lNlD9</EDGE>
<FACE>
<T COMPRESSED="B64Z-LittleEndian" BITSIZE="64">eJyFmVm4jlUYhrcQoqJIEUqGRMKuZChzbaESGZvIkLYUTUKZsiOKohBKSlEhQoMGjdIg6WqejuqgDjrqoIOGk+c+2Pd1fdfeJ8/1vu/zPOtd61/ft/9/rZKSyn/Vgkcp5q9GsHoBr2YVvJrKwzs6WKvAj3rtKnQ1xSNfJ3hMgR/1uvK3rrZ4xwjrBY8Vj/xxBfljVa+t/PHB+sFa4lM/IdigQHeiePaj3kDYUH6N5NNQ8Unyg99A9YaqM9/GBfmTC3waq+78KYqbiE8dXVPlzW+qPPM4NdhMPPLNNY751E9Qnj5aFPi1UNxU+dOCpwfrKQ+eEWwpPtiqgFdP9ZbitQ62KfCj3rYKXSvxyJ8ZbKJ8W9XPCrYr0LUv4LVRvZ14HeR3tvgdxO+oPPx2qpPnczwn2Ek86vh3Fh/sIl4nIfXOQvosLfArVdxR+XOD5wXPVx68INhVfLCbeOcLqXcVou8e7CEf8j2V7yZ+T+XRXRi8KMi+7aE6/fSSH7re4qFrrTp6dMy/j/K9lEffTXFf9dtPOuoDgv0LdBeL109Ivb/wEvHK5HOJ+APlV6aYOrpLgzxPzKtM9UEaFx90g8VD10f1AeINEb+v+EPE76/8ZcHLg6XKg/Q7SHzwCvGISxWjHxpkHRmHvq4Uj3iYkPpVweFCdCPEA4epPlwx3y9Hym+k6qPkO0q8EcqPFo4Rf4zqVwfHCuFdIx44UvWxiq/VOPWVB/G7Tgh/rPLXS088Tkh+vPTjxSOmjxuCzRTjN0F1YvwnSjdevEniEfNenqz6jcoTo5uiPHF3xehvEpbLr1z1qfKfKt4U5W8Wlok/TfVblCdGd6vyxOWK0U8XMs4MIXV8bxPCm6b87cI7gncqpo7urgLezCp4M5Unpn/G4X15t2J0zHOWeMSzhdTvCc4RortXPHC26nMUo+c9OVdIfp5854lHjO984YLgQsXU6eu+At6iKniLlCdm/hXyqxDCp//7hcxzsZD6A8ElQnRLxQMXq75E8TLxxigPontQCH+p8g8JlwcrFFNfob5XiPew8sQVitE/ImQc1nOl6vgyz1XBZQX8R5VHz/wfU53/Q/iuVh4/9GvEp844azUOfugeV50Y3Trp1om3Xn7U0W8ILhQfvydUh4//k9KtE2+jfMx/SnVidJuk2yTe0/Kjjv4Z8RiH/bhZdXzRPSu+fZ+Tns8Z3y3K44d+q/jU6eN51RkP3xeUxw+fF8Wnjv82jYMfuu2qE6PbId0O8V6SH3X0O6Vnn6LbpTx8/F8Wnzrj7JYPfL4v7VGdvtDtlW6PeK8U+KF/Ncj87fea8ujp/3WNwzzx3ac6fHzfkG6feG8W+KF/Sz7weB7fVh0dvvvF36f6OxqH73Gsx7uqMx79vyfdfvHelx919B8U9MF6fqg6vszzQJD585zR90fK0y/zPyg+dfw/1vjw+D76iWL09PlpkN83rAfvi89UZ76syyHpDoj3ufyooz8c5PyP78n4faE664X/EekOiFc9Fzk1g3WCtYO1gnWDxwePU75B8ET5wGsYPEk+6BoHmwRPCTYKnhpsEWwePFn9ni6fJuqjpXyaBc8Itg22kd+ZwQ7B9sFW4tP32cq3V98d5cu6dQqeIx3rVhrsIt55wa7ygXdBsLt8ztU8ewZ7yI916xW8SH700Uc+PdVHX/mA/bRuA8S/WOtWFuwvPn0PlK5MfV8q3qDgEM17cPCy4BWa9+XBocFh8rsyODw4Qn5XBUcGR2sdRwXHBK/WuowNXiOfIcpfKx/mcZ2Q9RgXHC9k3SYEJwrHaR6Tg5OkYx5Tgv+UVI5vDN4U5D1XXbxyjcP9Mvqbg0ek4353msZBx73tdI0zlec49Rkan/tUeLfLnzz3lHdo/tPU953qv6V4d8mf+zf0M4Nc2N8avCV4d/Bb+cKbFayhvtDPCX5XUlnH/eA9iadLx73fXK0v68m91jzNj/s6eAuCt2m9uedaqP5niLdI/ZHnnq9C68fnwed2v+bPfQ68xfLvXVJZ/0DwF+m431qqzxkd91IPav583twTPKTxuc+At0L+5LmneSTxv4lna1+sDP6a+nDxVmmfcX6P/lHtf/YL++qx4G/yhbc6WE19oV+j9wc67hfW6vlFx/3C+sT3BtmvnHNv0Py4D4D3pPY3+5nz8Y3qf654T6k/8twTbEr8X2Keh/nBp4N/pM7zAO8Z7W/q9wU3B6srj/9zwT81PufnW/ScoeP8fKv2P+f29PG89ifn7/SxLchzyvPIufF29b9YvB16fqhzjrxT/aPjnHyX+uN8Hp/den54nnnu9wT/Yl7i7Q0uoY+SyvpX9Pyh4/z4Ve1PdPNS36f9vYznMvU3ND7ni/DelD95zkHf0vx4X3H+9bbWhzrvrf0anzznY+9q/z+s99p7wb9TXyve+8HlQc7f0H8Q/F06+vwweJTGpc+P1D/v043xO6j1p8750yfa37xvOYf6VP1zfgfvkPpbqXE+1/rwvuZ86LDWlzrv7S80PnnOkY7o/cC5Dj5f6vni/Aafr+TP/wPOQb7W+lDnd/w38t9fUpn3rdaf/yf8/v4uMe/nNeJ9HyxXnd/fP6g//t/wO/xHfT6rxftJ76fH2VfR/6z321rxfgnWUJ4+/weSxzHH</T>
<Q COMPRESSED="B64Z-LittleEndian" BITSIZE="64">eJx1mGtsnnMYxt+nHTNaihWdjTqsbDbbqNHNMEwddjQzNptVDTEJc8wiIY1DIlmyOGQRhA/ikPFhIZEQXyYSMea0GRvDaHVrp9gwzOmD6yd5ftH3y5X7vq/7vq7/4e37PF1f+ffzdnBt8J3gu8EN4r1XKX/WBT/WnPeDH6pvo3TgfRT8IPiJfJAfJN1P5Qc95sHfJN31QdaH/82q4xcd/H6mfnjo4P9zzSffGGRdW+QbvU3ifyFd/OIHvS9V36I8fr9SP7xm6W3VfPKTgqzra/lGr1n8b6TPfPq5R53i0TcryL3q0pxO5en7VjqdynO+3dLpEo97tk3zupXnHm1XnbhHPoi3idcb3KgYPz3Kc392aB7xd8HNiqn3Bb+Xjz7xiRcHuT/09SqGx734QfhjcGdwq/LwdgWXBTnfneL/FOxQ3y7x4f0c7FQenQ7xuhQz75fg7mC38uCvwZXS2y2E91twu/LorxSvRzHzfg/uCe5QHvwjuEp6e4TwXgr2KY/+KvG4F38Guc9/BYuiXCfPuv4OVhXlOvlexdUFg8v43+9I4gHSxQeIXrX45PcK7h1syJyB4lMfmvq+iQdpDn1HireP5uwXrFF/rfg10qkLHqC8++DtX5R9HBg8KDi4UvZVo/owreNg6Q0O1mt+jXzWqx9/hyg+VPs4KvqHyRf1MakfnniIdOg7WbwG+R+qdeFjmHzWSqeR8+6nr0G8I5RnX8fK31FFOa5TnnlHy/8xwWODrZk7XPrUp6XOPh2n9dA3Xbwm+Tg+OEI+RsrnCOmwT6P66WsS7wT5AmfEH/s1Wjojlcf3ifLF/VqUeeM4H83h3NrFG6P11Ku/mfsoP3Xqh3eSfJ4SHB9cWCn7Hqt6m/ydWpTj04Itml8vny3qx98E9bEvb0Z3UuLT5Ytzul28ifJVp/6zgmfKV6P64Z2hPOt+TrqTi3LcrDzzztbcc3QPl2buueojvjV11j9FOufpnjG/tZ94itZ/vvLo3qb1XqA5rcrj+0LNvSg4Nfhw5o4Xn/ojqbPv0zRnOt9f5VvEn6GY85mpObOCFweXy99M1R+qlP3O1pxLgnOUbxF/jmL8Xqo5c4OXBZ+J/jjxqT+fOud2uebMC85Xvln8+Yo53ys0Z4H0n47+VPEXau4CrZv6lbqHazJvUfH/+IbW2ya8SveV+e39YJvWe7VwsfRfjz7rv0bYrj7Wf12Q51DeO6uC16Z+Q7Amef6PQN/1qd9YlPsGpl4r3k3BJeLXhcfz2y3JD1E/fTyv3pl4eGL+P8Dz9tLgXUGej5nXVCnzOoqy/gDp8txzb+LR6qeP57cHEk9IzPsp/u7ge6+YeZNDnxhcwd+3SpmPLs8VDybmvYX3ydnBZak/VpTjualzLryXPK59hD8v/Lbgk9qvQn08fzzLOSTmfY33qftSf6Eox9XapyXhv5j4fvHYF363Vye+WTr08bvzsvaP9+B7gpzrq/Qnf3eQ/YH3mvaB/Irw+d1Zkzz7wPvto0HO/63gE/LDeyK8tVof+afSx9/rdfJXpT6ePzYkXi0/7OMryf8DbeJGigAA</Q>
</FACE>
<ELEMENT>
<A COMPRESSED="B64Z-LittleEndian" BITSIZE="64">eJxd1edfj1EcxvGMZO89M1JRVpQtMysS2RlZGZGdRNmbUPbIXmUnUvaO0F/kyed64Po9eb9e97l+55z73Od8T0DA/79qWB1r2POaGIi1LC+DsDbWsf+p37rWXg/r23gNsCE2wsbWbxNsavlmNt/m9rwFtrT5t8LW1t4G9X5tbdx22B71fh2sv46W13p0wmDsjF1Q69QVu2GI5bWO3TEUwzActb49sKflI1DrGYm9sDf2QX2fvtZfP8vr+0VhfxyA0ajvGmPtA3EQav0H4xAcisNQ3324jTPC8tonsTgSR+Fo1P4Zg2OtfRxq/8TheJyAE1H7bhJOtrzmoX0Uj1NwKiag9us0m5/y6l/7OBGn4wxMQu3XmTgLZ1te+38OzsV5OB91LpJxgeUXos7LIsstxhTU+VmCS3GZ5YNxOa7AVFyJOk+rcDWusbzOZxquxXWYjjq363GD5TeizvUm3IxbcCvq3GdYf8pvQ9WBTNyOWbgDde53Wns25qDqyC7cjXtwL6rO7LNx9lte9ecAHsRDeBhVT47gUWs/hqpbx/EE5uJJVD07hactr3mo3uXZuPn2P9W3M9Z+Fs+h6uV5vIAX8RKqjl7GK3jV8qqvBXgNr+MNVL29ibcsfxtVj+9Y7i7eQ9Xj+/b8ARai6mwRPsRH+BhV559Y+1N8hqr/z7EYX2AJ6n54aeO8srzujVJ8jWVYjrpX3uBba3+Hukfe4wf8iJ9Q99Jn/GJ5zUP31Vcb9xt+R91jP6w/tVdgLP7EX1iJv1H3zR/8i1WW/wfhWnRd</A>
<P COMPRESSED="B64Z-LittleEndian" BITSIZE="64">eJxdlUlsjVEUx++LRCQkbCwkIiIkJCIiFkREYiFhQyJCQiIRYip9aKsofU8HqsNr6Tyjg+o8oRSlRWmrWlPRkthY2lhYmBb9/SX3vM0v/3Pu/5zz3ffd+wXd5O9AYJIH4SE4hfxh9DF0FHo6+ojxH4XHyY9Bwv/ryzeN+AkYHfD7TiU+w/n1Y9BBzae+cCb5uTBWfcjPQccY/zzicWaeWc6vL99s4ieh9icWxsEFWoeOR2tj4uEp4z+tOMvPoBeiE4z/rHzkJ/Q8pr5881UXnjN9Ne8i59dPQGs/E2EILia/TD4YJn8eJhn/cs0Bk4kvRacY/wriiWb+kKkv3xLWhaD2MxVegGvJX0SH0WnoVehLxp+u+ch/dX5drV9JfA1MghmmjvqvI78aJovkM2EW3Eh+C0yBEfIb0NnGv5l4qplHdbV+PflNej6YQ/4yvAK3kc/Vc6F/wzzi+cZfANNYp/tqO7ow4PuLtM/kv8Gtzp9HfeRPJ18c8Ofc4fy6mieDeAm6FJbBneT3wExYTr4CVhr/PtZlQd1vu9FXjX8v8YiZf5fz51Ef+bPJ/4XXiF+HVbAa5rCuRvczujbg+2/o/yOfZurV6XtB/qbeE/R358+h+vud789F15u+mld1o1iXBxuIN8Im2Kz7jnX5sIW4vk+txq/vRAGMmHptMJq87vFCM7/mUP2g8/1F6HZ0B+yEug9uoYvROo+6H28b/x1YQv4H1PdA95j6yF9KvEv3Hzrk/Lqap4z4XfQ92G3eR+1LOdT50vm5b/za9wozj+4LvbfqI38lce2n+j1AP4Q9OjfkH6Efw16zH306D+g/zq/3xPif6tyx7qeZQ/9jr/FXEX+m+8D586qu5qkmr/2vQfejn+scOT//gvgAHDTP1wBr4ZCp99L4653fR/P3m30YNP464tr/NvQw+hUc0fnXc5j8qN5j8q/NXL9M3SZ0p/P9eu436Hbn91fdLuKNUPvZg36Lfqd9cH7fFpN/r/eO/ABsNvOobjfxPuf7W9D6Ho3BD/AjHGJdK9T/+wmOG/8w6/T/6L6dgJ+Nf5R12r+w6f/F+EdY1wEDZu5x00f+f9N28hAA</P>
<R COMPRESSED="B64Z-LittleEndian" BITSIZE="64">eJxdlEdOQ0EQBT8552xyjiYHk3M2JmMwycbGZNiw4CCIKyBxESTEgiOxcL3FbzalUvdjPNMz3+8k/pKTEkyBqbCWehq+hafjGTDT5LNggP5sPAfmQo/jXm8bz8PzYYHJa70d+gvxIlgMB6iX4Lt4KV4Gy02+Au7Rr/1Xwiroddzr7ePavwdWm7zWO6C/RucN68zvrccP8Qa8ETaZfDM8MvkWqPPQPFvxoOPOt5nzUL4dHtOveXbAThil3oWf4JpnN+wxeS8M0d+L98F+GHbc653iA/ggHDJ5rXdGv+Y5DEfgH/VR/BzXPMegz+TH4QX92v8EnIS/jnu9S1z7n4LTJu8z+5/BZ6Hm+UZ9Do/g8/iCmafyi/DK5JegzuOV+rKZt/Ir5jyUX4Ux+tfwdbgBP6hr3tf4Ju7XPTZ5zTtOf0DfF3034LtZ/wbf1fvXuzZ5rX9L/4Hep94d/KKued/hQb0fvQuT17zv6Q/pfuu7AT+p67we8DPdV71rk9d5PdJ/ofsHNc8f6mH8CY/ofph5Kh+Fz/THND/dW/hNXef1gsc1D/P/rs15/QM7bzcM</R>
<H COMPRESSED="B64Z-LittleEndian" BITSIZE="64">eJxdz8diAQAQRVF+TRefr0ZE7wRREp2FYzOzOZu3uJNKvK7MCqussc4G0/wMuya/2OI3M2yHXYdd9thnlgMOOeI4dE+Y4zTsZpyH7gXzXIbdD1ehe80CN/zllrvQvWeRh7D743/oPvKDp7A78xK6ryzx/e+Ndz6YSIJPE3E3WQAA</H>
</ELEMENT>
<COMPOSITE>
<C ID="0"> A[0-65] </C>
<C ID="1"> F[211,217,222,227,232,236,240,245,249] </C>
<C ID="2"> F[4,0,17,13,86,82,93,89,126,122,139,135,212,228,241,266,269,302,305,334,337] </C>
<C ID="3"> F[133,129,146,142,157,153,164,160,177,173,190,186,250,246,242,336,339,346,348,356,358] </C>
<C ID="4"> F[184,180,197,193,106,102,113,109,55,51,68,64,251,238,224,350,353,318,321,284,288] </C>
<C ID="5"> F[62,58,75,71,35,31,42,38,10,7,23,20,225,220,215,286,290,272,276,258,262] </C>
<C ID="6"> F[263,270,277,282,289,294,299,306,311,316,322,326,332,338,344,349,354,359] </C>
<C ID="7"> H[66-75] </C>
<C ID="8"> R[76-111] </C>
<C ID="9"> P[112-180] </C>
</COMPOSITE>
<DOMAIN> C[0,7,8,9] </DOMAIN>
</GEOMETRY>
</NEKTAR>
...@@ -92,46 +92,52 @@ int main(int argc, char *argv[]) ...@@ -92,46 +92,52 @@ int main(int argc, char *argv[])
std::cout << "Initial shape:\n" << in << std::endl << std::endl; std::cout << "Initial shape:\n" << in << std::endl << std::endl;
// Test SIMD Implementation
#ifdef NEKTAR_ENABLE_SIMD_AVX2 #ifdef NEKTAR_ENABLE_SIMD_AVX2
// Test out field reshaping {
in.ReshapeStorage<4>(); // Test out field reshaping
std::cout << "Reshaped to 4:\n" << in << std::endl << std::endl; in.ReshapeStorage<4>();
std::cout << "Reshaped to 4:\n" << in << std::endl << std::endl;
// Test out SIMD instructions // Test out SIMD instructions
using vec_t = tinysimd::simd<double>; using vec_t = tinysimd::simd<double>;
// Reshape into vec_t::width, with the right memory alignment requirements // Reshape into vec_t::width, with the right memory alignment
in.ReshapeStorage<vec_t::width, vec_t::alignment>(); // requirements
std::cout << "SIMD In:\n" << in << std::endl << std::endl; in.ReshapeStorage<vec_t::width, vec_t::alignment>();
std::cout << "SIMD In:\n" << in << std::endl << std::endl;
in2.ReshapeStorage<vec_t::width, vec_t::alignment>(); in2.ReshapeStorage<vec_t::width, vec_t::alignment>();
std::cout << "SIMD Out (before):\n" << out << std::endl << std::endl; std::cout << "SIMD Out (before):\n" << out << std::endl << std::endl;
// Reinterpret casting from double to vector type allows for efficient // Reinterpret casting from double to vector type allows for efficient
// conversion to SIMD intrinsics // conversion to SIMD intrinsics
vec_t::vectorType *inptr = vec_t::vectorType *inptr =
reinterpret_cast<vec_t::vectorType *>(in.GetStorage().GetCPUPtr()); reinterpret_cast<vec_t::vectorType *>(in.GetStorage().GetCPUPtr());
vec_t::scalarType *in2ptr = in2.GetStorage().GetCPUPtr();
// Loop over each block in the field and square each element vec_t::scalarType *in2ptr = in2.GetStorage().GetCPUPtr();
for (auto const &block : blocks_coeff)
{ // Loop over each block in the field and square each element
const size_t numMetaBlocks = block.num_elements / vec_t::width; for (auto const &block : blocks_coeff)
for (size_t metaBlock = 0; metaBlock < numMetaBlocks * block.num_pts;
++metaBlock)
{ {
(vec_t(inptr[metaBlock]) * vec_t(inptr[metaBlock])).store(in2ptr); const size_t numMetaBlocks = block.num_elements / vec_t::width;
in2ptr += vec_t::width; for (size_t metaBlock = 0;
} metaBlock < numMetaBlocks * block.num_pts; ++metaBlock)
{
(vec_t(inptr[metaBlock]) * vec_t(inptr[metaBlock]))
.store(in2ptr);
in2ptr += vec_t::width;
}
inptr += numMetaBlocks * block.num_pts; inptr += numMetaBlocks * block.num_pts;
} }
// Back to non-interleaved for non-SIMD Operators // Back to non-interleaved for non-SIMD Operators
in.ReshapeStorage<1>(); in.ReshapeStorage<1>();
in2.ReshapeStorage<1>(); in2.ReshapeStorage<1>();
std::cout << "Out:\n" << in2 << std::endl; std::cout << "Out:\n" << in2 << std::endl;
}
#endif #endif
// Operators are instantiated using a Factory pattern. First lets check // Operators are instantiated using a Factory pattern. First lets check
...@@ -153,93 +159,146 @@ int main(int argc, char *argv[]) ...@@ -153,93 +159,146 @@ int main(int argc, char *argv[])
// Let's display the result // Let's display the result
std::cout << out << std::endl; std::cout << out << std::endl;
in = Field<double, FieldState::Coeff>::create(blocks_coeff); // Test BwdTrans Implementation
auto &inStorage = in.GetStorage(); {
std::fill(inStorage.GetCPUPtr(), inStorage.GetCPUPtr() + inStorage.size(), std::cout << "BwdTrans (StdMat) test starts." << std::endl;
0);
out = Field<double, FieldState::Phys>::create(blocks_phys);
auto &outStorage = out.GetStorage(); // Create two Fields with memory on the CPU.
std::fill(outStorage.GetCPUPtr(), auto inCoeff = Field<double, FieldState::Coeff>::create(blocks_coeff);
outStorage.GetCPUPtr() + outStorage.size(), 0); auto outPhys = Field<double, FieldState::Phys>::create(blocks_phys);
for (auto const &block : in.GetBlocks()) // Assign input values from the CPU.
{ auto *inptr = inCoeff.GetStorage().GetCPUPtr();
for (size_t el = 0; el < block.num_elements; ++el) for (auto const &block : inCoeff.GetBlocks())
{ {
in.GetStorage().GetCPUPtr()[el * block.num_pts] = 1; for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t coeff = 0; coeff < block.num_pts; ++coeff)
{
*(inptr++) = coeff + 1;
}
}
} }
}
std::cout << in << std::endl;
BwdTrans<>::create(explist, "StdMat")->apply(in, out); std::cout << "Initial shape:\n" << inCoeff << std::endl;
std::cout << out << std::endl; // Perform the BwdTrans on the fields.
BwdTrans<>::create(explist, "StdMat")->apply(inCoeff, outPhys);
// Check output values.
std::cout << "Out:" << std::endl;
auto outptr = outPhys.GetStorage().GetCPUPtr();
for (auto const &block : out.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t phys = 0; phys < block.num_pts; ++phys)
{
std::cout << *(outptr++) << ' ';
}
}
std::cout << std::endl;
}
std::cout << std::endl;
}
// Test BwdTrans (CUDA) Implementation
#ifdef NEKTAR_USE_CUDA #ifdef NEKTAR_USE_CUDA
// Test CUDA MemoryRegion
// Create two Fields with memory on the GPU
in = Field<double, FieldState::Coeff>::create<MemoryRegionCUDA>(
blocks_coeff);
out =
Field<double, FieldState::Phys>::create<MemoryRegionCUDA>(blocks_phys);
// Perform the BwdTrans on the fields using the CUDA implementation
// Since this is a CUDA operator, acting on CUDA fields, everything happens
// on the GPU.
BwdTrans<>::create(explist, "CUDA")->apply(in, out);
// Test the GPU-backed fields with a CPU operator
// This should show a debug warning due to the implicit conversion. The
// purpose of this is to allow us to transition the code to the new
// infrastructure and add CUDA operators, without the need to add ALL CUDA
// operators before we can test anything.
BwdTrans<>::create(explist, "MatFree")->apply(in, out);
// Create two CPU backed fields and use them with a GPU operator
// This call implicitly converts the fields to a GPU backend and warns the
// user about that fact
in = Field<double, FieldState::Coeff>::create(blocks);
out = Field<double, FieldState::Phys>::create(blocks);
BwdTrans<>::create(explist, "CUDA")->apply(in, out);
#endif
std::cout << "Inner Product of a function WRT the Basis" << std::endl;
// Create two Field objects with a MemoryRegionCPU backend by default
// for the inner product with respect to base
auto inPhys = Field<double, FieldState::Phys>::create(blocks_phys);
auto outCoeff = Field<double, FieldState::Coeff>::create(blocks_coeff);
double *y = inPhys.GetStorage().GetCPUPtr();
for (auto const &block : blocks_phys)
{ {
for (size_t el = 0; el < block.num_elements; ++el) std::cout << "BwdTrans (CUDA) test starts." << std::endl;
// Create two Fields with memory on the GPU.
auto inCoeff =
Field<double, FieldState::Coeff>::create<MemoryRegionCUDA>(
blocks_coeff);
auto outPhys =
Field<double, FieldState::Phys>::create<MemoryRegionCUDA>(
blocks_phys);
// Assign input values from the CPU.
std::cout << "Initial shape: " << std::endl;
auto *inptr =
inCoeff.template GetStorage<MemoryRegionCUDA>().GetCPUPtr();
for (auto const &block : inCoeff.GetBlocks())
{ {
for (size_t phys = 0; phys < block.num_pts; ++phys) for (size_t el = 0; el < block.num_elements; ++el)
{ {
// Each element is the index of the quadrature points for (size_t coeff = 0; coeff < block.num_pts; ++coeff)
// this is useful for testing reshapes {
*(y++) = phys; *(inptr++) = coeff + 1;
std::cout << coeff + 1 << " ";
}
}
}
std::cout << std::endl << std::endl;
// Perform the BwdTrans on the fields using the CUDA implementation
// Since this is a CUDA operator, acting on CUDA fields, everything
// happens on the GPU.
BwdTrans<>::create(explist, "CUDA")->apply(inCoeff, outPhys);
// Check output values.
std::cout << "Out:" << std::endl;
auto *outptr =
outPhys.template GetStorage<MemoryRegionCUDA>().GetCPUPtr();
for (auto const &block : outPhys.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t phys = 0; phys < block.num_pts; ++phys)
{
std::cout << *(outptr++) << ' ';
}
} }
std::cout << std::endl;
} }
std::cout << std::endl;
} }
#endif
// Test IProductWRTBase Implementation
{
std::cout << "IProductWRTBase (StdMat) test starts." << std::endl;
memset(outCoeff.GetStorage().GetCPUPtr(), 0, // Create two Field objects with a MemoryRegionCPU backend by default
outCoeff.GetStorage().size() * sizeof(double)); // for the inner product with respect to base
auto inPhys = Field<double, FieldState::Phys>::create(blocks_phys);
auto outCoeff = Field<double, FieldState::Coeff>::create(blocks_coeff);
std::cout << "Initial shape:\n" << inPhys << std::endl << std::endl; // Assign input values
auto *inptr = inPhys.GetStorage().GetCPUPtr();
for (auto const &block : inPhys.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t phys = 0; phys < block.num_pts; ++phys)
{
// Each element is the index of the quadrature points
// this is useful for testing reshapes
*(inptr++) = phys;
}
}
}
// IProductWRTBase std::cout << "Initial shape:\n" << inPhys << std::endl;
auto ipwrtb = IProductWRTBase<>::create(explist, "StdMat");
// ... and then apply it
ipwrtb->apply(inPhys, outCoeff); // out and in is inverted for the IP
// Let's display the result // IProductWRTBase
std::cout << "Out:\n" << outCoeff << std::endl; IProductWRTBase<>::create(explist, "StdMat")->apply(inPhys, outCoeff);
// Check output values.
std::cout << "Out:" << std::endl;
auto *outptr = outCoeff.GetStorage().GetCPUPtr();
for (auto const &block : outCoeff.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t coeff = 0; coeff < block.num_pts; ++coeff)
{
std::cout << *(outptr++) << ' ';
}
}
std::cout << std::endl;
}
std::cout << std::endl;
}
std::cout << "END" << std::endl; std::cout << "END" << std::endl;
return 0; return 0;
......
<?xml version="1.0" encoding="utf-8" ?>
<NEKTAR>
<GEOMETRY DIM="1" SPACE="1">
<VERTEX>
<V ID="0"> -1.0 0.0 0.0</V>
<V ID="1"> -0.8 0.0 0.0</V>
<V ID="2"> -0.6 0.0 0.0</V>
<V ID="3"> -0.4 0.0 0.0</V>
<V ID="4"> -0.2 0.0 0.0</V>
<V ID="5"> 0.0 0.0 0.0</V>
<V ID="6"> 0.2 0.0 0.0</V>
<V ID="7"> 0.4 0.0 0.0</V>
<V ID="8"> 0.6 0.0 0.0</V>
<V ID="9"> 0.8 0.0 0.0</V>
<V ID="10"> 1.0 0.0 0.0</V>
</VERTEX>
<ELEMENT>
<S ID="0"> 0 1 </S>
<S ID="1"> 1 2 </S>
<S ID="2"> 2 3 </S>
<S ID="3"> 3 4 </S>
<S ID="4"> 4 5 </S>
<S ID="5"> 5 6 </S>
<S ID="6"> 6 7 </S>
<S ID="7"> 7 8 </S>
<S ID="8"> 8 9 </S>
<S ID="9"> 9 10 </S>
</ELEMENT>
<COMPOSITE>
<C ID="0"> S[0-9] </C>
<C ID="1"> V[0] </C>
<C ID="2"> V[10] </C>
</COMPOSITE>
<DOMAIN> C[0] </DOMAIN>
</GEOMETRY>
<EXPANSIONS>
<E COMPOSITE="C[0]" FIELDS="u" TYPE="MODIFIED" NUMMODES="4"/>
</EXPANSIONS>
</NEKTAR>
<?xml version="1.0" encoding="utf-8" ?>
<NEKTAR>
<GEOMETRY DIM="2" SPACE="2">
<VERTEX COMPRESSED="B64Z-LittleEndian" BITSIZE="64">eJx1j8sVgjAABIOIgIrK324o3RYoIaV4yi7guBfey2SySwjbxM/+G0K24+sPPyFfxXN83/yMvlOg71y4f0knJfeLV+hH8Rp98+tx0GHnDbn/847cadhX/4P7xZ/oed8Lufe1f3pTOvZ1r+d+8QF3e9+I3Psm5M7Mvu69uV/8C2/eNsUA</VERTEX>
<EDGE COMPRESSED="B64Z-LittleEndian" BITSIZE="64">eJx1kEkSglAMBQEVZxxBHEARh/vf0I29oKvyN6l+FUI6STJ8aVCzoI6CyhurH54EnIuZN1U/fTMxfXMx8xbB90sx36+Ceet/5T7MLcTM3Yi5x1Y5/9uJ8d4rZ4+DGL+jcvYrxXhX2p+9T2L2rgOPs3J8LmI8rsrxvInxaJTj34rxu2t/7vIQc5dOjMdTOffqxXi8lHPHtxiPj3Lu+xXj9wOocAa/</EDGE>
<ELEMENT>
<Q COMPRESSED="B64Z-LittleEndian" BITSIZE="64">eJx10EcOwjAABdFQ0kkPvYTO/W/IZrxgJLx50kSyvxJFv2eGc1z86UuMMdH30FPMMNe9oRdY4krvVFhjo12xeoud7knUexy0N1Ufca29Yf8Gt7jT3lx9jwftLdSPeNLeUv2MF+0N/3HCK960t1K/40N7a/UnvrS3UX/jR3u/iOQFQAAA</Q>
</ELEMENT>
<COMPOSITE>
<C ID="0"> Q[0-11] </C>
<C ID="1"> E[0,13,22,31,11,21,30,39,3,6,9,12,32,34,36,38] </C>
<C ID="2"> Q[12-15] </C>
</COMPOSITE>
<DOMAIN> C[0,2] </DOMAIN>
</GEOMETRY>
<Metadata>
<Provenance>
<GitBranch></GitBranch>
<GitSHA1>88b1cb3be29cc9090d8a14af19c26decd88345db</GitSHA1>
<Hostname>kingscross</Hostname>
<NektarVersion>5.1.0</NektarVersion>
<Timestamp>27-Apr-2023 15:32:08</Timestamp>
</Provenance>
<NekMeshCommandLine>square.msh square.xml </NekMeshCommandLine>
</Metadata>
<EXPANSIONS>
<E COMPOSITE="C[0]" NUMMODES="4" TYPE="MODIFIED" FIELDS="u" />
<E COMPOSITE="C[2]" NUMMODES="3" TYPE="MODIFIED" FIELDS="u" />
</EXPANSIONS>
<CONDITIONS>
<SOLVERINFO>
<I PROPERTY="EQTYPE" VALUE="Helmholtz" />
</SOLVERINFO>
<VARIABLES>
<V ID="0">u</V>
</VARIABLES>
<FUNCTION NAME="Forcing">
<E VAR="u" VALUE="0" />
</FUNCTION>
</CONDITIONS>
</NEKTAR>
<?xml version="1.0" encoding="utf-8"?>
<NEKTAR xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="http://www.nektar.info/schema/nektar.xsd">
<GEOMETRY DIM="2" SPACE="2">
<VERTEX>
<V ID="0"> -1.0 -1.0 0.0 </V>
<V ID="1"> 0.0 -1.0 0.0 </V>
<V ID="2"> 1.0 -1.0 0.0 </V>
<V ID="3"> -1.0 0.0 0.0 </V>
<V ID="4"> 0.0 0.0 0.0 </V>
<V ID="5"> 1.0 0.0 0.0 </V>
<V ID="6"> -1.0 1.0 0.0 </V>
<V ID="7"> 0.0 1.0 0.0 </V>
<V ID="8"> 1.0 1.0 0.0 </V>
</VERTEX>
<EDGE>
<E ID="0"> 0 1 </E>
<E ID="1"> 1 2 </E>
<E ID="2"> 0 3 </E>
<E ID="3"> 1 4 </E>
<E ID="4"> 2 5 </E>
<E ID="5"> 3 4 </E>
<E ID="6"> 4 5 </E>
<E ID="7"> 6 3 </E>
<E ID="8"> 4 7 </E>
<E ID="9"> 5 8 </E>
<E ID="10"> 6 7 </E>
<E ID="11"> 7 8 </E>
<E ID="12"> 0 4 </E>
<E ID="13"> 1 5 </E>
</EDGE>
<ELEMENT>
<T ID="0"> 0 3 12 </T>
<T ID="1"> 2 12 5 </T>
<T ID="2"> 1 4 13 </T>
<T ID="3"> 3 13 6 </T>
<Q ID="4"> 5 8 10 7 </Q>
<Q ID="5"> 8 6 9 11 </Q>
</ELEMENT>
<COMPOSITE>
<C ID="0"> T[0-3] </C>
<C ID="1"> Q[4-5] </C>
<C ID="2"> E[2,7,4,9] </C>
<C ID="3"> E[0,1,10,11] </C>
</COMPOSITE>
<DOMAIN> C[0-1] </DOMAIN>
</GEOMETRY>
<EXPANSIONS>
<E COMPOSITE="C[0]" NUMMODES="6,7" FIELDS="u" BASISTYPE="Modified_A,Modified_B" NUMPOINTS="7,8" POINTSTYPE="GaussLobattoLegendre,GaussRadauMAlpha1Beta0"/>
<E COMPOSITE="C[1]" NUMMODES="6,7" FIELDS="u" BASISTYPE="Modified_A,Modified_A" NUMPOINTS="7,8" POINTSTYPE="GaussLobattoLegendre,GaussLobattoLegendre"/>
</EXPANSIONS>
</NEKTAR>
...@@ -13,6 +13,17 @@ target_include_directories(test_bwdtrans PRIVATE ${NEKTAR++_INCLUDE_DIRS}) ...@@ -13,6 +13,17 @@ target_include_directories(test_bwdtrans PRIVATE ${NEKTAR++_INCLUDE_DIRS})
target_compile_definitions(test_bwdtrans PRIVATE target_compile_definitions(test_bwdtrans PRIVATE
-DTEST_PATH="${CMAKE_SOURCE_DIR}") -DTEST_PATH="${CMAKE_SOURCE_DIR}")
IF (NEKTAR_USE_CUDA)
add_executable(test_bwdtranscuda test_bwdtranscuda.cpp)
target_link_libraries(test_bwdtranscuda PRIVATE Operators)
target_link_libraries(test_bwdtranscuda PRIVATE ${NEKTAR++_LIBRARIES})
target_link_libraries(test_bwdtranscuda PRIVATE Boost::unit_test_framework)
target_include_directories(test_bwdtranscuda PRIVATE "${CMAKE_SOURCE_DIR}")
target_include_directories(test_bwdtranscuda PRIVATE ${NEKTAR++_INCLUDE_DIRS})
target_compile_definitions(test_bwdtranscuda PRIVATE
-DTEST_PATH="${CMAKE_SOURCE_DIR}")
ENDIF()
add_executable(test_ipwrtbase test_ipwrtbase.cpp) add_executable(test_ipwrtbase test_ipwrtbase.cpp)
target_link_libraries(test_ipwrtbase PRIVATE Operators) target_link_libraries(test_ipwrtbase PRIVATE Operators)
target_link_libraries(test_ipwrtbase PRIVATE ${NEKTAR++_LIBRARIES}) target_link_libraries(test_ipwrtbase PRIVATE ${NEKTAR++_LIBRARIES})
...@@ -25,8 +36,12 @@ target_compile_definitions(test_ipwrtbase PRIVATE ...@@ -25,8 +36,12 @@ target_compile_definitions(test_ipwrtbase PRIVATE
add_test( add_test(
NAME BwdTrans NAME BwdTrans
COMMAND test_bwdtrans COMMAND test_bwdtrans
# Test executable is looking for input session file in the working WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
# directory )
add_test(
NAME BwdTransCUDA
COMMAND test_bwdtranscuda
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
) )
......
...@@ -8,6 +8,10 @@ ...@@ -8,6 +8,10 @@
#include <Operators/OperatorBwdTrans.hpp> #include <Operators/OperatorBwdTrans.hpp>
#include <Operators/OperatorIProductWRTBase.hpp> #include <Operators/OperatorIProductWRTBase.hpp>
#ifdef NEKTAR_USE_CUDA
#include "MemoryRegionCUDA.hpp"
#endif
using namespace Nektar::Operators; using namespace Nektar::Operators;
using namespace Nektar::LibUtilities; using namespace Nektar::LibUtilities;
using namespace Nektar; using namespace Nektar;
...@@ -25,23 +29,33 @@ using namespace Nektar; ...@@ -25,23 +29,33 @@ using namespace Nektar;
* https://www.boost.org/doc/libs/1_82_0/libs/test/doc/html/boost_test/tests_organization/fixtures/case.html * https://www.boost.org/doc/libs/1_82_0/libs/test/doc/html/boost_test/tests_organization/fixtures/case.html
*/ */
template <FieldState stateIn = FieldState::Coeff, template <typename TData, FieldState stateIn = FieldState::Coeff,
FieldState stateOut = FieldState::Phys> FieldState stateOut = FieldState::Phys>
class InitFields class InitFields
{ {
public: public:
Field<double, stateIn> *fixt_in; Field<TData, stateIn> *fixt_in = nullptr;
Field<double, stateOut> *fixt_out; Field<TData, stateOut> *fixt_out = nullptr;
Field<double, stateOut> *fixt_expected; Field<TData, stateOut> *fixt_expected = nullptr;
#ifdef NEKTAR_USE_CUDA
Field<TData, stateIn> *fixtcuda_in = nullptr;
Field<TData, stateOut> *fixtcuda_out = nullptr;
#endif
MultiRegions::ExpListSharedPtr fixt_explist{nullptr}; MultiRegions::ExpListSharedPtr fixt_explist{nullptr};
~InitFields() ~InitFields()
{ {
BOOST_TEST_MESSAGE("teardown fixture"); BOOST_TEST_MESSAGE("teardown fixture");
if (fixt_in) delete fixt_in;
if (fixt_out) delete fixt_out;
if (fixt_expected) delete fixt_expected;
#ifdef NEKTAR_USE_CUDA
if (fixtcuda_in) delete fixtcuda_in;
if (fixtcuda_out) delete fixtcuda_out;
#endif
} }
InitFields() InitFields() = default;
{
}
void Configure() void Configure()
{ {
...@@ -54,7 +68,7 @@ public: ...@@ -54,7 +68,7 @@ public:
// Session::Reader::CreateInstance. The first element stands for // Session::Reader::CreateInstance. The first element stands for
// the name of the executable which, in our case, doesn't matter. // the name of the executable which, in our case, doesn't matter.
int argc = 2; int argc = 2;
char *argv[] = {(char *)"exe_name", GetMeshName().data()}; char *argv[] = {(char *)"exe_name", meshName.data()};
session = LibUtilities::SessionReader::CreateInstance(argc, argv); session = LibUtilities::SessionReader::CreateInstance(argc, argv);
graph = SpatialDomains::MeshGraph::Read(session); graph = SpatialDomains::MeshGraph::Read(session);
...@@ -66,14 +80,23 @@ public: ...@@ -66,14 +80,23 @@ public:
auto blocks_out = GetBlockAttributes(stateOut, fixt_explist); auto blocks_out = GetBlockAttributes(stateOut, fixt_explist);
// Create two Field objects with a MemoryRegionCPU backend by default // Create two Field objects with a MemoryRegionCPU backend by default
auto f_in = Field<double, stateIn>::create(blocks_in); auto f_in = Field<TData, stateIn>::create(blocks_in);
auto f_out = Field<double, stateOut>::create(blocks_out); auto f_out = Field<TData, stateOut>::create(blocks_out);
auto f_expected = Field<double, stateOut>::create(blocks_out); auto f_expected = Field<TData, stateOut>::create(blocks_out);
fixt_in = new Field<double, stateIn>(std::move(f_in)); fixt_in = new Field<TData, stateIn>(std::move(f_in));
fixt_out = new Field<double, stateOut>(std::move(f_out)); fixt_out = new Field<TData, stateOut>(std::move(f_out));
fixt_expected = new Field<double, stateOut>(std::move(f_expected)); fixt_expected = new Field<TData, stateOut>(std::move(f_expected));
#ifdef NEKTAR_USE_CUDA
auto fcuda_in =
Field<TData, stateIn>::template create<MemoryRegionCUDA>(blocks_in);
auto fcuda_out =
Field<TData, stateOut>::template create<MemoryRegionCUDA>(
blocks_out);
fixtcuda_in = new Field<TData, stateIn>(std::move(fcuda_in));
fixtcuda_out = new Field<TData, stateOut>(std::move(fcuda_out));
#endif
} }
protected: protected:
virtual std::string GetMeshName() = 0; std::string meshName = "";
}; };
...@@ -18,27 +18,25 @@ using namespace Nektar::Operators; ...@@ -18,27 +18,25 @@ using namespace Nektar::Operators;
using namespace Nektar::LibUtilities; using namespace Nektar::LibUtilities;
using namespace Nektar; using namespace Nektar;
class Line : public InitFields<FieldState::Coeff, FieldState::Phys> class Line : public InitFields<double, FieldState::Coeff, FieldState::Phys>
{ {
protected: public:
virtual std::string GetMeshName() override Line() : InitFields<double, FieldState::Coeff, FieldState::Phys>() {
{ meshName = "line.xml";
return "line.xml";
} }
}; };
class Square : public InitFields<FieldState::Coeff, FieldState::Phys> class Square : public InitFields<double, FieldState::Coeff, FieldState::Phys>
{ {
protected: public:
virtual std::string GetMeshName() override Square() : InitFields<double, FieldState::Coeff, FieldState::Phys>() {
{ meshName = "square.xml";
return "square.xml";
} }
}; };
using init = InitFields<FieldState::Coeff, FieldState::Phys>;
BOOST_FIXTURE_TEST_CASE(bwdtrans, Line) BOOST_FIXTURE_TEST_CASE(bwdtrans, Line)
{ {
Configure(); Configure();
double *x = fixt_in->GetStorage().GetCPUPtr(); double *x = fixt_in->GetStorage().GetCPUPtr();
...@@ -74,10 +72,12 @@ BOOST_FIXTURE_TEST_CASE(bwdtrans, Line) ...@@ -74,10 +72,12 @@ BOOST_FIXTURE_TEST_CASE(bwdtrans, Line)
{ {
for (size_t coeff = 0; coeff < block.num_pts; ++coeff) for (size_t coeff = 0; coeff < block.num_pts; ++coeff)
{ {
if (coeff == 0) { if (coeff == 0)
{
*(x++) = 1.0; *(x++) = 1.0;
} }
else { else
{
*(x++) = 0.0; *(x++) = 0.0;
} }
} }
...@@ -85,6 +85,6 @@ BOOST_FIXTURE_TEST_CASE(bwdtrans, Line) ...@@ -85,6 +85,6 @@ BOOST_FIXTURE_TEST_CASE(bwdtrans, Line)
} }
BwdTrans<>::create(fixt_explist, "StdMat")->apply(*fixt_in, *fixt_out); BwdTrans<>::create(fixt_explist, "StdMat")->apply(*fixt_in, *fixt_out);
double TOL {0.01}; double TOL{0.01};
BOOST_TEST( fixt_out->compare(*fixt_expected, TOL)); BOOST_TEST(fixt_out->compare(*fixt_expected, TOL));
} }
#define BOOST_TEST_MODULE example
#include <boost/test/unit_test.hpp>
#include <iostream>
#include <memory>
#include "Field.hpp"
#include "Operators/OperatorBwdTrans.hpp"
#include <LibUtilities/BasicUtils/SessionReader.h>
#include <MultiRegions/ExpList.h>
#include <SpatialDomains/MeshGraph.h>
#include "init_fields.hpp"
using namespace std;
using namespace Nektar::Operators;
using namespace Nektar::LibUtilities;
using namespace Nektar;
class Line : public InitFields<double, FieldState::Coeff, FieldState::Phys>
{
public:
Line() : InitFields<FieldState::Coeff, FieldState::Phys>() {
meshName = "line.xml";
}
};
class Square : public InitFields<double, FieldState::Coeff, FieldState::Phys>
{
public:
Square() : InitFields<FieldState::Coeff, FieldState::Phys>() {
meshName = "square.xml";
}
};
BOOST_FIXTURE_TEST_CASE(bwdtranscuda, Line)
{
Configure();
static double *x =
fixtcuda_in->template GetStorage<MemoryRegionCUDA>().GetCPUPtr();
for (auto const &block : fixtcuda_in->GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t coeff = 0; coeff < block.num_pts; ++coeff)
{
*(x++) = coeff + 1;
}
}
}
BwdTrans<>::create(fixt_explist, "CUDA")
->apply(*fixtcuda_in, *fixtcuda_out);
static double *y =
fixtcuda_out->template GetStorage<MemoryRegionCUDA>().GetCPUPtr();
double TOL = 1e-12;
BOOST_CHECK_CLOSE(y[0], 1.000000000000000, TOL);
BOOST_CHECK_CLOSE(y[1], 0.808463389187877, TOL);
BOOST_CHECK_CLOSE(y[2], 1.993385866728399, TOL);
BOOST_CHECK_CLOSE(y[3], 1.312500000000000, TOL);
BOOST_CHECK_CLOSE(y[4], 2.321846776942122, TOL);
BOOST_CHECK_CLOSE(y[5], 4.082915537389534, TOL);
BOOST_CHECK_CLOSE(y[6], 2.000000000000000, TOL);
}
...@@ -18,21 +18,19 @@ using namespace Nektar::Operators; ...@@ -18,21 +18,19 @@ using namespace Nektar::Operators;
using namespace Nektar::LibUtilities; using namespace Nektar::LibUtilities;
using namespace Nektar; using namespace Nektar;
class Line : public InitFields<FieldState::Phys, FieldState::Coeff> class Line : public InitFields<double, FieldState::Phys, FieldState::Coeff>
{ {
protected: public:
virtual std::string GetMeshName() override Line() : InitFields<double, FieldState::Phys, FieldState::Coeff>() {
{ meshName = "line.xml";
return "line.xml";
} }
}; };
class Square : public InitFields<FieldState::Phys, FieldState::Coeff> class Square : public InitFields<double, FieldState::Phys, FieldState::Coeff>
{ {
protected: public:
virtual std::string GetMeshName() override Square() : InitFields<double, FieldState::Phys, FieldState::Coeff>() {
{ meshName = "square.xml";
return "square.xml";
} }
}; };
......