Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • nektar/redesign-prototypes
1 result
Show changes
Commits on Source (2)
Showing
with 1644 additions and 69 deletions
......@@ -26,12 +26,12 @@ message(STATUS "Found Nektar++: version ${NEKTAR++_VERSION}")
set(CMAKE_INSTALL_RPATH "${NEKTAR++_LIBRARY_DIRS}")
set(SRC Field.cpp Operators/Operator.cpp Operators/OperatorBwdTrans.cpp Operators/BwdTrans/BwdTransImpl.cpp Operators/OperatorIProductWRTBase.cpp Operators/IProductWRTBase/IProductWRTBaseImpl.cpp)
set(SRC Field.cpp Operators/Operator.cpp Operators/OperatorBwdTrans.cpp Operators/BwdTrans/BwdTransImpl.cpp Operators/OperatorIProductWRTBase.cpp Operators/IProductWRTBase/IProductWRTBaseImpl.cpp Operators/OperatorPhysDeriv.cpp Operators/PhysDeriv/PhysDerivImpl.cpp)
if (NEKTAR_USE_CUDA)
enable_language(CUDA)
add_definitions(-DNEKTAR_USE_CUDA)
set(SRC ${SRC} MemoryRegionCUDA.cu Operators/BwdTrans/BwdTransCUDA.cu Operators/IProductWRTBase/IProductWRTBaseCUDA.cu)
set(SRC ${SRC} MemoryRegionCUDA.cu Operators/BwdTrans/BwdTransCUDA.cu Operators/IProductWRTBase/IProductWRTBaseCUDA.cu Operators/PhysDeriv/PhysDerivCUDA.cu)
endif()
if (NEKTAR_USE_SIMD)
......
......@@ -99,38 +99,45 @@ public:
return *this;
}
/**
* @brief Compare this field to another field, with absolute
* tolerance tol.
* @return bool
*/
bool compare(Field<TType, TState> &rhs, double tol)
{
const std::vector<BlockAttributes>& rhs_blocks = rhs.GetBlocks();
TType *store = GetStorage().GetCPUPtr();
TType *rhs_store = rhs.GetStorage().GetCPUPtr();
if (rhs_blocks.size() != block_attributes.size()) return false;
size_t i {0};
for (size_t bl = 0; bl < block_attributes.size(); ++bl){
size_t num_elements = block_attributes[bl].num_elements;
size_t num_pts = block_attributes[bl].num_pts;
// Check that each block have the same structure
if (num_elements != rhs_blocks[bl].num_elements) return false;
if (num_pts != rhs_blocks[bl].num_pts) return false;
// Compare elements in the blocks
for (size_t el = 0 ; el < num_elements; ++el) {
for (size_t coeff = 0; coeff < num_pts; ++coeff) {
i = coeff + el * num_pts + bl * num_pts * num_elements;
if (std::abs(store[i] - rhs_store[i]) > tol) return false;
}
}
/**
* @brief Compare this field to another field, with absolute
* tolerance tol.
* @return bool
*/
bool compare(Field<TType, TState> &rhs, double tol)
{
const std::vector<BlockAttributes> &rhs_blocks = rhs.GetBlocks();
TType *store = GetStorage().GetCPUPtr();
TType *rhs_store = rhs.GetStorage().GetCPUPtr();
if (rhs_blocks.size() != block_attributes.size())
return false;
size_t i{0};
for (size_t bl = 0; bl < block_attributes.size(); ++bl)
{
size_t num_elements = block_attributes[bl].num_elements;
size_t num_pts = block_attributes[bl].num_pts;
// Check that each block have the same structure
if (num_elements != rhs_blocks[bl].num_elements)
return false;
if (num_pts != rhs_blocks[bl].num_pts)
return false;
// Compare elements in the blocks
for (size_t el = 0; el < num_elements; ++el)
{
for (size_t coeff = 0; coeff < num_pts; ++coeff)
{
i = coeff + el * num_pts + bl * num_pts * num_elements;
if (std::abs(store[i] - rhs_store[i]) > tol)
return false;
}
}
}
return true;
}
return true;
}
/**
* @brief Static templated creation method.
......
......@@ -61,13 +61,7 @@ public:
~OperatorBwdTransImpl()
{
for (auto &basis : m_basis)
{
for (size_t i = 0; i < basis.second.size(); i++)
{
cudaFree(basis.second[i]);
}
}
DeallocateDataCUDA<TData>(m_basis);
}
void apply(Field<TData, FieldState::Coeff> &in,
......
......@@ -114,6 +114,50 @@ protected:
return jac;
}
Array<OneD, Array<OneD, TData>> SetDerivativeFactor(size_t dfSize)
{
// Allocate memory for the derivative factor
size_t nDim = this->m_expansionList->GetShapeDimension();
size_t nCoord = this->m_expansionList->GetCoordim(0);
Array<OneD, Array<OneD, TData>> derivFac(nDim * nCoord);
for (size_t d = 0; d < nDim * nCoord; d++)
{
derivFac[d] = Array<OneD, TData>(dfSize);
}
// Initialise derivative factor.
size_t dfindex = 0;
size_t nTotElmts = this->m_expansionList->GetNumElmts();
for (size_t e = 0; e < nTotElmts; ++e)
{
auto expPtr = this->m_expansionList->GetExp(e);
auto &df = expPtr->GetMetricInfo()->GetDerivFactors(
expPtr->GetPointsKeys());
size_t nqTot = expPtr->GetTotPoints();
if (expPtr->GetMetricInfo()->GetGtype() ==
SpatialDomains::eDeformed)
{
for (size_t d = 0; d < nDim * nCoord; d++)
{
for (size_t i = 0; i < nqTot; ++i)
{
derivFac[d][dfindex + i] = df[d][i];
}
}
dfindex += nqTot;
}
else
{
for (size_t d = 0; d < nDim * nCoord; d++)
{
derivFac[d][dfindex] = df[d][0];
}
dfindex += 1;
}
}
return derivFac;
}
MultiRegions::ExpListSharedPtr m_expansionList;
};
......
......@@ -7,18 +7,15 @@ namespace Nektar::Operators
{
template <typename TData>
using BasisMap =
std::map<std::vector<LibUtilities::BasisKey>, std::vector<TData *>>;
template <typename TData>
using WeightMap =
using DataMap =
std::map<std::vector<LibUtilities::BasisKey>, std::vector<TData *>>;
template <typename TData>
BasisMap<TData> GetBasisDataCUDA(
DataMap<TData> GetBasisDataCUDA(
const MultiRegions::ExpListSharedPtr &expansionList)
{
// Initialize data map.
BasisMap<TData> basis;
DataMap<TData> basis;
// Initialize basiskey.
std::vector<LibUtilities::BasisKey> basisKeys(3,
......@@ -40,7 +37,7 @@ BasisMap<TData> GetBasisDataCUDA(
if (basis.find(basisKeys) == basis.end())
{
basis[basisKeys] = std::vector<TData *>(nDim, 0);
for (size_t d = 0; d < expPtr->GetShapeDimension(); d++)
for (size_t d = 0; d < nDim; d++)
{
auto ndata = expPtr->GetBasis(d)->GetBdata().size();
auto hostPtr = expPtr->GetBasis(d)->GetBdata().get();
......@@ -55,11 +52,93 @@ BasisMap<TData> GetBasisDataCUDA(
}
template <typename TData>
WeightMap<TData> GetWeightDataCUDA(
DataMap<TData> GetPointDataCUDA(
const MultiRegions::ExpListSharedPtr &expansionList)
{
// Initialize data map.
WeightMap<TData> weight;
DataMap<TData> points;
// Initialize basiskey.
std::vector<LibUtilities::BasisKey> basisKeys(3,
LibUtilities::NullBasisKey);
// Loop over the elements of expansionList.
size_t nDim = expansionList->GetShapeDimension();
for (size_t i = 0; i < expansionList->GetNumElmts(); ++i)
{
auto const expPtr = expansionList->GetExp(i);
// Fetch basiskeys of current element.
for (size_t d = 0; d < nDim; d++)
{
basisKeys[d] = expPtr->GetBasis(d)->GetBasisKey();
}
// Copy data to points, if necessary.
if (points.find(basisKeys) == points.end())
{
points[basisKeys] = std::vector<TData *>(nDim, 0);
for (size_t d = 0; d < nDim; d++)
{
auto ndata = expPtr->GetBasis(d)->GetZ().size();
auto hostPtr = expPtr->GetBasis(d)->GetZ().get();
auto &devicePtr = points[basisKeys][d];
cudaMalloc((void **)&devicePtr, sizeof(TData) * ndata);
cudaMemcpy(devicePtr, hostPtr, sizeof(TData) * ndata,
cudaMemcpyHostToDevice);
}
}
}
return points;
}
template <typename TData>
DataMap<TData> GetDerivativeDataCUDA(
const MultiRegions::ExpListSharedPtr &expansionList)
{
// Initialize data map.
DataMap<TData> derivative;
// Initialize basiskey.
std::vector<LibUtilities::BasisKey> basisKeys(3,
LibUtilities::NullBasisKey);
// Loop over the elements of expansionList.
size_t nDim = expansionList->GetShapeDimension();
for (size_t i = 0; i < expansionList->GetNumElmts(); ++i)
{
auto const expPtr = expansionList->GetExp(i);
// Fetch basiskeys of current element.
for (size_t d = 0; d < nDim; d++)
{
basisKeys[d] = expPtr->GetBasis(d)->GetBasisKey();
}
// Copy data to derivative, if necessary.
if (derivative.find(basisKeys) == derivative.end())
{
derivative[basisKeys] = std::vector<TData *>(nDim, 0);
for (size_t d = 0; d < nDim; d++)
{
auto ndata = expPtr->GetBasis(d)->GetD()->GetPtr().size();
auto hostPtr = expPtr->GetBasis(d)->GetD()->GetPtr().get();
auto &devicePtr = derivative[basisKeys][d];
cudaMalloc((void **)&devicePtr, sizeof(TData) * ndata);
cudaMemcpy(devicePtr, hostPtr, sizeof(TData) * ndata,
cudaMemcpyHostToDevice);
}
}
}
return derivative;
}
template <typename TData>
DataMap<TData> GetWeightDataCUDA(
const MultiRegions::ExpListSharedPtr &expansionList)
{
// Initialize data map.
DataMap<TData> weight;
// Initialize basiskey.
std::vector<LibUtilities::BasisKey> basisKeys(3,
......@@ -81,7 +160,7 @@ WeightMap<TData> GetWeightDataCUDA(
if (weight.find(basisKeys) == weight.end())
{
weight[basisKeys] = std::vector<TData *>(nDim, 0);
for (size_t d = 0; d < expPtr->GetShapeDimension(); d++)
for (size_t d = 0; d < nDim; d++)
{
auto ndata = expPtr->GetBasis(d)->GetW().size();
Array<OneD, TData> w(ndata);
......@@ -112,4 +191,17 @@ WeightMap<TData> GetWeightDataCUDA(
}
return weight;
}
template <typename TData>
void DeallocateDataCUDA(DataMap<TData> &dataMap)
{
for (auto &data : dataMap)
{
for (size_t i = 0; i < data.second.size(); i++)
{
cudaFree(data.second[i]);
}
}
}
} // namespace Nektar::Operators
#include "OperatorPhysDeriv.hpp"
namespace Nektar::Operators
{
template <> const std::string PhysDeriv<>::key = "PhysDeriv";
template <> const std::string PhysDeriv<>::default_impl = "StdMat";
} // namespace Nektar::Operators
#pragma once
#include "Field.hpp"
#include "Operator.hpp"
namespace Nektar::Operators
{
// PhysDeriv base class
// Defines the apply operator to enforce apply parameter types
template <typename TData> class OperatorPhysDeriv : public Operator<TData>
{
public:
virtual ~OperatorPhysDeriv() = default;
OperatorPhysDeriv(const MultiRegions::ExpListSharedPtr &expansionList)
: Operator<TData>(expansionList)
{
}
virtual void apply(Field<TData, FieldState::Phys> &in,
Field<TData, FieldState::Phys> &out0,
Field<TData, FieldState::Phys> &out1,
Field<TData, FieldState::Phys> &out2) = 0;
virtual void operator()(Field<TData, FieldState::Phys> &in,
Field<TData, FieldState::Phys> &out0,
Field<TData, FieldState::Phys> &out1,
Field<TData, FieldState::Phys> &out2)
{
apply(in, out0, out1, out2);
}
};
// Descriptor / traits class for PhysDeriv
template <typename TData = default_fp_type> struct PhysDeriv
{
using class_name = OperatorPhysDeriv<TData>;
using FieldIn = Field<TData, FieldState::Phys>;
using FieldOut = Field<TData, FieldState::Phys>;
static const std::string key;
static const std::string default_impl;
static std::shared_ptr<class_name> create(
const MultiRegions::ExpListSharedPtr &expansionList,
std::string pKey = "")
{
return Operator<TData>::template create<PhysDeriv>(expansionList, pKey);
}
};
namespace detail
{
// Template for PhysDeriv implementations
template <typename TData, typename Op> class OperatorPhysDerivImpl;
} // namespace detail
} // namespace Nektar::Operators
#include "PhysDerivCUDA.hpp"
namespace Nektar::Operators::detail
{
template <>
std::string OperatorPhysDerivImpl<double, ImplCUDA>::className =
GetOperatorFactory<double>().RegisterCreatorFunction(
"PhysDerivCUDA", OperatorPhysDerivImpl<double, ImplCUDA>::instantiate,
"...");
}
#include "MemoryRegionCUDA.hpp"
#include "Operators/OperatorHelper.cuh"
#include "Operators/OperatorPhysDeriv.hpp"
#include "Operators/PhysDeriv/PhysDerivCUDAKernels.cuh"
namespace Nektar::Operators::detail
{
template <typename TData, bool DEFORMED = false>
void PhysDeriv1DKernel(const size_t gridSize, const size_t blockSize,
const size_t nq0, const size_t nCoord,
const size_t nElmts, const TData *D0,
const size_t dfSize, TData *df, const TData *in,
TData *out0, TData *out1, TData *out2);
template <typename TData, bool DEFORMED = false>
void PhysDeriv2DKernel(const size_t gridSize, const size_t blockSize,
LibUtilities::ShapeType shapetype, const size_t nq0,
const size_t nq1, const size_t nCoord,
const size_t nElmts, const TData *D0, const TData *D1,
const TData *Z0, const TData *Z1, const size_t dfSize,
TData *df, const TData *in, TData *out0, TData *out1,
TData *out2);
template <typename TData, bool DEFORMED = false>
void PhysDeriv3DKernel(const size_t gridSize, const size_t blockSize,
LibUtilities::ShapeType shapetype, const size_t nq0,
const size_t nq1, const size_t nq2, const size_t nCoord,
const size_t nElmts, const TData *D0, const TData *D1,
const TData *D2, const TData *Z0, const TData *Z1,
const TData *Z2, const size_t dfSize, TData *df,
const TData *in, TData *out0, TData *out1, TData *out2);
// Matrix-free implementation
template <typename TData>
class OperatorPhysDerivImpl<TData, ImplCUDA> : public OperatorPhysDeriv<TData>
{
public:
OperatorPhysDerivImpl(const MultiRegions::ExpListSharedPtr &expansionList)
: OperatorPhysDeriv<TData>(expansionList)
{
size_t nTotElmts = this->m_expansionList->GetNumElmts();
size_t nDim = this->m_expansionList->GetShapeDimension();
size_t nCoord = this->m_expansionList->GetCoordim(0);
// Initialise derivative factor.
m_dfSize = Operator<TData>::GetGeometricFactorSize();
auto derivFac = Operator<TData>::SetDerivativeFactor(m_dfSize);
cudaMalloc((void **)&m_derivFac,
sizeof(TData) * nDim * nCoord * m_dfSize);
for (size_t d = 0; d < nDim * nCoord; d++)
{
auto hostPtr = derivFac[d].get();
auto devicePtr = m_derivFac + d * m_dfSize;
cudaMemcpy(devicePtr, hostPtr, sizeof(TData) * m_dfSize,
cudaMemcpyHostToDevice);
}
// Initialize points.
m_Z = GetPointDataCUDA<TData>(expansionList);
// Initialize derivative matrix.
m_D = GetDerivativeDataCUDA<TData>(expansionList);
}
~OperatorPhysDerivImpl(void)
{
DeallocateDataCUDA<TData>(m_Z);
DeallocateDataCUDA<TData>(m_D);
cudaFree(m_derivFac);
}
void apply(Field<TData, FieldState::Phys> &in,
Field<TData, FieldState::Phys> &out0,
Field<TData, FieldState::Phys> &out1,
Field<TData, FieldState::Phys> &out2) override
{
// Initialize pointers.
auto const *inptr =
in.template GetStorage<MemoryRegionCUDA>().GetGPUPtr();
auto *outptr0 =
out0.template GetStorage<MemoryRegionCUDA>().GetGPUPtr();
auto *outptr1 =
out1.template GetStorage<MemoryRegionCUDA>().GetGPUPtr();
auto *outptr2 =
out2.template GetStorage<MemoryRegionCUDA>().GetGPUPtr();
auto dfptr = m_derivFac;
// Initialize index.
size_t expIdx = 0;
// Initialize basiskey.
std::vector<LibUtilities::BasisKey> basisKeys(
3, LibUtilities::NullBasisKey);
for (size_t block_idx = 0; block_idx < in.GetBlocks().size();
++block_idx)
{
// Determine shape and type of the element.
auto const expPtr = this->m_expansionList->GetExp(expIdx);
auto nElmts = in.GetBlocks()[block_idx].num_elements;
auto nqTot = expPtr->GetTotPoints();
auto nCoord = expPtr->GetCoordim();
auto shape = expPtr->DetShapeType();
auto deformed = expPtr->GetMetricInfo()->GetGtype() ==
SpatialDomains::eDeformed;
// Determine CUDA grid parameters.
m_gridSize = nElmts / m_blockSize;
m_gridSize += (nElmts % m_blockSize == 0) ? 0 : 1;
// Fetch basis key for the current element type.
for (size_t d = 0; d < expPtr->GetShapeDimension(); d++)
{
basisKeys[d] = expPtr->GetBasis(d)->GetBasisKey();
}
// Function call to kernel functions.
if (expPtr->GetShapeDimension() == 1)
{
auto D0 = m_D[basisKeys][0];
auto nq0 = expPtr->GetNumPoints(0);
if (deformed)
{
PhysDeriv1DKernel<TData, true>(
m_gridSize, m_blockSize, nq0, nCoord, nElmts, D0,
m_dfSize, dfptr, inptr, outptr0, outptr1, outptr2);
}
else
{
PhysDeriv1DKernel<TData, false>(
m_gridSize, m_blockSize, nq0, nCoord, nElmts, D0,
m_dfSize, dfptr, inptr, outptr0, outptr1, outptr2);
}
}
else if (expPtr->GetShapeDimension() == 2)
{
auto D0 = m_D[basisKeys][0];
auto D1 = m_D[basisKeys][1];
auto Z0 = m_Z[basisKeys][0];
auto Z1 = m_Z[basisKeys][1];
auto nq0 = expPtr->GetNumPoints(0);
auto nq1 = expPtr->GetNumPoints(1);
if (deformed)
{
PhysDeriv2DKernel<TData, true>(
m_gridSize, m_blockSize, shape, nq0, nq1, nCoord,
nElmts, D0, D1, Z0, Z1, m_dfSize, dfptr, inptr, outptr0,
outptr1, outptr2);
}
else
{
PhysDeriv2DKernel<TData, false>(
m_gridSize, m_blockSize, shape, nq0, nq1, nCoord,
nElmts, D0, D1, Z0, Z1, m_dfSize, dfptr, inptr, outptr0,
outptr1, outptr2);
}
}
else if (expPtr->GetShapeDimension() == 3)
{
auto D0 = m_D[basisKeys][0];
auto D1 = m_D[basisKeys][1];
auto D2 = m_D[basisKeys][2];
auto Z0 = m_Z[basisKeys][0];
auto Z1 = m_Z[basisKeys][1];
auto Z2 = m_Z[basisKeys][2];
auto nq0 = expPtr->GetNumPoints(0);
auto nq1 = expPtr->GetNumPoints(1);
auto nq2 = expPtr->GetNumPoints(2);
if (deformed)
{
PhysDeriv3DKernel<TData, true>(
m_gridSize, m_blockSize, shape, nq0, nq1, nq2, nCoord,
nElmts, D0, D1, D2, Z0, Z1, Z2, m_dfSize, dfptr, inptr,
outptr0, outptr1, outptr2);
}
else
{
PhysDeriv3DKernel<TData, false>(
m_gridSize, m_blockSize, shape, nq0, nq1, nq2, nCoord,
nElmts, D0, D1, D2, Z0, Z1, Z2, m_dfSize, dfptr, inptr,
outptr0, outptr1, outptr2);
}
}
// Increment pointer and index for next element type.
dfptr += deformed ? nqTot * nElmts : nElmts;
outptr0 += nqTot * nElmts;
outptr1 += nqTot * nElmts;
outptr2 += nqTot * nElmts;
inptr += nqTot * nElmts;
expIdx += nElmts;
}
}
static std::unique_ptr<Operator<TData>> instantiate(
MultiRegions::ExpListSharedPtr expansionList)
{
return std::make_unique<OperatorPhysDerivImpl<TData, ImplCUDA>>(
expansionList);
}
static std::string className;
private:
TData *m_derivFac;
std::map<std::vector<LibUtilities::BasisKey>, std::vector<TData *>> m_D;
std::map<std::vector<LibUtilities::BasisKey>, std::vector<TData *>> m_Z;
size_t m_dfSize;
size_t m_blockSize = 32;
size_t m_gridSize;
};
template <typename TData, bool DEFORMED>
void PhysDeriv1DKernel(const size_t gridSize, const size_t blockSize,
const size_t nq0, const size_t nCoord,
const size_t nElmts, const TData *D0,
const size_t dfSize, TData *df, const TData *in,
TData *out0, TData *out1, TData *out2)
{
// Compute tensorial derivative.
PhysDerivTensor1DKernel<TData>
<<<gridSize, blockSize>>>(nq0, nElmts, D0, in, out0);
// Compute physical derivative.
PhysDerivSegKernel<TData, DEFORMED><<<gridSize, blockSize>>>(
nq0, nCoord, nElmts, dfSize, df, out0, out1, out2);
}
template <typename TData, bool DEFORMED>
void PhysDeriv2DKernel(const size_t gridSize, const size_t blockSize,
LibUtilities::ShapeType shapetype, const size_t nq0,
const size_t nq1, const size_t nCoord,
const size_t nElmts, const TData *D0, const TData *D1,
const TData *Z0, const TData *Z1, const size_t dfSize,
TData *df, const TData *in, TData *out0, TData *out1,
TData *out2)
{
// Compute tensorial derivative.
PhysDerivTensor2DKernel<TData>
<<<gridSize, blockSize>>>(nq0, nq1, nElmts, D0, D1, in, out0, out1);
// Compute physical derivative.
if (shapetype == LibUtilities::Quad)
{
PhysDerivQuadKernel<TData, DEFORMED><<<gridSize, blockSize>>>(
nq0, nq1, nCoord, nElmts, dfSize, df, out0, out1, out2);
}
else if (shapetype == LibUtilities::Tri)
{
PhysDerivTriKernel<TData, DEFORMED><<<gridSize, blockSize>>>(
nq0, nq1, nCoord, nElmts, Z0, Z1, dfSize, df, out0, out1, out2);
}
}
template <typename TData, bool DEFORMED>
void PhysDeriv3DKernel(const size_t gridSize, const size_t blockSize,
LibUtilities::ShapeType shapetype, const size_t nq0,
const size_t nq1, const size_t nq2, const size_t nCoord,
const size_t nElmts, const TData *D0, const TData *D1,
const TData *D2, const TData *Z0, const TData *Z1,
const TData *Z2, const size_t dfSize, TData *df,
const TData *in, TData *out0, TData *out1, TData *out2)
{
// Compute tensorial derivative.
PhysDerivTensor3DKernel<TData><<<gridSize, blockSize>>>(
nq0, nq1, nq2, nElmts, D0, D1, D2, in, out0, out1, out2);
// Compute physical derivative.
if (shapetype == LibUtilities::Hex)
{
PhysDerivHexKernel<TData, DEFORMED><<<gridSize, blockSize>>>(
nq0, nq1, nq2, nCoord, nElmts, dfSize, df, out0, out1, out2);
}
else if (shapetype == LibUtilities::Tet)
{
PhysDerivTetKernel<TData, DEFORMED>
<<<gridSize, blockSize>>>(nq0, nq1, nq2, nCoord, nElmts, Z0, Z1, Z2,
dfSize, df, out0, out1, out2);
}
else if (shapetype == LibUtilities::Prism)
{
PhysDerivPrismKernel<TData, DEFORMED>
<<<gridSize, blockSize>>>(nq0, nq1, nq2, nCoord, nElmts, Z0, Z1, Z2,
dfSize, df, out0, out1, out2);
}
else if (shapetype == LibUtilities::Pyr)
{
PhysDerivPyrKernel<TData, DEFORMED>
<<<gridSize, blockSize>>>(nq0, nq1, nq2, nCoord, nElmts, Z0, Z1, Z2,
dfSize, df, out0, out1, out2);
}
}
} // namespace Nektar::Operators::detail
namespace Nektar::Operators::detail
{
template <typename TData, bool DEFORMED>
__global__ void PhysDerivSegKernel(const size_t nq0, const size_t ncoord,
const size_t nelmt, const size_t dfSize,
TData *df, TData *inout0, TData *inout1,
TData *inout2)
{
size_t e = blockDim.x * blockIdx.x + threadIdx.x;
if (e >= nelmt)
{
return;
}
size_t ndf = ncoord;
// Assign pointers.
TData **inoutptr = new TData *[ncoord];
inoutptr[0] = inout0 + (nq0 * e);
if (ncoord > 1)
{
inoutptr[1] = inout1 + (nq0 * e);
}
if (ncoord > 2)
{
inoutptr[2] = inout2 + (nq0 * e);
}
TData **dfptr = new TData *[ndf];
for (size_t d = 0; d < ndf; d++)
{
dfptr[d] = df + d * dfSize;
dfptr[d] += DEFORMED ? nq0 * e : e;
}
// Compute derivative.
for (size_t j = 0; j < nq0; ++j)
{
size_t dfindex = DEFORMED ? j : 0;
for (size_t d = ndf; d > 0; d--)
{
inoutptr[d - 1][j] = inoutptr[0][j] * dfptr[d - 1][dfindex];
}
}
}
template <typename TData, bool DEFORMED>
__global__ void PhysDerivQuadKernel(const size_t nq0, const size_t nq1,
const size_t ncoord, const size_t nelmt,
const size_t dfSize, TData *df,
TData *inout0, TData *inout1, TData *inout2)
{
size_t e = blockDim.x * blockIdx.x + threadIdx.x;
if (e >= nelmt)
{
return;
}
auto ndf = 2 * ncoord;
// Assign pointers.
TData **inoutptr = new TData *[ncoord];
inoutptr[0] = inout0 + (nq0 * nq1 * e);
inoutptr[1] = inout1 + (nq0 * nq1 * e);
if (ncoord > 2)
{
inoutptr[2] = inout2 + (nq0 * nq1 * e);
}
TData **dfptr = new TData *[ndf];
for (size_t d = 0; d < ndf; d++)
{
dfptr[d] = df + d * dfSize;
dfptr[d] += DEFORMED ? nq0 * nq1 * e : e;
}
// Compute derivative.
for (size_t j = 0, cnt_ji = 0; j < nq1; ++j)
{
for (size_t i = 0; i < nq0; ++i, ++cnt_ji)
{
TData d0 = inoutptr[0][cnt_ji];
TData d1 = inoutptr[1][cnt_ji];
size_t dfindex = DEFORMED ? cnt_ji : 0;
for (size_t d = 0; d < ncoord; d++)
{
inoutptr[d][cnt_ji] =
d0 * dfptr[2 * d][dfindex] + d1 * dfptr[2 * d + 1][dfindex];
}
}
}
}
template <typename TData, bool DEFORMED>
__global__ void PhysDerivTriKernel(const size_t nq0, const size_t nq1,
const size_t ncoord, const size_t nelmt,
const TData *Z0, const TData *Z1,
const size_t dfSize, TData *df,
TData *inout0, TData *inout1, TData *inout2)
{
size_t e = blockDim.x * blockIdx.x + threadIdx.x;
if (e >= nelmt)
{
return;
}
auto ndf = 2 * ncoord;
// Assign pointers.
TData **inoutptr = new TData *[ncoord];
inoutptr[0] = inout0 + (nq0 * nq1 * e);
inoutptr[1] = inout1 + (nq0 * nq1 * e);
if (ncoord > 2)
{
inoutptr[2] = inout2 + (nq0 * nq1 * e);
}
TData **dfptr = new TData *[ndf];
for (size_t d = 0; d < ndf; d++)
{
dfptr[d] = df + d * dfSize;
dfptr[d] += DEFORMED ? nq0 * nq1 * e : e;
}
// Compute derivative.
for (int j = 0, cnt_ji = 0; j < nq1; ++j)
{
TData xfrm0 = 2.0 / (1.0 - Z1[j]);
for (int i = 0; i < nq0; ++i, ++cnt_ji)
{
TData d0 = inoutptr[0][cnt_ji];
TData d1 = inoutptr[1][cnt_ji];
// Moving from standard to collapsed coordinates.
TData xfrm1 = 0.5 * (1.0 + Z0[i]);
d0 *= xfrm0;
d1 += d0 * xfrm1;
// Multiply by derivative factors.
size_t dfindex = DEFORMED ? cnt_ji : 0;
for (size_t d = 0; d < ncoord; d++)
{
inoutptr[d][cnt_ji] =
d0 * dfptr[2 * d][dfindex] + d1 * dfptr[2 * d + 1][dfindex];
}
}
}
}
template <typename TData, bool DEFORMED>
__global__ void PhysDerivHexKernel(const size_t nq0, const size_t nq1,
const size_t nq2, const size_t ncoord,
const size_t nelmt, const size_t dfSize,
TData *df, TData *inout0, TData *inout1,
TData *inout2)
{
size_t e = blockDim.x * blockIdx.x + threadIdx.x;
if (e >= nelmt)
{
return;
}
size_t ndf = 3 * ncoord;
// Assign pointers.
TData **inoutptr = new TData *[ncoord];
inoutptr[0] = inout0 + (nq0 * nq1 * nq2 * e);
inoutptr[1] = inout1 + (nq0 * nq1 * nq2 * e);
inoutptr[2] = inout2 + (nq0 * nq1 * nq2 * e);
TData **dfptr = new TData *[ndf];
for (size_t d = 0; d < ndf; d++)
{
dfptr[d] = df + d * dfSize;
dfptr[d] += DEFORMED ? nq0 * nq1 * nq2 * e : e;
}
// Compute derivative.
for (size_t k = 0, cnt_ijk = 0; k < nq2; ++k)
{
for (size_t j = 0; j < nq1; ++j)
{
for (size_t i = 0; i < nq0; ++i, ++cnt_ijk)
{
TData tmp[] = {inoutptr[0][cnt_ijk], inoutptr[1][cnt_ijk],
inoutptr[2][cnt_ijk]};
// Multiply by derivative factors.
size_t dfindex = DEFORMED ? cnt_ijk : 0;
for (size_t d = 0; d < ncoord; ++d)
{
TData sum = 0.0;
for (size_t n = 0; n < ncoord; ++n)
{
sum += tmp[n] * dfptr[d * ncoord + n][dfindex];
}
inoutptr[d][cnt_ijk] = sum;
}
}
}
}
}
template <typename TData, bool DEFORMED>
__global__ void PhysDerivTetKernel(const size_t nq0, const size_t nq1,
const size_t nq2, const size_t ncoord,
const size_t nelmt, const TData *Z0,
const TData *Z1, const TData *Z2,
const size_t dfSize, TData *df,
TData *inout0, TData *inout1, TData *inout2)
{
size_t e = blockDim.x * blockIdx.x + threadIdx.x;
if (e >= nelmt)
{
return;
}
size_t ndf = 3 * ncoord;
// Allocate workspace memory.
TData *wsp0 = new TData[nq0 * nq1 * nq2];
TData *wsp1 = new TData[nq0 * nq1 * nq2];
// Assign pointers.
TData **inoutptr = new TData *[ncoord];
inoutptr[0] = inout0 + (nq0 * nq1 * nq2 * e);
inoutptr[1] = inout1 + (nq0 * nq1 * nq2 * e);
inoutptr[2] = inout2 + (nq0 * nq1 * nq2 * e);
TData **dfptr = new TData *[ndf];
for (size_t d = 0; d < ndf; d++)
{
dfptr[d] = df + d * dfSize;
dfptr[d] += DEFORMED ? nq0 * nq1 * nq2 * e : e;
}
// Moving from standard to collapsed coordinates.
for (int k = 0, eta0 = 0; k < nq2; ++k)
{
TData xfrm_eta2 = 2.0 / (1.0 - Z2[k]);
for (int j = 0; j < nq1; ++j)
{
TData xfrm_eta1 = 2.0 / (1.0 - Z1[j]);
TData xfrm = xfrm_eta1 * xfrm_eta2;
for (int i = 0; i < nq0; ++i, ++eta0)
{
inoutptr[0][eta0] *= xfrm;
wsp0[eta0] = inoutptr[0][eta0];
}
}
}
for (int k = 0, eta0 = 0; k < nq2; ++k)
{
TData xfrm_eta2 = 2.0 / (1.0 - Z2[k]);
for (int j = 0; j < nq1; ++j)
{
for (int i = 0; i < nq0; ++i, ++eta0)
{
TData xfrm_eta0 = 0.5 * (1.0 + Z0[i]);
wsp0[eta0] = xfrm_eta0 * wsp0[eta0];
wsp1[eta0] = xfrm_eta2 * inoutptr[1][eta0];
inoutptr[1][eta0] = wsp0[eta0] + wsp1[eta0];
}
}
}
for (int k = 0, eta0 = 0; k < nq2; ++k)
{
for (int j = 0; j < nq1; ++j)
{
TData xfrm_eta1 = 0.5 * (1.0 + Z1[j]);
for (int i = 0; i < nq0; ++i, ++eta0)
{
inoutptr[2][eta0] += wsp0[eta0] + wsp1[eta0] * xfrm_eta1;
}
}
}
// Compute derivative.
for (size_t k = 0, cnt_ijk = 0; k < nq2; ++k)
{
for (size_t j = 0; j < nq1; ++j)
{
for (size_t i = 0; i < nq0; ++i, ++cnt_ijk)
{
TData tmp[] = {inoutptr[0][cnt_ijk], inoutptr[1][cnt_ijk],
inoutptr[2][cnt_ijk]};
// Multiply by derivative factors.
size_t dfindex = DEFORMED ? cnt_ijk : 0;
for (size_t d = 0; d < ncoord; ++d)
{
TData sum = 0.0;
for (size_t n = 0; n < ncoord; ++n)
{
sum += tmp[n] * dfptr[d * ncoord + n][dfindex];
}
inoutptr[d][cnt_ijk] = sum;
}
}
}
}
}
template <typename TData, bool DEFORMED>
__global__ void PhysDerivPrismKernel(
const size_t nq0, const size_t nq1, const size_t nq2, const size_t ncoord,
const size_t nelmt, const TData *Z0, const TData *Z1, const TData *Z2,
const size_t dfSize, TData *df, TData *inout0, TData *inout1, TData *inout2)
{
size_t e = blockDim.x * blockIdx.x + threadIdx.x;
if (e >= nelmt)
{
return;
}
size_t ndf = 3 * ncoord;
// Assign pointers.
TData **inoutptr = new TData *[ncoord];
inoutptr[0] = inout0 + (nq0 * nq1 * nq2 * e);
inoutptr[1] = inout1 + (nq0 * nq1 * nq2 * e);
inoutptr[2] = inout2 + (nq0 * nq1 * nq2 * e);
TData **dfptr = new TData *[ndf];
for (size_t d = 0; d < ndf; d++)
{
dfptr[d] = df + d * dfSize;
dfptr[d] += DEFORMED ? nq0 * nq1 * nq2 * e : e;
}
// Compute derivative.
for (size_t k = 0, cnt_ijk = 0; k < nq2; ++k)
{
TData xfrm_eta2 = 2.0 / (1.0 - Z2[k]);
for (size_t j = 0; j < nq1; ++j)
{
for (size_t i = 0; i < nq0; ++i, ++cnt_ijk)
{
TData d0 = inoutptr[0][cnt_ijk];
TData d1 = inoutptr[1][cnt_ijk];
TData d2 = inoutptr[2][cnt_ijk];
// Chain-rule for eta_0 and eta_2.
TData xfrm_eta0 = 0.5 * (1.0 + Z0[i]);
d0 *= xfrm_eta2;
d2 += xfrm_eta0 * d0;
// Multiply by derivative factors.
size_t dfindex = DEFORMED ? cnt_ijk : 0;
inoutptr[0][cnt_ijk] = d0 * dfptr[0][dfindex] +
d1 * dfptr[1][dfindex] +
d2 * dfptr[2][dfindex];
inoutptr[1][cnt_ijk] = d0 * dfptr[3][dfindex] +
d1 * dfptr[4][dfindex] +
d2 * dfptr[5][dfindex];
inoutptr[2][cnt_ijk] = d0 * dfptr[6][dfindex] +
d1 * dfptr[7][dfindex] +
d2 * dfptr[8][dfindex];
}
}
}
}
template <typename TData, bool DEFORMED>
__global__ void PhysDerivPyrKernel(const size_t nq0, const size_t nq1,
const size_t nq2, const size_t ncoord,
const size_t nelmt, const TData *Z0,
const TData *Z1, const TData *Z2,
const size_t dfSize, TData *df,
TData *inout0, TData *inout1, TData *inout2)
{
size_t e = blockDim.x * blockIdx.x + threadIdx.x;
if (e >= nelmt)
{
return;
}
size_t ndf = 3 * ncoord;
// Assign pointers.
TData **inoutptr = new TData *[ncoord];
inoutptr[0] = inout0 + (nq0 * nq1 * nq2 * e);
inoutptr[1] = inout1 + (nq0 * nq1 * nq2 * e);
inoutptr[2] = inout2 + (nq0 * nq1 * nq2 * e);
TData **dfptr = new TData *[ndf];
for (size_t d = 0; d < ndf; d++)
{
dfptr[d] = df + d * dfSize;
dfptr[d] += DEFORMED ? nq0 * nq1 * nq2 * e : e;
}
// Compute derivative.
for (size_t k = 0, cnt_ijk = 0; k < nq2; ++k)
{
TData xfrm_eta2 = 2.0 / (1.0 - Z2[k]);
for (size_t j = 0; j < nq1; ++j)
{
TData xfrm_eta1 = 0.5 * (1.0 + Z1[j]);
for (size_t i = 0; i < nq0; ++i, ++cnt_ijk)
{
TData d0 = inoutptr[0][cnt_ijk];
TData d1 = inoutptr[1][cnt_ijk];
TData d2 = inoutptr[2][cnt_ijk];
// Chain-rule for eta_0 and eta_2.
TData xfrm_eta0 = 0.5 * (1.0 + Z0[i]);
d0 *= xfrm_eta2;
d1 *= xfrm_eta2;
d2 += xfrm_eta0 * d0 + xfrm_eta1 * d1;
// Multiply by derivative factors.
size_t dfindex = DEFORMED ? cnt_ijk : 0;
inoutptr[0][cnt_ijk] = d0 * dfptr[0][dfindex] +
d1 * dfptr[1][dfindex] +
d2 * dfptr[2][dfindex];
inoutptr[1][cnt_ijk] = d0 * dfptr[3][dfindex] +
d1 * dfptr[4][dfindex] +
d2 * dfptr[5][dfindex];
inoutptr[2][cnt_ijk] = d0 * dfptr[6][dfindex] +
d1 * dfptr[7][dfindex] +
d2 * dfptr[8][dfindex];
}
}
}
}
template <typename TData>
__global__ void PhysDerivTensor1DKernel(const size_t nq0, const size_t nelmt,
const TData *D0, const TData *in,
TData *out0)
{
size_t e = blockDim.x * blockIdx.x + threadIdx.x;
if (e >= nelmt)
{
return;
}
// Assign pointers.
const TData *inptr = in + (nq0 * e);
TData *outptr0 = out0 + (nq0 * e);
// Direction 1
for (size_t i = 0; i < nq0; ++i)
{
TData sum = 0.0;
for (size_t k = 0; k < nq0; ++k)
{
sum += D0[k * nq0 + i] * inptr[k];
}
outptr0[i] = sum;
}
}
template <typename TData>
__global__ void PhysDerivTensor2DKernel(const size_t nq0, const size_t nq1,
const size_t nelmt, const TData *D0,
const TData *D1, const TData *in,
TData *out0, TData *out1)
{
size_t e = blockDim.x * blockIdx.x + threadIdx.x;
if (e >= nelmt)
{
return;
}
// Assign pointers.
const TData *inptr = in + (nq0 * nq1 * e);
TData *outptr0 = out0 + (nq0 * nq1 * e);
TData *outptr1 = out1 + (nq0 * nq1 * e);
// Direction 1
for (size_t i = 0; i < nq0; ++i)
{
for (size_t j = 0; j < nq1; ++j)
{
TData sum = 0.0;
for (size_t k = 0; k < nq0; ++k)
{
sum += D0[k * nq0 + i] * inptr[j * nq0 + k];
}
outptr0[j * nq0 + i] = sum;
}
}
// Direction 2
for (size_t i = 0; i < nq0; ++i)
{
for (size_t j = 0; j < nq1; ++j)
{
TData sum = 0.0;
for (size_t k = 0; k < nq1; ++k)
{
sum += D1[k * nq1 + j] * inptr[k * nq0 + i];
}
outptr1[j * nq0 + i] = sum;
}
}
}
template <typename TData>
__global__ void PhysDerivTensor3DKernel(const size_t nq0, const size_t nq1,
const size_t nq2, const size_t nelmt,
const TData *D0, const TData *D1,
const TData *D2, const TData *in,
TData *out0, TData *out1, TData *out2)
{
size_t e = blockDim.x * blockIdx.x + threadIdx.x;
if (e >= nelmt)
{
return;
}
// Assign pointers.
const TData *inptr = in + (nq0 * nq1 * nq2 * e);
TData *outptr0 = out0 + (nq0 * nq1 * nq2 * e);
TData *outptr1 = out1 + (nq0 * nq1 * nq2 * e);
TData *outptr2 = out2 + (nq0 * nq1 * nq2 * e);
// Direction 1
for (size_t i = 0; i < nq0; ++i)
{
for (size_t j = 0; j < nq1 * nq2; ++j)
{
TData sum = 0.0;
for (size_t k = 0; k < nq0; ++k)
{
sum += D0[k * nq0 + i] * inptr[j * nq0 + k];
}
outptr0[j * nq0 + i] = sum;
}
}
// Direction 2
for (size_t block = 0; block < nq2; ++block)
{
size_t start = block * nq0 * nq1;
for (size_t i = 0; i < nq0; ++i)
{
for (size_t j = 0; j < nq1; ++j)
{
TData sum = 0.0;
for (size_t k = 0; k < nq1; ++k)
{
sum += D1[k * nq1 + j] * inptr[start + k * nq0 + i];
}
outptr1[start + j * nq0 + i] = sum;
}
}
}
// Direction 3
for (size_t i = 0; i < nq0 * nq1; ++i)
{
for (size_t j = 0; j < nq2; ++j)
{
TData sum = 0.0;
for (size_t k = 0; k < nq2; ++k)
{
sum += D2[k * nq2 + j] * inptr[k * nq0 * nq1 + i];
}
outptr2[j * nq0 * nq1 + i] = sum;
}
}
}
} // namespace Nektar::Operators::detail
#include "PhysDerivStdMat.hpp"
namespace Nektar::Operators::detail
{
// Add different PhysDeriv implementations to the factory.
template <>
std::string OperatorPhysDerivImpl<double, ImplStdMat>::className =
GetOperatorFactory<double>().RegisterCreatorFunction(
"PhysDerivStdMat",
OperatorPhysDerivImpl<double, ImplStdMat>::instantiate, "...");
} // namespace Nektar::Operators::detail
#include "Operators/OperatorPhysDeriv.hpp"
#include <StdRegions/StdExpansion.h>
namespace Nektar::Operators::detail
{
// standard matrix implementation
template <typename TData>
class OperatorPhysDerivImpl<TData, ImplStdMat> : public OperatorPhysDeriv<TData>
{
public:
OperatorPhysDerivImpl(const MultiRegions::ExpListSharedPtr &expansionList)
: OperatorPhysDeriv<TData>(expansionList)
{
size_t nTotElmts = this->m_expansionList->GetNumElmts();
size_t nDim = this->m_expansionList->GetShapeDimension();
// Initialise derivative factor.
size_t dfSize = Operator<TData>::GetGeometricFactorSize();
m_derivFac = Operator<TData>::SetDerivativeFactor(dfSize);
// Initialize basiskey.
std::vector<LibUtilities::BasisKey> basisKeys(
3, LibUtilities::NullBasisKey);
// Loop over the elements of expansionList.
for (size_t e = 0; e < nTotElmts; ++e)
{
auto const expPtr = this->m_expansionList->GetExp(e);
// Fetch basiskeys of current element.
for (size_t d = 0; d < nDim; d++)
{
basisKeys[d] = expPtr->GetBasis(d)->GetBasisKey();
}
// Copy data to m_matPtr, if necessary.
if (m_matPtr.find(basisKeys) == m_matPtr.end())
{
size_t nqTot = expPtr->GetTotPoints();
auto &matPtr = m_matPtr[basisKeys];
matPtr = Array<OneD, Array<OneD, TData>>(nDim);
Array<OneD, NekDouble> tmp(nqTot), t;
for (size_t d = 0; d < nDim; ++d)
{
// Get deriv matrix.
matPtr[d] = Array<OneD, TData>(nqTot * nqTot);
for (int i = 0; i < nqTot; ++i)
{
Vmath::Zero(nqTot, tmp, 1);
tmp[i] = 1.0;
expPtr->GetStdExp()->PhysDeriv(
d, tmp, t = matPtr[d] + i * nqTot);
}
}
}
}
}
void apply(Field<TData, FieldState::Phys> &in,
Field<TData, FieldState::Phys> &out0,
Field<TData, FieldState::Phys> &out1,
Field<TData, FieldState::Phys> &out2) override
{
// Initialize pointers.
auto const *inptr = in.GetStorage().GetCPUPtr();
std::vector<TData *> outptr{out0.GetStorage().GetCPUPtr(),
out1.GetStorage().GetCPUPtr(),
out2.GetStorage().GetCPUPtr()};
// Initialize index.
size_t expIdx = 0;
size_t dfindex = 0;
// Initialize basiskey.
std::vector<LibUtilities::BasisKey> basisKeys(
3, LibUtilities::NullBasisKey);
for (size_t block_idx = 0; block_idx < in.GetBlocks().size();
++block_idx)
{
// Determine shape and type of the element.
auto const expPtr = this->m_expansionList->GetExp(expIdx);
auto nElmts = in.GetBlocks()[block_idx].num_elements;
auto nDim = expPtr->GetShapeDimension();
auto nCoord = expPtr->GetCoordim();
auto nqTot = expPtr->GetTotPoints();
auto ptsKeys = expPtr->GetPointsKeys();
auto deformed = expPtr->GetMetricInfo()->GetGtype() ==
SpatialDomains::eDeformed;
Array<OneD, Array<OneD, TData>> deriv(nDim);
// Fetch basis key for the current element type.
for (size_t d = 0; d < expPtr->GetShapeDimension(); d++)
{
basisKeys[d] = expPtr->GetBasis(d)->GetBasisKey();
}
// Get derivative matrix.
auto &matPtr = m_matPtr[basisKeys];
for (size_t d = 0; d < nDim; ++d)
{
// Perform matrix-matrix multiply.
deriv[d] = Array<OneD, TData>(nqTot * nElmts);
Blas::Dgemm('N', 'N', nqTot, nElmts, nqTot, 1.0,
matPtr[d].get(), nqTot, inptr, nqTot, 0.0,
deriv[d].get(), nqTot);
}
if (deformed)
{
for (size_t i = 0; i < nCoord; i++)
{
Vmath::Vmul(nqTot * nElmts,
m_derivFac[i * nDim].get() + dfindex, 1,
deriv[0].get(), 1, outptr[i], 1);
for (size_t d = 1; d < nDim; d++)
{
Vmath::Vvtvp(nqTot * nElmts,
m_derivFac[i * nDim + d].get() + dfindex,
1, deriv[d].get(), 1, outptr[i], 1,
outptr[i], 1);
}
outptr[i] += nqTot * nElmts;
}
dfindex += nqTot * nElmts;
}
else
{
for (size_t i = 0; i < nCoord; i++)
{
for (size_t e = 0; e < nElmts; ++e)
{
Vmath::Smul(nqTot, m_derivFac[i * nDim][dfindex + e],
deriv[0].get() + e * nqTot, 1, outptr[i],
1);
for (size_t d = 1; d < nDim; d++)
{
Vmath::Svtvp(nqTot,
m_derivFac[i * nDim + d][dfindex + e],
deriv[d].get() + e * nqTot, 1,
outptr[i], 1, outptr[i], 1);
}
outptr[i] += nqTot;
}
}
dfindex += nElmts;
}
inptr += nqTot * nElmts;
expIdx += nElmts;
}
}
static std::unique_ptr<Operator<TData>> instantiate(
const MultiRegions::ExpListSharedPtr &expansionList)
{
return std::make_unique<OperatorPhysDerivImpl<TData, ImplStdMat>>(
expansionList);
}
static std::string className;
private:
Array<OneD, Array<OneD, TData>> m_derivFac;
std::map<std::vector<LibUtilities::BasisKey>,
Array<OneD, Array<OneD, TData>>>
m_matPtr;
};
} // namespace Nektar::Operators::detail
......@@ -15,6 +15,7 @@
#include "Field.hpp"
#include "Operators/OperatorBwdTrans.hpp"
#include "Operators/OperatorIProductWRTBase.hpp"
#include "Operators/OperatorPhysDeriv.hpp"
#include <LibUtilities/BasicUtils/SessionReader.h>
//#include <LibUtilities/SimdLib/tinysimd.hpp>
......@@ -188,7 +189,7 @@ int main(int argc, char *argv[])
// Check output values.
std::cout << "Out:" << std::endl;
auto outptr = outPhys.GetStorage().GetCPUPtr();
for (auto const &block : out.GetBlocks())
for (auto const &block : outPhys.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
......@@ -353,6 +354,180 @@ int main(int argc, char *argv[])
std::cout << std::endl;
}
#endif
// Test PhysDeriv Implementation
{
std::cout << "PhysDeriv (StdMat) test starts." << std::endl;
// Create two Field objects with a MemoryRegionCPU backend by default
// for the inner product with respect to base
auto inPhys = Field<double, FieldState::Phys>::create(blocks_phys);
auto outPhys0 = Field<double, FieldState::Phys>::create(blocks_phys);
auto outPhys1 = Field<double, FieldState::Phys>::create(blocks_phys);
auto outPhys2 = Field<double, FieldState::Phys>::create(blocks_phys);
// Assign input values
auto *inptr = inPhys.GetStorage().GetCPUPtr();
for (auto const &block : inPhys.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t phys = 0; phys < block.num_pts; ++phys)
{
// Each element is the index of the quadrature points
// this is useful for testing reshapes
*(inptr++) = phys;
}
}
}
std::cout << "Initial shape:\n" << inPhys << std::endl;
// PhysDeriv
PhysDeriv<>::create(explist, "StdMat")
->apply(inPhys, outPhys0, outPhys1, outPhys2);
// Check output values.
std::cout << "Out0:" << std::endl;
auto *outptr0 = outPhys0.GetStorage().GetCPUPtr();
for (auto const &block : outPhys0.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t phys = 0; phys < block.num_pts; ++phys)
{
std::cout << *(outptr0++) << ' ';
}
}
std::cout << std::endl;
}
std::cout << std::endl;
// Check output values.
std::cout << "Out1:" << std::endl;
auto *outptr1 = outPhys1.GetStorage().GetCPUPtr();
for (auto const &block : outPhys1.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t phys = 0; phys < block.num_pts; ++phys)
{
std::cout << *(outptr1++) << ' ';
}
}
std::cout << std::endl;
}
std::cout << std::endl;
// Check output values.
std::cout << "Out2:" << std::endl;
auto *outptr2 = outPhys2.GetStorage().GetCPUPtr();
for (auto const &block : outPhys2.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t phys = 0; phys < block.num_pts; ++phys)
{
std::cout << *(outptr2++) << ' ';
}
}
std::cout << std::endl;
}
std::cout << std::endl;
}
// Test PhysDeriv (CUDA) Implementation
#ifdef NEKTAR_USE_CUDA
{
std::cout << "PhysDeriv (CUDA) test starts." << std::endl;
// Create two Field objects with a MemoryRegionCPU backend by default
// for the inner product with respect to base
auto inPhys = Field<double, FieldState::Phys>::create<MemoryRegionCUDA>(
blocks_phys);
auto outPhys0 =
Field<double, FieldState::Phys>::create<MemoryRegionCUDA>(
blocks_phys);
auto outPhys1 =
Field<double, FieldState::Phys>::create<MemoryRegionCUDA>(
blocks_phys);
auto outPhys2 =
Field<double, FieldState::Phys>::create<MemoryRegionCUDA>(
blocks_phys);
// Assign input values
std::cout << "Initial shape: " << std::endl;
auto *inptr =
inPhys.template GetStorage<MemoryRegionCUDA>().GetCPUPtr();
for (auto const &block : inPhys.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t phys = 0; phys < block.num_pts; ++phys)
{
// Each element is the index of the quadrature points
// this is useful for testing reshapes
*(inptr++) = phys;
std::cout << phys << " ";
}
}
}
std::cout << std::endl << std::endl;
// PhysDeriv
PhysDeriv<>::create(explist, "CUDA")
->apply(inPhys, outPhys0, outPhys1, outPhys2);
// Check output values.
std::cout << "Out0:" << std::endl;
auto *outptr0 =
outPhys0.template GetStorage<MemoryRegionCUDA>().GetCPUPtr();
for (auto const &block : outPhys0.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t phys = 0; phys < block.num_pts; ++phys)
{
std::cout << *(outptr0++) << ' ';
}
}
std::cout << std::endl;
}
std::cout << std::endl;
// Check output values.
std::cout << "Out1:" << std::endl;
auto *outptr1 =
outPhys1.template GetStorage<MemoryRegionCUDA>().GetCPUPtr();
for (auto const &block : outPhys1.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t phys = 0; phys < block.num_pts; ++phys)
{
std::cout << *(outptr1++) << ' ';
}
}
std::cout << std::endl;
}
std::cout << std::endl;
// Check output values.
std::cout << "Out2:" << std::endl;
auto *outptr2 =
outPhys2.template GetStorage<MemoryRegionCUDA>().GetCPUPtr();
for (auto const &block : outPhys2.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t phys = 0; phys < block.num_pts; ++phys)
{
std::cout << *(outptr2++) << ' ';
}
}
std::cout << std::endl;
}
std::cout << std::endl;
}
#endif
std::cout << "END" << std::endl;
return 0;
......
......@@ -44,6 +44,26 @@ IF (NEKTAR_USE_CUDA)
-DTEST_PATH="${CMAKE_SOURCE_DIR}")
ENDIF()
add_executable(test_physderiv test_physderiv.cpp)
target_link_libraries(test_physderiv PRIVATE Operators)
target_link_libraries(test_physderiv PRIVATE ${NEKTAR++_LIBRARIES})
target_link_libraries(test_physderiv PRIVATE Boost::unit_test_framework)
target_include_directories(test_physderiv PRIVATE "${CMAKE_SOURCE_DIR}")
target_include_directories(test_physderiv PRIVATE ${NEKTAR++_INCLUDE_DIRS})
target_compile_definitions(test_physderiv PRIVATE
-DTEST_PATH="${CMAKE_SOURCE_DIR}")
IF (NEKTAR_USE_CUDA)
add_executable(test_physderivcuda test_physderivcuda.cpp)
target_link_libraries(test_physderivcuda PRIVATE Operators)
target_link_libraries(test_physderivcuda PRIVATE ${NEKTAR++_LIBRARIES})
target_link_libraries(test_physderivcuda PRIVATE Boost::unit_test_framework)
target_include_directories(test_physderivcuda PRIVATE "${CMAKE_SOURCE_DIR}")
target_include_directories(test_physderivcuda PRIVATE ${NEKTAR++_INCLUDE_DIRS})
target_compile_definitions(test_physderivcuda PRIVATE
-DTEST_PATH="${CMAKE_SOURCE_DIR}")
ENDIF()
add_test(
NAME BwdTrans
COMMAND test_bwdtrans
......@@ -71,3 +91,17 @@ IF (NEKTAR_USE_CUDA)
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
)
ENDIF()
add_test(
NAME PhysDeriv
COMMAND test_physderiv
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
)
IF (NEKTAR_USE_CUDA)
add_test(
NAME PhysDerivCUDA
COMMAND test_physderivcuda
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
)
ENDIF()
......@@ -34,24 +34,39 @@ template <typename TData, FieldState stateIn = FieldState::Coeff,
class InitFields
{
public:
Field<TData, stateIn> *fixt_in = nullptr;
Field<TData, stateOut> *fixt_out = nullptr;
Field<TData, stateOut> *fixt_expected = nullptr;
Field<TData, stateIn> *fixt_in = nullptr;
Field<TData, stateOut> *fixt_out = nullptr;
Field<TData, stateOut> *fixt_expected = nullptr;
#ifdef NEKTAR_USE_CUDA
Field<TData, stateIn> *fixtcuda_in = nullptr;
Field<TData, stateOut> *fixtcuda_out = nullptr;
Field<TData, stateIn> *fixtcuda_in = nullptr;
Field<TData, stateOut> *fixtcuda_out = nullptr;
#endif
MultiRegions::ExpListSharedPtr fixt_explist{nullptr};
~InitFields()
{
BOOST_TEST_MESSAGE("teardown fixture");
if (fixt_in) delete fixt_in;
if (fixt_out) delete fixt_out;
if (fixt_expected) delete fixt_expected;
if (fixt_in)
{
delete fixt_in;
}
if (fixt_out)
{
delete fixt_out;
}
if (fixt_expected)
{
delete fixt_expected;
}
#ifdef NEKTAR_USE_CUDA
if (fixtcuda_in) delete fixtcuda_in;
if (fixtcuda_out) delete fixtcuda_out;
if (fixtcuda_in)
{
delete fixtcuda_in;
}
if (fixtcuda_out)
{
delete fixtcuda_out;
}
#endif
}
......
......@@ -21,7 +21,8 @@ using namespace Nektar;
class Line : public InitFields<double, FieldState::Coeff, FieldState::Phys>
{
public:
Line() : InitFields<double, FieldState::Coeff, FieldState::Phys>() {
Line() : InitFields<double, FieldState::Coeff, FieldState::Phys>()
{
meshName = "line.xml";
}
};
......@@ -29,7 +30,8 @@ public:
class Square : public InitFields<double, FieldState::Coeff, FieldState::Phys>
{
public:
Square() : InitFields<double, FieldState::Coeff, FieldState::Phys>() {
Square() : InitFields<double, FieldState::Coeff, FieldState::Phys>()
{
meshName = "square.xml";
}
};
......
......@@ -21,7 +21,8 @@ using namespace Nektar;
class Line : public InitFields<double, FieldState::Coeff, FieldState::Phys>
{
public:
Line() : InitFields<double, FieldState::Coeff, FieldState::Phys>() {
Line() : InitFields<double, FieldState::Coeff, FieldState::Phys>()
{
meshName = "line.xml";
}
};
......@@ -29,7 +30,8 @@ public:
class Square : public InitFields<double, FieldState::Coeff, FieldState::Phys>
{
public:
Square() : InitFields<double, FieldState::Coeff, FieldState::Phys>() {
Square() : InitFields<double, FieldState::Coeff, FieldState::Phys>()
{
meshName = "square.xml";
}
};
......
......@@ -21,7 +21,8 @@ using namespace Nektar;
class Line : public InitFields<double, FieldState::Phys, FieldState::Coeff>
{
public:
Line() : InitFields<double, FieldState::Phys, FieldState::Coeff>() {
Line() : InitFields<double, FieldState::Phys, FieldState::Coeff>()
{
meshName = "line.xml";
}
};
......@@ -29,7 +30,8 @@ public:
class Square : public InitFields<double, FieldState::Phys, FieldState::Coeff>
{
public:
Square() : InitFields<double, FieldState::Phys, FieldState::Coeff>() {
Square() : InitFields<double, FieldState::Phys, FieldState::Coeff>()
{
meshName = "square.xml";
}
};
......
......@@ -21,7 +21,8 @@ using namespace Nektar;
class Line : public InitFields<double, FieldState::Phys, FieldState::Coeff>
{
public:
Line() : InitFields<double, FieldState::Phys, FieldState::Coeff>() {
Line() : InitFields<double, FieldState::Phys, FieldState::Coeff>()
{
meshName = "line.xml";
}
};
......@@ -29,7 +30,8 @@ public:
class Square : public InitFields<double, FieldState::Phys, FieldState::Coeff>
{
public:
Square() : InitFields<double, FieldState::Phys, FieldState::Coeff>() {
Square() : InitFields<double, FieldState::Phys, FieldState::Coeff>()
{
meshName = "square.xml";
}
};
......
#define BOOST_TEST_MODULE example
#include <boost/test/unit_test.hpp>
#include <iostream>
#include <memory>
#include "Field.hpp"
#include "Operators/OperatorPhysDeriv.hpp"
#include <LibUtilities/BasicUtils/SessionReader.h>
#include <MultiRegions/ExpList.h>
#include <SpatialDomains/MeshGraph.h>
#include "init_fields.hpp"
using namespace std;
using namespace Nektar::Operators;
using namespace Nektar::LibUtilities;
using namespace Nektar;
class Line : public InitFields<double, FieldState::Phys, FieldState::Phys>
{
public:
Line() : InitFields<double, FieldState::Phys, FieldState::Phys>()
{
meshName = "line.xml";
}
};
class Square : public InitFields<double, FieldState::Phys, FieldState::Phys>
{
public:
Square() : InitFields<double, FieldState::Phys, FieldState::Phys>()
{
meshName = "square.xml";
}
};
BOOST_FIXTURE_TEST_CASE(physderiv, Line)
{
Configure();
double *inptr = fixt_in->GetStorage().GetCPUPtr();
double *exptr = fixt_expected->GetStorage().GetCPUPtr();
size_t order = 6, pts = 0;
Array<OneD, double> x(fixt_explist->GetTotPoints());
fixt_explist->GetCoords(x);
for (auto const &block : fixt_in->GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t phys = 0; phys < block.num_pts; ++phys)
{
double tmp1 = 0.0, tmp2 = 0.0;
for (size_t k = 0; k < order; k++)
{
tmp1 += std::pow(x[pts], k);
tmp2 += k * std::pow(x[pts], k - 1);
}
pts++;
*(inptr++) = tmp1;
*(exptr++) = tmp2;
}
}
}
PhysDeriv<>::create(fixt_explist, "StdMat")
->apply(*fixt_in, *fixt_out, *fixt_out, *fixt_out);
double TOL{1.0E-12};
BOOST_TEST(fixt_out->compare(*fixt_expected, TOL));
}