Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • nektar/redesign-prototypes
1 result
Show changes
Commits on Source (2)
......@@ -31,7 +31,7 @@ set(SRC Field.cpp Operators/Operator.cpp Operators/OperatorBwdTrans.cpp Operator
if (NEKTAR_USE_CUDA)
enable_language(CUDA)
add_definitions(-DNEKTAR_USE_CUDA)
set(SRC ${SRC} MemoryRegionCUDA.cu Operators/BwdTrans/BwdTransCUDA.cu)
set(SRC ${SRC} MemoryRegionCUDA.cu Operators/BwdTrans/BwdTransCUDA.cu Operators/IProductWRTBase/IProductWRTBaseCUDA.cu)
endif()
if (NEKTAR_USE_SIMD)
......
#include "IProductWRTBaseCUDA.hpp"
namespace Nektar::Operators::detail
{
template <>
std::string OperatorIProductWRTBaseImpl<double, ImplCUDA>::className =
GetOperatorFactory<double>().RegisterCreatorFunction(
"IProductWRTBaseCUDA",
OperatorIProductWRTBaseImpl<double, ImplCUDA>::instantiate, "...");
}
#include "MemoryRegionCUDA.hpp"
#include "Operators/IProductWRTBase/IProductWRTBaseCUDAKernels.cuh"
#include "Operators/OperatorHelper.cuh"
#include "Operators/OperatorIProductWRTBase.hpp"
namespace Nektar::Operators::detail
{
template <typename TData, bool APPEND = false, bool DEFORMED = false>
void IProductWRTBase1DKernel(const size_t gridSize, const size_t blockSize,
const size_t nm0, const size_t nq0,
const size_t nElmts, const TData *basis0,
const TData *w0, const TData *jac, const TData *in,
TData *out);
template <typename TData, bool APPEND = false, bool DEFORMED = false>
void IProductWRTBase2DKernel(const size_t gridSize, const size_t blockSize,
LibUtilities::ShapeType shapetype,
const size_t nm0, const size_t nm1,
const size_t nq0, const size_t nq1,
const size_t nElmts, const bool correct,
const TData *basis0, const TData *basis1,
const TData *w0, const TData *w1, const TData *jac,
const TData *in, TData *out);
template <typename TData, bool APPEND = false, bool DEFORMED = false>
void IProductWRTBase3DKernel(
const size_t gridSize, const size_t blockSize,
LibUtilities::ShapeType shapetype, const size_t nm0, const size_t nm1,
const size_t nm2, const size_t nq0, const size_t nq1, const size_t nq2,
const size_t nElmts, const bool correct, const TData *basis0,
const TData *basis1, const TData *basis2, const TData *w0, const TData *w1,
const TData *w2, const TData *jac, const TData *in, TData *out);
// IProductWRTBase implementation
template <typename TData>
class OperatorIProductWRTBaseImpl<TData, ImplCUDA>
: public OperatorIProductWRTBase<TData>
{
public:
OperatorIProductWRTBaseImpl(
const MultiRegions::ExpListSharedPtr &expansionList)
: OperatorIProductWRTBase<TData>(expansionList)
{
// Initialise jacobian.
size_t jacSize = Operator<TData>::GetGeometricFactorSize();
auto jac = Operator<TData>::SetJacobian(jacSize);
cudaMalloc((void **)&m_jac, sizeof(TData) * jacSize);
cudaMemcpy(m_jac, jac.get(), sizeof(TData) * jacSize,
cudaMemcpyHostToDevice);
// Initialize basiskey.
m_basis = GetBasisDataCUDA<TData>(expansionList);
// Initialize weight.
m_weight = GetWeightDataCUDA<TData>(expansionList);
}
~OperatorIProductWRTBaseImpl(void)
{
for (auto &basis : m_basis)
{
for (size_t i = 0; i < basis.second.size(); i++)
{
cudaFree(basis.second[i]);
}
}
for (auto &weight : m_weight)
{
for (size_t i = 0; i < weight.second.size(); i++)
{
cudaFree(weight.second[i]);
}
}
cudaFree(m_jac);
}
void apply(Field<TData, FieldState::Phys> &in,
Field<TData, FieldState::Coeff> &out) override
{
// Copy memory to GPU, if necessary and get raw pointers.
auto const *inptr =
in.template GetStorage<MemoryRegionCUDA>().GetGPUPtr();
auto *outptr = out.template GetStorage<MemoryRegionCUDA>().GetGPUPtr();
// Initialize index.
size_t expIdx = 0;
auto jacptr = m_jac;
// Initialize basiskey.
std::vector<LibUtilities::BasisKey> basisKeys(
3, LibUtilities::NullBasisKey);
// Loop over the blocks.
for (size_t block_idx = 0; block_idx < in.GetBlocks().size();
++block_idx)
{
auto const expPtr = this->m_expansionList->GetExp(expIdx);
auto nElmts = in.GetBlocks()[block_idx].num_elements;
auto nqTot = expPtr->GetTotPoints();
auto nmTot = expPtr->GetNcoeffs();
auto ptsKeys = expPtr->GetPointsKeys();
auto deformed = expPtr->GetMetricInfo()->GetGtype() ==
SpatialDomains::eDeformed;
// Deterime CUDA grid parameters.
m_gridSize = nElmts / m_blockSize;
m_gridSize += (nElmts % m_blockSize == 0) ? 0 : 1;
// Flag for collapsed coordinate correction.
bool correct = expPtr->GetBasis(0)->GetBasisType() ==
LibUtilities::eModified_A;
// Fetch basis key for the current element type.
for (size_t d = 0; d < expPtr->GetShapeDimension(); d++)
{
basisKeys[d] = expPtr->GetBasis(d)->GetBasisKey();
}
// Function call to kernel functions.
if (expPtr->GetShapeDimension() == 1)
{
auto basis0 = m_basis[basisKeys][0];
auto w0 = m_weight[basisKeys][0];
auto nm0 = expPtr->GetBasisNumModes(0);
auto nq0 = expPtr->GetNumPoints(0);
if (deformed)
{
IProductWRTBase1DKernel<TData, false, true>(
m_gridSize, m_blockSize, nm0, nq0, nElmts, basis0, w0,
jacptr, inptr, outptr);
}
else
{
IProductWRTBase1DKernel<TData, false, false>(
m_gridSize, m_blockSize, nm0, nq0, nElmts, basis0, w0,
jacptr, inptr, outptr);
}
}
else if (expPtr->GetShapeDimension() == 2)
{
auto basis0 = m_basis[basisKeys][0];
auto basis1 = m_basis[basisKeys][1];
auto w0 = m_weight[basisKeys][0];
auto w1 = m_weight[basisKeys][1];
auto shape = expPtr->DetShapeType();
auto nm0 = expPtr->GetBasisNumModes(0);
auto nm1 = expPtr->GetBasisNumModes(1);
auto nq0 = expPtr->GetNumPoints(0);
auto nq1 = expPtr->GetNumPoints(1);
if (deformed)
{
IProductWRTBase2DKernel<TData, false, true>(
m_gridSize, m_blockSize, shape, nm0, nm1, nq0, nq1,
nElmts, correct, basis0, basis1, w0, w1, jacptr, inptr,
outptr);
}
else
{
IProductWRTBase2DKernel<TData, false, false>(
m_gridSize, m_blockSize, shape, nm0, nm1, nq0, nq1,
nElmts, correct, basis0, basis1, w0, w1, jacptr, inptr,
outptr);
}
}
else if (expPtr->GetShapeDimension() == 3)
{
auto basis0 = m_basis[basisKeys][0];
auto basis1 = m_basis[basisKeys][1];
auto basis2 = m_basis[basisKeys][2];
auto w0 = m_weight[basisKeys][0];
auto w1 = m_weight[basisKeys][1];
auto w2 = m_weight[basisKeys][2];
auto shape = expPtr->DetShapeType();
auto nm0 = expPtr->GetBasisNumModes(0);
auto nm1 = expPtr->GetBasisNumModes(1);
auto nm2 = expPtr->GetBasisNumModes(2);
auto nq0 = expPtr->GetNumPoints(0);
auto nq1 = expPtr->GetNumPoints(1);
auto nq2 = expPtr->GetNumPoints(2);
if (deformed)
{
IProductWRTBase3DKernel<TData, false, true>(
m_gridSize, m_blockSize, shape, nm0, nm1, nm2, nq0, nq1,
nq2, nElmts, correct, basis0, basis1, basis2, w0, w1,
w2, jacptr, inptr, outptr);
}
else
{
IProductWRTBase3DKernel<TData, false, false>(
m_gridSize, m_blockSize, shape, nm0, nm1, nm2, nq0, nq1,
nq2, nElmts, correct, basis0, basis1, basis2, w0, w1,
w2, jacptr, inptr, outptr);
}
}
// Increment pointer and index for next element type.
jacptr += deformed ? nqTot * nElmts : nElmts;
inptr += nqTot * nElmts;
outptr += nmTot * nElmts;
expIdx += nElmts;
}
}
static std::unique_ptr<Operator<TData>> instantiate(
MultiRegions::ExpListSharedPtr expansionList)
{
return std::make_unique<OperatorIProductWRTBaseImpl<TData, ImplCUDA>>(
expansionList);
}
static std::string className;
private:
std::map<std::vector<LibUtilities::BasisKey>, std::vector<TData *>> m_basis;
std::map<std::vector<LibUtilities::BasisKey>, std::vector<TData *>>
m_weight;
TData *m_jac;
size_t m_blockSize = 32;
size_t m_gridSize;
};
template <typename TData, bool APPEND, bool DEFORMED>
void IProductWRTBase1DKernel(const size_t gridSize, const size_t blockSize,
const size_t nm0, const size_t nq0,
const size_t nElmts, const TData *basis0,
const TData *w0, const TData *jac, const TData *in,
TData *out)
{
IProductWRTBaseSegKernel<TData, false, APPEND, DEFORMED>
<<<gridSize, blockSize>>>(nm0, nq0, nElmts, basis0, w0, jac, in, out);
}
template <typename TData, bool APPEND, bool DEFORMED>
void IProductWRTBase2DKernel(const size_t gridSize, const size_t blockSize,
LibUtilities::ShapeType shapetype,
const size_t nm0, const size_t nm1,
const size_t nq0, const size_t nq1,
const size_t nElmts, const bool correct,
const TData *basis0, const TData *basis1,
const TData *w0, const TData *w1, const TData *jac,
const TData *in, TData *out)
{
if (shapetype == LibUtilities::Quad)
{
IProductWRTBaseQuadKernel<TData, false, APPEND, DEFORMED>
<<<gridSize, blockSize>>>(nm0, nm1, nq0, nq1, nElmts, basis0,
basis1, w0, w1, jac, in, out);
}
else if (shapetype == LibUtilities::Tri)
{
size_t nmTot =
LibUtilities::StdTriData::getNumberOfCoefficients(nm0, nm1);
IProductWRTBaseTriKernel<TData, false, APPEND, DEFORMED>
<<<gridSize, blockSize>>>(nm0, nm1, nmTot, nq0, nq1, nElmts,
correct, basis0, basis1, w0, w1, jac, in,
out);
}
}
template <typename TData, bool APPEND, bool DEFORMED>
void IProductWRTBase3DKernel(
const size_t gridSize, const size_t blockSize,
LibUtilities::ShapeType shapetype, const size_t nm0, const size_t nm1,
const size_t nm2, const size_t nq0, const size_t nq1, const size_t nq2,
const size_t nElmts, const bool correct, const TData *basis0,
const TData *basis1, const TData *basis2, const TData *w0, const TData *w1,
const TData *w2, const TData *jac, const TData *in, TData *out)
{
if (shapetype == LibUtilities::Hex)
{
IProductWRTBaseHexKernel<TData, false, APPEND, DEFORMED>
<<<gridSize, blockSize>>>(nm0, nm1, nm2, nq0, nq1, nq2, nElmts,
basis0, basis1, basis2, w0, w1, w2, jac,
in, out);
}
else if (shapetype == LibUtilities::Tet)
{
size_t nmTot =
LibUtilities::StdTetData::getNumberOfCoefficients(nm0, nm1, nm2);
IProductWRTBaseTetKernel<TData, false, APPEND, DEFORMED>
<<<gridSize, blockSize>>>(nm0, nm1, nm2, nmTot, nq0, nq1, nq2,
nElmts, correct, basis0, basis1, basis2,
w0, w1, w2, jac, in, out);
}
else if (shapetype == LibUtilities::Pyr)
{
size_t nmTot =
LibUtilities::StdPyrData::getNumberOfCoefficients(nm0, nm1, nm2);
IProductWRTBasePyrKernel<TData, false, APPEND, DEFORMED>
<<<gridSize, blockSize>>>(nm0, nm1, nm2, nmTot, nq0, nq1, nq2,
nElmts, correct, basis0, basis1, basis2,
w0, w1, w2, jac, in, out);
}
else if (shapetype == LibUtilities::Prism)
{
size_t nmTot =
LibUtilities::StdPrismData::getNumberOfCoefficients(nm0, nm1, nm2);
IProductWRTBasePrismKernel<TData, false, APPEND, DEFORMED>
<<<gridSize, blockSize>>>(nm0, nm1, nm2, nmTot, nq0, nq1, nq2,
nElmts, correct, basis0, basis1, basis2,
w0, w1, w2, jac, in, out);
}
}
} // namespace Nektar::Operators::detail
This diff is collapsed.
......@@ -14,48 +14,9 @@ public:
const MultiRegions::ExpListSharedPtr &expansionList)
: OperatorIProductWRTBase<TData>(std::move(expansionList))
{
size_t jacSize = 0;
size_t nTotElmts = this->m_expansionList->GetNumElmts();
// Calculate the jacobian array size
for (size_t e = 0; e < nTotElmts; ++e)
{
// Determine shape and type of the element
auto const expPtr = this->m_expansionList->GetExp(e);
if (expPtr->GetMetricInfo()->GetGtype() ==
SpatialDomains::eDeformed)
{
jacSize += expPtr->GetTotPoints();
}
else
{
jacSize++;
}
}
// Allocate memory for the jacobian
m_jac = {jacSize, 0.0};
// Initialise jacobian.
size_t index = 0;
for (size_t e = 0; e < nTotElmts; ++e)
{
auto expPtr = this->m_expansionList->GetExp(e);
auto &auxJac =
expPtr->GetMetricInfo()->GetJac(expPtr->GetPointsKeys());
if (expPtr->GetMetricInfo()->GetGtype() ==
SpatialDomains::eDeformed)
{
size_t nqe = expPtr->GetTotPoints();
for (size_t i = 0; i < nqe; ++i)
{
m_jac[index++] = auxJac[i];
}
}
else
{
m_jac[index++] = auxJac[0];
}
}
size_t jacSize = Operator<TData>::GetGeometricFactorSize();
m_jac = Operator<TData>::SetJacobian(jacSize);
}
void apply(Field<TData, FieldState::Phys> &in,
......@@ -80,7 +41,7 @@ public:
// This is the B^{T} matrix
auto const matPtr = expPtr->GetStdMatrix(key);
Array<OneD, NekDouble> wsp(nqTot * nElmts, 0.0);
Array<OneD, TData> wsp(nqTot * nElmts, 0.0);
if (expPtr->GetMetricInfo()->GetGtype() ==
SpatialDomains::eDeformed)
{
......@@ -123,7 +84,7 @@ public:
static std::string className;
private:
Array<OneD, NekDouble> m_jac;
Array<OneD, TData> m_jac;
};
} // namespace Nektar::Operators::detail
......@@ -60,6 +60,60 @@ public:
}
protected:
size_t GetGeometricFactorSize(void)
{
size_t gfSize = 0;
size_t nTotElmts = this->m_expansionList->GetNumElmts();
// Calculate the jacobian array size
for (size_t e = 0; e < nTotElmts; ++e)
{
// Determine shape and type of the element
auto const expPtr = this->m_expansionList->GetExp(e);
if (expPtr->GetMetricInfo()->GetGtype() ==
SpatialDomains::eDeformed)
{
gfSize += expPtr->GetTotPoints();
}
else
{
gfSize++;
}
}
return gfSize;
}
Array<OneD, TData> SetJacobian(size_t jacSize)
{
// Allocate memory for the jacobian
Array<OneD, TData> jac(jacSize, 0.0);
// Initialise jacobian.
size_t index = 0;
size_t nTotElmts = this->m_expansionList->GetNumElmts();
for (size_t e = 0; e < nTotElmts; ++e)
{
auto expPtr = this->m_expansionList->GetExp(e);
auto &auxJac =
expPtr->GetMetricInfo()->GetJac(expPtr->GetPointsKeys());
if (expPtr->GetMetricInfo()->GetGtype() ==
SpatialDomains::eDeformed)
{
size_t nqe = expPtr->GetTotPoints();
for (size_t i = 0; i < nqe; ++i)
{
jac[index++] = auxJac[i];
}
}
else
{
jac[index++] = auxJac[0];
}
}
return jac;
}
MultiRegions::ExpListSharedPtr m_expansionList;
};
......
......@@ -9,6 +9,9 @@ namespace Nektar::Operators
template <typename TData>
using BasisMap =
std::map<std::vector<LibUtilities::BasisKey>, std::vector<TData *>>;
template <typename TData>
using WeightMap =
std::map<std::vector<LibUtilities::BasisKey>, std::vector<TData *>>;
template <typename TData>
BasisMap<TData> GetBasisDataCUDA(
......@@ -51,4 +54,62 @@ BasisMap<TData> GetBasisDataCUDA(
return basis;
}
template <typename TData>
WeightMap<TData> GetWeightDataCUDA(
const MultiRegions::ExpListSharedPtr &expansionList)
{
// Initialize data map.
WeightMap<TData> weight;
// Initialize basiskey.
std::vector<LibUtilities::BasisKey> basisKeys(3,
LibUtilities::NullBasisKey);
// Loop over the elements of expansionList.
size_t nDim = expansionList->GetShapeDimension();
for (size_t i = 0; i < expansionList->GetNumElmts(); ++i)
{
auto const expPtr = expansionList->GetExp(i);
// Fetch basiskeys of current element.
for (size_t d = 0; d < nDim; d++)
{
basisKeys[d] = expPtr->GetBasis(d)->GetBasisKey();
}
// Copy data to weight, if necessary.
if (weight.find(basisKeys) == weight.end())
{
weight[basisKeys] = std::vector<TData *>(nDim, 0);
for (size_t d = 0; d < expPtr->GetShapeDimension(); d++)
{
auto ndata = expPtr->GetBasis(d)->GetW().size();
Array<OneD, TData> w(ndata);
if (expPtr->GetBasis(d)->GetPointsType() ==
LibUtilities::eGaussRadauMAlpha1Beta0)
{
Vmath::Smul(ndata, 0.5, expPtr->GetBasis(d)->GetW().get(),
1, w.get(), 1);
}
else if (expPtr->GetBasis(d)->GetPointsType() ==
LibUtilities::eGaussRadauMAlpha2Beta0)
{
Vmath::Smul(ndata, 0.25, expPtr->GetBasis(d)->GetW().get(),
1, w.get(), 1);
}
else
{
Vmath::Vcopy(ndata, expPtr->GetBasis(d)->GetW().get(), 1,
w.get(), 1);
}
auto hostPtr = w.get();
auto &devicePtr = weight[basisKeys][d];
cudaMalloc((void **)&devicePtr, sizeof(TData) * ndata);
cudaMemcpy(devicePtr, hostPtr, sizeof(TData) * ndata,
cudaMemcpyHostToDevice);
}
}
}
return weight;
}
} // namespace Nektar::Operators
......@@ -299,6 +299,60 @@ int main(int argc, char *argv[])
}
std::cout << std::endl;
}
// Test IProductWRTBase (CUDA) Implementation
#ifdef NEKTAR_USE_CUDA
{
std::cout << "IProductWRTBase (CUDA) test starts." << std::endl;
// Create two Fields with memory on the GPU
auto inPhys = Field<double, FieldState::Phys>::create<MemoryRegionCUDA>(
blocks_phys);
auto outCoeff =
Field<double, FieldState::Coeff>::create<MemoryRegionCUDA>(
blocks_coeff);
// Assign input values.
std::cout << "Initial shape: " << std::endl;
auto *inptr =
inPhys.template GetStorage<MemoryRegionCUDA>().GetCPUPtr();
for (auto const &block : inPhys.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t phys = 0; phys < block.num_pts; ++phys)
{
// Each element is the index of the quadrature points
// this is useful for testing reshapes
*(inptr++) = phys;
std::cout << phys << " ";
}
}
}
std::cout << std::endl << std::endl;
// Perform the IProductWRTBase on the fields using the CUDA
// implementation Since this is a CUDA operator, acting on CUDA fields,
// everything happens on the GPU.
IProductWRTBase<>::create(explist, "CUDA")->apply(inPhys, outCoeff);
// Check output values.
std::cout << "Out:" << std::endl;
auto *outptr =
outCoeff.template GetStorage<MemoryRegionCUDA>().GetCPUPtr();
for (auto const &block : outCoeff.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t coeff = 0; coeff < block.num_pts; ++coeff)
{
std::cout << *(outptr++) << ' ';
}
}
std::cout << std::endl;
}
std::cout << std::endl;
}
#endif
std::cout << "END" << std::endl;
return 0;
......
......@@ -33,20 +33,41 @@ target_include_directories(test_ipwrtbase PRIVATE ${NEKTAR++_INCLUDE_DIRS})
target_compile_definitions(test_ipwrtbase PRIVATE
-DTEST_PATH="${CMAKE_SOURCE_DIR}")
IF (NEKTAR_USE_CUDA)
add_executable(test_ipwrtbasecuda test_ipwrtbasecuda.cpp)
target_link_libraries(test_ipwrtbasecuda PRIVATE Operators)
target_link_libraries(test_ipwrtbasecuda PRIVATE ${NEKTAR++_LIBRARIES})
target_link_libraries(test_ipwrtbasecuda PRIVATE Boost::unit_test_framework)
target_include_directories(test_ipwrtbasecuda PRIVATE "${CMAKE_SOURCE_DIR}")
target_include_directories(test_ipwrtbasecuda PRIVATE ${NEKTAR++_INCLUDE_DIRS})
target_compile_definitions(test_ipwrtbasecuda PRIVATE
-DTEST_PATH="${CMAKE_SOURCE_DIR}")
ENDIF()
add_test(
NAME BwdTrans
COMMAND test_bwdtrans
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
)
add_test(
NAME BwdTransCUDA
COMMAND test_bwdtranscuda
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
)
IF (NEKTAR_USE_CUDA)
add_test(
NAME BwdTransCUDA
COMMAND test_bwdtranscuda
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
)
ENDIF()
add_test(
NAME IProductWRTBase
COMMAND test_ipwrtbase
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
)
IF (NEKTAR_USE_CUDA)
add_test(
NAME IProductWRTBaseCUDA
COMMAND test_ipwrtbasecuda
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
)
ENDIF()
......@@ -21,7 +21,7 @@ using namespace Nektar;
class Line : public InitFields<double, FieldState::Coeff, FieldState::Phys>
{
public:
Line() : InitFields<FieldState::Coeff, FieldState::Phys>() {
Line() : InitFields<double, FieldState::Coeff, FieldState::Phys>() {
meshName = "line.xml";
}
};
......@@ -29,7 +29,7 @@ public:
class Square : public InitFields<double, FieldState::Coeff, FieldState::Phys>
{
public:
Square() : InitFields<FieldState::Coeff, FieldState::Phys>() {
Square() : InitFields<double, FieldState::Coeff, FieldState::Phys>() {
meshName = "square.xml";
}
};
......
#define BOOST_TEST_MODULE example
#include <boost/test/unit_test.hpp>
#include <iostream>
#include <memory>
#include "Field.hpp"
#include "Operators/OperatorIProductWRTBase.hpp"
#include <LibUtilities/BasicUtils/SessionReader.h>
#include <MultiRegions/ExpList.h>
#include <SpatialDomains/MeshGraph.h>
#include "init_fields.hpp"
using namespace std;
using namespace Nektar::Operators;
using namespace Nektar::LibUtilities;
using namespace Nektar;
class Line : public InitFields<double, FieldState::Phys, FieldState::Coeff>
{
public:
Line() : InitFields<double, FieldState::Phys, FieldState::Coeff>() {
meshName = "line.xml";
}
};
class Square : public InitFields<double, FieldState::Phys, FieldState::Coeff>
{
public:
Square() : InitFields<double, FieldState::Phys, FieldState::Coeff>() {
meshName = "square.xml";
}
};
BOOST_FIXTURE_TEST_CASE(ipwrtbasecuda, Line)
{
Configure();
static double *x =
fixtcuda_in->template GetStorage<MemoryRegionCUDA>().GetCPUPtr();
for (auto const &block : fixtcuda_in->GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t phys = 0; phys < block.num_pts; ++phys)
{
*(x++) = phys;
}
}
}
IProductWRTBase<>::create(fixt_explist, "CUDA")
->apply(*fixtcuda_in, *fixtcuda_out);
static double *y =
fixtcuda_out->template GetStorage<MemoryRegionCUDA>().GetCPUPtr();
double TOL = 1e-12;
BOOST_CHECK_CLOSE(y[0], 1.097532178406115, TOL);
BOOST_CHECK_CLOSE(y[1], 1.902467821593885, TOL);
BOOST_CHECK_CLOSE(y[2], 0.500000000000000, TOL);
BOOST_CHECK_CLOSE(y[3], 0.150377322580158, TOL);
BOOST_TEST(std::abs(y[4] - 1.387778780781446e-17) < TOL);
BOOST_CHECK_CLOSE(y[5], 0.0074684742369482, TOL);
}