diff --git a/Operators/Identity/IdentityCUDA.hpp b/Operators/Identity/IdentityCUDA.hpp index 26d9ea59e226bb9848526729769484596d03fc7b..bb9dc75fa37038737d6feafd14fa8e3ae4638871 100644 --- a/Operators/Identity/IdentityCUDA.hpp +++ b/Operators/Identity/IdentityCUDA.hpp @@ -1,16 +1,11 @@ +#pragma once + #include "MemoryRegionCUDA.hpp" -#include "Operators/Identity/IdentityCUDAKernels.cuh" -#include "Operators/OperatorHelper.cuh" #include "Operators/OperatorIdentity.hpp" namespace Nektar::Operators::detail { -template <typename TData> -void IdentityKernel(const size_t gridSize, const size_t blockSize, - const size_t numPts, const size_t nElmts, const TData *in, - TData *out); - // Identity matrix implementation template <typename TData, FieldState TFieldState> class OperatorIdentityImpl<TData, TFieldState, ImplCUDA> @@ -25,39 +20,14 @@ public: void apply(Field<TData, TFieldState> &in, Field<TData, TFieldState> &out) override { - // Copy memory to GPU, if necessary and get raw pointers. + size_t N = in.template GetStorage<MemoryRegionCUDA>().size(); auto const *inptr = in.template GetStorage<MemoryRegionCUDA>().GetGPUPtr(); auto *outptr = out.template GetStorage<MemoryRegionCUDA>().GetGPUPtr(); - - // Initialise index - size_t expIdx = 0; - - // Loop over the blocks. - for (size_t block_idx = 0; block_idx < in.GetBlocks().size(); - ++block_idx) - { - // Determine shape and type of the element. - auto const expPtr = this->m_expansionList->GetExp(expIdx); - auto nElmts = in.GetBlocks()[block_idx].num_elements; - auto numPts = (TFieldState == FieldState::Coeff) - ? expPtr->GetNcoeffs() - : expPtr->GetTotPoints(); - - // Deterime CUDA grid parameters. - m_gridSize = nElmts / m_blockSize; - m_gridSize += (nElmts % m_blockSize == 0) ? 0 : 1; - - IdentityKernel(m_gridSize, m_blockSize, numPts, nElmts, inptr, - outptr); - - // Increment pointer and index for next element type. - inptr += numPts * nElmts; - outptr += numPts * nElmts; - expIdx += nElmts; - } + cudaMemcpy(outptr, inptr, sizeof(TData) * N, cudaMemcpyDeviceToDevice); } + // instantiation function for CreatorFunction in OperatorFactory static std::unique_ptr<Operator<TData>> instantiate( const MultiRegions::ExpListSharedPtr &expansionList) { @@ -65,19 +35,12 @@ public: OperatorIdentityImpl<TData, TFieldState, ImplCUDA>>(expansionList); } + // className - for OperatorFactory static std::string className; -private: +protected: size_t m_gridSize; size_t m_blockSize = 32; }; -template <typename TData> -void IdentityKernel(const size_t gridSize, const size_t blockSize, - const size_t numPts, const size_t nElmts, const TData *in, - TData *out) -{ - IdentityKernel<TData><<<gridSize, blockSize>>>(numPts, nElmts, in, out); -} - } // namespace Nektar::Operators::detail diff --git a/Operators/Identity/IdentityCUDAKernels.cuh b/Operators/Identity/IdentityCUDAKernels.cuh deleted file mode 100644 index 25ad440a2792a53a8f80334f3fd956869ae07a7c..0000000000000000000000000000000000000000 --- a/Operators/Identity/IdentityCUDAKernels.cuh +++ /dev/null @@ -1,23 +0,0 @@ -namespace Nektar::Operators::detail -{ -template <typename TData> -__global__ void IdentityKernel(const size_t numPts, const size_t nelmt, - const TData *in, TData *out) -{ - size_t e = blockDim.x * blockIdx.x + threadIdx.x; - - if (e >= nelmt) - { - return; - } - - const TData *inptr = in + e * numPts; - TData *outptr = out + e * numPts; - - for (size_t i = 0; i < numPts; ++i) - { - outptr[i] = inptr[i]; - } -} - -} // namespace Nektar::Operators::detail diff --git a/Operators/Identity/IdentityStdMat.hpp b/Operators/Identity/IdentityStdMat.hpp index 860a8cac6e052e38034477bc631694748e4c1ba9..8bc6d552b69862303e36b9b761c2c4eb83e2c601 100644 --- a/Operators/Identity/IdentityStdMat.hpp +++ b/Operators/Identity/IdentityStdMat.hpp @@ -5,6 +5,7 @@ namespace Nektar::Operators::detail { +// Identity matrix implementation template <typename TData, FieldState TFieldState> class OperatorIdentityImpl<TData, TFieldState, ImplStdMat> : public OperatorIdentity<TData, TFieldState> @@ -21,11 +22,7 @@ public: size_t N = in.GetStorage().size(); auto pIn = in.GetStorage().GetCPUPtr(); auto pOut = out.GetStorage().GetCPUPtr(); - - for (size_t i = 0; i < N; ++i) - { - *(pOut++) = *(pIn++); - } + std::copy(pIn, pIn + N, pOut); } // instantiation function for CreatorFunction in OperatorFactory