diff --git a/Operators/Identity/IdentityCUDA.hpp b/Operators/Identity/IdentityCUDA.hpp
index 26d9ea59e226bb9848526729769484596d03fc7b..bb9dc75fa37038737d6feafd14fa8e3ae4638871 100644
--- a/Operators/Identity/IdentityCUDA.hpp
+++ b/Operators/Identity/IdentityCUDA.hpp
@@ -1,16 +1,11 @@
+#pragma once
+
 #include "MemoryRegionCUDA.hpp"
-#include "Operators/Identity/IdentityCUDAKernels.cuh"
-#include "Operators/OperatorHelper.cuh"
 #include "Operators/OperatorIdentity.hpp"
 
 namespace Nektar::Operators::detail
 {
 
-template <typename TData>
-void IdentityKernel(const size_t gridSize, const size_t blockSize,
-                    const size_t numPts, const size_t nElmts, const TData *in,
-                    TData *out);
-
 // Identity matrix implementation
 template <typename TData, FieldState TFieldState>
 class OperatorIdentityImpl<TData, TFieldState, ImplCUDA>
@@ -25,39 +20,14 @@ public:
     void apply(Field<TData, TFieldState> &in,
                Field<TData, TFieldState> &out) override
     {
-        // Copy memory to GPU, if necessary and get raw pointers.
+        size_t N = in.template GetStorage<MemoryRegionCUDA>().size();
         auto const *inptr =
             in.template GetStorage<MemoryRegionCUDA>().GetGPUPtr();
         auto *outptr = out.template GetStorage<MemoryRegionCUDA>().GetGPUPtr();
-
-        // Initialise index
-        size_t expIdx = 0;
-
-        // Loop over the blocks.
-        for (size_t block_idx = 0; block_idx < in.GetBlocks().size();
-             ++block_idx)
-        {
-            // Determine shape and type of the element.
-            auto const expPtr = this->m_expansionList->GetExp(expIdx);
-            auto nElmts       = in.GetBlocks()[block_idx].num_elements;
-            auto numPts       = (TFieldState == FieldState::Coeff)
-                                    ? expPtr->GetNcoeffs()
-                                    : expPtr->GetTotPoints();
-
-            // Deterime CUDA grid parameters.
-            m_gridSize = nElmts / m_blockSize;
-            m_gridSize += (nElmts % m_blockSize == 0) ? 0 : 1;
-
-            IdentityKernel(m_gridSize, m_blockSize, numPts, nElmts, inptr,
-                           outptr);
-
-            // Increment pointer and index for next element type.
-            inptr += numPts * nElmts;
-            outptr += numPts * nElmts;
-            expIdx += nElmts;
-        }
+        cudaMemcpy(outptr, inptr, sizeof(TData) * N, cudaMemcpyDeviceToDevice);
     }
 
+    // instantiation function for CreatorFunction in OperatorFactory
     static std::unique_ptr<Operator<TData>> instantiate(
         const MultiRegions::ExpListSharedPtr &expansionList)
     {
@@ -65,19 +35,12 @@ public:
             OperatorIdentityImpl<TData, TFieldState, ImplCUDA>>(expansionList);
     }
 
+    // className - for OperatorFactory
     static std::string className;
 
-private:
+protected:
     size_t m_gridSize;
     size_t m_blockSize = 32;
 };
 
-template <typename TData>
-void IdentityKernel(const size_t gridSize, const size_t blockSize,
-                    const size_t numPts, const size_t nElmts, const TData *in,
-                    TData *out)
-{
-    IdentityKernel<TData><<<gridSize, blockSize>>>(numPts, nElmts, in, out);
-}
-
 } // namespace Nektar::Operators::detail
diff --git a/Operators/Identity/IdentityCUDAKernels.cuh b/Operators/Identity/IdentityCUDAKernels.cuh
deleted file mode 100644
index 25ad440a2792a53a8f80334f3fd956869ae07a7c..0000000000000000000000000000000000000000
--- a/Operators/Identity/IdentityCUDAKernels.cuh
+++ /dev/null
@@ -1,23 +0,0 @@
-namespace Nektar::Operators::detail
-{
-template <typename TData>
-__global__ void IdentityKernel(const size_t numPts, const size_t nelmt,
-                               const TData *in, TData *out)
-{
-    size_t e = blockDim.x * blockIdx.x + threadIdx.x;
-
-    if (e >= nelmt)
-    {
-        return;
-    }
-
-    const TData *inptr = in + e * numPts;
-    TData *outptr      = out + e * numPts;
-
-    for (size_t i = 0; i < numPts; ++i)
-    {
-        outptr[i] = inptr[i];
-    }
-}
-
-} // namespace Nektar::Operators::detail
diff --git a/Operators/Identity/IdentityStdMat.hpp b/Operators/Identity/IdentityStdMat.hpp
index 860a8cac6e052e38034477bc631694748e4c1ba9..8bc6d552b69862303e36b9b761c2c4eb83e2c601 100644
--- a/Operators/Identity/IdentityStdMat.hpp
+++ b/Operators/Identity/IdentityStdMat.hpp
@@ -5,6 +5,7 @@
 namespace Nektar::Operators::detail
 {
 
+// Identity matrix implementation
 template <typename TData, FieldState TFieldState>
 class OperatorIdentityImpl<TData, TFieldState, ImplStdMat>
     : public OperatorIdentity<TData, TFieldState>
@@ -21,11 +22,7 @@ public:
         size_t N  = in.GetStorage().size();
         auto pIn  = in.GetStorage().GetCPUPtr();
         auto pOut = out.GetStorage().GetCPUPtr();
-
-        for (size_t i = 0; i < N; ++i)
-        {
-            *(pOut++) = *(pIn++);
-        }
+        std::copy(pIn, pIn + N, pOut);
     }
 
     // instantiation function for CreatorFunction in OperatorFactory