diff --git a/Field.hpp b/Field.hpp
index 613694aaf7ef40f0472db5e9e644e61d6ae29769..ea51fc989f60c52fca9696259b3352d0af17e8eb 100644
--- a/Field.hpp
+++ b/Field.hpp
@@ -222,8 +222,9 @@ public:
 
         size_t storage_size = std::accumulate(
             field.block_attributes.begin(), field.block_attributes.end(), 0,
-            [](size_t acc, const BlockAttributes &block)
-            { return acc + block.block_size; });
+            [](size_t acc, const BlockAttributes &block) {
+                return acc + block.block_size;
+            });
 
         // Create new TMemoryRegion and polymorphically store as MemoryRegionCPU
         field.m_storage = std::make_unique<TMemoryRegion<TData>>(
@@ -252,8 +253,9 @@ public:
 
         size_t storage_size = std::accumulate(
             field.block_attributes.begin(), field.block_attributes.end(), 0,
-            [](size_t acc, const BlockAttributes &block)
-            { return acc + block.block_size; });
+            [](size_t acc, const BlockAttributes &block) {
+                return acc + block.block_size;
+            });
 
         // Create new TMemoryRegion and polymorphically store as MemoryRegionCPU
         field.m_storage = std::make_unique<TMemoryRegion<TData>>(
diff --git a/MemoryRegionCPU.hpp b/MemoryRegionCPU.hpp
index b7d762dba4a13f4fdf02b93c0acc9c63ff7facb6..7dcd1c2b44d609ef617557a8065a67d2e375d846 100644
--- a/MemoryRegionCPU.hpp
+++ b/MemoryRegionCPU.hpp
@@ -13,8 +13,8 @@
 template <typename TData> class MemoryRegionCPU
 {
 public:
-    MemoryRegionCPU()                                      = delete;
-    MemoryRegionCPU(const MemoryRegionCPU &rhs)            = delete;
+    MemoryRegionCPU()                           = delete;
+    MemoryRegionCPU(const MemoryRegionCPU &rhs) = delete;
     MemoryRegionCPU &operator=(const MemoryRegionCPU &rhs) = delete;
 
     MemoryRegionCPU(MemoryRegionCPU &&rhs)
diff --git a/MemoryRegionCUDA.hpp b/MemoryRegionCUDA.hpp
index d77cfce56563b508331cf6bd6d795069096ab872..ce5469dd5791eb04b3f3d1319b106ed4237a6af0 100644
--- a/MemoryRegionCUDA.hpp
+++ b/MemoryRegionCUDA.hpp
@@ -42,10 +42,10 @@ public:
     void operator=(MemoryRegionCUDA &&rhs)
     {
         MemoryRegionCPU<TData>::operator=(std::move(rhs));
-        m_device     = rhs.m_device;
-        m_size       = rhs.m_size;
-        rhs.m_device = nullptr;
-        rhs.m_size   = 0;
+        m_device                        = rhs.m_device;
+        m_size                          = rhs.m_size;
+        rhs.m_device                    = nullptr;
+        rhs.m_size                      = 0;
     }
 
     virtual TData *GetCPUPtr() override
diff --git a/Operators/ConjGrad/ConjGradStdMat.hpp b/Operators/ConjGrad/ConjGradStdMat.hpp
index d99870f1d7a81143367d0c0ed316ae25bdd00f79..d3f9ac7ae62ef49a99ec64f39e2cb748c25cd2bf 100644
--- a/Operators/ConjGrad/ConjGradStdMat.hpp
+++ b/Operators/ConjGrad/ConjGradStdMat.hpp
@@ -41,6 +41,7 @@ public:
                                                m_maxIter, 5000);
         contfield->GetSession()->LoadParameter("IterativeSolverTolerance",
                                                m_tol, 1.0E-09);
+        m_rowComm    = contfield->GetSession()->GetComm()->GetRowComm();
         m_nloc       = contfield->GetLocalToGlobalMap()->GetNumLocalCoeffs();
         m_assmbScatr = AssmbScatr<TData>::create(this->m_expansionList);
         m_robBndCond = RobBndCond<TData>::create(this->m_expansionList);
@@ -51,6 +52,12 @@ public:
     void apply(Field<TData, FieldState::Coeff> &in,
                Field<TData, FieldState::Coeff> &out) override
     {
+        if (m_rowComm->GetSize() > 1 && m_rowComm->GetRank() == 0)
+        {
+            std::cout << "Solve ConjGrad with " << m_rowComm->GetSize()
+                      << " processors" << std::endl;
+        }
+
         // Store pointers to temporary fields
         Array<OneD, TData> inArr(m_nloc, 0.0);
         Array<OneD, TData> outArr(m_nloc, 0.0);
@@ -79,7 +86,7 @@ public:
         TData rho_new;
         TData mu;
         TData eps;
-        std::array<TData, 3> vExchange{0.0, 0.0, 0.0};
+        Array<OneD, TData> vExchange(3, 0.0);
 
         // Copy data from input field
         auto *inarrptr = inArr.data();
@@ -106,14 +113,14 @@ public:
         vExchange[2] = std::inner_product(p_wk, p_wk + m_nloc, p_r_A, 0.0);
 
         // Perform inner-product exchanges
-        // m_rowComm->AllReduce(vExchange, Nektar::LibUtilities::ReduceSum);
+        m_rowComm->AllReduce(vExchange, Nektar::LibUtilities::ReduceSum);
 
         eps = vExchange[2];
 
         // Calculate rhs magnitude
         m_assmbScatr->apply(m_r_A, m_wk);
         rhsMagnitude = std::inner_product(p_in, p_in + m_nloc, p_wk, 0.0);
-        // m_rowComm->AllReduce(rhsMagnitude, Nektar::LibUtilities::ReduceSum);
+        m_rowComm->AllReduce(rhsMagnitude, Nektar::LibUtilities::ReduceSum);
         rhsMagnitude = (rhsMagnitude > 1.0e-6) ? rhsMagnitude : 1.0;
 
         // If input residual is less than tolerance skip solve.
@@ -134,7 +141,7 @@ public:
         vExchange[0] = std::inner_product(p_r_A, p_r_A + m_nloc, p_w_A, 0.0);
         vExchange[1] = std::inner_product(p_s_A, p_s_A + m_nloc, p_w_A, 0.0);
 
-        // m_rowComm->AllReduce(vExchange, Nektar::LibUtilities::ReduceSum);
+        m_rowComm->AllReduce(vExchange, Nektar::LibUtilities::ReduceSum);
 
         rho             = vExchange[0];
         mu              = vExchange[1];
@@ -151,23 +158,27 @@ public:
 
             // Compute new search direction p_k
             std::transform(p_p_A, p_p_A + m_nloc, p_w_A, p_p_A,
-                           [&beta](const TData &pElem, const TData &wElem)
-                           { return beta * pElem + wElem; });
+                           [&beta](const TData &pElem, const TData &wElem) {
+                               return beta * pElem + wElem;
+                           });
 
             // Compute new search direction q_k
             std::transform(p_q_A, p_q_A + m_nloc, p_s_A, p_q_A,
-                           [&beta](const TData &qElem, const TData &sElem)
-                           { return beta * qElem + sElem; });
+                           [&beta](const TData &qElem, const TData &sElem) {
+                               return beta * qElem + sElem;
+                           });
 
             // Update solution x_{k+1}
             std::transform(p_p_A, p_p_A + m_nloc, p_out, p_out,
-                           [&alpha](const TData &pElem, const TData &xElem)
-                           { return alpha * pElem + xElem; });
+                           [&alpha](const TData &pElem, const TData &xElem) {
+                               return alpha * pElem + xElem;
+                           });
 
             // Update residual vector r_{k+1}
             std::transform(p_q_A, p_q_A + m_nloc, p_r_A, p_r_A,
-                           [&alpha](const TData &qElem, const TData &rElem)
-                           { return -alpha * qElem + rElem; });
+                           [&alpha](const TData &qElem, const TData &rElem) {
+                               return -alpha * qElem + rElem;
+                           });
 
             // Apply preconditioner
             this->m_precon->apply(m_r_A, m_w_A);
@@ -191,7 +202,7 @@ public:
             vExchange[2] = std::inner_product(p_wk, p_wk + m_nloc, p_r_A, 0.0);
 
             // Perform inner-product exchanges
-            // m_rowComm->AllReduce(vExchange, Nektar::LibUtilities::ReduceSum);
+            m_rowComm->AllReduce(vExchange, Nektar::LibUtilities::ReduceSum);
 
             rho_new = vExchange[0];
             mu      = vExchange[1];
@@ -244,6 +255,7 @@ public:
     static std::string className;
 
 protected:
+    LibUtilities::CommSharedPtr m_rowComm;
     std::shared_ptr<OperatorAssmbScatr<TData>> m_assmbScatr;
     std::shared_ptr<OperatorRobBndCond<TData>> m_robBndCond;
     Field<TData, FieldState::Coeff> m_w_A;
diff --git a/Operators/FwdTrans/FwdTransStdMat.hpp b/Operators/FwdTrans/FwdTransStdMat.hpp
index 37a8a2bdf0e4708e81f78a6773d3d96f27e69d30..d15ccd6ef1cd3cc06d4f663b23bcfe2ba0b80b19 100644
--- a/Operators/FwdTrans/FwdTransStdMat.hpp
+++ b/Operators/FwdTrans/FwdTransStdMat.hpp
@@ -50,9 +50,9 @@ public:
         // Handle Dirichlet BCs
         m_DirBCOp->apply(out);
         m_MassOp->apply(out, m_tmp);
-        std::transform(rhsptr, rhsptr + nloc, tmpptr, rhsptr,
-                       [](const TData &rhs, const TData &dir)
-                       { return rhs - dir; });
+        std::transform(
+            rhsptr, rhsptr + nloc, tmpptr, rhsptr,
+            [](const TData &rhs, const TData &dir) { return rhs - dir; });
 
         // Handle Robin BCs
         m_RobBCOp->apply(out, m_rhs, true);
@@ -61,9 +61,9 @@ public:
         m_CGOp->apply(m_rhs, m_tmp);
 
         // Add Dirichlet BCs
-        std::transform(outptr, outptr + nloc, tmpptr, outptr,
-                       [](const TData &x, const TData &diff)
-                       { return x + diff; });
+        std::transform(
+            outptr, outptr + nloc, tmpptr, outptr,
+            [](const TData &x, const TData &diff) { return x + diff; });
     }
 
     void setPrecon(
diff --git a/Operators/HelmSolve/HelmSolveStdMat.hpp b/Operators/HelmSolve/HelmSolveStdMat.hpp
index 30e8fe5f233a708c6212c1866aa3e029eeb83500..1d15b3691c236fb384e4d195aa38c956bd6e79ab 100644
--- a/Operators/HelmSolve/HelmSolveStdMat.hpp
+++ b/Operators/HelmSolve/HelmSolveStdMat.hpp
@@ -57,9 +57,9 @@ public:
         // Handle Dirichlet BCs
         m_DirBCOp->apply(out);
         m_HelmOp->apply(out, m_tmp);
-        std::transform(rhsptr, rhsptr + nloc, tmpptr, rhsptr,
-                       [](const TData &rhs, const TData &dir)
-                       { return rhs - dir; });
+        std::transform(
+            rhsptr, rhsptr + nloc, tmpptr, rhsptr,
+            [](const TData &rhs, const TData &dir) { return rhs - dir; });
 
         // Handle Robin BCs
         m_RobBCOp->apply(out, m_rhs, true);
@@ -68,9 +68,9 @@ public:
         m_CGOp->apply(m_rhs, m_tmp);
 
         // Add Dirichlet BCs
-        std::transform(outptr, outptr + nloc, tmpptr, outptr,
-                       [](const TData &x, const TData &diff)
-                       { return x + diff; });
+        std::transform(
+            outptr, outptr + nloc, tmpptr, outptr,
+            [](const TData &x, const TData &diff) { return x + diff; });
     }
 
     void setLambda(const TData &lambda)
diff --git a/Operators/Matrix/MatrixCUDA.hpp b/Operators/Matrix/MatrixCUDA.hpp
index 0ab87111e827246be4dd9f7ac603b27b37f19ad1..eb4575d3b54a31db56da0a8258b8211e463b92e8 100644
--- a/Operators/Matrix/MatrixCUDA.hpp
+++ b/Operators/Matrix/MatrixCUDA.hpp
@@ -21,8 +21,9 @@ public:
         // expansionlist
         auto blocks = GetBlockAttributes(TFieldState, expansionList);
         m_size      = std::accumulate(blocks.begin(), blocks.end(), 0,
-                                      [](size_t acc, const BlockAttributes &block)
-                                      { return acc + block.block_size; });
+                                 [](size_t acc, const BlockAttributes &block) {
+                                     return acc + block.block_size;
+                                 });
         // create memory for square matrix of given size
         // allocate memory device
         cudaMalloc((void **)&m_matrix, sizeof(TData) * m_size * m_size);
diff --git a/Operators/Matrix/MatrixStdMat.hpp b/Operators/Matrix/MatrixStdMat.hpp
index fd045a139de3342725294bd4e18bcc86e6f44407..bf823137bc5fda04f69797191eb292d34036415f 100644
--- a/Operators/Matrix/MatrixStdMat.hpp
+++ b/Operators/Matrix/MatrixStdMat.hpp
@@ -1,7 +1,7 @@
 #pragma once
 
-#include <numeric>
 #include "Operators/OperatorMatrix.hpp"
+#include <numeric>
 
 namespace Nektar::Operators::detail
 {
@@ -18,8 +18,9 @@ public:
         // expansionlist
         auto blocks = GetBlockAttributes(TFieldState, expansionList);
         m_size      = std::accumulate(blocks.begin(), blocks.end(), 0,
-                                      [](size_t acc, const BlockAttributes &block)
-                                      { return acc + block.block_size; });
+                                 [](size_t acc, const BlockAttributes &block) {
+                                     return acc + block.block_size;
+                                 });
 
         // create memory for square matrix of given size
         m_matrix = std::vector<TData>(m_size * m_size);
diff --git a/Operators/NullPrecon/NullPreconCUDA.hpp b/Operators/NullPrecon/NullPreconCUDA.hpp
index a9561267251db02c89699c432eb1e21626c9cc3b..bb93c12dc53ca4a9341f9c98a4a62f9da2c53043 100644
--- a/Operators/NullPrecon/NullPreconCUDA.hpp
+++ b/Operators/NullPrecon/NullPreconCUDA.hpp
@@ -9,8 +9,7 @@ namespace Nektar::Operators::detail
 {
 
 template <typename TData>
-class OperatorNullPreconImpl<TData, ImplCUDA>
-    : public OperatorNullPrecon<TData>
+class OperatorNullPreconImpl<TData, ImplCUDA> : public OperatorNullPrecon<TData>
 {
 public:
     OperatorNullPreconImpl(const MultiRegions::ExpListSharedPtr &expansionList)
diff --git a/Operators/Operator.hpp b/Operators/Operator.hpp
index 2af377b0b3ba60290bd5624ab5dafce4234508d9..d03e3ca153f5ec9fe4e2147335562f2070afac0d 100644
--- a/Operators/Operator.hpp
+++ b/Operators/Operator.hpp
@@ -134,7 +134,7 @@ protected:
         {
             auto expPtr = this->m_expansionList->GetExp(e);
             auto &df    = expPtr->GetMetricInfo()->GetDerivFactors(
-                   expPtr->GetPointsKeys());
+                expPtr->GetPointsKeys());
             size_t nqTot = expPtr->GetTotPoints();
             if (expPtr->GetMetricInfo()->GetGtype() ==
                 SpatialDomains::eDeformed)
diff --git a/Operators/OperatorConjGrad.hpp b/Operators/OperatorConjGrad.hpp
index cd52fb51e6857d6adc38bce19d158db185a2192f..ad1bf8eeeef9cd9a13cefbb11f1fd7925cb9e94e 100644
--- a/Operators/OperatorConjGrad.hpp
+++ b/Operators/OperatorConjGrad.hpp
@@ -27,7 +27,6 @@ public:
         apply(in, out);
     }
 
-
     void setLHS(const std::shared_ptr<OperatorLinear<TData, FieldState::Coeff,
                                                      FieldState::Coeff>> &ptr)
     {
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index a37c18c314c20fe7740070e5321314bfd78cd464..eeeba79f1e9054d9fc141903720b781b9a728a95 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -1,3 +1,7 @@
+find_package(
+  MPI
+)
+
 find_package(
   Boost
   REQUIRED
@@ -541,11 +545,19 @@ IF (NEKTAR_USE_CUDA)
     )
 ENDIF()
 
-add_test(
-  NAME FwdTrans
-  COMMAND test_fwdtrans
-  WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
-)
+IF (${MPI_FOUND})
+  add_test(
+    NAME FwdTrans
+    COMMAND mpirun -np 2 "${CMAKE_BINARY_DIR}/tests/test_fwdtrans"
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+  )
+ELSE()
+    add_test(
+    NAME FwdTrans
+    COMMAND test_fwdtrans
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+  )
+ENDIF()
 
 IF (NEKTAR_USE_CUDA)
     add_test(
@@ -569,11 +581,19 @@ IF (NEKTAR_USE_CUDA)
     )
 ENDIF()
 
-add_test(
-  NAME HelmSolve
-  COMMAND test_helmsolve
-  WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
-)
+IF (${MPI_FOUND})
+  add_test(
+    NAME HelmSolve
+    COMMAND mpirun -np 2 "${CMAKE_BINARY_DIR}/tests/test_helmsolve"
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+  )
+ELSE()
+  add_test(
+    NAME HelmSolve
+    COMMAND test_helmsolve
+    WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
+  )
+ENDIF()
 
 IF (NEKTAR_USE_CUDA)
     add_test(
diff --git a/tests/init_diagpreconfields.hpp b/tests/init_diagpreconfields.hpp
index 913a6d551ec8d80f8e549b5354e90fb6fa0b777d..2be53bb59f05fedd9e16fed27a5a684bad3e1e10 100644
--- a/tests/init_diagpreconfields.hpp
+++ b/tests/init_diagpreconfields.hpp
@@ -104,7 +104,7 @@ class Helmholtz2D_Tri_Quad : public DiagPreconField
 public:
     Helmholtz2D_Tri_Quad()
     {
-        //meshName = "run/Helmholtz2D_P7_AllBCs.xml";
+        // meshName = "run/Helmholtz2D_P7_AllBCs.xml";
         meshName = "run/Helmholtz2D_varP.xml";
     }
 };
diff --git a/tests/init_fields.hpp b/tests/init_fields.hpp
index 97a752a98263b7dbecf0ec5f8d9516abb320dd6b..ea357ff9f6b388969abc4f76ecb52774a06c1b75 100644
--- a/tests/init_fields.hpp
+++ b/tests/init_fields.hpp
@@ -96,7 +96,6 @@ public:
                     session, graph, "u", true, false,
                     Collections::eNoCollection);
         }
-
         else if constexpr (std::is_same_v<TExpList, MultiRegions::DisContField>)
         {
             fixt_explist =
diff --git a/tests/init_fwdtransfields.hpp b/tests/init_fwdtransfields.hpp
index f723eea7b7a2d5ad74dbb45b850d46aa8f1dbc2d..ed8f7180212d15cb411cd5b4ca090cbd264f43a8 100644
--- a/tests/init_fwdtransfields.hpp
+++ b/tests/init_fwdtransfields.hpp
@@ -144,7 +144,7 @@ class Helmholtz2D_Tri_Quad : public FwdTransField
 public:
     Helmholtz2D_Tri_Quad()
     {
-        //meshName = "run/Helmholtz2D_P7_AllBCs.xml";
+        // meshName = "run/Helmholtz2D_P7_AllBCs.xml";
         meshName = "run/Helmholtz2D_varP.xml";
     }
 };
diff --git a/tests/init_helmsolvefields.hpp b/tests/init_helmsolvefields.hpp
index 53f5aec0a37ea4c4a69612f5c07fc28fe2e986b9..b6584565eb465e7e6ad0fb8fe971d2c4222aa401 100644
--- a/tests/init_helmsolvefields.hpp
+++ b/tests/init_helmsolvefields.hpp
@@ -148,7 +148,7 @@ class Helmholtz2D_Tri_Quad : public HelmSolveField
 public:
     Helmholtz2D_Tri_Quad()
     {
-        //meshName = "run/Helmholtz2D_P7_AllBCs.xml";
+        // meshName = "run/Helmholtz2D_P7_AllBCs.xml";
         meshName = "run/Helmholtz2D_varP.xml";
     }
 };
@@ -188,4 +188,3 @@ public:
         meshName = "run/Helmholtz3D_Tet_VarP.xml";
     }
 };
-
diff --git a/tests/init_nullpreconfields.hpp b/tests/init_nullpreconfields.hpp
index 80a6633daf9e7d957b1252e9c83081cbdfb0dac8..44ab2154700451c08e96fcdfe7fea820f5466d5f 100644
--- a/tests/init_nullpreconfields.hpp
+++ b/tests/init_nullpreconfields.hpp
@@ -137,4 +137,3 @@ public:
         meshName = "run/Helmholtz3D_Tet_VarP.xml";
     }
 };
-
diff --git a/tests/test_nullprecon.cpp b/tests/test_nullprecon.cpp
index 05b567d076562f821a3530296d9ef66a6d03c300..b814f7756b5b5fbbc9c4c84bc87ae958d774e605 100644
--- a/tests/test_nullprecon.cpp
+++ b/tests/test_nullprecon.cpp
@@ -11,7 +11,6 @@
 
 BOOST_AUTO_TEST_SUITE(TestNullPrecon)
 
-
 BOOST_FIXTURE_TEST_CASE(nullprecon_seg, Helmholtz1D_Seg)
 {
     Configure();