From 9e6578c99e8099b2cfdee05c61a91156e01be3e1 Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Thu, 26 May 2016 12:29:19 +0100 Subject: [PATCH 01/34] Preallocate forcing term as a member variable in VCS --- .../VelocityCorrectionScheme.cpp | 46 ++++++++----------- .../VelocityCorrectionScheme.h | 1 + 2 files changed, 21 insertions(+), 26 deletions(-) diff --git a/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.cpp b/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.cpp index 320860210..57da5b277 100644 --- a/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.cpp +++ b/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.cpp @@ -239,6 +239,7 @@ namespace Nektar // field below SetBoundaryConditions(m_time); + m_F = Array > (m_nConvectiveFields); for(int i = 0; i < m_nConvectiveFields; ++i) { m_fields[i]->LocalToGlobal(); @@ -246,6 +247,7 @@ namespace Nektar m_fields[i]->GlobalToLocal(); m_fields[i]->BwdTrans(m_fields[i]->GetCoeffs(), m_fields[i]->UpdatePhys()); + m_F[i] = Array< OneD, NekDouble> (m_fields[0]->GetTotPoints(), 0.0); } } @@ -336,32 +338,23 @@ namespace Nektar const NekDouble time, const NekDouble aii_Dt) { - int i; - int phystot = m_fields[0]->GetTotPoints(); - - Array > F(m_nConvectiveFields); - for(i = 0; i < m_nConvectiveFields; ++i) - { - F[i] = Array (phystot); - } - // Enforcing boundary conditions on all fields SetBoundaryConditions(time); - + // Substep the pressure boundary condition if using substepping m_extrapolation->SubStepSetPressureBCs(inarray,aii_Dt,m_kinvis); - + // Set up forcing term for pressure Poisson equation - SetUpPressureForcing(inarray, F, aii_Dt); + SetUpPressureForcing(inarray, m_F, aii_Dt); // Solve Pressure System - SolvePressure (F[0]); + SolvePressure (m_F[0]); // Set up forcing term for Helmholtz problems - SetUpViscousForcing(inarray, F, aii_Dt); - + SetUpViscousForcing(inarray, m_F, aii_Dt); + // Solve velocity system - SolveViscous( F, outarray, aii_Dt); + SolveViscous( m_F, outarray, aii_Dt); } /** @@ -375,17 +368,18 @@ namespace Nektar int i; int physTot = m_fields[0]->GetTotPoints(); int nvel = m_velocity.num_elements(); - Array wk(physTot, 0.0); - - Vmath::Zero(physTot,Forcing[0],1); - - for(i = 0; i < nvel; ++i) + + m_fields[0]->PhysDeriv(MultiRegions::DirCartesianMap[0],fields[0], + Forcing[0]); + for(i = 1; i < nvel; ++i) { - m_fields[i]->PhysDeriv(MultiRegions::DirCartesianMap[i],fields[i], wk); - Vmath::Vadd(physTot,wk,1,Forcing[0],1,Forcing[0],1); + // Use Forcing[1] as storage since it is not needed for the pressure + m_fields[i]->PhysDeriv(MultiRegions::DirCartesianMap[i],fields[i], + Forcing[1]); + Vmath::Vadd(physTot,Forcing[1],1,Forcing[0],1,Forcing[0],1); } - Vmath::Smul(physTot,1.0/aii_Dt,Forcing[0],1,Forcing[0],1); + Vmath::Smul(physTot,1.0/aii_Dt,Forcing[0],1,Forcing[0],1); } /** @@ -401,7 +395,7 @@ namespace Nektar // Grad p m_pressure->BwdTrans(m_pressure->GetCoeffs(),m_pressure->UpdatePhys()); - + int nvel = m_velocity.num_elements(); if(nvel == 2) { @@ -412,7 +406,7 @@ namespace Nektar m_pressure->PhysDeriv(m_pressure->GetPhys(), Forcing[0], Forcing[1], Forcing[2]); } - + // Subtract inarray/(aii_dt) and divide by kinvis. Kinvis will // need to be updated for the convected fields. for(int i = 0; i < nvel; ++i) diff --git a/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.h b/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.h index df90971a6..92aa92a06 100644 --- a/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.h +++ b/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.h @@ -160,6 +160,7 @@ namespace Nektar const NekDouble time); private: + Array > m_F; }; -- GitLab From bc39e93cde4240a601d11f787a54bc6329b4e16d Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Thu, 26 May 2016 12:46:27 +0100 Subject: [PATCH 02/34] Reserve memory for PointsKeyVector --- library/StdRegions/StdExpansion.cpp | 1 - library/StdRegions/StdExpansion.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/library/StdRegions/StdExpansion.cpp b/library/StdRegions/StdExpansion.cpp index 322946a5b..cd7e34e49 100644 --- a/library/StdRegions/StdExpansion.cpp +++ b/library/StdRegions/StdExpansion.cpp @@ -466,7 +466,6 @@ namespace Nektar returnval = MemoryManager::AllocateSharedPtr( m_ncoeffs,m_ncoeffs); - int cnt = 0; for(int i = 0; i < m_ncoeffs; ++i) { // Get mode at quadrature points diff --git a/library/StdRegions/StdExpansion.h b/library/StdRegions/StdExpansion.h index 9e12fc32a..9675719e7 100644 --- a/library/StdRegions/StdExpansion.h +++ b/library/StdRegions/StdExpansion.h @@ -1328,6 +1328,7 @@ namespace Nektar const LibUtilities::PointsKeyVector GetPointsKeys() const { LibUtilities::PointsKeyVector p; + p.reserve(m_base.num_elements()); for (int i = 0; i < m_base.num_elements(); ++i) { p.push_back(m_base[i]->GetPointsKey()); -- GitLab From 577937083e21b7f25eb0e2b6e75261ecb763e3a6 Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Thu, 26 May 2016 13:13:50 +0100 Subject: [PATCH 03/34] Fix previous commit --- library/SolverUtils/DriverSteadyState.cpp | 5 ++++- solvers/IncNavierStokesSolver/EquationSystems/VCSMapping.cpp | 2 ++ .../EquationSystems/VelocityCorrectionScheme.h | 3 ++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/library/SolverUtils/DriverSteadyState.cpp b/library/SolverUtils/DriverSteadyState.cpp index 0f0048d03..312b3f2a5 100644 --- a/library/SolverUtils/DriverSteadyState.cpp +++ b/library/SolverUtils/DriverSteadyState.cpp @@ -85,7 +85,10 @@ void DriverSteadyState::v_Execute(ostream &out) // to find the steady state of a flow above the critical Reynolds // number. m_equ[m_nequ - 1]->PrintSummary(out); - m_equ[m_nequ - 1]->DoInitialise(); + for( int i = 0; i < m_nequ; ++i) + { + m_equ[i]->DoInitialise(); + } m_session->LoadParameter("IO_InfoSteps", m_infosteps, 1000); m_session->LoadParameter("IO_CheckSteps", m_checksteps, 100000); diff --git a/solvers/IncNavierStokesSolver/EquationSystems/VCSMapping.cpp b/solvers/IncNavierStokesSolver/EquationSystems/VCSMapping.cpp index 2cb60f7bb..414705770 100644 --- a/solvers/IncNavierStokesSolver/EquationSystems/VCSMapping.cpp +++ b/solvers/IncNavierStokesSolver/EquationSystems/VCSMapping.cpp @@ -157,6 +157,7 @@ namespace Nektar // Correct Dirichlet boundary conditions to account for mapping m_mapping->UpdateBCs(0.0); // + m_F = Array > (m_nConvectiveFields); for(int i = 0; i < m_nConvectiveFields; ++i) { m_fields[i]->LocalToGlobal(); @@ -164,6 +165,7 @@ namespace Nektar m_fields[i]->GlobalToLocal(); m_fields[i]->BwdTrans(m_fields[i]->GetCoeffs(), m_fields[i]->UpdatePhys()); + m_F[i] = Array< OneD, NekDouble> (m_fields[0]->GetTotPoints(), 0.0); } // Initialise m_gradP diff --git a/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.h b/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.h index 92aa92a06..8e65cb1e4 100644 --- a/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.h +++ b/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.h @@ -159,8 +159,9 @@ namespace Nektar Array > &outarray, const NekDouble time); - private: Array > m_F; + + private: }; -- GitLab From d5bbdc6bb516bc777ef8c40be974aa3bedc4c751 Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Thu, 26 May 2016 13:18:24 +0100 Subject: [PATCH 04/34] Remove unsed vector and put another vector in wsp of GlobalLinSysStaticCond --- library/MultiRegions/GlobalLinSysStaticCond.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/library/MultiRegions/GlobalLinSysStaticCond.cpp b/library/MultiRegions/GlobalLinSysStaticCond.cpp index 2aee89452..4b35fe198 100644 --- a/library/MultiRegions/GlobalLinSysStaticCond.cpp +++ b/library/MultiRegions/GlobalLinSysStaticCond.cpp @@ -121,7 +121,7 @@ namespace Nektar int nIntDofs = pLocToGloMap->GetNumGlobalCoeffs() - nGlobBndDofs; - Array F = m_wsp + 2*nLocBndDofs; + Array F = m_wsp + 2*nLocBndDofs + nGlobHomBndDofs; Array tmp; if(nDirBndDofs && dirForcCalculated) { @@ -135,7 +135,6 @@ namespace Nektar NekVector F_HomBnd(nGlobHomBndDofs,tmp=F+nDirBndDofs, eWrapper); NekVector F_GlobBnd(nGlobBndDofs,F,eWrapper); - NekVector F_LocBnd(nLocBndDofs,0.0); NekVector F_Int(nIntDofs,tmp=F+nGlobBndDofs,eWrapper); NekVector V_GlobBnd(nGlobBndDofs,out,eWrapper); @@ -145,7 +144,8 @@ namespace Nektar NekVector V_Int(nIntDofs,tmp=out+nGlobBndDofs,eWrapper); NekVector V_LocBnd(nLocBndDofs,m_wsp,eWrapper); - NekVector V_GlobHomBndTmp(nGlobHomBndDofs,0.0); + NekVector V_GlobHomBndTmp( + nGlobHomBndDofs,tmp = m_wsp + 2*nLocBndDofs,eWrapper); // set up normalisation factor for right hand side on first SC level DNekScalBlkMatSharedPtr sc = v_PreSolve(scLevel, F_GlobBnd); @@ -269,7 +269,11 @@ namespace Nektar { int nLocalBnd = m_locToGloMap->GetNumLocalBndCoeffs(); int nGlobal = m_locToGloMap->GetNumGlobalCoeffs(); - m_wsp = Array(2*nLocalBnd + nGlobal, 0.0); + int nGlobBndDofs = pLocToGloMap->GetNumGlobalBndCoeffs(); + int nDirBndDofs = pLocToGloMap->GetNumGlobalDirBndCoeffs(); + int nGlobHomBndDofs = nGlobBndDofs - nDirBndDofs; + m_wsp = Array + (2*nLocalBnd + nGlobal + nGlobHomBndDofs, 0.0); if (pLocToGloMap->AtLastLevel()) { -- GitLab From 8acc3fdf45b8cdb6b8180b6ee466b6d160d1e10b Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Thu, 26 May 2016 14:01:45 +0100 Subject: [PATCH 05/34] Store (reference to) derivfactors in Expansion --- library/LocalRegions/Expansion.cpp | 4 +- library/LocalRegions/Expansion.h | 1 + library/LocalRegions/PrismExp.cpp | 150 +++++++++++++--------------- library/LocalRegions/PyrExp.cpp | 127 ++++++++++++------------ library/LocalRegions/QuadExp.cpp | 125 +++++++++++------------- library/LocalRegions/SegExp.cpp | 89 ++++++++--------- library/LocalRegions/TetExp.cpp | 152 ++++++++++++++--------------- library/LocalRegions/TriExp.cpp | 113 ++++++++++----------- 8 files changed, 352 insertions(+), 409 deletions(-) diff --git a/library/LocalRegions/Expansion.cpp b/library/LocalRegions/Expansion.cpp index 0b8fbb768..c4da37080 100644 --- a/library/LocalRegions/Expansion.cpp +++ b/library/LocalRegions/Expansion.cpp @@ -69,11 +69,13 @@ namespace Nektar << "(first vertex ID = " << m_geom->GetVid(0) << ")"; NEKERROR(ErrorUtil::ewarning, err.str()); } + m_df = m_metricinfo->GetDerivFactors(GetPointsKeys()); } Expansion::Expansion(const Expansion &pSrc) : m_geom(pSrc.m_geom), - m_metricinfo(pSrc.m_metricinfo) + m_metricinfo(pSrc.m_metricinfo), + m_df(pSrc.m_df) { } diff --git a/library/LocalRegions/Expansion.h b/library/LocalRegions/Expansion.h index 2b9493b98..15d8267ea 100644 --- a/library/LocalRegions/Expansion.h +++ b/library/LocalRegions/Expansion.h @@ -124,6 +124,7 @@ namespace Nektar protected: SpatialDomains::GeometrySharedPtr m_geom; SpatialDomains::GeomFactorsSharedPtr m_metricinfo; + Array m_df; MetricMap m_metrics; void ComputeLaplacianMetric(); diff --git a/library/LocalRegions/PrismExp.cpp b/library/LocalRegions/PrismExp.cpp index dd3c0dad9..c54e61959 100644 --- a/library/LocalRegions/PrismExp.cpp +++ b/library/LocalRegions/PrismExp.cpp @@ -141,8 +141,6 @@ namespace Nektar { int nqtot = GetTotPoints(); - Array df = - m_metricinfo->GetDerivFactors(GetPointsKeys()); Array diff0(nqtot); Array diff1(nqtot); Array diff2(nqtot); @@ -153,46 +151,46 @@ namespace Nektar { if(out_d0.num_elements()) { - Vmath::Vmul (nqtot,&df[0][0],1,&diff0[0],1,&out_d0[0],1); - Vmath::Vvtvp (nqtot,&df[1][0],1,&diff1[0],1,&out_d0[0],1,&out_d0[0],1); - Vmath::Vvtvp (nqtot,&df[2][0],1,&diff2[0],1,&out_d0[0],1,&out_d0[0],1); + Vmath::Vmul (nqtot,&m_df[0][0],1,&diff0[0],1,&out_d0[0],1); + Vmath::Vvtvp (nqtot,&m_df[1][0],1,&diff1[0],1,&out_d0[0],1,&out_d0[0],1); + Vmath::Vvtvp (nqtot,&m_df[2][0],1,&diff2[0],1,&out_d0[0],1,&out_d0[0],1); } if(out_d1.num_elements()) { - Vmath::Vmul (nqtot,&df[3][0],1,&diff0[0],1,&out_d1[0],1); - Vmath::Vvtvp (nqtot,&df[4][0],1,&diff1[0],1,&out_d1[0],1,&out_d1[0],1); - Vmath::Vvtvp (nqtot,&df[5][0],1,&diff2[0],1,&out_d1[0],1,&out_d1[0],1); + Vmath::Vmul (nqtot,&m_df[3][0],1,&diff0[0],1,&out_d1[0],1); + Vmath::Vvtvp (nqtot,&m_df[4][0],1,&diff1[0],1,&out_d1[0],1,&out_d1[0],1); + Vmath::Vvtvp (nqtot,&m_df[5][0],1,&diff2[0],1,&out_d1[0],1,&out_d1[0],1); } if(out_d2.num_elements()) { - Vmath::Vmul (nqtot,&df[6][0],1,&diff0[0],1,&out_d2[0],1); - Vmath::Vvtvp (nqtot,&df[7][0],1,&diff1[0],1,&out_d2[0],1,&out_d2[0],1); - Vmath::Vvtvp (nqtot,&df[8][0],1,&diff2[0],1,&out_d2[0],1,&out_d2[0],1); + Vmath::Vmul (nqtot,&m_df[6][0],1,&diff0[0],1,&out_d2[0],1); + Vmath::Vvtvp (nqtot,&m_df[7][0],1,&diff1[0],1,&out_d2[0],1,&out_d2[0],1); + Vmath::Vvtvp (nqtot,&m_df[8][0],1,&diff2[0],1,&out_d2[0],1,&out_d2[0],1); } } else // regular geometry { if(out_d0.num_elements()) { - Vmath::Smul (nqtot,df[0][0],&diff0[0],1,&out_d0[0],1); - Blas::Daxpy (nqtot,df[1][0],&diff1[0],1,&out_d0[0],1); - Blas::Daxpy (nqtot,df[2][0],&diff2[0],1,&out_d0[0],1); + Vmath::Smul (nqtot,m_df[0][0],&diff0[0],1,&out_d0[0],1); + Blas::Daxpy (nqtot,m_df[1][0],&diff1[0],1,&out_d0[0],1); + Blas::Daxpy (nqtot,m_df[2][0],&diff2[0],1,&out_d0[0],1); } if(out_d1.num_elements()) { - Vmath::Smul (nqtot,df[3][0],&diff0[0],1,&out_d1[0],1); - Blas::Daxpy (nqtot,df[4][0],&diff1[0],1,&out_d1[0],1); - Blas::Daxpy (nqtot,df[5][0],&diff2[0],1,&out_d1[0],1); + Vmath::Smul (nqtot,m_df[3][0],&diff0[0],1,&out_d1[0],1); + Blas::Daxpy (nqtot,m_df[4][0],&diff1[0],1,&out_d1[0],1); + Blas::Daxpy (nqtot,m_df[5][0],&diff2[0],1,&out_d1[0],1); } if(out_d2.num_elements()) { - Vmath::Smul (nqtot,df[6][0],&diff0[0],1,&out_d2[0],1); - Blas::Daxpy (nqtot,df[7][0],&diff1[0],1,&out_d2[0],1); - Blas::Daxpy (nqtot,df[8][0],&diff2[0],1,&out_d2[0],1); + Vmath::Smul (nqtot,m_df[6][0],&diff0[0],1,&out_d2[0],1); + Blas::Daxpy (nqtot,m_df[7][0],&diff1[0],1,&out_d2[0],1); + Blas::Daxpy (nqtot,m_df[8][0],&diff2[0],1,&out_d2[0],1); } } } @@ -375,22 +373,19 @@ namespace Nektar Array tmp6 (m_ncoeffs); Array wsp (order0*nquad2*(nquad1+order1)); - const Array& df = - m_metricinfo->GetDerivFactors(GetPointsKeys()); - MultiplyByQuadratureMetric(inarray, tmp1); if(m_metricinfo->GetGtype() == SpatialDomains::eDeformed) { - Vmath::Vmul(nqtot,&df[3*dir][0], 1,tmp1.get(),1,tmp2.get(),1); - Vmath::Vmul(nqtot,&df[3*dir+1][0],1,tmp1.get(),1,tmp3.get(),1); - Vmath::Vmul(nqtot,&df[3*dir+2][0],1,tmp1.get(),1,tmp4.get(),1); + Vmath::Vmul(nqtot,&m_df[3*dir][0], 1,tmp1.get(),1,tmp2.get(),1); + Vmath::Vmul(nqtot,&m_df[3*dir+1][0],1,tmp1.get(),1,tmp3.get(),1); + Vmath::Vmul(nqtot,&m_df[3*dir+2][0],1,tmp1.get(),1,tmp4.get(),1); } else { - Vmath::Smul(nqtot, df[3*dir][0], tmp1.get(),1,tmp2.get(), 1); - Vmath::Smul(nqtot, df[3*dir+1][0],tmp1.get(),1,tmp3.get(), 1); - Vmath::Smul(nqtot, df[3*dir+2][0],tmp1.get(),1,tmp4.get(), 1); + Vmath::Smul(nqtot, m_df[3*dir][0], tmp1.get(),1,tmp2.get(), 1); + Vmath::Smul(nqtot, m_df[3*dir+1][0],tmp1.get(),1,tmp3.get(), 1); + Vmath::Smul(nqtot, m_df[3*dir+2][0],tmp1.get(),1,tmp4.get(), 1); } // set up geometric factor: (1+z0)/2 @@ -691,7 +686,6 @@ namespace Nektar GetGeom()->GetMetricInfo(); LibUtilities::PointsKeyVector ptsKeys = GetPointsKeys(); SpatialDomains::GeomType type = geomFactors->GetGtype(); - const Array &df = geomFactors->GetDerivFactors(ptsKeys); const Array &jac = geomFactors->GetJac(ptsKeys); int nq0 = ptsKeys[0].GetNumPoints(); @@ -729,7 +723,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = -df[3*i+2][0];; + normal[i][0] = -m_df[3*i+2][0];; } break; } @@ -737,7 +731,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = -df[3*i+1][0]; + normal[i][0] = -m_df[3*i+1][0]; } break; } @@ -745,7 +739,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = df[3*i][0]+df[3*i+2][0]; + normal[i][0] = m_df[3*i][0]+m_df[3*i+2][0]; } break; } @@ -753,7 +747,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = df[3*i+1][0]; + normal[i][0] = m_df[3*i+1][0]; } break; } @@ -761,7 +755,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = -df[3*i][0]; + normal[i][0] = -m_df[3*i][0]; } break; } @@ -815,9 +809,9 @@ namespace Nektar { for(j = 0; j < nq01; ++j) { - normals[j] = -df[2][j]*jac[j]; - normals[nqtot+j] = -df[5][j]*jac[j]; - normals[2*nqtot+j] = -df[8][j]*jac[j]; + normals[j] = -m_df[2][j]*jac[j]; + normals[nqtot+j] = -m_df[5][j]*jac[j]; + normals[2*nqtot+j] = -m_df[8][j]*jac[j]; faceJac[j] = jac[j]; } @@ -834,11 +828,11 @@ namespace Nektar { int tmp = j+nq01*k; normals[j+k*nq0] = - -df[1][tmp]*jac[tmp]; + -m_df[1][tmp]*jac[tmp]; normals[nqtot+j+k*nq0] = - -df[4][tmp]*jac[tmp]; + -m_df[4][tmp]*jac[tmp]; normals[2*nqtot+j+k*nq0] = - -df[7][tmp]*jac[tmp]; + -m_df[7][tmp]*jac[tmp]; faceJac[j+k*nq0] = jac[tmp]; } } @@ -856,11 +850,11 @@ namespace Nektar { int tmp = nq0-1+nq0*j+nq01*k; normals[j+k*nq1] = - (df[0][tmp]+df[2][tmp])*jac[tmp]; + (m_df[0][tmp]+m_df[2][tmp])*jac[tmp]; normals[nqtot+j+k*nq1] = - (df[3][tmp]+df[5][tmp])*jac[tmp]; + (m_df[3][tmp]+m_df[5][tmp])*jac[tmp]; normals[2*nqtot+j+k*nq1] = - (df[6][tmp]+df[8][tmp])*jac[tmp]; + (m_df[6][tmp]+m_df[8][tmp])*jac[tmp]; faceJac[j+k*nq1] = jac[tmp]; } } @@ -878,11 +872,11 @@ namespace Nektar { int tmp = nq0*(nq1-1) + j + nq01*k; normals[j+k*nq0] = - df[1][tmp]*jac[tmp]; + m_df[1][tmp]*jac[tmp]; normals[nqtot+j+k*nq0] = - df[4][tmp]*jac[tmp]; + m_df[4][tmp]*jac[tmp]; normals[2*nqtot+j+k*nq0] = - df[7][tmp]*jac[tmp]; + m_df[7][tmp]*jac[tmp]; faceJac[j+k*nq0] = jac[tmp]; } } @@ -900,11 +894,11 @@ namespace Nektar { int tmp = j*nq0+nq01*k; normals[j+k*nq1] = - -df[0][tmp]*jac[tmp]; + -m_df[0][tmp]*jac[tmp]; normals[nqtot+j+k*nq1] = - -df[3][tmp]*jac[tmp]; + -m_df[3][tmp]*jac[tmp]; normals[2*nqtot+j+k*nq1] = - -df[6][tmp]*jac[tmp]; + -m_df[6][tmp]*jac[tmp]; faceJac[j+k*nq1] = jac[tmp]; } } @@ -1155,8 +1149,6 @@ namespace Nektar else { NekDouble jac = (m_metricinfo->GetJac(ptsKeys))[0]; - Array df = - m_metricinfo->GetDerivFactors(ptsKeys); int dir = 0; switch(mkey.GetMatrixType()) @@ -1191,9 +1183,9 @@ namespace Nektar DNekMatSharedPtr WeakDeriv = MemoryManager ::AllocateSharedPtr(rows,cols); - (*WeakDeriv) = df[3*dir ][0]*deriv0 - + df[3*dir+1][0]*deriv1 - + df[3*dir+2][0]*deriv2; + (*WeakDeriv) = m_df[3*dir ][0]*deriv0 + + m_df[3*dir+1][0]*deriv1 + + m_df[3*dir+2][0]*deriv2; returnval = MemoryManager ::AllocateSharedPtr(jac,WeakDeriv); @@ -1587,8 +1579,6 @@ namespace Nektar // wsp3 = du_dxi3 = D_xi3 * wsp0 = D_xi3 * u StdExpansion3D::PhysTensorDeriv(inarray,wsp1,wsp2,wsp3); - const Array& df = - m_metricinfo->GetDerivFactors(GetPointsKeys()); const Array& z0 = m_base[0]->GetZ(); const Array& z2 = m_base[2]->GetZ(); @@ -1610,67 +1600,67 @@ namespace Nektar if (m_metricinfo->GetGtype() == SpatialDomains::eDeformed) { // wsp4 = d eta_1/d x_1 - Vmath::Vvtvvtp(nqtot, &df[0][0], 1, &h0[0], 1, &df[2][0], 1, &h1[0], 1, &wsp4[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[0][0], 1, &h0[0], 1, &m_df[2][0], 1, &h1[0], 1, &wsp4[0], 1); // wsp5 = d eta_2/d x_1 - Vmath::Vvtvvtp(nqtot, &df[3][0], 1, &h0[0], 1, &df[5][0], 1, &h1[0], 1, &wsp5[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[3][0], 1, &h0[0], 1, &m_df[5][0], 1, &h1[0], 1, &wsp5[0], 1); // wsp6 = d eta_3/d x_1d - Vmath::Vvtvvtp(nqtot, &df[6][0], 1, &h0[0], 1, &df[8][0], 1, &h1[0], 1, &wsp6[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[6][0], 1, &h0[0], 1, &m_df[8][0], 1, &h1[0], 1, &wsp6[0], 1); // g0 (overwrites h0) Vmath::Vvtvvtp(nqtot, &wsp4[0], 1, &wsp4[0], 1, &wsp5[0], 1, &wsp5[0], 1, &g0[0], 1); Vmath::Vvtvp (nqtot, &wsp6[0], 1, &wsp6[0], 1, &g0[0], 1, &g0[0], 1); // g3 (overwrites h1) - Vmath::Vvtvvtp(nqtot, &df[1][0], 1, &wsp4[0], 1, &df[4][0], 1, &wsp5[0], 1, &g3[0], 1); - Vmath::Vvtvp (nqtot, &df[7][0], 1, &wsp6[0], 1, &g3[0], 1, &g3[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[1][0], 1, &wsp4[0], 1, &m_df[4][0], 1, &wsp5[0], 1, &g3[0], 1); + Vmath::Vvtvp (nqtot, &m_df[7][0], 1, &wsp6[0], 1, &g3[0], 1, &g3[0], 1); // g4 - Vmath::Vvtvvtp(nqtot, &df[2][0], 1, &wsp4[0], 1, &df[5][0], 1, &wsp5[0], 1, &g4[0], 1); - Vmath::Vvtvp (nqtot, &df[8][0], 1, &wsp6[0], 1, &g4[0], 1, &g4[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[2][0], 1, &wsp4[0], 1, &m_df[5][0], 1, &wsp5[0], 1, &g4[0], 1); + Vmath::Vvtvp (nqtot, &m_df[8][0], 1, &wsp6[0], 1, &g4[0], 1, &g4[0], 1); // Overwrite wsp4/5/6 with g1/2/5 // g1 - Vmath::Vvtvvtp(nqtot, &df[1][0], 1, &df[1][0], 1, &df[4][0], 1, &df[4][0], 1, &g1[0], 1); - Vmath::Vvtvp (nqtot, &df[7][0], 1, &df[7][0], 1, &g1[0], 1, &g1[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[1][0], 1, &m_df[1][0], 1, &m_df[4][0], 1, &m_df[4][0], 1, &g1[0], 1); + Vmath::Vvtvp (nqtot, &m_df[7][0], 1, &m_df[7][0], 1, &g1[0], 1, &g1[0], 1); // g2 - Vmath::Vvtvvtp(nqtot, &df[2][0], 1, &df[2][0], 1, &df[5][0], 1, &df[5][0], 1, &g2[0], 1); - Vmath::Vvtvp (nqtot, &df[8][0], 1, &df[8][0], 1, &g2[0], 1, &g2[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[2][0], 1, &m_df[2][0], 1, &m_df[5][0], 1, &m_df[5][0], 1, &g2[0], 1); + Vmath::Vvtvp (nqtot, &m_df[8][0], 1, &m_df[8][0], 1, &g2[0], 1, &g2[0], 1); // g5 - Vmath::Vvtvvtp(nqtot, &df[1][0], 1, &df[2][0], 1, &df[4][0], 1, &df[5][0], 1, &g5[0], 1); - Vmath::Vvtvp (nqtot, &df[7][0], 1, &df[8][0], 1, &g5[0], 1, &g5[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[1][0], 1, &m_df[2][0], 1, &m_df[4][0], 1, &m_df[5][0], 1, &g5[0], 1); + Vmath::Vvtvp (nqtot, &m_df[7][0], 1, &m_df[8][0], 1, &g5[0], 1, &g5[0], 1); } else { // wsp4 = d eta_1/d x_1 - Vmath::Svtsvtp(nqtot, df[0][0], &h0[0], 1, df[2][0], &h1[0], 1, &wsp4[0], 1); + Vmath::Svtsvtp(nqtot, m_df[0][0], &h0[0], 1, m_df[2][0], &h1[0], 1, &wsp4[0], 1); // wsp5 = d eta_2/d x_1 - Vmath::Svtsvtp(nqtot, df[3][0], &h0[0], 1, df[5][0], &h1[0], 1, &wsp5[0], 1); + Vmath::Svtsvtp(nqtot, m_df[3][0], &h0[0], 1, m_df[5][0], &h1[0], 1, &wsp5[0], 1); // wsp6 = d eta_3/d x_1 - Vmath::Svtsvtp(nqtot, df[6][0], &h0[0], 1, df[8][0], &h1[0], 1, &wsp6[0], 1); + Vmath::Svtsvtp(nqtot, m_df[6][0], &h0[0], 1, m_df[8][0], &h1[0], 1, &wsp6[0], 1); // g0 (overwrites h0) Vmath::Vvtvvtp(nqtot, &wsp4[0], 1, &wsp4[0], 1, &wsp5[0], 1, &wsp5[0], 1, &g0[0], 1); Vmath::Vvtvp (nqtot, &wsp6[0], 1, &wsp6[0], 1, &g0[0], 1, &g0[0], 1); // g3 (overwrites h1) - Vmath::Svtsvtp(nqtot, df[1][0], &wsp4[0], 1, df[4][0], &wsp5[0], 1, &g3[0], 1); - Vmath::Svtvp (nqtot, df[7][0], &wsp6[0], 1, &g3[0], 1, &g3[0], 1); + Vmath::Svtsvtp(nqtot, m_df[1][0], &wsp4[0], 1, m_df[4][0], &wsp5[0], 1, &g3[0], 1); + Vmath::Svtvp (nqtot, m_df[7][0], &wsp6[0], 1, &g3[0], 1, &g3[0], 1); // g4 - Vmath::Svtsvtp(nqtot, df[2][0], &wsp4[0], 1, df[5][0], &wsp5[0], 1, &g4[0], 1); - Vmath::Svtvp (nqtot, df[8][0], &wsp6[0], 1, &g4[0], 1, &g4[0], 1); + Vmath::Svtsvtp(nqtot, m_df[2][0], &wsp4[0], 1, m_df[5][0], &wsp5[0], 1, &g4[0], 1); + Vmath::Svtvp (nqtot, m_df[8][0], &wsp6[0], 1, &g4[0], 1, &g4[0], 1); // Overwrite wsp4/5/6 with g1/2/5 // g1 - Vmath::Fill(nqtot, df[1][0]*df[1][0] + df[4][0]*df[4][0] + df[7][0]*df[7][0], &g1[0], 1); + Vmath::Fill(nqtot, m_df[1][0]*m_df[1][0] + m_df[4][0]*m_df[4][0] + m_df[7][0]*m_df[7][0], &g1[0], 1); // g2 - Vmath::Fill(nqtot, df[2][0]*df[2][0] + df[5][0]*df[5][0] + df[8][0]*df[8][0], &g2[0], 1); + Vmath::Fill(nqtot, m_df[2][0]*m_df[2][0] + m_df[5][0]*m_df[5][0] + m_df[8][0]*m_df[8][0], &g2[0], 1); // g5 - Vmath::Fill(nqtot, df[1][0]*df[2][0] + df[4][0]*df[5][0] + df[7][0]*df[8][0], &g5[0], 1); + Vmath::Fill(nqtot, m_df[1][0]*m_df[2][0] + m_df[4][0]*m_df[5][0] + m_df[7][0]*m_df[8][0], &g5[0], 1); } // Compute component derivatives into wsp7, 8, 9 (wsp7 overwrites // g0). diff --git a/library/LocalRegions/PyrExp.cpp b/library/LocalRegions/PyrExp.cpp index 1904f3577..a613fffd3 100644 --- a/library/LocalRegions/PyrExp.cpp +++ b/library/LocalRegions/PyrExp.cpp @@ -142,8 +142,6 @@ namespace Nektar int nquad0 = m_base[0]->GetNumPoints(); int nquad1 = m_base[1]->GetNumPoints(); int nquad2 = m_base[2]->GetNumPoints(); - Array gmat = - m_metricinfo->GetDerivFactors(GetPointsKeys()); Array diff0(nquad0*nquad1*nquad2); Array diff1(nquad0*nquad1*nquad2); Array diff2(nquad0*nquad1*nquad2); @@ -154,46 +152,46 @@ namespace Nektar { if(out_d0.num_elements()) { - Vmath::Vmul (nquad0*nquad1*nquad2,&gmat[0][0],1,&diff0[0],1, &out_d0[0], 1); - Vmath::Vvtvp (nquad0*nquad1*nquad2,&gmat[1][0],1,&diff1[0],1, &out_d0[0], 1,&out_d0[0],1); - Vmath::Vvtvp (nquad0*nquad1*nquad2,&gmat[2][0],1,&diff2[0],1, &out_d0[0], 1,&out_d0[0],1); + Vmath::Vmul (nquad0*nquad1*nquad2,&m_df[0][0],1,&diff0[0],1, &out_d0[0], 1); + Vmath::Vvtvp (nquad0*nquad1*nquad2,&m_df[1][0],1,&diff1[0],1, &out_d0[0], 1,&out_d0[0],1); + Vmath::Vvtvp (nquad0*nquad1*nquad2,&m_df[2][0],1,&diff2[0],1, &out_d0[0], 1,&out_d0[0],1); } if(out_d1.num_elements()) { - Vmath::Vmul (nquad0*nquad1*nquad2,&gmat[3][0],1,&diff0[0],1, &out_d1[0], 1); - Vmath::Vvtvp (nquad0*nquad1*nquad2,&gmat[4][0],1,&diff1[0],1, &out_d1[0], 1,&out_d1[0],1); - Vmath::Vvtvp (nquad0*nquad1*nquad2,&gmat[5][0],1,&diff2[0],1, &out_d1[0], 1,&out_d1[0],1); + Vmath::Vmul (nquad0*nquad1*nquad2,&m_df[3][0],1,&diff0[0],1, &out_d1[0], 1); + Vmath::Vvtvp (nquad0*nquad1*nquad2,&m_df[4][0],1,&diff1[0],1, &out_d1[0], 1,&out_d1[0],1); + Vmath::Vvtvp (nquad0*nquad1*nquad2,&m_df[5][0],1,&diff2[0],1, &out_d1[0], 1,&out_d1[0],1); } if(out_d2.num_elements()) { - Vmath::Vmul (nquad0*nquad1*nquad2,&gmat[6][0],1,&diff0[0],1, &out_d2[0], 1); - Vmath::Vvtvp (nquad0*nquad1*nquad2,&gmat[7][0],1,&diff1[0],1, &out_d2[0], 1, &out_d2[0],1); - Vmath::Vvtvp (nquad0*nquad1*nquad2,&gmat[8][0],1,&diff2[0],1, &out_d2[0], 1, &out_d2[0],1); + Vmath::Vmul (nquad0*nquad1*nquad2,&m_df[6][0],1,&diff0[0],1, &out_d2[0], 1); + Vmath::Vvtvp (nquad0*nquad1*nquad2,&m_df[7][0],1,&diff1[0],1, &out_d2[0], 1, &out_d2[0],1); + Vmath::Vvtvp (nquad0*nquad1*nquad2,&m_df[8][0],1,&diff2[0],1, &out_d2[0], 1, &out_d2[0],1); } } else // regular geometry { if(out_d0.num_elements()) { - Vmath::Smul (nquad0*nquad1*nquad2,gmat[0][0],&diff0[0],1, &out_d0[0], 1); - Blas::Daxpy (nquad0*nquad1*nquad2,gmat[1][0],&diff1[0],1, &out_d0[0], 1); - Blas::Daxpy (nquad0*nquad1*nquad2,gmat[2][0],&diff2[0],1, &out_d0[0], 1); + Vmath::Smul (nquad0*nquad1*nquad2,m_df[0][0],&diff0[0],1, &out_d0[0], 1); + Blas::Daxpy (nquad0*nquad1*nquad2,m_df[1][0],&diff1[0],1, &out_d0[0], 1); + Blas::Daxpy (nquad0*nquad1*nquad2,m_df[2][0],&diff2[0],1, &out_d0[0], 1); } if(out_d1.num_elements()) { - Vmath::Smul (nquad0*nquad1*nquad2,gmat[3][0],&diff0[0],1, &out_d1[0], 1); - Blas::Daxpy (nquad0*nquad1*nquad2,gmat[4][0],&diff1[0],1, &out_d1[0], 1); - Blas::Daxpy (nquad0*nquad1*nquad2,gmat[5][0],&diff2[0],1, &out_d1[0], 1); + Vmath::Smul (nquad0*nquad1*nquad2,m_df[3][0],&diff0[0],1, &out_d1[0], 1); + Blas::Daxpy (nquad0*nquad1*nquad2,m_df[4][0],&diff1[0],1, &out_d1[0], 1); + Blas::Daxpy (nquad0*nquad1*nquad2,m_df[5][0],&diff2[0],1, &out_d1[0], 1); } if(out_d2.num_elements()) { - Vmath::Smul (nquad0*nquad1*nquad2,gmat[6][0],&diff0[0],1, &out_d2[0], 1); - Blas::Daxpy (nquad0*nquad1*nquad2,gmat[7][0],&diff1[0],1, &out_d2[0], 1); - Blas::Daxpy (nquad0*nquad1*nquad2,gmat[8][0],&diff2[0],1, &out_d2[0], 1); + Vmath::Smul (nquad0*nquad1*nquad2,m_df[6][0],&diff0[0],1, &out_d2[0], 1); + Blas::Daxpy (nquad0*nquad1*nquad2,m_df[7][0],&diff1[0],1, &out_d2[0], 1); + Blas::Daxpy (nquad0*nquad1*nquad2,m_df[8][0],&diff2[0],1, &out_d2[0], 1); } } } @@ -468,7 +466,6 @@ namespace Nektar GetGeom()->GetMetricInfo(); LibUtilities::PointsKeyVector ptsKeys = GetPointsKeys(); SpatialDomains::GeomType type = geomFactors->GetGtype(); - const Array &df = geomFactors->GetDerivFactors(ptsKeys); const Array &jac = geomFactors->GetJac(ptsKeys); LibUtilities::BasisKey tobasis0 = DetFaceBasisKey(face,0); @@ -499,7 +496,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = -df[3*i+2][0]; + normal[i][0] = -m_df[3*i+2][0]; } break; } @@ -507,7 +504,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = -df[3*i+1][0]; + normal[i][0] = -m_df[3*i+1][0]; } break; } @@ -515,7 +512,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = df[3*i][0]+df[3*i+2][0]; + normal[i][0] = m_df[3*i][0]+m_df[3*i+2][0]; } break; } @@ -523,7 +520,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = df[3*i+1][0]+df[3*i+2][0]; + normal[i][0] = m_df[3*i+1][0]+m_df[3*i+2][0]; } break; } @@ -531,7 +528,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = -df[3*i][0]; + normal[i][0] = -m_df[3*i][0]; } break; } @@ -591,9 +588,9 @@ namespace Nektar { for(j = 0; j < nq01; ++j) { - normals[j] = -df[2][j]*jac[j]; - normals[nqtot+j] = -df[5][j]*jac[j]; - normals[2*nqtot+j] = -df[8][j]*jac[j]; + normals[j] = -m_df[2][j]*jac[j]; + normals[nqtot+j] = -m_df[5][j]*jac[j]; + normals[2*nqtot+j] = -m_df[8][j]*jac[j]; faceJac[j] = jac[j]; } @@ -610,11 +607,11 @@ namespace Nektar { int tmp = j+nq01*k; normals[j+k*nq0] = - -df[1][tmp]*jac[tmp]; + -m_df[1][tmp]*jac[tmp]; normals[nqtot+j+k*nq0] = - -df[4][tmp]*jac[tmp]; + -m_df[4][tmp]*jac[tmp]; normals[2*nqtot+j+k*nq0] = - -df[7][tmp]*jac[tmp]; + -m_df[7][tmp]*jac[tmp]; faceJac[j+k*nq0] = jac[tmp]; } } @@ -632,11 +629,11 @@ namespace Nektar { int tmp = nq0-1+nq0*j+nq01*k; normals[j+k*nq1] = - (df[0][tmp]+df[2][tmp])*jac[tmp]; + (m_df[0][tmp]+m_df[2][tmp])*jac[tmp]; normals[nqtot+j+k*nq1] = - (df[3][tmp]+df[5][tmp])*jac[tmp]; + (m_df[3][tmp]+m_df[5][tmp])*jac[tmp]; normals[2*nqtot+j+k*nq1] = - (df[6][tmp]+df[8][tmp])*jac[tmp]; + (m_df[6][tmp]+m_df[8][tmp])*jac[tmp]; faceJac[j+k*nq1] = jac[tmp]; } } @@ -654,11 +651,11 @@ namespace Nektar { int tmp = nq0*(nq1-1) + j + nq01*k; normals[j+k*nq0] = - (df[1][tmp]+df[2][tmp])*jac[tmp]; + (m_df[1][tmp]+m_df[2][tmp])*jac[tmp]; normals[nqtot+j+k*nq0] = - (df[4][tmp]+df[5][tmp])*jac[tmp]; + (m_df[4][tmp]+m_df[5][tmp])*jac[tmp]; normals[2*nqtot+j+k*nq0] = - (df[7][tmp]+df[8][tmp])*jac[tmp]; + (m_df[7][tmp]+m_df[8][tmp])*jac[tmp]; faceJac[j+k*nq0] = jac[tmp]; } } @@ -676,11 +673,11 @@ namespace Nektar { int tmp = j*nq0+nq01*k; normals[j+k*nq1] = - -df[0][tmp]*jac[tmp]; + -m_df[0][tmp]*jac[tmp]; normals[nqtot+j+k*nq1] = - -df[3][tmp]*jac[tmp]; + -m_df[3][tmp]*jac[tmp]; normals[2*nqtot+j+k*nq1] = - -df[6][tmp]*jac[tmp]; + -m_df[6][tmp]*jac[tmp]; faceJac[j+k*nq1] = jac[tmp]; } } @@ -1064,8 +1061,6 @@ namespace Nektar Array wsp5 (nqtot, alloc+ 7*nqtot); Array wsp6 (nqtot, alloc+ 8*nqtot); - const Array& df = - m_metricinfo->GetDerivFactors(GetPointsKeys()); const Array& z0 = m_base[0]->GetZ(); const Array& z1 = m_base[1]->GetZ(); const Array& z2 = m_base[2]->GetZ(); @@ -1094,22 +1089,22 @@ namespace Nektar if(m_metricinfo->GetGtype() == SpatialDomains::eDeformed) { // f_{1k} - Vmath::Vvtvvtp(nqtot, &df[0][0], 1, &h0[0], 1, &df[2][0], 1, &h1[0], 1, &wsp1[0], 1); - Vmath::Vvtvvtp(nqtot, &df[3][0], 1, &h0[0], 1, &df[5][0], 1, &h1[0], 1, &wsp2[0], 1); - Vmath::Vvtvvtp(nqtot, &df[6][0], 1, &h0[0], 1, &df[8][0], 1, &h1[0], 1, &wsp3[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[0][0], 1, &h0[0], 1, &m_df[2][0], 1, &h1[0], 1, &wsp1[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[3][0], 1, &h0[0], 1, &m_df[5][0], 1, &h1[0], 1, &wsp2[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[6][0], 1, &h0[0], 1, &m_df[8][0], 1, &h1[0], 1, &wsp3[0], 1); // g0 Vmath::Vvtvvtp(nqtot, &wsp1[0], 1, &wsp1[0], 1, &wsp2[0], 1, &wsp2[0], 1, &g0[0], 1); Vmath::Vvtvp (nqtot, &wsp3[0], 1, &wsp3[0], 1, &g0[0], 1, &g0[0], 1); // g4 - Vmath::Vvtvvtp(nqtot, &df[2][0], 1, &wsp1[0], 1, &df[5][0], 1, &wsp2[0], 1, &g4[0], 1); - Vmath::Vvtvp (nqtot, &df[8][0], 1, &wsp3[0], 1, &g4[0], 1, &g4[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[2][0], 1, &wsp1[0], 1, &m_df[5][0], 1, &wsp2[0], 1, &g4[0], 1); + Vmath::Vvtvp (nqtot, &m_df[8][0], 1, &wsp3[0], 1, &g4[0], 1, &g4[0], 1); // f_{2k} - Vmath::Vvtvvtp(nqtot, &df[1][0], 1, &h0[0], 1, &df[2][0], 1, &h2[0], 1, &wsp4[0], 1); - Vmath::Vvtvvtp(nqtot, &df[4][0], 1, &h0[0], 1, &df[5][0], 1, &h2[0], 1, &wsp5[0], 1); - Vmath::Vvtvvtp(nqtot, &df[7][0], 1, &h0[0], 1, &df[8][0], 1, &h2[0], 1, &wsp6[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[1][0], 1, &h0[0], 1, &m_df[2][0], 1, &h2[0], 1, &wsp4[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[4][0], 1, &h0[0], 1, &m_df[5][0], 1, &h2[0], 1, &wsp5[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[7][0], 1, &h0[0], 1, &m_df[8][0], 1, &h2[0], 1, &wsp6[0], 1); // g1 Vmath::Vvtvvtp(nqtot, &wsp4[0], 1, &wsp4[0], 1, &wsp5[0], 1, &wsp5[0], 1, &g1[0], 1); @@ -1120,32 +1115,32 @@ namespace Nektar Vmath::Vvtvp (nqtot, &wsp3[0], 1, &wsp6[0], 1, &g3[0], 1, &g3[0], 1); // g5 - Vmath::Vvtvvtp(nqtot, &df[2][0], 1, &wsp4[0], 1, &df[5][0], 1, &wsp5[0], 1, &g5[0], 1); - Vmath::Vvtvp (nqtot, &df[8][0], 1, &wsp6[0], 1, &g5[0], 1, &g5[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[2][0], 1, &wsp4[0], 1, &m_df[5][0], 1, &wsp5[0], 1, &g5[0], 1); + Vmath::Vvtvp (nqtot, &m_df[8][0], 1, &wsp6[0], 1, &g5[0], 1, &g5[0], 1); // g2 - Vmath::Vvtvvtp(nqtot, &df[2][0], 1, &df[2][0], 1, &df[5][0], 1, &df[5][0], 1, &g2[0], 1); - Vmath::Vvtvp (nqtot, &df[8][0], 1, &df[8][0], 1, &g2[0], 1, &g2[0], 1); + Vmath::Vvtvvtp(nqtot, &m_df[2][0], 1, &m_df[2][0], 1, &m_df[5][0], 1, &m_df[5][0], 1, &g2[0], 1); + Vmath::Vvtvp (nqtot, &m_df[8][0], 1, &m_df[8][0], 1, &g2[0], 1, &g2[0], 1); } else { // f_{1k} - Vmath::Svtsvtp(nqtot, df[0][0], &h0[0], 1, df[2][0], &h1[0], 1, &wsp1[0], 1); - Vmath::Svtsvtp(nqtot, df[3][0], &h0[0], 1, df[5][0], &h1[0], 1, &wsp2[0], 1); - Vmath::Svtsvtp(nqtot, df[6][0], &h0[0], 1, df[8][0], &h1[0], 1, &wsp3[0], 1); + Vmath::Svtsvtp(nqtot, m_df[0][0], &h0[0], 1, m_df[2][0], &h1[0], 1, &wsp1[0], 1); + Vmath::Svtsvtp(nqtot, m_df[3][0], &h0[0], 1, m_df[5][0], &h1[0], 1, &wsp2[0], 1); + Vmath::Svtsvtp(nqtot, m_df[6][0], &h0[0], 1, m_df[8][0], &h1[0], 1, &wsp3[0], 1); // g0 Vmath::Vvtvvtp(nqtot, &wsp1[0], 1, &wsp1[0], 1, &wsp2[0], 1, &wsp2[0], 1, &g0[0], 1); Vmath::Vvtvp (nqtot, &wsp3[0], 1, &wsp3[0], 1, &g0[0], 1, &g0[0], 1); // g4 - Vmath::Svtsvtp(nqtot, df[2][0], &wsp1[0], 1, df[5][0], &wsp2[0], 1, &g4[0], 1); - Vmath::Svtvp (nqtot, df[8][0], &wsp3[0], 1, &g4[0], 1, &g4[0], 1); + Vmath::Svtsvtp(nqtot, m_df[2][0], &wsp1[0], 1, m_df[5][0], &wsp2[0], 1, &g4[0], 1); + Vmath::Svtvp (nqtot, m_df[8][0], &wsp3[0], 1, &g4[0], 1, &g4[0], 1); // f_{2k} - Vmath::Svtsvtp(nqtot, df[1][0], &h0[0], 1, df[2][0], &h2[0], 1, &wsp4[0], 1); - Vmath::Svtsvtp(nqtot, df[4][0], &h0[0], 1, df[5][0], &h2[0], 1, &wsp5[0], 1); - Vmath::Svtsvtp(nqtot, df[7][0], &h0[0], 1, df[8][0], &h2[0], 1, &wsp6[0], 1); + Vmath::Svtsvtp(nqtot, m_df[1][0], &h0[0], 1, m_df[2][0], &h2[0], 1, &wsp4[0], 1); + Vmath::Svtsvtp(nqtot, m_df[4][0], &h0[0], 1, m_df[5][0], &h2[0], 1, &wsp5[0], 1); + Vmath::Svtsvtp(nqtot, m_df[7][0], &h0[0], 1, m_df[8][0], &h2[0], 1, &wsp6[0], 1); // g1 Vmath::Vvtvvtp(nqtot, &wsp4[0], 1, &wsp4[0], 1, &wsp5[0], 1, &wsp5[0], 1, &g1[0], 1); @@ -1156,11 +1151,11 @@ namespace Nektar Vmath::Vvtvp (nqtot, &wsp3[0], 1, &wsp6[0], 1, &g3[0], 1, &g3[0], 1); // g5 - Vmath::Svtsvtp(nqtot, df[2][0], &wsp4[0], 1, df[5][0], &wsp5[0], 1, &g5[0], 1); - Vmath::Svtvp (nqtot, df[8][0], &wsp6[0], 1, &g5[0], 1, &g5[0], 1); + Vmath::Svtsvtp(nqtot, m_df[2][0], &wsp4[0], 1, m_df[5][0], &wsp5[0], 1, &g5[0], 1); + Vmath::Svtvp (nqtot, m_df[8][0], &wsp6[0], 1, &g5[0], 1, &g5[0], 1); // g2 - Vmath::Fill(nqtot, df[2][0]*df[2][0] + df[5][0]*df[5][0] + df[8][0]*df[8][0], &g2[0], 1); + Vmath::Fill(nqtot, m_df[2][0]*m_df[2][0] + m_df[5][0]*m_df[5][0] + m_df[8][0]*m_df[8][0], &g2[0], 1); } for (unsigned int i = 0; i < dim; ++i) diff --git a/library/LocalRegions/QuadExp.cpp b/library/LocalRegions/QuadExp.cpp index 2cadedcce..d1ff88526 100644 --- a/library/LocalRegions/QuadExp.cpp +++ b/library/LocalRegions/QuadExp.cpp @@ -116,7 +116,6 @@ namespace Nektar int nquad0 = m_base[0]->GetNumPoints(); int nquad1 = m_base[1]->GetNumPoints(); int nqtot = nquad0*nquad1; - const Array& df = m_metricinfo->GetDerivFactors(GetPointsKeys()); Array diff0(2*nqtot); Array diff1(diff0+nqtot); @@ -126,41 +125,41 @@ namespace Nektar { if (out_d0.num_elements()) { - Vmath::Vmul (nqtot, df[0], 1, diff0, 1, out_d0, 1); - Vmath::Vvtvp (nqtot, df[1], 1, diff1, 1, out_d0, 1, + Vmath::Vmul (nqtot, m_df[0], 1, diff0, 1, out_d0, 1); + Vmath::Vvtvp (nqtot, m_df[1], 1, diff1, 1, out_d0, 1, out_d0,1); } if(out_d1.num_elements()) { - Vmath::Vmul (nqtot,df[2],1,diff0,1, out_d1, 1); - Vmath::Vvtvp (nqtot,df[3],1,diff1,1, out_d1, 1, out_d1,1); + Vmath::Vmul (nqtot,m_df[2],1,diff0,1, out_d1, 1); + Vmath::Vvtvp (nqtot,m_df[3],1,diff1,1, out_d1, 1, out_d1,1); } if (out_d2.num_elements()) { - Vmath::Vmul (nqtot,df[4],1,diff0,1, out_d2, 1); - Vmath::Vvtvp (nqtot,df[5],1,diff1,1, out_d2, 1, out_d2,1); + Vmath::Vmul (nqtot,m_df[4],1,diff0,1, out_d2, 1); + Vmath::Vvtvp (nqtot,m_df[5],1,diff1,1, out_d2, 1, out_d2,1); } } else // regular geometry { if (out_d0.num_elements()) { - Vmath::Smul (nqtot, df[0][0], diff0, 1, out_d0, 1); - Blas::Daxpy (nqtot, df[1][0], diff1, 1, out_d0, 1); + Vmath::Smul (nqtot, m_df[0][0], diff0, 1, out_d0, 1); + Blas::Daxpy (nqtot, m_df[1][0], diff1, 1, out_d0, 1); } if (out_d1.num_elements()) { - Vmath::Smul (nqtot, df[2][0], diff0, 1, out_d1, 1); - Blas::Daxpy (nqtot, df[3][0], diff1, 1, out_d1, 1); + Vmath::Smul (nqtot, m_df[2][0], diff0, 1, out_d1, 1); + Blas::Daxpy (nqtot, m_df[3][0], diff1, 1, out_d1, 1); } if (out_d2.num_elements()) { - Vmath::Smul (nqtot, df[4][0], diff0, 1, out_d2, 1); - Blas::Daxpy (nqtot, df[5][0], diff1, 1, out_d2, 1); + Vmath::Smul (nqtot, m_df[4][0], diff0, 1, out_d2, 1); + Blas::Daxpy (nqtot, m_df[5][0], diff1, 1, out_d2, 1); } } } @@ -209,8 +208,6 @@ namespace Nektar int nquad1 = m_base[1]->GetNumPoints(); int nqtot = nquad0*nquad1; - const Array& df = m_metricinfo->GetDerivFactors(GetPointsKeys()); - Array diff0(2*nqtot); Array diff1(diff0+nqtot); @@ -227,7 +224,7 @@ namespace Nektar for (int k=0; k<(m_geom->GetCoordim()); ++k) { Vmath::Vvtvp(nqtot, - &df[2*k+i][0], 1, + &m_df[2*k+i][0], 1, &direction[k*nqtot], 1, &tangmat[i][0], 1, &tangmat[i][0], 1); @@ -491,8 +488,6 @@ namespace Nektar int nqtot = nquad0*nquad1; int nmodes0 = m_base[0]->GetNumModes(); - const Array& df = m_metricinfo->GetDerivFactors(GetPointsKeys()); - Array tmp1(2*nqtot+m_ncoeffs+nmodes0*nquad1); Array tmp2(tmp1 + nqtot); Array tmp3(tmp1 + 2*nqtot); @@ -501,21 +496,21 @@ namespace Nektar if (m_metricinfo->GetGtype() == SpatialDomains::eDeformed) { Vmath::Vmul(nqtot, - &df[2*dir][0], 1, + &m_df[2*dir][0], 1, inarray.get(), 1, tmp1.get(), 1); Vmath::Vmul(nqtot, - &df[2*dir+1][0], 1, + &m_df[2*dir+1][0], 1, inarray.get(), 1, tmp2.get(),1); } else { Vmath::Smul(nqtot, - df[2*dir][0], inarray.get(), 1, + m_df[2*dir][0], inarray.get(), 1, tmp1.get(), 1); Vmath::Smul(nqtot, - df[2*dir+1][0], inarray.get(), 1, + m_df[2*dir+1][0], inarray.get(), 1, tmp2.get(), 1); } @@ -936,7 +931,6 @@ namespace Nektar LibUtilities::PointsKeyVector ptsKeys = GetPointsKeys(); const Array& jac = m_metricinfo->GetJac(ptsKeys); - const Array& df = m_metricinfo->GetDerivFactors(ptsKeys); Array j (max(nquad0, nquad1), 0.0); Array g0(max(nquad0, nquad1), 0.0); @@ -955,9 +949,9 @@ namespace Nektar switch (edge) { case 0: - Vmath::Vcopy(nquad0, &(df[1][0]), + Vmath::Vcopy(nquad0, &(m_df[1][0]), 1, &(g1[0]), 1); - Vmath::Vcopy(nquad0, &(df[3][0]), + Vmath::Vcopy(nquad0, &(m_df[3][0]), 1, &(g3[0]), 1); Vmath::Vcopy(nquad0, &(jac[0]),1, &(j[0]), 1); @@ -969,11 +963,11 @@ namespace Nektar break; case 1: Vmath::Vcopy(nquad1, - &(df[0][0])+(nquad0-1), nquad0, + &(m_df[0][0])+(nquad0-1), nquad0, &(g0[0]), 1); Vmath::Vcopy(nquad1, - &(df[2][0])+(nquad0-1), nquad0, + &(m_df[2][0])+(nquad0-1), nquad0, &(g2[0]), 1); Vmath::Vcopy(nquad1, @@ -989,11 +983,11 @@ namespace Nektar case 2: Vmath::Vcopy(nquad0, - &(df[1][0])+(nquad0*nquad1-1), -1, + &(m_df[1][0])+(nquad0*nquad1-1), -1, &(g1[0]), 1); Vmath::Vcopy(nquad0, - &(df[3][0])+(nquad0*nquad1-1), -1, + &(m_df[3][0])+(nquad0*nquad1-1), -1, &(g3[0]), 1); Vmath::Vcopy(nquad0, @@ -1009,11 +1003,11 @@ namespace Nektar case 3: Vmath::Vcopy(nquad1, - &(df[0][0])+nquad0*(nquad1-1), + &(m_df[0][0])+nquad0*(nquad1-1), -nquad0,&(g0[0]), 1); Vmath::Vcopy(nquad1, - &(df[2][0])+nquad0*(nquad1-1), + &(m_df[2][0])+nquad0*(nquad1-1), -nquad0,&(g2[0]), 1); Vmath::Vcopy(nquad1, @@ -1047,9 +1041,9 @@ namespace Nektar switch (edge) { case 0: - Vmath::Vmul(nqtot,&(df[1][0]),1,&jac[0],1, + Vmath::Vmul(nqtot,&(m_df[1][0]),1,&jac[0],1, &(tmp_gmat1[0]),1); - Vmath::Vmul(nqtot,&(df[3][0]),1,&jac[0],1, + Vmath::Vmul(nqtot,&(m_df[3][0]),1,&jac[0],1, &(tmp_gmat3[0]),1); QuadExp::v_GetEdgeInterpVals( edge, tmp_gmat1, g1_edge); @@ -1065,11 +1059,11 @@ namespace Nektar case 1: Vmath::Vmul(nqtot, - &(df[0][0]), 1, + &(m_df[0][0]), 1, &jac[0], 1, &(tmp_gmat0[0]), 1); Vmath::Vmul(nqtot, - &(df[2][0]), 1, + &(m_df[2][0]), 1, &jac[0], 1, &(tmp_gmat2[0]), 1); @@ -1088,11 +1082,11 @@ namespace Nektar case 2: Vmath::Vmul(nqtot, - &(df[1][0]), 1, + &(m_df[1][0]), 1, &jac[0], 1, &(tmp_gmat1[0]), 1); Vmath::Vmul(nqtot, - &(df[3][0]), 1, + &(m_df[3][0]), 1, &jac[0], 1, &(tmp_gmat3[0]),1); QuadExp::v_GetEdgeInterpVals( @@ -1112,11 +1106,11 @@ namespace Nektar break; case 3: Vmath::Vmul(nqtot, - &(df[0][0]), 1, + &(m_df[0][0]), 1, &jac[0], 1, &(tmp_gmat0[0]), 1); Vmath::Vmul(nqtot, - &(df[2][0]),1, + &(m_df[2][0]),1, &jac[0], 1, &(tmp_gmat2[0]),1); QuadExp::v_GetEdgeInterpVals( @@ -1153,29 +1147,29 @@ namespace Nektar for (i = 0; i < nquad0; ++i) { - outarray[i] = jac[0]*sqrt(df[1][0]*df[1][0] + - df[3][0]*df[3][0]); + outarray[i] = jac[0]*sqrt(m_df[1][0]*m_df[1][0] + + m_df[3][0]*m_df[3][0]); } break; case 1: for (i = 0; i < nquad1; ++i) { - outarray[i] = jac[0]*sqrt(df[0][0]*df[0][0] + - df[2][0]*df[2][0]); + outarray[i] = jac[0]*sqrt(m_df[0][0]*m_df[0][0] + + m_df[2][0]*m_df[2][0]); } break; case 2: for (i = 0; i < nquad0; ++i) { - outarray[i] = jac[0]*sqrt(df[1][0]*df[1][0] + - df[3][0]*df[3][0]); + outarray[i] = jac[0]*sqrt(m_df[1][0]*m_df[1][0] + + m_df[3][0]*m_df[3][0]); } break; case 3: for (i = 0; i < nquad1; ++i) { - outarray[i] = jac[0]*sqrt(df[0][0]*df[0][0] + - df[2][0]*df[2][0]); + outarray[i] = jac[0]*sqrt(m_df[0][0]*m_df[0][0] + + m_df[2][0]*m_df[2][0]); } break; default: @@ -1193,7 +1187,6 @@ namespace Nektar GetGeom()->GetMetricInfo(); SpatialDomains::GeomType type = geomFactors->GetGtype(); LibUtilities::PointsKeyVector ptsKeys = GetPointsKeys(); - const Array & df = geomFactors->GetDerivFactors(ptsKeys); const Array & jac = geomFactors->GetJac(ptsKeys); int nqe; if (edge == 0 || edge == 2) @@ -1225,25 +1218,25 @@ namespace Nektar case 0: for (i = 0; i < vCoordDim; ++i) { - Vmath::Fill(nqe, -df[2*i+1][0], normal[i], 1); + Vmath::Fill(nqe, -m_df[2*i+1][0], normal[i], 1); } break; case 1: for (i = 0; i < vCoordDim; ++i) { - Vmath::Fill(nqe, df[2*i][0], normal[i], 1); + Vmath::Fill(nqe, m_df[2*i][0], normal[i], 1); } break; case 2: for (i = 0; i < vCoordDim; ++i) { - Vmath::Fill(nqe, df[2*i+1][0], normal[i], 1); + Vmath::Fill(nqe, m_df[2*i+1][0], normal[i], 1); } break; case 3: for (i = 0; i < vCoordDim; ++i) { - Vmath::Fill(nqe, -df[2*i][0], normal[i], 1); + Vmath::Fill(nqe, -m_df[2*i][0], normal[i], 1); } break; default: @@ -1293,7 +1286,7 @@ namespace Nektar for (i = 0; i < vCoordDim; ++i) { normals[i*nquad0+j] = - -df[2*i+1][j]*edgejac[j]; + -m_df[2*i+1][j]*edgejac[j]; } } from_key = ptsKeys[0]; @@ -1305,7 +1298,7 @@ namespace Nektar for (i = 0; i < vCoordDim; ++i) { normals[i*nquad1+j] = - df[2*i][nquad0*j + nquad0-1] + m_df[2*i][nquad0*j + nquad0-1] *edgejac[j]; } } @@ -1318,7 +1311,7 @@ namespace Nektar for (i = 0; i < vCoordDim; ++i) { normals[i*nquad0+j] = - (df[2*i+1][nquad0*(nquad1-1)+j]) + (m_df[2*i+1][nquad0*(nquad1-1)+j]) *edgejac[j]; } } @@ -1331,7 +1324,7 @@ namespace Nektar for (i = 0; i < vCoordDim; ++i) { normals[i*nquad1+j] = - -df[2*i][nquad0*j]*edgejac[j]; + -m_df[2*i][nquad0*j]*edgejac[j]; } } from_key = ptsKeys[1]; @@ -1354,7 +1347,7 @@ namespace Nektar for (i = 0; i < vCoordDim; ++i) { Vmath::Vmul(nqtot, - &(df[2*i+1][0]), 1, + &(m_df[2*i+1][0]), 1, &jac[0], 1, &(tmp_gmat[0]), 1); QuadExp::v_GetEdgeInterpVals( @@ -1370,7 +1363,7 @@ namespace Nektar for (i = 0; i < vCoordDim; ++i) { Vmath::Vmul(nqtot, - &(df[2*i][0]), 1, + &(m_df[2*i][0]), 1, &jac[0], 1, &(tmp_gmat[0]), 1); QuadExp::v_GetEdgeInterpVals( @@ -1386,7 +1379,7 @@ namespace Nektar for (i = 0; i < vCoordDim; ++i) { Vmath::Vmul(nqtot, - &(df[2*i+1][0]), 1, + &(m_df[2*i+1][0]), 1, &jac[0], 1, &(tmp_gmat[0]), 1); QuadExp::v_GetEdgeInterpVals( @@ -1402,7 +1395,7 @@ namespace Nektar for (i = 0; i < vCoordDim; ++i) { Vmath::Vmul(nqtot, - &(df[2*i][0]), 1, + &(m_df[2*i][0]), 1, &jac[0], 1, &(tmp_gmat[0]) ,1); QuadExp::v_GetEdgeInterpVals( @@ -1679,8 +1672,6 @@ namespace Nektar else { NekDouble jac = (m_metricinfo->GetJac(ptsKeys))[0]; - Array df = - m_metricinfo->GetDerivFactors(ptsKeys); int dir = 0; switch(mkey.GetMatrixType()) @@ -1711,8 +1702,8 @@ namespace Nektar DNekMatSharedPtr WeakDeriv = MemoryManager:: AllocateSharedPtr(rows,cols); - (*WeakDeriv) = df[2*dir][0]*deriv0 + - df[2*dir+1][0]*deriv1; + (*WeakDeriv) = m_df[2*dir][0]*deriv0 + + m_df[2*dir+1][0]*deriv1; returnval = MemoryManager:: AllocateSharedPtr(jac,WeakDeriv); } @@ -1825,8 +1816,6 @@ namespace Nektar else { NekDouble jac = (m_metricinfo->GetJac(ptsKeys))[0]; - const Array& df = - m_metricinfo->GetDerivFactors(ptsKeys); int dir = 0; switch(mkey.GetMatrixType()) @@ -1859,8 +1848,8 @@ namespace Nektar DNekMatSharedPtr mat = MemoryManager:: AllocateSharedPtr(rows,cols); - (*mat) = df[2*dir][0]*stdiprod0 + - df[2*dir+1][0]*stdiprod1; + (*mat) = m_df[2*dir][0]*stdiprod0 + + m_df[2*dir+1][0]*stdiprod1; returnval = MemoryManager:: AllocateSharedPtr(jac,mat); diff --git a/library/LocalRegions/SegExp.cpp b/library/LocalRegions/SegExp.cpp index 6c5d58d56..1871977cf 100644 --- a/library/LocalRegions/SegExp.cpp +++ b/library/LocalRegions/SegExp.cpp @@ -169,8 +169,6 @@ namespace Nektar Array &out_d2) { int nquad0 = m_base[0]->GetNumPoints(); - Array gmat = - m_metricinfo->GetDerivFactors(GetPointsKeys()); Array diff(nquad0); //StdExpansion1D::PhysTensorDeriv(inarray,diff); @@ -179,19 +177,19 @@ namespace Nektar { if(out_d0.num_elements()) { - Vmath::Vmul(nquad0,&gmat[0][0],1,&diff[0],1, + Vmath::Vmul(nquad0,&m_df[0][0],1,&diff[0],1, &out_d0[0],1); } if(out_d1.num_elements()) { - Vmath::Vmul(nquad0,&gmat[1][0],1,&diff[0],1, + Vmath::Vmul(nquad0,&m_df[1][0],1,&diff[0],1, &out_d1[0],1); } if(out_d2.num_elements()) { - Vmath::Vmul(nquad0,&gmat[2][0],1,&diff[0],1, + Vmath::Vmul(nquad0,&m_df[2][0],1,&diff[0],1, &out_d2[0],1); } } @@ -199,19 +197,19 @@ namespace Nektar { if(out_d0.num_elements()) { - Vmath::Smul(nquad0, gmat[0][0], diff, 1, + Vmath::Smul(nquad0, m_df[0][0], diff, 1, out_d0, 1); } if(out_d1.num_elements()) { - Vmath::Smul(nquad0, gmat[1][0], diff, 1, + Vmath::Smul(nquad0, m_df[1][0], diff, 1, out_d1, 1); } if(out_d2.num_elements()) { - Vmath::Smul(nquad0, gmat[2][0], diff, 1, + Vmath::Smul(nquad0, m_df[2][0], diff, 1, out_d2, 1); } } @@ -270,8 +268,6 @@ namespace Nektar Array& out_dn) { int nquad0 = m_base[0]->GetNumPoints(); - Array gmat = - m_metricinfo->GetDerivFactors(GetPointsKeys()); int coordim = m_geom->GetCoordim(); Array out_dn_tmp(nquad0,0.0); switch(coordim) @@ -565,8 +561,6 @@ cout<<"deps/dx ="<GetGtype() == SpatialDomains::eDeformed) { - Vmath::Vmul(nquad,gmat[2],1,inarray,1,tmp1,1); + Vmath::Vmul(nquad,m_df[2],1,inarray,1,tmp1,1); } else { - Vmath::Smul(nquad, gmat[2][0], inarray, 1, tmp1, 1); + Vmath::Smul(nquad, m_df[2][0], inarray, 1, tmp1, 1); } } break; @@ -888,8 +882,7 @@ cout<<"deps/dx ="<DoInitialise(); + for( int i = 0; i < m_nequ; ++i) + { + m_equ[i]->DoInitialise(); + } // FwdTrans Initial conditions to be in Coefficient Space m_equ[m_nequ-1] ->TransPhysToCoeff(); diff --git a/library/SolverUtils/DriverModifiedArnoldi.cpp b/library/SolverUtils/DriverModifiedArnoldi.cpp index fb2f13814..bf0aa5895 100644 --- a/library/SolverUtils/DriverModifiedArnoldi.cpp +++ b/library/SolverUtils/DriverModifiedArnoldi.cpp @@ -87,7 +87,10 @@ void DriverModifiedArnoldi::v_InitObject(ostream &out) DriverArnoldi::ArnoldiSummary(out); - m_equ[m_nequ - 1]->DoInitialise(); + for( int i = 0; i < m_nequ; ++i) + { + m_equ[i]->DoInitialise(); + } //FwdTrans Initial conditions to be in Coefficient Space m_equ[m_nequ-1] ->TransPhysToCoeff(); diff --git a/library/SolverUtils/DriverSteadyState.cpp b/library/SolverUtils/DriverSteadyState.cpp index 312b3f2a5..21c532aa1 100644 --- a/library/SolverUtils/DriverSteadyState.cpp +++ b/library/SolverUtils/DriverSteadyState.cpp @@ -85,10 +85,6 @@ void DriverSteadyState::v_Execute(ostream &out) // to find the steady state of a flow above the critical Reynolds // number. m_equ[m_nequ - 1]->PrintSummary(out); - for( int i = 0; i < m_nequ; ++i) - { - m_equ[i]->DoInitialise(); - } m_session->LoadParameter("IO_InfoSteps", m_infosteps, 1000); m_session->LoadParameter("IO_CheckSteps", m_checksteps, 100000); -- GitLab From 8ff1e9bf6d93b01b06c8c1996606fa93df23a6d7 Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Sat, 4 Jun 2016 13:49:48 +0100 Subject: [PATCH 09/34] Improve diagonal block multiply --- .../LinearAlgebra/BlockMatrix.cpp | 17 ++++++++++ .../LinearAlgebra/BlockMatrix.hpp | 3 ++ .../MatrixVectorMultiplication.cpp | 32 +++++++++---------- 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/library/LibUtilities/LinearAlgebra/BlockMatrix.cpp b/library/LibUtilities/LinearAlgebra/BlockMatrix.cpp index 37af170fb..b444f77eb 100644 --- a/library/LibUtilities/LinearAlgebra/BlockMatrix.cpp +++ b/library/LibUtilities/LinearAlgebra/BlockMatrix.cpp @@ -355,6 +355,23 @@ namespace Nektar } } + template + void NekMatrix, BlockMatrixTag>::GetBlockSizes( + Array& rowSizes, + Array& colSizes) const + { + if( this->GetTransposeFlag() == 'T' ) + { + rowSizes = m_columnSizes; + colSizes = m_rowSizes; + } + else + { + rowSizes = m_rowSizes; + colSizes = m_columnSizes; + } + } + template typename NekMatrix, BlockMatrixTag>::iterator NekMatrix, BlockMatrixTag>::begin() { return iterator(*this, 0, 0); } diff --git a/library/LibUtilities/LinearAlgebra/BlockMatrix.hpp b/library/LibUtilities/LinearAlgebra/BlockMatrix.hpp index 34af20053..67ea865ba 100644 --- a/library/LibUtilities/LinearAlgebra/BlockMatrix.hpp +++ b/library/LibUtilities/LinearAlgebra/BlockMatrix.hpp @@ -175,6 +175,9 @@ namespace Nektar LIB_UTILITIES_EXPORT unsigned int GetNumberOfColumnsInBlockColumn(unsigned int blockCol) const; + LIB_UTILITIES_EXPORT void GetBlockSizes(Array& rowSizes, + Array& colSizes) const; + LIB_UTILITIES_EXPORT iterator begin(); LIB_UTILITIES_EXPORT iterator end(); LIB_UTILITIES_EXPORT const_iterator begin() const; diff --git a/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp b/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp index 97edbdcb2..a2d74f0f8 100644 --- a/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp +++ b/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp @@ -208,37 +208,34 @@ namespace Nektar double* result_ptr = result.GetRawPtr(); const double* rhs_ptr = rhs.GetRawPtr(); + Array rowSizes; + Array colSizes; + lhs.GetBlockSizes(rowSizes, colSizes); + std::fill(result.begin(), result.end(), 0.0); unsigned int curResultRow = 0; unsigned int curWrapperRow = 0; + unsigned int rowsInBlock, columnsInBlock; for(unsigned int blockRow = 0; blockRow < numberOfBlockRows; ++blockRow) { - - if( blockRow != 0 ) - { - curResultRow += lhs.GetNumberOfRowsInBlockRow(blockRow-1); - } - - unsigned int blockColumn = blockRow; - if( blockColumn != 0 ) + if ( blockRow == 0) { - curWrapperRow += lhs.GetNumberOfColumnsInBlockColumn(blockColumn-1); + rowsInBlock = rowSizes[blockRow] + 1; + columnsInBlock = colSizes[blockRow] + 1; } - - unsigned int rowsInBlock = lhs.GetNumberOfRowsInBlockRow(blockRow); - if( rowsInBlock == 0 ) + else { - continue; + rowsInBlock = rowSizes[blockRow] - rowSizes[blockRow-1]; + columnsInBlock = colSizes[blockRow] - colSizes[blockRow-1]; } - unsigned int columnsInBlock = lhs.GetNumberOfColumnsInBlockColumn(blockColumn); - if( columnsInBlock == 0 ) + if( rowsInBlock == 0 || columnsInBlock == 0) { continue; } - const LhsInnerMatrixType* block = lhs.GetBlockPtr(blockRow, blockColumn); + const LhsInnerMatrixType* block = lhs.GetBlockPtr(blockRow, blockRow); if( !block ) { continue; @@ -248,6 +245,9 @@ namespace Nektar const double* rhsWrapper = rhs_ptr + curWrapperRow; Multiply(resultWrapper, *block, rhsWrapper); //resultWrapper = (*block)*rhsWrapper; + + curResultRow += rowsInBlock; + curWrapperRow += columnsInBlock; } } -- GitLab From 36d3fb81cf1ece535d2c32d56122138d73c9be7d Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Sun, 5 Jun 2016 13:29:37 +0100 Subject: [PATCH 10/34] Small optimizations to NekMatrix --- library/LibUtilities/LinearAlgebra/BlockMatrix.cpp | 10 +--------- library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp | 8 ++++++++ library/LibUtilities/LinearAlgebra/StandardMatrix.cpp | 6 ------ library/LibUtilities/LinearAlgebra/StandardMatrix.hpp | 4 ---- 4 files changed, 9 insertions(+), 19 deletions(-) diff --git a/library/LibUtilities/LinearAlgebra/BlockMatrix.cpp b/library/LibUtilities/LinearAlgebra/BlockMatrix.cpp index b444f77eb..6c9003ec1 100644 --- a/library/LibUtilities/LinearAlgebra/BlockMatrix.cpp +++ b/library/LibUtilities/LinearAlgebra/BlockMatrix.cpp @@ -161,16 +161,8 @@ namespace Nektar template unsigned int NekMatrix, BlockMatrixTag>::CalculateBlockIndex(unsigned int row, unsigned int column) const { - unsigned int blockRows = GetNumberOfBlockRows(); - unsigned int blockCols = GetNumberOfBlockColumns(); - - if( this->GetTransposeFlag() == 'T' ) - { - std::swap(blockRows, blockCols); - } - return BaseType::CalculateIndex(this->GetStorageType(), - row, column, blockRows, blockCols, this->GetTransposeFlag()); + row, column, m_numberOfBlockRows, m_numberOfBlockColumns, this->GetTransposeFlag()); } template diff --git a/library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp b/library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp index 65a618e60..4155e3783 100644 --- a/library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp +++ b/library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp @@ -31,6 +31,7 @@ // /////////////////////////////////////////////////////////////////////////////// +#include #include #include @@ -95,6 +96,13 @@ namespace Nektar return m_scale*m_matrix->Scale(); } + template<> + typename NekMatrix, ScaledMatrixTag>::NumberType + NekMatrix, ScaledMatrixTag>::Scale() const + { + return m_scale; + } + template void NekMatrix, ScaledMatrixTag>::SetScale(const NumberType& value) { diff --git a/library/LibUtilities/LinearAlgebra/StandardMatrix.cpp b/library/LibUtilities/LinearAlgebra/StandardMatrix.cpp index b3ee87ebb..f5b29d892 100644 --- a/library/LibUtilities/LinearAlgebra/StandardMatrix.cpp +++ b/library/LibUtilities/LinearAlgebra/StandardMatrix.cpp @@ -394,12 +394,6 @@ namespace Nektar template PointerWrapper NekMatrix::GetWrapperType() const { return m_wrapperType; } - template - char NekMatrix::GetTransposeFlag() const - { - return this->GetRawTransposeFlag(); - } - template boost::tuples::tuple NekMatrix::Advance(unsigned int curRow, unsigned int curColumn) const diff --git a/library/LibUtilities/LinearAlgebra/StandardMatrix.hpp b/library/LibUtilities/LinearAlgebra/StandardMatrix.hpp index 8a8972ea9..a04161499 100644 --- a/library/LibUtilities/LinearAlgebra/StandardMatrix.hpp +++ b/library/LibUtilities/LinearAlgebra/StandardMatrix.hpp @@ -508,10 +508,6 @@ namespace Nektar LIB_UTILITIES_EXPORT PointerWrapper GetWrapperType() const; - - LIB_UTILITIES_EXPORT char GetTransposeFlag() const ; - - LIB_UTILITIES_EXPORT boost::tuples::tuple Advance(unsigned int curRow, unsigned int curColumn) const; -- GitLab From 05ce571abed275cb16a10b4779e3becd7681af70 Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Sun, 5 Jun 2016 13:32:04 +0100 Subject: [PATCH 11/34] Allow choosing frequency for checking Nan --- library/SolverUtils/UnsteadySystem.cpp | 23 ++++++++++++++--------- library/SolverUtils/UnsteadySystem.h | 2 ++ 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/library/SolverUtils/UnsteadySystem.cpp b/library/SolverUtils/UnsteadySystem.cpp index 71e35f544..7bd646c4f 100644 --- a/library/SolverUtils/UnsteadySystem.cpp +++ b/library/SolverUtils/UnsteadySystem.cpp @@ -89,6 +89,8 @@ namespace Nektar m_session->MatchSolverInfo("REACTIONADVANCEMENT", "Explicit", m_explicitReaction, true); + m_session->LoadParameter("CheckNanSteps", m_nanSteps, 1); + // For steady problems, we do not initialise the time integration if (m_session->DefinesSolverInfo("TIMEINTEGRATIONMETHOD")) { @@ -333,19 +335,22 @@ namespace Nektar } // search for NaN and quit if found - int nanFound = 0; - for (i = 0; i < nvariables; ++i) + if (m_nanSteps && !((step+1) % m_nanSteps) ) { - if (Vmath::Nnan(fields[i].num_elements(), fields[i], 1) > 0) + int nanFound = 0; + for (i = 0; i < nvariables; ++i) { - nanFound = 1; + if (Vmath::Nnan(fields[i].num_elements(), + fields[i], 1) > 0) + { + nanFound = 1; + } } + m_session->GetComm()->AllReduce(nanFound, + LibUtilities::ReduceMax); + ASSERTL0 (!nanFound, + "NaN found during time integration."); } - m_session->GetComm()->AllReduce(nanFound, - LibUtilities::ReduceMax); - ASSERTL0 (!nanFound, - "NaN found during time integration."); - // Update filters std::vector::iterator x; for (x = m_filters.begin(); x != m_filters.end(); ++x) diff --git a/library/SolverUtils/UnsteadySystem.h b/library/SolverUtils/UnsteadySystem.h index 78186fb69..82d2f0c29 100644 --- a/library/SolverUtils/UnsteadySystem.h +++ b/library/SolverUtils/UnsteadySystem.h @@ -61,6 +61,8 @@ namespace Nektar protected: /// Number of time steps between outputting status information. int m_infosteps; + + int m_nanSteps; /// Wrapper to the time integration scheme LibUtilities::TimeIntegrationWrapperSharedPtr m_intScheme; /// The time integration scheme operators to use. -- GitLab From 967f30c47656dbec6f9d4ef4c1a8d70550f21ddc Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Sun, 5 Jun 2016 13:33:41 +0100 Subject: [PATCH 12/34] Minor changes to StdTri PhysDeriv --- library/StdRegions/StdTriExp.cpp | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/library/StdRegions/StdTriExp.cpp b/library/StdRegions/StdTriExp.cpp index aaa3a6eca..54d225525 100644 --- a/library/StdRegions/StdTriExp.cpp +++ b/library/StdRegions/StdTriExp.cpp @@ -136,16 +136,14 @@ namespace Nektar int i; int nquad0 = m_base[0]->GetNumPoints(); int nquad1 = m_base[1]->GetNumPoints(); - Array wsp(nquad0*nquad1); + Array wsp(std::max(nquad0, nquad1)); const Array& z0 = m_base[0]->GetZ(); const Array& z1 = m_base[1]->GetZ(); // set up geometric factor: 2/(1-z1) - for (i = 0; i < nquad1; ++i) - { - wsp[i] = 2.0/(1-z1[i]); - } + Vmath::Sadd(nquad1, -1.0, z1, 1, wsp, 1); + Vmath::Sdiv(nquad1, -2.0, wsp, 1, wsp, 1); if (out_d0.num_elements() > 0) { @@ -160,10 +158,8 @@ namespace Nektar if (out_d1.num_elements() > 0) { // set up geometric factor: (1_z0)/(1-z1) - for (i = 0; i < nquad0; ++i) - { - wsp[i] = 0.5*(1+z0[i]); - } + Vmath::Sadd(nquad0, 1.0, z0, 1, wsp, 1); + Vmath::Smul(nquad0, 0.5, wsp, 1, wsp, 1); for (i = 0; i < nquad1; ++i) { @@ -183,10 +179,8 @@ namespace Nektar Blas::Dscal(nquad0,wsp[i],&diff0[0]+i*nquad0,1); } - for (i = 0; i < nquad0; ++i) - { - wsp[i] = 0.5*(1+z0[i]); - } + Vmath::Sadd(nquad0, 1.0, z0, 1, wsp, 1); + Vmath::Smul(nquad0, 0.5, wsp, 1, wsp, 1); for (i = 0; i < nquad1; ++i) { -- GitLab From 6b93ba2b622d1bc1b40f439492e184a5489f3d34 Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Sun, 5 Jun 2016 15:36:05 +0100 Subject: [PATCH 13/34] Use direct calls to vector operations --- library/MultiRegions/GlobalLinSysStaticCond.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/library/MultiRegions/GlobalLinSysStaticCond.cpp b/library/MultiRegions/GlobalLinSysStaticCond.cpp index 4b35fe198..703d9aa80 100644 --- a/library/MultiRegions/GlobalLinSysStaticCond.cpp +++ b/library/MultiRegions/GlobalLinSysStaticCond.cpp @@ -172,12 +172,12 @@ namespace Nektar else { DNekScalBlkMat &BinvD = *m_BinvD; - V_LocBnd = BinvD*F_Int; + Multiply( V_LocBnd, BinvD, F_Int); } pLocToGloMap->AssembleBnd(V_LocBnd,V_GlobHomBndTmp, nDirBndDofs); - F_HomBnd = F_HomBnd - V_GlobHomBndTmp; + Subtract(F_HomBnd, F_HomBnd, V_GlobHomBndTmp); // Transform from original basis to low energy v_BasisTransform(F, nDirBndDofs); @@ -202,7 +202,7 @@ namespace Nektar Vmath::Vcopy(nGlobHomBndDofs, tmp.get()+nDirBndDofs, 1, V_GlobHomBndTmp.GetPtr().get(), 1); - F_HomBnd = F_HomBnd - V_GlobHomBndTmp; + Subtract( F_HomBnd, F_HomBnd, V_GlobHomBndTmp); } } @@ -210,7 +210,6 @@ namespace Nektar if(atLastLevel) { Array pert(nGlobBndDofs,0.0); - NekVector Pert(nGlobBndDofs,pert,eWrapper); // Solve for difference from initial solution given inout; SolveLinearSystem( @@ -251,8 +250,7 @@ namespace Nektar } F_Int = F_Int - C*V_LocBnd; } - - V_Int = invD*F_Int; + Multiply( V_Int, invD, F_Int); } } -- GitLab From 3044e57b5e819dd21ca69fe271bbafd4857e7671 Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Sun, 5 Jun 2016 18:24:25 +0100 Subject: [PATCH 14/34] Add option to disable locks for threading --- CMakeLists.txt | 9 +++ .../LibUtilities/BasicUtils/MutexTypeDefs.h | 77 +++++++++++++++++++ .../LibUtilities/BasicUtils/NekFactory.hpp | 14 +--- .../LibUtilities/BasicUtils/NekManager.hpp | 10 +-- library/LibUtilities/BasicUtils/Thread.h | 9 +-- library/LibUtilities/BasicUtils/Vmath.cpp | 5 +- library/LibUtilities/CMakeLists.txt | 1 + .../Memory/ThreadSpecificPool.hpp | 8 +- 8 files changed, 103 insertions(+), 30 deletions(-) create mode 100644 library/LibUtilities/BasicUtils/MutexTypeDefs.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 6d26c76f1..b72e42d69 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,6 +141,15 @@ OPTION(NEKTAR_USE_MEMORY_POOLS "Use memory pools to accelerate memory allocation." ON) MARK_AS_ADVANCED(NEKTAR_USE_MEMORY_POOLS) +# Option to disable threads +OPTION(NEKTAR_USE_THREADS + "Enable functionality required for using threads." ON) +MARK_AS_ADVANCED(NEKTAR_USE_THREADS) + +IF (NEKTAR_USE_THREADS) + ADD_DEFINITIONS(-DNEKTAR_USING_THREADS) +ENDIF() + IF (MSVC) # Needed for M_PI to be visible in visual studio. ADD_DEFINITIONS(-D_USE_MATH_DEFINES) diff --git a/library/LibUtilities/BasicUtils/MutexTypeDefs.h b/library/LibUtilities/BasicUtils/MutexTypeDefs.h new file mode 100644 index 000000000..1b9277d79 --- /dev/null +++ b/library/LibUtilities/BasicUtils/MutexTypeDefs.h @@ -0,0 +1,77 @@ +/////////////////////////////////////////////////////////////////////////////// +// +// File: MutexTypeDefs.h +// +// For more information, please see: http://www.nektar.info +// +// The MIT License +// +// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA), +// Department of Aeronautics, Imperial College London (UK), and Scientific +// Computing and Imaging Institute, University of Utah (USA). +// +// License for the specific language governing rights and limitations under +// Permission is hereby granted, free of charge, to any person obtaining a +// copy of this software and associated documentation files (the "Software"), +// to deal in the Software without restriction, including without limitation +// the rights to use, copy, modify, merge, publish, distribute, sublicense, +// and/or sell copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included +// in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +// DEALINGS IN THE SOFTWARE. +// +// Description: Type definitions of mutex objects. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef NEKTAR_LIB_UTILITIES_BASICUTILS_MUTEXTYPEDEFS_H +#define NEKTAR_LIB_UTILITIES_BASICUTILS_MUTEXTYPEDEFS_H + +#include +#include + +#include + +namespace Nektar +{ +namespace LibUtilities +{ +#ifdef NEKTAR_USING_THREADS + typedef boost::unique_lock WriteLock; + typedef boost::shared_lock ReadLock; + typedef boost::mutex::scoped_lock ScopedLock; + typedef boost::shared_mutex SharedMutex; + typedef boost::mutex Mutex; +#else + class Lock + { + public: + LIB_UTILITIES_EXPORT Lock(){} + + LIB_UTILITIES_EXPORT Lock(int i){} + + LIB_UTILITIES_EXPORT ~Lock(){} + + LIB_UTILITIES_EXPORT inline void lock(){} + + LIB_UTILITIES_EXPORT inline void unlock(){} + }; + typedef Lock WriteLock; + typedef Lock ReadLock; + typedef Lock ScopedLock; + typedef int SharedMutex; + typedef int Mutex; +#endif +} +} + +#endif //NEKTAR_LIB_UTILITIES_BASICUTILS_MUTEXTYPEDEFS_H diff --git a/library/LibUtilities/BasicUtils/NekFactory.hpp b/library/LibUtilities/BasicUtils/NekFactory.hpp index 0e6e018e9..92303eb67 100644 --- a/library/LibUtilities/BasicUtils/NekFactory.hpp +++ b/library/LibUtilities/BasicUtils/NekFactory.hpp @@ -43,8 +43,6 @@ #include #include #include - #include - #include #include @@ -53,6 +51,7 @@ #include #include + #include #ifndef MAX_PARAM #define MAX_PARAM 5 // default maximum number of parameters to support @@ -67,8 +66,6 @@ namespace Nektar // Generate parameter typenames with default type of 'none' #define FACTORY_print(z, n, data) BOOST_PP_CAT(data, n) = none - typedef boost::unique_lock WriteLock; - typedef boost::shared_lock ReadLock; /** * @class NekFactory @@ -325,7 +322,7 @@ namespace Nektar TMapFactory mMapFactory; - boost::shared_mutex m_mutex; + SharedMutex m_mutex; }; @@ -347,11 +344,6 @@ namespace Nektar #define n BOOST_PP_ITERATION() // Define macro for printing the non-required template parameters #define FACTORY_print(z, n, data) data - #include - #include - -typedef boost::unique_lock WriteLock; -typedef boost::shared_lock ReadLock; template < typename tKey, typename tBase BOOST_PP_COMMA_IF(n) @@ -500,7 +492,7 @@ typedef boost::shared_lock ReadLock; NekFactory& operator=(const NekFactory& rhs); TMapFactory mMapFactory; - boost::shared_mutex m_mutex; + SharedMutex m_mutex; }; #undef n diff --git a/library/LibUtilities/BasicUtils/NekManager.hpp b/library/LibUtilities/BasicUtils/NekManager.hpp index 1bfb6e71b..1e6f1818c 100644 --- a/library/LibUtilities/BasicUtils/NekManager.hpp +++ b/library/LibUtilities/BasicUtils/NekManager.hpp @@ -41,11 +41,10 @@ #include #include #include -#include -#include #include #include +#include namespace Nektar { @@ -53,9 +52,6 @@ namespace Nektar { using namespace std; - typedef boost::unique_lock WriteLock; - typedef boost::shared_lock ReadLock; - template struct defOpLessCreator { @@ -319,12 +315,12 @@ namespace Nektar static FlagContainerPool m_managementEnabledContainerPool; CreateFuncType m_globalCreateFunc; CreateFuncContainer m_keySpecificCreateFuncs; - static boost::shared_mutex m_mutex; + static SharedMutex m_mutex; }; template typename NekManager::ValueContainerPool NekManager::m_ValueContainerPool; template typename NekManager::FlagContainerPool NekManager::m_managementEnabledContainerPool; template - typename boost::shared_mutex NekManager::m_mutex; + SharedMutex NekManager::m_mutex; } } diff --git a/library/LibUtilities/BasicUtils/Thread.h b/library/LibUtilities/BasicUtils/Thread.h index 1e5b09172..d88480d7c 100644 --- a/library/LibUtilities/BasicUtils/Thread.h +++ b/library/LibUtilities/BasicUtils/Thread.h @@ -38,8 +38,6 @@ #include #include -#include -#include #include #include #include @@ -47,6 +45,8 @@ #include #include +#include + namespace Nektar { namespace Thread @@ -321,9 +321,6 @@ class ThreadManager : public boost::enable_shared_from_this } }; -typedef boost::unique_lock WriteLock; -typedef boost::shared_lock ReadLock; - /** * A class to manage multiple ThreadManagers. It also acts as a cut-out during * static initialisation, where code attempts to grab a ThreadManager before any @@ -345,7 +342,7 @@ class ThreadMaster { private: std::vector m_threadManagers; - boost::shared_mutex m_mutex; + LibUtilities::SharedMutex m_mutex; std::string m_threadingType; public: diff --git a/library/LibUtilities/BasicUtils/Vmath.cpp b/library/LibUtilities/BasicUtils/Vmath.cpp index 8565973da..6a8c6dd6c 100644 --- a/library/LibUtilities/BasicUtils/Vmath.cpp +++ b/library/LibUtilities/BasicUtils/Vmath.cpp @@ -36,6 +36,7 @@ #include #include #include +#include namespace Vmath { @@ -131,14 +132,14 @@ namespace Vmath #undef EPS #undef RNMX - static boost::mutex mutex; + static Nektar::LibUtilities::Mutex mutex; template LIB_UTILITIES_EXPORT Nektar::NekDouble ran2 (long* idum); /// \brief Fills a vector with white noise. template void FillWhiteNoise( int n, const T eps, T *x, const int incx, int outseed) { // Protect the static vars here and in ran2 - boost::mutex::scoped_lock l(mutex); + Nektar::LibUtilities::ScopedLock l(mutex); while( n-- ) { static int iset = 0; diff --git a/library/LibUtilities/CMakeLists.txt b/library/LibUtilities/CMakeLists.txt index 65f699fac..d2fe36fcb 100644 --- a/library/LibUtilities/CMakeLists.txt +++ b/library/LibUtilities/CMakeLists.txt @@ -18,6 +18,7 @@ SET(BasicUtilsHeaders ./BasicUtils/MeshEntities.hpp ./BasicUtils/MeshPartition.h ./BasicUtils/MeshPartitionMetis.h + ./BasicUtils/MutexTypeDefs.h ./BasicUtils/NekManager.hpp ./BasicUtils/NekFactory.hpp ./BasicUtils/NekPtr.hpp diff --git a/library/LibUtilities/Memory/ThreadSpecificPool.hpp b/library/LibUtilities/Memory/ThreadSpecificPool.hpp index 8f7240662..f815be531 100644 --- a/library/LibUtilities/Memory/ThreadSpecificPool.hpp +++ b/library/LibUtilities/Memory/ThreadSpecificPool.hpp @@ -40,12 +40,12 @@ #include #include -#include #include #include #include #include +#include #include @@ -100,7 +100,7 @@ namespace Nektar /// \throw std::bad_alloc if memory is exhausted. void* Allocate() { - boost::mutex::scoped_lock l(m_mutex); + LibUtilities::ScopedLock l(m_mutex); void* result = m_pool->malloc(); #if defined(NEKTAR_DEBUG) || defined(NEKTAR_FULLDEBUG) @@ -116,7 +116,7 @@ namespace Nektar /// from this pool. Doing this will result in undefined behavior. void Deallocate(const void* p) { - boost::mutex::scoped_lock l(m_mutex); + LibUtilities::ScopedLock l(m_mutex); #if defined(NEKTAR_DEBUG) || defined(NEKTAR_FULLDEBUG) // The idea here is to fill the returned memory with some known // pattern, then detect that pattern on the allocate. If the @@ -135,7 +135,7 @@ namespace Nektar //boost::thread_specific_ptr > m_pool; boost::pool<>* m_pool; size_t m_blockSize; - boost::mutex m_mutex; + LibUtilities::Mutex m_mutex; }; } -- GitLab From 1a778699bb30c6237c600604f7e883295cb1a36a Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Mon, 20 Jun 2016 17:05:31 +0100 Subject: [PATCH 15/34] Avoid unnecessary memset --- docs/tutorial | 2 +- .../LinearAlgebra/MatrixVectorMultiplication.cpp | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docs/tutorial b/docs/tutorial index 244b5128d..a20044f23 160000 --- a/docs/tutorial +++ b/docs/tutorial @@ -1 +1 @@ -Subproject commit 244b5128da8a9c9baedf71230db81344c790b03b +Subproject commit a20044f232e15e3fe2754a90215f8646db241147 diff --git a/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp b/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp index a2d74f0f8..4efc95b21 100644 --- a/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp +++ b/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp @@ -212,8 +212,6 @@ namespace Nektar Array colSizes; lhs.GetBlockSizes(rowSizes, colSizes); - std::fill(result.begin(), result.end(), 0.0); - unsigned int curResultRow = 0; unsigned int curWrapperRow = 0; unsigned int rowsInBlock, columnsInBlock; @@ -249,6 +247,10 @@ namespace Nektar curResultRow += rowsInBlock; curWrapperRow += columnsInBlock; } + if (curResultRow < result.GetRows()) + { + std::fill(result.begin()+curResultRow, result.end(), 0.0); + } } void NekMultiplyLowerTriangularMatrix(NekDouble* result, -- GitLab From ade025bd9ec059622e40c9a5cf8d0001536d5178 Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Tue, 21 Jun 2016 01:00:51 +0100 Subject: [PATCH 16/34] Add more functions for symmetric matrices --- library/LibUtilities/LinearAlgebra/Blas.hpp | 6 +-- library/LibUtilities/LinearAlgebra/Lapack.hpp | 12 +++++ .../LibUtilities/LinearAlgebra/MatrixFuncs.h | 45 +++++++++++++++++++ .../MatrixVectorMultiplication.cpp | 32 ++++++++++++- .../LinearAlgebra/StandardMatrix.cpp | 5 ++- 5 files changed, 95 insertions(+), 5 deletions(-) diff --git a/library/LibUtilities/LinearAlgebra/Blas.hpp b/library/LibUtilities/LinearAlgebra/Blas.hpp index 3d1cdd2e8..a23062499 100644 --- a/library/LibUtilities/LinearAlgebra/Blas.hpp +++ b/library/LibUtilities/LinearAlgebra/Blas.hpp @@ -79,7 +79,7 @@ namespace Blas void F77NAME(dtpmv) (const char& uplo, const char& trans, const char& diag, const int& n, const double* ap, double* x, const int& incx); - void F77NAME(dspmv) (const char& trans, const int& n, const double& alpha, + void F77NAME(dspmv) (const char& uplo, const int& n, const double& alpha, const double* a, const double* x, const int& incx, const double& beta, double* y, const int& incy); @@ -192,11 +192,11 @@ namespace Blas /// \brief BLAS level 2: Matrix vector multiply y = A \e x where A /// is symmetric packed - static inline void Dspmv (const char& trans, const int& n, const double& alpha, + static inline void Dspmv (const char& uplo, const int& n, const double& alpha, const double* a, const double* x, const int& incx, const double& beta, double* y, const int& incy) { - F77NAME(dspmv) (trans,n,alpha,a,x,incx,beta,y,incy); + F77NAME(dspmv) (uplo,n,alpha,a,x,incx,beta,y,incy); } static inline void Dsbmv (const char& uplo, const int& m, const int& k, diff --git a/library/LibUtilities/LinearAlgebra/Lapack.hpp b/library/LibUtilities/LinearAlgebra/Lapack.hpp index ea099f319..6b54adfd9 100644 --- a/library/LibUtilities/LinearAlgebra/Lapack.hpp +++ b/library/LibUtilities/LinearAlgebra/Lapack.hpp @@ -48,6 +48,10 @@ namespace Lapack const int& nrhs, const double* ap, const int *ipiv, double* b, const int& ldb, int& info); + void F77NAME(dsptri) (const char& uplo, const int& n, + const double* ap, + const int *ipiv, double* work, + int& info); void F77NAME(dtrtrs) (const char& uplo, const char& trans, const char& diag, const int& n, const int& nrhs, const double* a, const int& lda, double* b, const int& ldb, int& info); @@ -118,6 +122,14 @@ namespace Lapack F77NAME(dsptrs) (uplo,n,nrhs,ap,ipiv,b,ldb,info); } + /// \brief Invert a real symmetric matrix problem + static inline void Dsptri (const char& uplo, const int& n, + const double* ap, const int *ipiv, double* work, + int& info) + { + F77NAME(dsptri) (uplo,n,ap,ipiv,work,info); + } + /// \brief Cholesky factor a real Positive Definite packed-symmetric matrix. static inline void Dpptrf (const char& uplo, const int& n, double *ap, int& info) diff --git a/library/LibUtilities/LinearAlgebra/MatrixFuncs.h b/library/LibUtilities/LinearAlgebra/MatrixFuncs.h index 88824521f..fe829dd71 100644 --- a/library/LibUtilities/LinearAlgebra/MatrixFuncs.h +++ b/library/LibUtilities/LinearAlgebra/MatrixFuncs.h @@ -202,6 +202,51 @@ namespace Nektar static unsigned int CalculateIndex(unsigned int curRow, unsigned int curColumn); + template + static void Invert(unsigned int rows, unsigned int columns, + Array& data) + { +#ifdef NEKTAR_USING_BLAS + ASSERTL0(rows==columns, "Only square matrices can be inverted."); + + int n = columns; + int info = 0; + Array ipivot(n); + Array work(n); + + Lapack::Dsptrf('U', n, data.get(), ipivot.get(), info); + + if( info < 0 ) + { + std::string message = "ERROR: The " + boost::lexical_cast(-info) + "th parameter had an illegal parameter for dsptrf"; + ASSERTL0(false, message.c_str()); + } + else if( info > 0 ) + { + std::string message = "ERROR: Element u_" + boost::lexical_cast(info) + boost::lexical_cast(info) + " is 0 from dsptrf"; + ASSERTL0(false, message.c_str()); + } + + Lapack::Dsptri('U', n, data.get(), ipivot.get(), + work.get(), info); + + if( info < 0 ) + { + std::string message = "ERROR: The " + boost::lexical_cast(-info) + "th parameter had an illegal parameter for dsptri"; + ASSERTL0(false, message.c_str()); + } + else if( info > 0 ) + { + std::string message = "ERROR: Element u_" + boost::lexical_cast(info) + boost::lexical_cast(info) + " is 0 from dsptri"; + ASSERTL0(false, message.c_str()); + } + +#else + // error Full matrix inversion not supported without blas. + BOOST_STATIC_ASSERT(sizeof(DataType) == 0); +#endif + } + static boost::tuples::tuple Advance(const unsigned int totalRows, const unsigned int totalColumns, const unsigned int curRow, const unsigned int curColumn); diff --git a/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp b/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp index 4efc95b21..a94fcf915 100644 --- a/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp +++ b/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp @@ -337,6 +337,36 @@ namespace Nektar } } + template + void NekMultiplySymmetricMatrix(NekDouble* result, const NekMatrix& lhs, const NekDouble* rhs, + typename boost::enable_if + < + CanGetRawPtr > + >::type* p=0) + { + const unsigned int* size = lhs.GetSize(); + + double alpha = lhs.Scale(); + const double* a = lhs.GetRawPtr(); + const double* x = rhs; + int incx = 1; + double beta = 0.0; + double* y = result; + int incy = 1; + + Blas::Dspmv('U', size[0], alpha, a, x, incx, beta, y, incy); + } + + template + void NekMultiplySymmetricMatrix(NekDouble* result, const NekMatrix& lhs, const NekDouble* rhs, + typename boost::enable_if + < + boost::mpl::not_ > > + >::type* p = 0) + { + NekMultiplyUnspecializedMatrixType(result, lhs, rhs); + } + template void NekMultiplyFullMatrix(NekDouble* result, const NekMatrix& lhs, const NekDouble* rhs, typename boost::enable_if @@ -391,7 +421,7 @@ namespace Nektar NekMultiplyLowerTriangularMatrix(result, lhs, rhs); break; case eSYMMETRIC: - NekMultiplyUnspecializedMatrixType(result, lhs, rhs); + NekMultiplySymmetricMatrix(result, lhs, rhs); break; case eBANDED: NekMultiplyBandedMatrix(result, lhs, rhs); diff --git a/library/LibUtilities/LinearAlgebra/StandardMatrix.cpp b/library/LibUtilities/LinearAlgebra/StandardMatrix.cpp index f5b29d892..27d590c1e 100644 --- a/library/LibUtilities/LinearAlgebra/StandardMatrix.cpp +++ b/library/LibUtilities/LinearAlgebra/StandardMatrix.cpp @@ -749,9 +749,12 @@ namespace Nektar DiagonalMatrixFuncs::Invert(this->GetRows(), this->GetColumns(), this->GetData()); break; + case eSYMMETRIC: + SymmetricMatrixFuncs::Invert(this->GetRows(), this->GetColumns(), + this->GetData()); + break; case eUPPER_TRIANGULAR: case eLOWER_TRIANGULAR: - case eSYMMETRIC: case eBANDED: NEKERROR(ErrorUtil::efatal, "Unhandled matrix type"); break; -- GitLab From 478e7fe7ce10156ffce99d74584adce67dec2499 Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Tue, 21 Jun 2016 01:04:46 +0100 Subject: [PATCH 17/34] Set interior matrix as symmetric --- library/LocalRegions/QuadExp.cpp | 24 ++++++++++++----- library/LocalRegions/SegExp.cpp | 19 ++++++++++--- library/LocalRegions/TriExp.cpp | 21 ++++++++++++--- .../MultiRegions/GlobalLinSysStaticCond.cpp | 27 +++++++------------ library/StdRegions/StdExpansion.cpp | 14 +++++++++- 5 files changed, 73 insertions(+), 32 deletions(-) diff --git a/library/LocalRegions/QuadExp.cpp b/library/LocalRegions/QuadExp.cpp index d1ff88526..9b03850c4 100644 --- a/library/LocalRegions/QuadExp.cpp +++ b/library/LocalRegions/QuadExp.cpp @@ -388,10 +388,11 @@ namespace Nektar rhs[i] = tmp1[ mapArray[i] ]; } - Blas::Dgemv('N', nInteriorDofs, nInteriorDofs, - matsys->Scale(), - &((matsys->GetOwnedMatrix())->GetPtr())[0], - nInteriorDofs,rhs.get(),1,0.0,result.get(),1); + Blas::Dspmv('U',nInteriorDofs, + matsys->Scale(), + &((matsys->GetOwnedMatrix())->GetPtr())[0], + rhs.get(),1,0.0, + result.get(),1); for (i = 0; i < nInteriorDofs; i++) { @@ -1985,8 +1986,19 @@ namespace Nektar AllocateSharedPtr(nbdry,nint); DNekMatSharedPtr C = MemoryManager:: AllocateSharedPtr(nint,nbdry); - DNekMatSharedPtr D = MemoryManager:: - AllocateSharedPtr(nint,nint); + DNekMatSharedPtr D; + if ( (mkey.GetMatrixType() == StdRegions::eMass) || + (mkey.GetMatrixType() == StdRegions::eLaplacian) || + (mkey.GetMatrixType() == StdRegions::eHelmholtz)) + { + D = MemoryManager:: + AllocateSharedPtr(nint,nint, eSYMMETRIC); + } + else + { + D = MemoryManager:: + AllocateSharedPtr(nint,nint, eFULL); + } Array bmap(nbdry); Array imap(nint); diff --git a/library/LocalRegions/SegExp.cpp b/library/LocalRegions/SegExp.cpp index 1871977cf..6b15c2961 100644 --- a/library/LocalRegions/SegExp.cpp +++ b/library/LocalRegions/SegExp.cpp @@ -461,10 +461,10 @@ cout<<"deps/dx ="< Date: Wed, 22 Jun 2016 11:39:25 +0100 Subject: [PATCH 21/34] Use collections for PhysDeriv in a single direction --- library/Collections/Collection.h | 19 + library/Collections/Operator.h | 10 + library/Collections/PhysDeriv.cpp | 385 ++++++++++++++++++ library/MultiRegions/ExpList.cpp | 22 +- .../VelocityCorrectionScheme.cpp | 5 +- 5 files changed, 429 insertions(+), 12 deletions(-) diff --git a/library/Collections/Collection.h b/library/Collections/Collection.h index 9f59a81c7..79d516205 100644 --- a/library/Collections/Collection.h +++ b/library/Collections/Collection.h @@ -79,6 +79,12 @@ class Collection Array &output1, Array &output2); + inline void ApplyOperator( + const OperatorType &op, + int dir, + const Array &inarray, + Array &output); + inline bool HasOperator(const OperatorType &op); protected: @@ -135,6 +141,19 @@ inline void Collection::ApplyOperator( (*m_ops[op])(inarray, output0, output1, output2, wsp); } +/** + * + */ +inline void Collection::ApplyOperator( + const OperatorType &op, + int dir, + const Array &inarray, + Array &output) +{ + Array wsp(m_ops[op]->GetWspSize()); + (*m_ops[op])(dir, inarray, output, wsp); +} + inline bool Collection::HasOperator(const OperatorType &op) { return (m_ops.find(op) != m_ops.end()); diff --git a/library/Collections/Operator.h b/library/Collections/Operator.h index 70a7f8d8d..d499653e7 100644 --- a/library/Collections/Operator.h +++ b/library/Collections/Operator.h @@ -127,6 +127,16 @@ class Operator Array &wsp = NullNekDouble1DArray) = 0; + COLLECTIONS_EXPORT virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp + = NullNekDouble1DArray) + { + ASSERTL0(false, "Not valid for this operator."); + } + COLLECTIONS_EXPORT virtual ~Operator(); /// Get the size of the required workspace diff --git a/library/Collections/PhysDeriv.cpp b/library/Collections/PhysDeriv.cpp index e107cd73d..da8eb7363 100644 --- a/library/Collections/PhysDeriv.cpp +++ b/library/Collections/PhysDeriv.cpp @@ -105,6 +105,43 @@ class PhysDeriv_StdMat : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + int nPhys = m_stdExp->GetTotPoints(); + int ntot = m_numElmt*nPhys; + Array tmp0,tmp1,tmp2; + Array > Diff(3); + + for(int i = 0; i < m_dim; ++i) + { + Diff[i] = wsp + i*ntot; + } + + // calculate local derivatives + for(int i = 0; i < m_dim; ++i) + { + Blas::Dgemm('N', 'N', m_derivMat[i]->GetRows(), m_numElmt, + m_derivMat[i]->GetColumns(), 1.0, + m_derivMat[i]->GetRawPtr(), + m_derivMat[i]->GetRows(), input.get(), nPhys, + 0.0, &Diff[i][0],nPhys); + } + + // calculate full derivative + Vmath::Zero(ntot,output,1); + for(int j = 0; j < m_dim; ++j) + { + Vmath::Vvtvp (ntot, m_derivFac[dir*m_dim+j], 1, + Diff[j], 1, + output, 1, + output, 1); + } + } + protected: Array m_derivMat; Array m_derivFac; @@ -240,6 +277,42 @@ class PhysDeriv_IterPerExp : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + int nPhys = m_stdExp->GetTotPoints(); + int ntot = m_numElmt*nPhys; + Array tmp0,tmp1,tmp2; + Array > Diff(3); + + for(int i = 0; i < m_dim; ++i) + { + Diff[i] = wsp + i*ntot; + } + + // calculate local derivatives + for (int i = 0; i < m_numElmt; ++i) + { + m_stdExp->PhysDeriv(input + i*nPhys, + tmp0 = Diff[0] + i*nPhys, + tmp1 = Diff[1] + i*nPhys, + tmp2 = Diff[2] + i*nPhys); + } + + // calculate full derivative + Vmath::Vmul(ntot,m_derivFac[dir*m_dim],1,Diff[0],1,output,1); + for(int j = 1; j < m_dim; ++j) + { + Vmath::Vvtvp (ntot, m_derivFac[dir*m_dim+j], 1, + Diff[j], 1, + output, 1, + output, 1); + } + } + protected: Array m_derivFac; int m_dim; @@ -361,6 +434,23 @@ class PhysDeriv_NoCollection : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + const int nPhys = m_expList[0]->GetTotPoints(); + Array tmp; + + // calculate local derivatives + for (int i = 0; i < m_numElmt; ++i) + { + m_expList[i]->PhysDeriv(dir, input + i*nPhys, + tmp = output + i*nPhys); + } + } + protected: vector m_expList; @@ -456,6 +546,29 @@ class PhysDeriv_SumFac_Seg : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + const int nqcol = m_nquad0*m_numElmt; + + ASSERTL1(wsp.num_elements() == m_wspSize, + "Incorrect workspace size"); + ASSERTL1(input.num_elements() >= nqcol, + "Incorrect input size"); + + Array diff0(nqcol, wsp); + + Blas::Dgemm('N', 'N', m_nquad0, m_numElmt, + m_nquad0, 1.0, m_Deriv0, m_nquad0, + input.get(), m_nquad0, 0.0, + diff0.get(), m_nquad0); + + Vmath::Vmul(nqcol, m_derivFac[dir], 1, diff0, 1, output, 1); + } + protected: int m_coordim; const int m_nquad0; @@ -547,6 +660,42 @@ class PhysDeriv_SumFac_Quad : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + const int nqtot = m_nquad0 * m_nquad1; + const int nqcol = nqtot*m_numElmt; + + ASSERTL1(wsp.num_elements() == m_wspSize, + "Incorrect workspace size"); + ASSERTL1(input.num_elements() >= nqcol, + "Incorrect input size"); + + Array diff0(nqcol, wsp ); + Array diff1(nqcol, wsp + nqcol); + + Blas::Dgemm('N', 'N', m_nquad0, m_nquad1*m_numElmt, + m_nquad0, 1.0, m_Deriv0, m_nquad0, + input.get(), m_nquad0, 0.0, + diff0.get(), m_nquad0); + + int cnt = 0; + for (int i = 0; i < m_numElmt; ++i, cnt += nqtot) + { + Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0, + input.get() + cnt, m_nquad0, + m_Deriv1, m_nquad1, 0.0, + diff1.get() + cnt, m_nquad0); + } + + Vmath::Vmul (nqcol, m_derivFac[2*dir] , 1, diff0, 1, output, 1); + Vmath::Vvtvp (nqcol, m_derivFac[2*dir+1], 1, diff1, 1, output, 1, + output, 1); + } + protected: int m_coordim; const int m_nquad0; @@ -651,6 +800,52 @@ class PhysDeriv_SumFac_Tri : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + const int nqtot = m_nquad0 * m_nquad1; + const int nqcol = nqtot*m_numElmt; + + ASSERTL1(wsp.num_elements() == m_wspSize, + "Incorrect workspace size"); + ASSERTL1(input.num_elements() >= nqcol, + "Incorrect input size"); + + Array diff0(nqcol, wsp ); + Array diff1(nqcol, wsp + nqcol); + + // Tensor Product Derivative + Blas::Dgemm('N', 'N', m_nquad0, m_nquad1*m_numElmt, + m_nquad0, 1.0, m_Deriv0, m_nquad0, + input.get(), m_nquad0, 0.0, + diff0.get(), m_nquad0); + + int cnt = 0; + for (int i = 0; i < m_numElmt; ++i, cnt += nqtot) + { + // scale diff0 by geometric factor: 2/(1-z1) + Vmath::Vmul(nqtot,&m_fac1[0],1,diff0.get()+cnt,1, + diff0.get()+cnt,1); + + Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0, + input.get() + cnt, m_nquad0, + m_Deriv1, m_nquad1, 0.0, + diff1.get() + cnt, m_nquad0); + + // add to diff1 by diff0 scaled by: (1_z0)/(1-z1) + Vmath::Vvtvp(nqtot,m_fac0.get(),1,diff0.get()+cnt,1, + diff1.get()+cnt,1,diff1.get()+cnt,1); + } + + + Vmath::Vmul (nqcol, m_derivFac[2*dir] , 1, diff0, 1, output, 1); + Vmath::Vvtvp (nqcol, m_derivFac[2*dir+1], 1, diff1, 1, output, 1, + output, 1); + } + protected: int m_coordim; const int m_nquad0; @@ -783,6 +978,54 @@ class PhysDeriv_SumFac_Hex : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + int nPhys = m_stdExp->GetTotPoints(); + int ntot = m_numElmt*nPhys; + Array tmp0,tmp1,tmp2; + Array > Diff(3); + + for(int i = 0; i < m_dim; ++i) + { + Diff[i] = wsp + i*ntot; + } + + Blas::Dgemm('N','N', m_nquad0,m_nquad1*m_nquad2*m_numElmt, + m_nquad0,1.0, m_Deriv0,m_nquad0,&input[0], + m_nquad0,0.0,&Diff[0][0],m_nquad0); + + for(int i = 0; i < m_numElmt; ++i) + { + for (int j = 0; j < m_nquad2; ++j) + { + Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, + 1.0, &input[i*nPhys+j*m_nquad0*m_nquad1], + m_nquad0, m_Deriv1, m_nquad1, 0.0, + &Diff[1][i*nPhys+j*m_nquad0*m_nquad1], + m_nquad0); + } + + Blas::Dgemm('N','T',m_nquad0*m_nquad1,m_nquad2,m_nquad2, + 1.0, &input[i*nPhys],m_nquad0*m_nquad1, + m_Deriv2,m_nquad2, 0.0,&Diff[2][i*nPhys], + m_nquad0*m_nquad1); + } + + // calculate full derivative + Vmath::Vmul(ntot,m_derivFac[dir*m_dim],1,Diff[0],1,output,1); + for(int j = 1; j < m_dim; ++j) + { + Vmath::Vvtvp (ntot, m_derivFac[dir*m_dim+j], 1, + Diff[j], 1, + output, 1, + output, 1); + } + } + protected: Array m_derivFac; int m_dim; @@ -927,6 +1170,88 @@ class PhysDeriv_SumFac_Tet : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + int nPhys = m_stdExp->GetTotPoints(); + int ntot = m_numElmt*nPhys; + Array tmp0,tmp1,tmp2; + Array > Diff(3); + + for(int i = 0; i < m_dim; ++i) + { + Diff[i] = wsp + i*ntot; + } + + // dEta0 + Blas::Dgemm('N','N', m_nquad0,m_nquad1*m_nquad2*m_numElmt, + m_nquad0,1.0, m_Deriv0,m_nquad0,&input[0], + m_nquad0,0.0,&Diff[0][0],m_nquad0); + + // dEta2 + for(int i = 0; i < m_numElmt; ++i) + { + Blas::Dgemm('N','T',m_nquad0*m_nquad1,m_nquad2,m_nquad2, + 1.0, &input[i*nPhys],m_nquad0*m_nquad1, + m_Deriv2,m_nquad2, 0.0,&Diff[2][i*nPhys], + m_nquad0*m_nquad1); + } + + for(int i = 0; i < m_numElmt; ++i) + { + + // dEta1 + for (int j = 0; j < m_nquad2; ++j) + { + Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, + 1.0, &input[i*nPhys+j*m_nquad0*m_nquad1], + m_nquad0, m_Deriv1, m_nquad1, 0.0, + &Diff[1][i*nPhys+j*m_nquad0*m_nquad1], + m_nquad0); + } + + // dxi2 = (1 + eta_1)/(1 -eta_2)*dEta1 + dEta2 + Vmath::Vvtvp(nPhys, m_fac3.get(), 1, + Diff[1].get() + i*nPhys, 1, + Diff[2].get() + i*nPhys, 1, + Diff[2].get() + i*nPhys, 1); + + // dxi1 = 2/(1 - eta_2) dEta1 + Vmath::Vmul(nPhys, m_fac2.get(), 1, + Diff[1].get() + i*nPhys, 1, + Diff[1].get() + i*nPhys, 1); + + // dxi1 = 2.0(1+eta_0)/((1-eta_1)(1-eta_2)) dEta0 + dxi1 + Vmath::Vvtvp(nPhys, m_fac1.get(), 1, + Diff[0].get() + i*nPhys, 1, + Diff[1].get() + i*nPhys, 1, + Diff[1].get() + i*nPhys, 1); + + // dxi2 = 2.0(1+eta_0)/((1-eta_1)(1-eta_2)) dEta0 + dxi2 + Vmath::Vvtvp(nPhys, m_fac1.get(), 1, + Diff[0].get() + i*nPhys, 1, + Diff[2].get() + i*nPhys, 1, + Diff[2].get() + i*nPhys, 1); + + // dxi0 = 4.0/((1-eta_1)(1-eta_2)) dEta0 + Vmath::Vmul(nPhys, m_fac0.get(), 1, + Diff[0].get() + i*nPhys, 1, + Diff[0].get() + i*nPhys, 1); + + } + + // calculate full derivative + Vmath::Vmul(ntot,m_derivFac[dir*m_dim],1,Diff[0],1,output,1); + for(int j = 1; j < m_dim; ++j) + { + Vmath::Vvtvp (ntot, m_derivFac[dir*m_dim+j], 1, + Diff[j], 1, output, 1, output, 1); + } + } + protected: Array m_derivFac; int m_dim; @@ -1085,6 +1410,66 @@ class PhysDeriv_SumFac_Prism : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + int nPhys = m_stdExp->GetTotPoints(); + int ntot = m_numElmt*nPhys; + Array tmp0,tmp1,tmp2; + Array > Diff(3); + + for(int i = 0; i < m_dim; ++i) + { + Diff[i] = wsp + i*ntot; + } + + // dEta0 + Blas::Dgemm('N','N', m_nquad0,m_nquad1*m_nquad2*m_numElmt, + m_nquad0,1.0, m_Deriv0,m_nquad0,&input[0], + m_nquad0,0.0,&Diff[0][0],m_nquad0); + + int cnt = 0; + for(int i = 0; i < m_numElmt; ++i) + { + + // dEta 1 + for (int j = 0; j < m_nquad2; ++j) + { + Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, + 1.0, &input[i*nPhys+j*m_nquad0*m_nquad1], + m_nquad0, m_Deriv1, m_nquad1, 0.0, + &Diff[1][i*nPhys+j*m_nquad0*m_nquad1], + m_nquad0); + } + + // dEta 2 + Blas::Dgemm('N','T',m_nquad0*m_nquad1,m_nquad2,m_nquad2, + 1.0, &input[i*nPhys],m_nquad0*m_nquad1, + m_Deriv2,m_nquad2, 0.0,&Diff[2][i*nPhys], + m_nquad0*m_nquad1); + + // dxi0 = 2/(1-eta_2) d Eta_0 + Vmath::Vmul(nPhys,&m_fac0[0],1,Diff[0].get()+cnt,1, + Diff[0].get()+cnt,1); + + // dxi2 = (1+eta0)/(1-eta_2) d Eta_0 + d/dEta2; + Vmath::Vvtvp(nPhys,&m_fac1[0],1,Diff[0].get()+cnt,1, + Diff[2].get()+cnt,1,Diff[2].get()+cnt,1); + cnt += nPhys; + } + + // calculate full derivative + Vmath::Vmul(ntot,m_derivFac[dir*m_dim],1,Diff[0],1,output,1); + for(int j = 1; j < m_dim; ++j) + { + Vmath::Vvtvp (ntot, m_derivFac[dir*m_dim+j], 1, + Diff[j], 1, output, 1, output, 1); + } + } + protected: Array m_derivFac; int m_dim; diff --git a/library/MultiRegions/ExpList.cpp b/library/MultiRegions/ExpList.cpp index f61f32ea4..1177cd6bc 100644 --- a/library/MultiRegions/ExpList.cpp +++ b/library/MultiRegions/ExpList.cpp @@ -514,8 +514,18 @@ namespace Nektar const Array &inarray, Array &out_d) { - Direction edir = DirCartesianMap[dir]; - v_PhysDeriv(edir, inarray,out_d); + Array e_out_d; + int offset; + for (int i = 0; i < m_collections.size(); ++i) + { + offset = m_coll_phys_offset[i]; + e_out_d = out_d + offset; + + m_collections[i].ApplyOperator(Collections::ePhysDeriv, + dir, + inarray + offset, + e_out_d); + } } void ExpList::v_PhysDeriv(Direction edir, const Array &inarray, @@ -544,13 +554,7 @@ namespace Nektar { // convert enum into int int intdir= (int)edir; - Array e_out_d; - for(i= 0; i < (*m_exp).size(); ++i) - { - e_out_d = out_d + m_phys_offset[i]; - (*m_exp)[i]->PhysDeriv(intdir, inarray+m_phys_offset[i], e_out_d); - } - + v_PhysDeriv(intdir, inarray, out_d); } } diff --git a/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.cpp b/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.cpp index 34b7bf097..86bddf4c3 100644 --- a/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.cpp +++ b/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.cpp @@ -367,13 +367,12 @@ namespace Nektar int physTot = m_fields[0]->GetTotPoints(); int nvel = m_velocity.num_elements(); - m_fields[0]->PhysDeriv(MultiRegions::DirCartesianMap[0],fields[0], + m_fields[0]->PhysDeriv(0,fields[0], Forcing[0]); for(i = 1; i < nvel; ++i) { // Use Forcing[1] as storage since it is not needed for the pressure - m_fields[i]->PhysDeriv(MultiRegions::DirCartesianMap[i],fields[i], - Forcing[1]); + m_fields[i]->PhysDeriv(i,fields[i],Forcing[1]); Vmath::Vadd(physTot,Forcing[1],1,Forcing[0],1,Forcing[0],1); } -- GitLab From 5924ace53c721fff62f2b1c713bd0bab16a67c9d Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Wed, 22 Jun 2016 12:24:07 +0100 Subject: [PATCH 22/34] Use new function for block matrix multiply --- .../MatrixOperationsDeclarations.hpp | 4 ++ .../MatrixVectorMultiplication.cpp | 55 +++++++++++++++++++ .../MultiRegions/GlobalLinSysStaticCond.cpp | 2 +- 3 files changed, 60 insertions(+), 1 deletion(-) diff --git a/library/LibUtilities/LinearAlgebra/MatrixOperationsDeclarations.hpp b/library/LibUtilities/LinearAlgebra/MatrixOperationsDeclarations.hpp index 5dd8c72ac..640278e4f 100644 --- a/library/LibUtilities/LinearAlgebra/MatrixOperationsDeclarations.hpp +++ b/library/LibUtilities/LinearAlgebra/MatrixOperationsDeclarations.hpp @@ -75,6 +75,10 @@ namespace Nektar const NekMatrix& lhs, const NekVector& rhs); + void DiagonalBlockFullScalMatrixMultiply(NekVector& result, + const NekMatrix, ScaledMatrixTag>, BlockMatrixTag>& lhs, + const NekVector& rhs); + //////////////////////////////////////////////////////////////////////////////////// // Matrix-Constant Multiplication //////////////////////////////////////////////////////////////////////////////////// diff --git a/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp b/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp index a94fcf915..4f6844e8e 100644 --- a/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp +++ b/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp @@ -253,6 +253,61 @@ namespace Nektar } } + void DiagonalBlockFullScalMatrixMultiply(NekVector& result, + const NekMatrix, ScaledMatrixTag>, BlockMatrixTag>& lhs, + const NekVector& rhs) + { + unsigned int numberOfBlockRows = lhs.GetNumberOfBlockRows(); + double* result_ptr = result.GetRawPtr(); + const double* rhs_ptr = rhs.GetRawPtr(); + + Array rowSizes; + Array colSizes; + lhs.GetBlockSizes(rowSizes, colSizes); + + unsigned int curResultRow = 0; + unsigned int curWrapperRow = 0; + unsigned int rowsInBlock, columnsInBlock; + for(unsigned int blockRow = 0; blockRow < numberOfBlockRows; ++blockRow) + { + if ( blockRow == 0) + { + rowsInBlock = rowSizes[blockRow] + 1; + columnsInBlock = colSizes[blockRow] + 1; + } + else + { + rowsInBlock = rowSizes[blockRow] - rowSizes[blockRow-1]; + columnsInBlock = colSizes[blockRow] - colSizes[blockRow-1]; + } + + if( rowsInBlock == 0 || columnsInBlock == 0) + { + continue; + } + + const DNekScalMat* block = lhs.GetBlockPtr(blockRow, blockRow); + if( !block ) + { + continue; + } + + double* resultWrapper = result_ptr + curResultRow; + const double* rhsWrapper = rhs_ptr + curWrapperRow; + curResultRow += rowsInBlock; + curWrapperRow += columnsInBlock; + + // Multiply + Blas::Dgemv('N', rowsInBlock, columnsInBlock, block->Scale(), + block->GetRawPtr(), rowsInBlock, rhsWrapper, 1, + 0.0, resultWrapper, 1); + } + if (curResultRow < result.GetRows()) + { + std::fill(result.begin()+curResultRow, result.end(), 0.0); + } + } + void NekMultiplyLowerTriangularMatrix(NekDouble* result, const NekMatrix& lhs, const NekDouble* rhs) diff --git a/library/MultiRegions/GlobalLinSysStaticCond.cpp b/library/MultiRegions/GlobalLinSysStaticCond.cpp index 05b8e52b2..d588bbdbf 100644 --- a/library/MultiRegions/GlobalLinSysStaticCond.cpp +++ b/library/MultiRegions/GlobalLinSysStaticCond.cpp @@ -172,7 +172,7 @@ namespace Nektar else { DNekScalBlkMat &BinvD = *m_BinvD; - Multiply( V_LocBnd, BinvD, F_Int); + DiagonalBlockFullScalMatrixMultiply( V_LocBnd, BinvD, F_Int); } pLocToGloMap->AssembleBnd(V_LocBnd,V_GlobHomBndTmp, -- GitLab From 9da1d90edf752b736f4e1ec7ca82e47e3fa36ffe Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Wed, 22 Jun 2016 13:07:54 +0100 Subject: [PATCH 23/34] Fix previous commit --- library/MultiRegions/ExpList.cpp | 27 ++++++++++--------- .../VelocityCorrectionScheme.cpp | 2 +- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/library/MultiRegions/ExpList.cpp b/library/MultiRegions/ExpList.cpp index 1177cd6bc..ea4970e09 100644 --- a/library/MultiRegions/ExpList.cpp +++ b/library/MultiRegions/ExpList.cpp @@ -514,18 +514,8 @@ namespace Nektar const Array &inarray, Array &out_d) { - Array e_out_d; - int offset; - for (int i = 0; i < m_collections.size(); ++i) - { - offset = m_coll_phys_offset[i]; - e_out_d = out_d + offset; - - m_collections[i].ApplyOperator(Collections::ePhysDeriv, - dir, - inarray + offset, - e_out_d); - } + Direction edir = DirCartesianMap[dir]; + v_PhysDeriv(edir, inarray,out_d); } void ExpList::v_PhysDeriv(Direction edir, const Array &inarray, @@ -554,7 +544,18 @@ namespace Nektar { // convert enum into int int intdir= (int)edir; - v_PhysDeriv(intdir, inarray, out_d); + Array e_out_d; + int offset; + for (int i = 0; i < m_collections.size(); ++i) + { + offset = m_coll_phys_offset[i]; + e_out_d = out_d + offset; + + m_collections[i].ApplyOperator(Collections::ePhysDeriv, + intdir, + inarray + offset, + e_out_d); + } } } diff --git a/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.cpp b/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.cpp index 86bddf4c3..fa01da610 100644 --- a/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.cpp +++ b/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.cpp @@ -372,7 +372,7 @@ namespace Nektar for(i = 1; i < nvel; ++i) { // Use Forcing[1] as storage since it is not needed for the pressure - m_fields[i]->PhysDeriv(i,fields[i],Forcing[1]); + m_fields[i]->PhysDeriv(MultiRegions::DirCartesianMap[i],fields[i],Forcing[1]); Vmath::Vadd(physTot,Forcing[1],1,Forcing[0],1,Forcing[0],1); } -- GitLab From 9a4cd637aa9fe4f0fae762968e0e1728a9f2ace1 Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Wed, 22 Jun 2016 15:35:12 +0100 Subject: [PATCH 24/34] Avoid weird issue in block matrix multiply --- .../LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp b/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp index 4f6844e8e..e8c333c40 100644 --- a/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp +++ b/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp @@ -298,7 +298,8 @@ namespace Nektar curWrapperRow += columnsInBlock; // Multiply - Blas::Dgemv('N', rowsInBlock, columnsInBlock, block->Scale(), + const unsigned int* size = block->GetSize(); + Blas::Dgemv('N', size[0], size[1], block->Scale(), block->GetRawPtr(), rowsInBlock, rhsWrapper, 1, 0.0, resultWrapper, 1); } @@ -433,7 +434,6 @@ namespace Nektar char t = lhs.GetTransposeFlag(); - double alpha = lhs.Scale(); const double* a = lhs.GetRawPtr(); int lda = size[0]; -- GitLab From 163c18c4eaafff1a1d2cc8fd2ba31ba0fc85fb62 Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Wed, 22 Jun 2016 20:51:24 +0100 Subject: [PATCH 25/34] Revert "Simplify transpose flag" This reverts commit 4ab22bd72c75b60799e5b7c6568e7eb8f32e32e1. --- .../LibUtilities/LinearAlgebra/MatrixBase.cpp | 10 +++++++ .../LibUtilities/LinearAlgebra/MatrixBase.hpp | 6 ++--- .../LinearAlgebra/ScaledMatrix.cpp | 27 ++++++++++++++++--- .../LinearAlgebra/ScaledMatrix.hpp | 2 ++ 4 files changed, 37 insertions(+), 8 deletions(-) diff --git a/library/LibUtilities/LinearAlgebra/MatrixBase.cpp b/library/LibUtilities/LinearAlgebra/MatrixBase.cpp index 4a72615cf..133b982aa 100644 --- a/library/LibUtilities/LinearAlgebra/MatrixBase.cpp +++ b/library/LibUtilities/LinearAlgebra/MatrixBase.cpp @@ -110,6 +110,12 @@ namespace Nektar v_Transpose(); } + template + char ConstMatrix::GetTransposeFlag() const + { + return v_GetTransposeFlag(); + } + template unsigned int ConstMatrix::CalculateIndex(MatrixStorage type, unsigned int row, unsigned int col, @@ -260,6 +266,10 @@ namespace Nektar template void ConstMatrix::v_Transpose() {} + template + char ConstMatrix::v_GetTransposeFlag() const { return m_transpose; } + + template Matrix::~Matrix() {} diff --git a/library/LibUtilities/LinearAlgebra/MatrixBase.hpp b/library/LibUtilities/LinearAlgebra/MatrixBase.hpp index 0684538d7..9fccede4e 100644 --- a/library/LibUtilities/LinearAlgebra/MatrixBase.hpp +++ b/library/LibUtilities/LinearAlgebra/MatrixBase.hpp @@ -91,10 +91,7 @@ namespace Nektar LIB_UTILITIES_EXPORT void Transpose() ; - LIB_UTILITIES_EXPORT inline char GetTransposeFlag() const - { - return m_transpose; - } + LIB_UTILITIES_EXPORT char GetTransposeFlag() const; LIB_UTILITIES_EXPORT static unsigned int CalculateIndex(MatrixStorage type, unsigned int row, unsigned int col, @@ -129,6 +126,7 @@ namespace Nektar virtual typename boost::call_traits::value_type v_GetValue(unsigned int row, unsigned int column) const = 0; virtual unsigned int v_GetStorageSize() const = 0; LIB_UTILITIES_EXPORT virtual void v_Transpose(); + LIB_UTILITIES_EXPORT virtual char v_GetTransposeFlag() const; unsigned int m_size[2]; char m_transpose; MatrixStorage m_storageType; diff --git a/library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp b/library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp index 81e9540a2..4155e3783 100644 --- a/library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp +++ b/library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp @@ -44,7 +44,6 @@ namespace Nektar m_scale(0) { this->SetStorageType(m_matrix->GetStorageType()); - this->SetTransposeFlag(m_matrix->GetTransposeFlag()); } template @@ -55,7 +54,6 @@ namespace Nektar m_scale(scale) { this->SetStorageType(m_matrix->GetStorageType()); - this->SetTransposeFlag(m_matrix->GetTransposeFlag()); } template @@ -65,7 +63,6 @@ namespace Nektar m_scale(rhs.m_scale) { this->SetStorageType(m_matrix->GetStorageType()); - this->SetTransposeFlag(m_matrix->GetTransposeFlag()); } @@ -77,7 +74,6 @@ namespace Nektar m_scale(scale) { this->SetStorageType(m_matrix->GetStorageType()); - this->SetTransposeFlag(m_matrix->GetTransposeFlag()); } template @@ -166,6 +162,29 @@ namespace Nektar return ThisType::GetStorageSize(); } + template + char NekMatrix, ScaledMatrixTag>::v_GetTransposeFlag() const + { + if( this->GetRawTransposeFlag() == 'N' ) + { + return m_matrix->GetTransposeFlag(); + } + else + { + if( m_matrix->GetTransposeFlag() == 'N' ) + { + return 'T'; + } + else + { + return 'N'; + } + } + } + + + + template NekMatrix Transpose(NekMatrix& rhs) diff --git a/library/LibUtilities/LinearAlgebra/ScaledMatrix.hpp b/library/LibUtilities/LinearAlgebra/ScaledMatrix.hpp index 6b7812b64..3b248b9ca 100644 --- a/library/LibUtilities/LinearAlgebra/ScaledMatrix.hpp +++ b/library/LibUtilities/LinearAlgebra/ScaledMatrix.hpp @@ -146,6 +146,8 @@ namespace Nektar v_GetValue(unsigned int row, unsigned int column) const; LIB_UTILITIES_EXPORT virtual unsigned int v_GetStorageSize() const ; + + LIB_UTILITIES_EXPORT virtual char v_GetTransposeFlag() const; boost::shared_ptr m_matrix; NumberType m_scale; -- GitLab From 0ba6d5cda639ebc8f8dbe01d9b1e4b391cdc6517 Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Wed, 22 Jun 2016 20:51:43 +0100 Subject: [PATCH 26/34] Revert "Add option to disable locks for threading" This reverts commit 3044e57b5e819dd21ca69fe271bbafd4857e7671. --- CMakeLists.txt | 9 --- .../LibUtilities/BasicUtils/MutexTypeDefs.h | 77 ------------------- .../LibUtilities/BasicUtils/NekFactory.hpp | 14 +++- .../LibUtilities/BasicUtils/NekManager.hpp | 10 ++- library/LibUtilities/BasicUtils/Thread.h | 9 ++- library/LibUtilities/BasicUtils/Vmath.cpp | 5 +- library/LibUtilities/CMakeLists.txt | 1 - .../Memory/ThreadSpecificPool.hpp | 8 +- 8 files changed, 30 insertions(+), 103 deletions(-) delete mode 100644 library/LibUtilities/BasicUtils/MutexTypeDefs.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b72e42d69..6d26c76f1 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,15 +141,6 @@ OPTION(NEKTAR_USE_MEMORY_POOLS "Use memory pools to accelerate memory allocation." ON) MARK_AS_ADVANCED(NEKTAR_USE_MEMORY_POOLS) -# Option to disable threads -OPTION(NEKTAR_USE_THREADS - "Enable functionality required for using threads." ON) -MARK_AS_ADVANCED(NEKTAR_USE_THREADS) - -IF (NEKTAR_USE_THREADS) - ADD_DEFINITIONS(-DNEKTAR_USING_THREADS) -ENDIF() - IF (MSVC) # Needed for M_PI to be visible in visual studio. ADD_DEFINITIONS(-D_USE_MATH_DEFINES) diff --git a/library/LibUtilities/BasicUtils/MutexTypeDefs.h b/library/LibUtilities/BasicUtils/MutexTypeDefs.h deleted file mode 100644 index 1b9277d79..000000000 --- a/library/LibUtilities/BasicUtils/MutexTypeDefs.h +++ /dev/null @@ -1,77 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// -// File: MutexTypeDefs.h -// -// For more information, please see: http://www.nektar.info -// -// The MIT License -// -// Copyright (c) 2006 Division of Applied Mathematics, Brown University (USA), -// Department of Aeronautics, Imperial College London (UK), and Scientific -// Computing and Imaging Institute, University of Utah (USA). -// -// License for the specific language governing rights and limitations under -// Permission is hereby granted, free of charge, to any person obtaining a -// copy of this software and associated documentation files (the "Software"), -// to deal in the Software without restriction, including without limitation -// the rights to use, copy, modify, merge, publish, distribute, sublicense, -// and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: -// -// The above copyright notice and this permission notice shall be included -// in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS -// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -// DEALINGS IN THE SOFTWARE. -// -// Description: Type definitions of mutex objects. -// -/////////////////////////////////////////////////////////////////////////////// - -#ifndef NEKTAR_LIB_UTILITIES_BASICUTILS_MUTEXTYPEDEFS_H -#define NEKTAR_LIB_UTILITIES_BASICUTILS_MUTEXTYPEDEFS_H - -#include -#include - -#include - -namespace Nektar -{ -namespace LibUtilities -{ -#ifdef NEKTAR_USING_THREADS - typedef boost::unique_lock WriteLock; - typedef boost::shared_lock ReadLock; - typedef boost::mutex::scoped_lock ScopedLock; - typedef boost::shared_mutex SharedMutex; - typedef boost::mutex Mutex; -#else - class Lock - { - public: - LIB_UTILITIES_EXPORT Lock(){} - - LIB_UTILITIES_EXPORT Lock(int i){} - - LIB_UTILITIES_EXPORT ~Lock(){} - - LIB_UTILITIES_EXPORT inline void lock(){} - - LIB_UTILITIES_EXPORT inline void unlock(){} - }; - typedef Lock WriteLock; - typedef Lock ReadLock; - typedef Lock ScopedLock; - typedef int SharedMutex; - typedef int Mutex; -#endif -} -} - -#endif //NEKTAR_LIB_UTILITIES_BASICUTILS_MUTEXTYPEDEFS_H diff --git a/library/LibUtilities/BasicUtils/NekFactory.hpp b/library/LibUtilities/BasicUtils/NekFactory.hpp index 92303eb67..0e6e018e9 100644 --- a/library/LibUtilities/BasicUtils/NekFactory.hpp +++ b/library/LibUtilities/BasicUtils/NekFactory.hpp @@ -43,6 +43,8 @@ #include #include #include + #include + #include #include @@ -51,7 +53,6 @@ #include #include - #include #ifndef MAX_PARAM #define MAX_PARAM 5 // default maximum number of parameters to support @@ -66,6 +67,8 @@ namespace Nektar // Generate parameter typenames with default type of 'none' #define FACTORY_print(z, n, data) BOOST_PP_CAT(data, n) = none + typedef boost::unique_lock WriteLock; + typedef boost::shared_lock ReadLock; /** * @class NekFactory @@ -322,7 +325,7 @@ namespace Nektar TMapFactory mMapFactory; - SharedMutex m_mutex; + boost::shared_mutex m_mutex; }; @@ -344,6 +347,11 @@ namespace Nektar #define n BOOST_PP_ITERATION() // Define macro for printing the non-required template parameters #define FACTORY_print(z, n, data) data + #include + #include + +typedef boost::unique_lock WriteLock; +typedef boost::shared_lock ReadLock; template < typename tKey, typename tBase BOOST_PP_COMMA_IF(n) @@ -492,7 +500,7 @@ namespace Nektar NekFactory& operator=(const NekFactory& rhs); TMapFactory mMapFactory; - SharedMutex m_mutex; + boost::shared_mutex m_mutex; }; #undef n diff --git a/library/LibUtilities/BasicUtils/NekManager.hpp b/library/LibUtilities/BasicUtils/NekManager.hpp index 1e6f1818c..1bfb6e71b 100644 --- a/library/LibUtilities/BasicUtils/NekManager.hpp +++ b/library/LibUtilities/BasicUtils/NekManager.hpp @@ -41,10 +41,11 @@ #include #include #include +#include +#include #include #include -#include namespace Nektar { @@ -52,6 +53,9 @@ namespace Nektar { using namespace std; + typedef boost::unique_lock WriteLock; + typedef boost::shared_lock ReadLock; + template struct defOpLessCreator { @@ -315,12 +319,12 @@ namespace Nektar static FlagContainerPool m_managementEnabledContainerPool; CreateFuncType m_globalCreateFunc; CreateFuncContainer m_keySpecificCreateFuncs; - static SharedMutex m_mutex; + static boost::shared_mutex m_mutex; }; template typename NekManager::ValueContainerPool NekManager::m_ValueContainerPool; template typename NekManager::FlagContainerPool NekManager::m_managementEnabledContainerPool; template - SharedMutex NekManager::m_mutex; + typename boost::shared_mutex NekManager::m_mutex; } } diff --git a/library/LibUtilities/BasicUtils/Thread.h b/library/LibUtilities/BasicUtils/Thread.h index d88480d7c..1e5b09172 100644 --- a/library/LibUtilities/BasicUtils/Thread.h +++ b/library/LibUtilities/BasicUtils/Thread.h @@ -38,6 +38,8 @@ #include #include +#include +#include #include #include #include @@ -45,8 +47,6 @@ #include #include -#include - namespace Nektar { namespace Thread @@ -321,6 +321,9 @@ class ThreadManager : public boost::enable_shared_from_this } }; +typedef boost::unique_lock WriteLock; +typedef boost::shared_lock ReadLock; + /** * A class to manage multiple ThreadManagers. It also acts as a cut-out during * static initialisation, where code attempts to grab a ThreadManager before any @@ -342,7 +345,7 @@ class ThreadMaster { private: std::vector m_threadManagers; - LibUtilities::SharedMutex m_mutex; + boost::shared_mutex m_mutex; std::string m_threadingType; public: diff --git a/library/LibUtilities/BasicUtils/Vmath.cpp b/library/LibUtilities/BasicUtils/Vmath.cpp index 6a8c6dd6c..8565973da 100644 --- a/library/LibUtilities/BasicUtils/Vmath.cpp +++ b/library/LibUtilities/BasicUtils/Vmath.cpp @@ -36,7 +36,6 @@ #include #include #include -#include namespace Vmath { @@ -132,14 +131,14 @@ namespace Vmath #undef EPS #undef RNMX - static Nektar::LibUtilities::Mutex mutex; + static boost::mutex mutex; template LIB_UTILITIES_EXPORT Nektar::NekDouble ran2 (long* idum); /// \brief Fills a vector with white noise. template void FillWhiteNoise( int n, const T eps, T *x, const int incx, int outseed) { // Protect the static vars here and in ran2 - Nektar::LibUtilities::ScopedLock l(mutex); + boost::mutex::scoped_lock l(mutex); while( n-- ) { static int iset = 0; diff --git a/library/LibUtilities/CMakeLists.txt b/library/LibUtilities/CMakeLists.txt index d2fe36fcb..65f699fac 100644 --- a/library/LibUtilities/CMakeLists.txt +++ b/library/LibUtilities/CMakeLists.txt @@ -18,7 +18,6 @@ SET(BasicUtilsHeaders ./BasicUtils/MeshEntities.hpp ./BasicUtils/MeshPartition.h ./BasicUtils/MeshPartitionMetis.h - ./BasicUtils/MutexTypeDefs.h ./BasicUtils/NekManager.hpp ./BasicUtils/NekFactory.hpp ./BasicUtils/NekPtr.hpp diff --git a/library/LibUtilities/Memory/ThreadSpecificPool.hpp b/library/LibUtilities/Memory/ThreadSpecificPool.hpp index f815be531..8f7240662 100644 --- a/library/LibUtilities/Memory/ThreadSpecificPool.hpp +++ b/library/LibUtilities/Memory/ThreadSpecificPool.hpp @@ -40,12 +40,12 @@ #include #include +#include #include #include #include #include -#include #include @@ -100,7 +100,7 @@ namespace Nektar /// \throw std::bad_alloc if memory is exhausted. void* Allocate() { - LibUtilities::ScopedLock l(m_mutex); + boost::mutex::scoped_lock l(m_mutex); void* result = m_pool->malloc(); #if defined(NEKTAR_DEBUG) || defined(NEKTAR_FULLDEBUG) @@ -116,7 +116,7 @@ namespace Nektar /// from this pool. Doing this will result in undefined behavior. void Deallocate(const void* p) { - LibUtilities::ScopedLock l(m_mutex); + boost::mutex::scoped_lock l(m_mutex); #if defined(NEKTAR_DEBUG) || defined(NEKTAR_FULLDEBUG) // The idea here is to fill the returned memory with some known // pattern, then detect that pattern on the allocate. If the @@ -135,7 +135,7 @@ namespace Nektar //boost::thread_specific_ptr > m_pool; boost::pool<>* m_pool; size_t m_blockSize; - LibUtilities::Mutex m_mutex; + boost::mutex m_mutex; }; } -- GitLab From 5a39ae0b80e7f5b421007b5ae1d550df04387020 Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Wed, 22 Jun 2016 20:51:47 +0100 Subject: [PATCH 27/34] Revert "Store (reference to) derivfactors in Expansion" This reverts commit 8acc3fdf45b8cdb6b8180b6ee466b6d160d1e10b. --- library/LocalRegions/Expansion.cpp | 4 +- library/LocalRegions/Expansion.h | 1 - library/LocalRegions/PrismExp.cpp | 150 +++++++++++++++------------- library/LocalRegions/PyrExp.cpp | 127 ++++++++++++------------ library/LocalRegions/QuadExp.cpp | 125 +++++++++++++----------- library/LocalRegions/SegExp.cpp | 89 +++++++++-------- library/LocalRegions/TetExp.cpp | 152 +++++++++++++++-------------- library/LocalRegions/TriExp.cpp | 113 +++++++++++---------- 8 files changed, 409 insertions(+), 352 deletions(-) diff --git a/library/LocalRegions/Expansion.cpp b/library/LocalRegions/Expansion.cpp index c4da37080..0b8fbb768 100644 --- a/library/LocalRegions/Expansion.cpp +++ b/library/LocalRegions/Expansion.cpp @@ -69,13 +69,11 @@ namespace Nektar << "(first vertex ID = " << m_geom->GetVid(0) << ")"; NEKERROR(ErrorUtil::ewarning, err.str()); } - m_df = m_metricinfo->GetDerivFactors(GetPointsKeys()); } Expansion::Expansion(const Expansion &pSrc) : m_geom(pSrc.m_geom), - m_metricinfo(pSrc.m_metricinfo), - m_df(pSrc.m_df) + m_metricinfo(pSrc.m_metricinfo) { } diff --git a/library/LocalRegions/Expansion.h b/library/LocalRegions/Expansion.h index 15d8267ea..2b9493b98 100644 --- a/library/LocalRegions/Expansion.h +++ b/library/LocalRegions/Expansion.h @@ -124,7 +124,6 @@ namespace Nektar protected: SpatialDomains::GeometrySharedPtr m_geom; SpatialDomains::GeomFactorsSharedPtr m_metricinfo; - Array m_df; MetricMap m_metrics; void ComputeLaplacianMetric(); diff --git a/library/LocalRegions/PrismExp.cpp b/library/LocalRegions/PrismExp.cpp index c54e61959..dd3c0dad9 100644 --- a/library/LocalRegions/PrismExp.cpp +++ b/library/LocalRegions/PrismExp.cpp @@ -141,6 +141,8 @@ namespace Nektar { int nqtot = GetTotPoints(); + Array df = + m_metricinfo->GetDerivFactors(GetPointsKeys()); Array diff0(nqtot); Array diff1(nqtot); Array diff2(nqtot); @@ -151,46 +153,46 @@ namespace Nektar { if(out_d0.num_elements()) { - Vmath::Vmul (nqtot,&m_df[0][0],1,&diff0[0],1,&out_d0[0],1); - Vmath::Vvtvp (nqtot,&m_df[1][0],1,&diff1[0],1,&out_d0[0],1,&out_d0[0],1); - Vmath::Vvtvp (nqtot,&m_df[2][0],1,&diff2[0],1,&out_d0[0],1,&out_d0[0],1); + Vmath::Vmul (nqtot,&df[0][0],1,&diff0[0],1,&out_d0[0],1); + Vmath::Vvtvp (nqtot,&df[1][0],1,&diff1[0],1,&out_d0[0],1,&out_d0[0],1); + Vmath::Vvtvp (nqtot,&df[2][0],1,&diff2[0],1,&out_d0[0],1,&out_d0[0],1); } if(out_d1.num_elements()) { - Vmath::Vmul (nqtot,&m_df[3][0],1,&diff0[0],1,&out_d1[0],1); - Vmath::Vvtvp (nqtot,&m_df[4][0],1,&diff1[0],1,&out_d1[0],1,&out_d1[0],1); - Vmath::Vvtvp (nqtot,&m_df[5][0],1,&diff2[0],1,&out_d1[0],1,&out_d1[0],1); + Vmath::Vmul (nqtot,&df[3][0],1,&diff0[0],1,&out_d1[0],1); + Vmath::Vvtvp (nqtot,&df[4][0],1,&diff1[0],1,&out_d1[0],1,&out_d1[0],1); + Vmath::Vvtvp (nqtot,&df[5][0],1,&diff2[0],1,&out_d1[0],1,&out_d1[0],1); } if(out_d2.num_elements()) { - Vmath::Vmul (nqtot,&m_df[6][0],1,&diff0[0],1,&out_d2[0],1); - Vmath::Vvtvp (nqtot,&m_df[7][0],1,&diff1[0],1,&out_d2[0],1,&out_d2[0],1); - Vmath::Vvtvp (nqtot,&m_df[8][0],1,&diff2[0],1,&out_d2[0],1,&out_d2[0],1); + Vmath::Vmul (nqtot,&df[6][0],1,&diff0[0],1,&out_d2[0],1); + Vmath::Vvtvp (nqtot,&df[7][0],1,&diff1[0],1,&out_d2[0],1,&out_d2[0],1); + Vmath::Vvtvp (nqtot,&df[8][0],1,&diff2[0],1,&out_d2[0],1,&out_d2[0],1); } } else // regular geometry { if(out_d0.num_elements()) { - Vmath::Smul (nqtot,m_df[0][0],&diff0[0],1,&out_d0[0],1); - Blas::Daxpy (nqtot,m_df[1][0],&diff1[0],1,&out_d0[0],1); - Blas::Daxpy (nqtot,m_df[2][0],&diff2[0],1,&out_d0[0],1); + Vmath::Smul (nqtot,df[0][0],&diff0[0],1,&out_d0[0],1); + Blas::Daxpy (nqtot,df[1][0],&diff1[0],1,&out_d0[0],1); + Blas::Daxpy (nqtot,df[2][0],&diff2[0],1,&out_d0[0],1); } if(out_d1.num_elements()) { - Vmath::Smul (nqtot,m_df[3][0],&diff0[0],1,&out_d1[0],1); - Blas::Daxpy (nqtot,m_df[4][0],&diff1[0],1,&out_d1[0],1); - Blas::Daxpy (nqtot,m_df[5][0],&diff2[0],1,&out_d1[0],1); + Vmath::Smul (nqtot,df[3][0],&diff0[0],1,&out_d1[0],1); + Blas::Daxpy (nqtot,df[4][0],&diff1[0],1,&out_d1[0],1); + Blas::Daxpy (nqtot,df[5][0],&diff2[0],1,&out_d1[0],1); } if(out_d2.num_elements()) { - Vmath::Smul (nqtot,m_df[6][0],&diff0[0],1,&out_d2[0],1); - Blas::Daxpy (nqtot,m_df[7][0],&diff1[0],1,&out_d2[0],1); - Blas::Daxpy (nqtot,m_df[8][0],&diff2[0],1,&out_d2[0],1); + Vmath::Smul (nqtot,df[6][0],&diff0[0],1,&out_d2[0],1); + Blas::Daxpy (nqtot,df[7][0],&diff1[0],1,&out_d2[0],1); + Blas::Daxpy (nqtot,df[8][0],&diff2[0],1,&out_d2[0],1); } } } @@ -373,19 +375,22 @@ namespace Nektar Array tmp6 (m_ncoeffs); Array wsp (order0*nquad2*(nquad1+order1)); + const Array& df = + m_metricinfo->GetDerivFactors(GetPointsKeys()); + MultiplyByQuadratureMetric(inarray, tmp1); if(m_metricinfo->GetGtype() == SpatialDomains::eDeformed) { - Vmath::Vmul(nqtot,&m_df[3*dir][0], 1,tmp1.get(),1,tmp2.get(),1); - Vmath::Vmul(nqtot,&m_df[3*dir+1][0],1,tmp1.get(),1,tmp3.get(),1); - Vmath::Vmul(nqtot,&m_df[3*dir+2][0],1,tmp1.get(),1,tmp4.get(),1); + Vmath::Vmul(nqtot,&df[3*dir][0], 1,tmp1.get(),1,tmp2.get(),1); + Vmath::Vmul(nqtot,&df[3*dir+1][0],1,tmp1.get(),1,tmp3.get(),1); + Vmath::Vmul(nqtot,&df[3*dir+2][0],1,tmp1.get(),1,tmp4.get(),1); } else { - Vmath::Smul(nqtot, m_df[3*dir][0], tmp1.get(),1,tmp2.get(), 1); - Vmath::Smul(nqtot, m_df[3*dir+1][0],tmp1.get(),1,tmp3.get(), 1); - Vmath::Smul(nqtot, m_df[3*dir+2][0],tmp1.get(),1,tmp4.get(), 1); + Vmath::Smul(nqtot, df[3*dir][0], tmp1.get(),1,tmp2.get(), 1); + Vmath::Smul(nqtot, df[3*dir+1][0],tmp1.get(),1,tmp3.get(), 1); + Vmath::Smul(nqtot, df[3*dir+2][0],tmp1.get(),1,tmp4.get(), 1); } // set up geometric factor: (1+z0)/2 @@ -686,6 +691,7 @@ namespace Nektar GetGeom()->GetMetricInfo(); LibUtilities::PointsKeyVector ptsKeys = GetPointsKeys(); SpatialDomains::GeomType type = geomFactors->GetGtype(); + const Array &df = geomFactors->GetDerivFactors(ptsKeys); const Array &jac = geomFactors->GetJac(ptsKeys); int nq0 = ptsKeys[0].GetNumPoints(); @@ -723,7 +729,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = -m_df[3*i+2][0];; + normal[i][0] = -df[3*i+2][0];; } break; } @@ -731,7 +737,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = -m_df[3*i+1][0]; + normal[i][0] = -df[3*i+1][0]; } break; } @@ -739,7 +745,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = m_df[3*i][0]+m_df[3*i+2][0]; + normal[i][0] = df[3*i][0]+df[3*i+2][0]; } break; } @@ -747,7 +753,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = m_df[3*i+1][0]; + normal[i][0] = df[3*i+1][0]; } break; } @@ -755,7 +761,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = -m_df[3*i][0]; + normal[i][0] = -df[3*i][0]; } break; } @@ -809,9 +815,9 @@ namespace Nektar { for(j = 0; j < nq01; ++j) { - normals[j] = -m_df[2][j]*jac[j]; - normals[nqtot+j] = -m_df[5][j]*jac[j]; - normals[2*nqtot+j] = -m_df[8][j]*jac[j]; + normals[j] = -df[2][j]*jac[j]; + normals[nqtot+j] = -df[5][j]*jac[j]; + normals[2*nqtot+j] = -df[8][j]*jac[j]; faceJac[j] = jac[j]; } @@ -828,11 +834,11 @@ namespace Nektar { int tmp = j+nq01*k; normals[j+k*nq0] = - -m_df[1][tmp]*jac[tmp]; + -df[1][tmp]*jac[tmp]; normals[nqtot+j+k*nq0] = - -m_df[4][tmp]*jac[tmp]; + -df[4][tmp]*jac[tmp]; normals[2*nqtot+j+k*nq0] = - -m_df[7][tmp]*jac[tmp]; + -df[7][tmp]*jac[tmp]; faceJac[j+k*nq0] = jac[tmp]; } } @@ -850,11 +856,11 @@ namespace Nektar { int tmp = nq0-1+nq0*j+nq01*k; normals[j+k*nq1] = - (m_df[0][tmp]+m_df[2][tmp])*jac[tmp]; + (df[0][tmp]+df[2][tmp])*jac[tmp]; normals[nqtot+j+k*nq1] = - (m_df[3][tmp]+m_df[5][tmp])*jac[tmp]; + (df[3][tmp]+df[5][tmp])*jac[tmp]; normals[2*nqtot+j+k*nq1] = - (m_df[6][tmp]+m_df[8][tmp])*jac[tmp]; + (df[6][tmp]+df[8][tmp])*jac[tmp]; faceJac[j+k*nq1] = jac[tmp]; } } @@ -872,11 +878,11 @@ namespace Nektar { int tmp = nq0*(nq1-1) + j + nq01*k; normals[j+k*nq0] = - m_df[1][tmp]*jac[tmp]; + df[1][tmp]*jac[tmp]; normals[nqtot+j+k*nq0] = - m_df[4][tmp]*jac[tmp]; + df[4][tmp]*jac[tmp]; normals[2*nqtot+j+k*nq0] = - m_df[7][tmp]*jac[tmp]; + df[7][tmp]*jac[tmp]; faceJac[j+k*nq0] = jac[tmp]; } } @@ -894,11 +900,11 @@ namespace Nektar { int tmp = j*nq0+nq01*k; normals[j+k*nq1] = - -m_df[0][tmp]*jac[tmp]; + -df[0][tmp]*jac[tmp]; normals[nqtot+j+k*nq1] = - -m_df[3][tmp]*jac[tmp]; + -df[3][tmp]*jac[tmp]; normals[2*nqtot+j+k*nq1] = - -m_df[6][tmp]*jac[tmp]; + -df[6][tmp]*jac[tmp]; faceJac[j+k*nq1] = jac[tmp]; } } @@ -1149,6 +1155,8 @@ namespace Nektar else { NekDouble jac = (m_metricinfo->GetJac(ptsKeys))[0]; + Array df = + m_metricinfo->GetDerivFactors(ptsKeys); int dir = 0; switch(mkey.GetMatrixType()) @@ -1183,9 +1191,9 @@ namespace Nektar DNekMatSharedPtr WeakDeriv = MemoryManager ::AllocateSharedPtr(rows,cols); - (*WeakDeriv) = m_df[3*dir ][0]*deriv0 - + m_df[3*dir+1][0]*deriv1 - + m_df[3*dir+2][0]*deriv2; + (*WeakDeriv) = df[3*dir ][0]*deriv0 + + df[3*dir+1][0]*deriv1 + + df[3*dir+2][0]*deriv2; returnval = MemoryManager ::AllocateSharedPtr(jac,WeakDeriv); @@ -1579,6 +1587,8 @@ namespace Nektar // wsp3 = du_dxi3 = D_xi3 * wsp0 = D_xi3 * u StdExpansion3D::PhysTensorDeriv(inarray,wsp1,wsp2,wsp3); + const Array& df = + m_metricinfo->GetDerivFactors(GetPointsKeys()); const Array& z0 = m_base[0]->GetZ(); const Array& z2 = m_base[2]->GetZ(); @@ -1600,67 +1610,67 @@ namespace Nektar if (m_metricinfo->GetGtype() == SpatialDomains::eDeformed) { // wsp4 = d eta_1/d x_1 - Vmath::Vvtvvtp(nqtot, &m_df[0][0], 1, &h0[0], 1, &m_df[2][0], 1, &h1[0], 1, &wsp4[0], 1); + Vmath::Vvtvvtp(nqtot, &df[0][0], 1, &h0[0], 1, &df[2][0], 1, &h1[0], 1, &wsp4[0], 1); // wsp5 = d eta_2/d x_1 - Vmath::Vvtvvtp(nqtot, &m_df[3][0], 1, &h0[0], 1, &m_df[5][0], 1, &h1[0], 1, &wsp5[0], 1); + Vmath::Vvtvvtp(nqtot, &df[3][0], 1, &h0[0], 1, &df[5][0], 1, &h1[0], 1, &wsp5[0], 1); // wsp6 = d eta_3/d x_1d - Vmath::Vvtvvtp(nqtot, &m_df[6][0], 1, &h0[0], 1, &m_df[8][0], 1, &h1[0], 1, &wsp6[0], 1); + Vmath::Vvtvvtp(nqtot, &df[6][0], 1, &h0[0], 1, &df[8][0], 1, &h1[0], 1, &wsp6[0], 1); // g0 (overwrites h0) Vmath::Vvtvvtp(nqtot, &wsp4[0], 1, &wsp4[0], 1, &wsp5[0], 1, &wsp5[0], 1, &g0[0], 1); Vmath::Vvtvp (nqtot, &wsp6[0], 1, &wsp6[0], 1, &g0[0], 1, &g0[0], 1); // g3 (overwrites h1) - Vmath::Vvtvvtp(nqtot, &m_df[1][0], 1, &wsp4[0], 1, &m_df[4][0], 1, &wsp5[0], 1, &g3[0], 1); - Vmath::Vvtvp (nqtot, &m_df[7][0], 1, &wsp6[0], 1, &g3[0], 1, &g3[0], 1); + Vmath::Vvtvvtp(nqtot, &df[1][0], 1, &wsp4[0], 1, &df[4][0], 1, &wsp5[0], 1, &g3[0], 1); + Vmath::Vvtvp (nqtot, &df[7][0], 1, &wsp6[0], 1, &g3[0], 1, &g3[0], 1); // g4 - Vmath::Vvtvvtp(nqtot, &m_df[2][0], 1, &wsp4[0], 1, &m_df[5][0], 1, &wsp5[0], 1, &g4[0], 1); - Vmath::Vvtvp (nqtot, &m_df[8][0], 1, &wsp6[0], 1, &g4[0], 1, &g4[0], 1); + Vmath::Vvtvvtp(nqtot, &df[2][0], 1, &wsp4[0], 1, &df[5][0], 1, &wsp5[0], 1, &g4[0], 1); + Vmath::Vvtvp (nqtot, &df[8][0], 1, &wsp6[0], 1, &g4[0], 1, &g4[0], 1); // Overwrite wsp4/5/6 with g1/2/5 // g1 - Vmath::Vvtvvtp(nqtot, &m_df[1][0], 1, &m_df[1][0], 1, &m_df[4][0], 1, &m_df[4][0], 1, &g1[0], 1); - Vmath::Vvtvp (nqtot, &m_df[7][0], 1, &m_df[7][0], 1, &g1[0], 1, &g1[0], 1); + Vmath::Vvtvvtp(nqtot, &df[1][0], 1, &df[1][0], 1, &df[4][0], 1, &df[4][0], 1, &g1[0], 1); + Vmath::Vvtvp (nqtot, &df[7][0], 1, &df[7][0], 1, &g1[0], 1, &g1[0], 1); // g2 - Vmath::Vvtvvtp(nqtot, &m_df[2][0], 1, &m_df[2][0], 1, &m_df[5][0], 1, &m_df[5][0], 1, &g2[0], 1); - Vmath::Vvtvp (nqtot, &m_df[8][0], 1, &m_df[8][0], 1, &g2[0], 1, &g2[0], 1); + Vmath::Vvtvvtp(nqtot, &df[2][0], 1, &df[2][0], 1, &df[5][0], 1, &df[5][0], 1, &g2[0], 1); + Vmath::Vvtvp (nqtot, &df[8][0], 1, &df[8][0], 1, &g2[0], 1, &g2[0], 1); // g5 - Vmath::Vvtvvtp(nqtot, &m_df[1][0], 1, &m_df[2][0], 1, &m_df[4][0], 1, &m_df[5][0], 1, &g5[0], 1); - Vmath::Vvtvp (nqtot, &m_df[7][0], 1, &m_df[8][0], 1, &g5[0], 1, &g5[0], 1); + Vmath::Vvtvvtp(nqtot, &df[1][0], 1, &df[2][0], 1, &df[4][0], 1, &df[5][0], 1, &g5[0], 1); + Vmath::Vvtvp (nqtot, &df[7][0], 1, &df[8][0], 1, &g5[0], 1, &g5[0], 1); } else { // wsp4 = d eta_1/d x_1 - Vmath::Svtsvtp(nqtot, m_df[0][0], &h0[0], 1, m_df[2][0], &h1[0], 1, &wsp4[0], 1); + Vmath::Svtsvtp(nqtot, df[0][0], &h0[0], 1, df[2][0], &h1[0], 1, &wsp4[0], 1); // wsp5 = d eta_2/d x_1 - Vmath::Svtsvtp(nqtot, m_df[3][0], &h0[0], 1, m_df[5][0], &h1[0], 1, &wsp5[0], 1); + Vmath::Svtsvtp(nqtot, df[3][0], &h0[0], 1, df[5][0], &h1[0], 1, &wsp5[0], 1); // wsp6 = d eta_3/d x_1 - Vmath::Svtsvtp(nqtot, m_df[6][0], &h0[0], 1, m_df[8][0], &h1[0], 1, &wsp6[0], 1); + Vmath::Svtsvtp(nqtot, df[6][0], &h0[0], 1, df[8][0], &h1[0], 1, &wsp6[0], 1); // g0 (overwrites h0) Vmath::Vvtvvtp(nqtot, &wsp4[0], 1, &wsp4[0], 1, &wsp5[0], 1, &wsp5[0], 1, &g0[0], 1); Vmath::Vvtvp (nqtot, &wsp6[0], 1, &wsp6[0], 1, &g0[0], 1, &g0[0], 1); // g3 (overwrites h1) - Vmath::Svtsvtp(nqtot, m_df[1][0], &wsp4[0], 1, m_df[4][0], &wsp5[0], 1, &g3[0], 1); - Vmath::Svtvp (nqtot, m_df[7][0], &wsp6[0], 1, &g3[0], 1, &g3[0], 1); + Vmath::Svtsvtp(nqtot, df[1][0], &wsp4[0], 1, df[4][0], &wsp5[0], 1, &g3[0], 1); + Vmath::Svtvp (nqtot, df[7][0], &wsp6[0], 1, &g3[0], 1, &g3[0], 1); // g4 - Vmath::Svtsvtp(nqtot, m_df[2][0], &wsp4[0], 1, m_df[5][0], &wsp5[0], 1, &g4[0], 1); - Vmath::Svtvp (nqtot, m_df[8][0], &wsp6[0], 1, &g4[0], 1, &g4[0], 1); + Vmath::Svtsvtp(nqtot, df[2][0], &wsp4[0], 1, df[5][0], &wsp5[0], 1, &g4[0], 1); + Vmath::Svtvp (nqtot, df[8][0], &wsp6[0], 1, &g4[0], 1, &g4[0], 1); // Overwrite wsp4/5/6 with g1/2/5 // g1 - Vmath::Fill(nqtot, m_df[1][0]*m_df[1][0] + m_df[4][0]*m_df[4][0] + m_df[7][0]*m_df[7][0], &g1[0], 1); + Vmath::Fill(nqtot, df[1][0]*df[1][0] + df[4][0]*df[4][0] + df[7][0]*df[7][0], &g1[0], 1); // g2 - Vmath::Fill(nqtot, m_df[2][0]*m_df[2][0] + m_df[5][0]*m_df[5][0] + m_df[8][0]*m_df[8][0], &g2[0], 1); + Vmath::Fill(nqtot, df[2][0]*df[2][0] + df[5][0]*df[5][0] + df[8][0]*df[8][0], &g2[0], 1); // g5 - Vmath::Fill(nqtot, m_df[1][0]*m_df[2][0] + m_df[4][0]*m_df[5][0] + m_df[7][0]*m_df[8][0], &g5[0], 1); + Vmath::Fill(nqtot, df[1][0]*df[2][0] + df[4][0]*df[5][0] + df[7][0]*df[8][0], &g5[0], 1); } // Compute component derivatives into wsp7, 8, 9 (wsp7 overwrites // g0). diff --git a/library/LocalRegions/PyrExp.cpp b/library/LocalRegions/PyrExp.cpp index a613fffd3..1904f3577 100644 --- a/library/LocalRegions/PyrExp.cpp +++ b/library/LocalRegions/PyrExp.cpp @@ -142,6 +142,8 @@ namespace Nektar int nquad0 = m_base[0]->GetNumPoints(); int nquad1 = m_base[1]->GetNumPoints(); int nquad2 = m_base[2]->GetNumPoints(); + Array gmat = + m_metricinfo->GetDerivFactors(GetPointsKeys()); Array diff0(nquad0*nquad1*nquad2); Array diff1(nquad0*nquad1*nquad2); Array diff2(nquad0*nquad1*nquad2); @@ -152,46 +154,46 @@ namespace Nektar { if(out_d0.num_elements()) { - Vmath::Vmul (nquad0*nquad1*nquad2,&m_df[0][0],1,&diff0[0],1, &out_d0[0], 1); - Vmath::Vvtvp (nquad0*nquad1*nquad2,&m_df[1][0],1,&diff1[0],1, &out_d0[0], 1,&out_d0[0],1); - Vmath::Vvtvp (nquad0*nquad1*nquad2,&m_df[2][0],1,&diff2[0],1, &out_d0[0], 1,&out_d0[0],1); + Vmath::Vmul (nquad0*nquad1*nquad2,&gmat[0][0],1,&diff0[0],1, &out_d0[0], 1); + Vmath::Vvtvp (nquad0*nquad1*nquad2,&gmat[1][0],1,&diff1[0],1, &out_d0[0], 1,&out_d0[0],1); + Vmath::Vvtvp (nquad0*nquad1*nquad2,&gmat[2][0],1,&diff2[0],1, &out_d0[0], 1,&out_d0[0],1); } if(out_d1.num_elements()) { - Vmath::Vmul (nquad0*nquad1*nquad2,&m_df[3][0],1,&diff0[0],1, &out_d1[0], 1); - Vmath::Vvtvp (nquad0*nquad1*nquad2,&m_df[4][0],1,&diff1[0],1, &out_d1[0], 1,&out_d1[0],1); - Vmath::Vvtvp (nquad0*nquad1*nquad2,&m_df[5][0],1,&diff2[0],1, &out_d1[0], 1,&out_d1[0],1); + Vmath::Vmul (nquad0*nquad1*nquad2,&gmat[3][0],1,&diff0[0],1, &out_d1[0], 1); + Vmath::Vvtvp (nquad0*nquad1*nquad2,&gmat[4][0],1,&diff1[0],1, &out_d1[0], 1,&out_d1[0],1); + Vmath::Vvtvp (nquad0*nquad1*nquad2,&gmat[5][0],1,&diff2[0],1, &out_d1[0], 1,&out_d1[0],1); } if(out_d2.num_elements()) { - Vmath::Vmul (nquad0*nquad1*nquad2,&m_df[6][0],1,&diff0[0],1, &out_d2[0], 1); - Vmath::Vvtvp (nquad0*nquad1*nquad2,&m_df[7][0],1,&diff1[0],1, &out_d2[0], 1, &out_d2[0],1); - Vmath::Vvtvp (nquad0*nquad1*nquad2,&m_df[8][0],1,&diff2[0],1, &out_d2[0], 1, &out_d2[0],1); + Vmath::Vmul (nquad0*nquad1*nquad2,&gmat[6][0],1,&diff0[0],1, &out_d2[0], 1); + Vmath::Vvtvp (nquad0*nquad1*nquad2,&gmat[7][0],1,&diff1[0],1, &out_d2[0], 1, &out_d2[0],1); + Vmath::Vvtvp (nquad0*nquad1*nquad2,&gmat[8][0],1,&diff2[0],1, &out_d2[0], 1, &out_d2[0],1); } } else // regular geometry { if(out_d0.num_elements()) { - Vmath::Smul (nquad0*nquad1*nquad2,m_df[0][0],&diff0[0],1, &out_d0[0], 1); - Blas::Daxpy (nquad0*nquad1*nquad2,m_df[1][0],&diff1[0],1, &out_d0[0], 1); - Blas::Daxpy (nquad0*nquad1*nquad2,m_df[2][0],&diff2[0],1, &out_d0[0], 1); + Vmath::Smul (nquad0*nquad1*nquad2,gmat[0][0],&diff0[0],1, &out_d0[0], 1); + Blas::Daxpy (nquad0*nquad1*nquad2,gmat[1][0],&diff1[0],1, &out_d0[0], 1); + Blas::Daxpy (nquad0*nquad1*nquad2,gmat[2][0],&diff2[0],1, &out_d0[0], 1); } if(out_d1.num_elements()) { - Vmath::Smul (nquad0*nquad1*nquad2,m_df[3][0],&diff0[0],1, &out_d1[0], 1); - Blas::Daxpy (nquad0*nquad1*nquad2,m_df[4][0],&diff1[0],1, &out_d1[0], 1); - Blas::Daxpy (nquad0*nquad1*nquad2,m_df[5][0],&diff2[0],1, &out_d1[0], 1); + Vmath::Smul (nquad0*nquad1*nquad2,gmat[3][0],&diff0[0],1, &out_d1[0], 1); + Blas::Daxpy (nquad0*nquad1*nquad2,gmat[4][0],&diff1[0],1, &out_d1[0], 1); + Blas::Daxpy (nquad0*nquad1*nquad2,gmat[5][0],&diff2[0],1, &out_d1[0], 1); } if(out_d2.num_elements()) { - Vmath::Smul (nquad0*nquad1*nquad2,m_df[6][0],&diff0[0],1, &out_d2[0], 1); - Blas::Daxpy (nquad0*nquad1*nquad2,m_df[7][0],&diff1[0],1, &out_d2[0], 1); - Blas::Daxpy (nquad0*nquad1*nquad2,m_df[8][0],&diff2[0],1, &out_d2[0], 1); + Vmath::Smul (nquad0*nquad1*nquad2,gmat[6][0],&diff0[0],1, &out_d2[0], 1); + Blas::Daxpy (nquad0*nquad1*nquad2,gmat[7][0],&diff1[0],1, &out_d2[0], 1); + Blas::Daxpy (nquad0*nquad1*nquad2,gmat[8][0],&diff2[0],1, &out_d2[0], 1); } } } @@ -466,6 +468,7 @@ namespace Nektar GetGeom()->GetMetricInfo(); LibUtilities::PointsKeyVector ptsKeys = GetPointsKeys(); SpatialDomains::GeomType type = geomFactors->GetGtype(); + const Array &df = geomFactors->GetDerivFactors(ptsKeys); const Array &jac = geomFactors->GetJac(ptsKeys); LibUtilities::BasisKey tobasis0 = DetFaceBasisKey(face,0); @@ -496,7 +499,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = -m_df[3*i+2][0]; + normal[i][0] = -df[3*i+2][0]; } break; } @@ -504,7 +507,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = -m_df[3*i+1][0]; + normal[i][0] = -df[3*i+1][0]; } break; } @@ -512,7 +515,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = m_df[3*i][0]+m_df[3*i+2][0]; + normal[i][0] = df[3*i][0]+df[3*i+2][0]; } break; } @@ -520,7 +523,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = m_df[3*i+1][0]+m_df[3*i+2][0]; + normal[i][0] = df[3*i+1][0]+df[3*i+2][0]; } break; } @@ -528,7 +531,7 @@ namespace Nektar { for(i = 0; i < vCoordDim; ++i) { - normal[i][0] = -m_df[3*i][0]; + normal[i][0] = -df[3*i][0]; } break; } @@ -588,9 +591,9 @@ namespace Nektar { for(j = 0; j < nq01; ++j) { - normals[j] = -m_df[2][j]*jac[j]; - normals[nqtot+j] = -m_df[5][j]*jac[j]; - normals[2*nqtot+j] = -m_df[8][j]*jac[j]; + normals[j] = -df[2][j]*jac[j]; + normals[nqtot+j] = -df[5][j]*jac[j]; + normals[2*nqtot+j] = -df[8][j]*jac[j]; faceJac[j] = jac[j]; } @@ -607,11 +610,11 @@ namespace Nektar { int tmp = j+nq01*k; normals[j+k*nq0] = - -m_df[1][tmp]*jac[tmp]; + -df[1][tmp]*jac[tmp]; normals[nqtot+j+k*nq0] = - -m_df[4][tmp]*jac[tmp]; + -df[4][tmp]*jac[tmp]; normals[2*nqtot+j+k*nq0] = - -m_df[7][tmp]*jac[tmp]; + -df[7][tmp]*jac[tmp]; faceJac[j+k*nq0] = jac[tmp]; } } @@ -629,11 +632,11 @@ namespace Nektar { int tmp = nq0-1+nq0*j+nq01*k; normals[j+k*nq1] = - (m_df[0][tmp]+m_df[2][tmp])*jac[tmp]; + (df[0][tmp]+df[2][tmp])*jac[tmp]; normals[nqtot+j+k*nq1] = - (m_df[3][tmp]+m_df[5][tmp])*jac[tmp]; + (df[3][tmp]+df[5][tmp])*jac[tmp]; normals[2*nqtot+j+k*nq1] = - (m_df[6][tmp]+m_df[8][tmp])*jac[tmp]; + (df[6][tmp]+df[8][tmp])*jac[tmp]; faceJac[j+k*nq1] = jac[tmp]; } } @@ -651,11 +654,11 @@ namespace Nektar { int tmp = nq0*(nq1-1) + j + nq01*k; normals[j+k*nq0] = - (m_df[1][tmp]+m_df[2][tmp])*jac[tmp]; + (df[1][tmp]+df[2][tmp])*jac[tmp]; normals[nqtot+j+k*nq0] = - (m_df[4][tmp]+m_df[5][tmp])*jac[tmp]; + (df[4][tmp]+df[5][tmp])*jac[tmp]; normals[2*nqtot+j+k*nq0] = - (m_df[7][tmp]+m_df[8][tmp])*jac[tmp]; + (df[7][tmp]+df[8][tmp])*jac[tmp]; faceJac[j+k*nq0] = jac[tmp]; } } @@ -673,11 +676,11 @@ namespace Nektar { int tmp = j*nq0+nq01*k; normals[j+k*nq1] = - -m_df[0][tmp]*jac[tmp]; + -df[0][tmp]*jac[tmp]; normals[nqtot+j+k*nq1] = - -m_df[3][tmp]*jac[tmp]; + -df[3][tmp]*jac[tmp]; normals[2*nqtot+j+k*nq1] = - -m_df[6][tmp]*jac[tmp]; + -df[6][tmp]*jac[tmp]; faceJac[j+k*nq1] = jac[tmp]; } } @@ -1061,6 +1064,8 @@ namespace Nektar Array wsp5 (nqtot, alloc+ 7*nqtot); Array wsp6 (nqtot, alloc+ 8*nqtot); + const Array& df = + m_metricinfo->GetDerivFactors(GetPointsKeys()); const Array& z0 = m_base[0]->GetZ(); const Array& z1 = m_base[1]->GetZ(); const Array& z2 = m_base[2]->GetZ(); @@ -1089,22 +1094,22 @@ namespace Nektar if(m_metricinfo->GetGtype() == SpatialDomains::eDeformed) { // f_{1k} - Vmath::Vvtvvtp(nqtot, &m_df[0][0], 1, &h0[0], 1, &m_df[2][0], 1, &h1[0], 1, &wsp1[0], 1); - Vmath::Vvtvvtp(nqtot, &m_df[3][0], 1, &h0[0], 1, &m_df[5][0], 1, &h1[0], 1, &wsp2[0], 1); - Vmath::Vvtvvtp(nqtot, &m_df[6][0], 1, &h0[0], 1, &m_df[8][0], 1, &h1[0], 1, &wsp3[0], 1); + Vmath::Vvtvvtp(nqtot, &df[0][0], 1, &h0[0], 1, &df[2][0], 1, &h1[0], 1, &wsp1[0], 1); + Vmath::Vvtvvtp(nqtot, &df[3][0], 1, &h0[0], 1, &df[5][0], 1, &h1[0], 1, &wsp2[0], 1); + Vmath::Vvtvvtp(nqtot, &df[6][0], 1, &h0[0], 1, &df[8][0], 1, &h1[0], 1, &wsp3[0], 1); // g0 Vmath::Vvtvvtp(nqtot, &wsp1[0], 1, &wsp1[0], 1, &wsp2[0], 1, &wsp2[0], 1, &g0[0], 1); Vmath::Vvtvp (nqtot, &wsp3[0], 1, &wsp3[0], 1, &g0[0], 1, &g0[0], 1); // g4 - Vmath::Vvtvvtp(nqtot, &m_df[2][0], 1, &wsp1[0], 1, &m_df[5][0], 1, &wsp2[0], 1, &g4[0], 1); - Vmath::Vvtvp (nqtot, &m_df[8][0], 1, &wsp3[0], 1, &g4[0], 1, &g4[0], 1); + Vmath::Vvtvvtp(nqtot, &df[2][0], 1, &wsp1[0], 1, &df[5][0], 1, &wsp2[0], 1, &g4[0], 1); + Vmath::Vvtvp (nqtot, &df[8][0], 1, &wsp3[0], 1, &g4[0], 1, &g4[0], 1); // f_{2k} - Vmath::Vvtvvtp(nqtot, &m_df[1][0], 1, &h0[0], 1, &m_df[2][0], 1, &h2[0], 1, &wsp4[0], 1); - Vmath::Vvtvvtp(nqtot, &m_df[4][0], 1, &h0[0], 1, &m_df[5][0], 1, &h2[0], 1, &wsp5[0], 1); - Vmath::Vvtvvtp(nqtot, &m_df[7][0], 1, &h0[0], 1, &m_df[8][0], 1, &h2[0], 1, &wsp6[0], 1); + Vmath::Vvtvvtp(nqtot, &df[1][0], 1, &h0[0], 1, &df[2][0], 1, &h2[0], 1, &wsp4[0], 1); + Vmath::Vvtvvtp(nqtot, &df[4][0], 1, &h0[0], 1, &df[5][0], 1, &h2[0], 1, &wsp5[0], 1); + Vmath::Vvtvvtp(nqtot, &df[7][0], 1, &h0[0], 1, &df[8][0], 1, &h2[0], 1, &wsp6[0], 1); // g1 Vmath::Vvtvvtp(nqtot, &wsp4[0], 1, &wsp4[0], 1, &wsp5[0], 1, &wsp5[0], 1, &g1[0], 1); @@ -1115,32 +1120,32 @@ namespace Nektar Vmath::Vvtvp (nqtot, &wsp3[0], 1, &wsp6[0], 1, &g3[0], 1, &g3[0], 1); // g5 - Vmath::Vvtvvtp(nqtot, &m_df[2][0], 1, &wsp4[0], 1, &m_df[5][0], 1, &wsp5[0], 1, &g5[0], 1); - Vmath::Vvtvp (nqtot, &m_df[8][0], 1, &wsp6[0], 1, &g5[0], 1, &g5[0], 1); + Vmath::Vvtvvtp(nqtot, &df[2][0], 1, &wsp4[0], 1, &df[5][0], 1, &wsp5[0], 1, &g5[0], 1); + Vmath::Vvtvp (nqtot, &df[8][0], 1, &wsp6[0], 1, &g5[0], 1, &g5[0], 1); // g2 - Vmath::Vvtvvtp(nqtot, &m_df[2][0], 1, &m_df[2][0], 1, &m_df[5][0], 1, &m_df[5][0], 1, &g2[0], 1); - Vmath::Vvtvp (nqtot, &m_df[8][0], 1, &m_df[8][0], 1, &g2[0], 1, &g2[0], 1); + Vmath::Vvtvvtp(nqtot, &df[2][0], 1, &df[2][0], 1, &df[5][0], 1, &df[5][0], 1, &g2[0], 1); + Vmath::Vvtvp (nqtot, &df[8][0], 1, &df[8][0], 1, &g2[0], 1, &g2[0], 1); } else { // f_{1k} - Vmath::Svtsvtp(nqtot, m_df[0][0], &h0[0], 1, m_df[2][0], &h1[0], 1, &wsp1[0], 1); - Vmath::Svtsvtp(nqtot, m_df[3][0], &h0[0], 1, m_df[5][0], &h1[0], 1, &wsp2[0], 1); - Vmath::Svtsvtp(nqtot, m_df[6][0], &h0[0], 1, m_df[8][0], &h1[0], 1, &wsp3[0], 1); + Vmath::Svtsvtp(nqtot, df[0][0], &h0[0], 1, df[2][0], &h1[0], 1, &wsp1[0], 1); + Vmath::Svtsvtp(nqtot, df[3][0], &h0[0], 1, df[5][0], &h1[0], 1, &wsp2[0], 1); + Vmath::Svtsvtp(nqtot, df[6][0], &h0[0], 1, df[8][0], &h1[0], 1, &wsp3[0], 1); // g0 Vmath::Vvtvvtp(nqtot, &wsp1[0], 1, &wsp1[0], 1, &wsp2[0], 1, &wsp2[0], 1, &g0[0], 1); Vmath::Vvtvp (nqtot, &wsp3[0], 1, &wsp3[0], 1, &g0[0], 1, &g0[0], 1); // g4 - Vmath::Svtsvtp(nqtot, m_df[2][0], &wsp1[0], 1, m_df[5][0], &wsp2[0], 1, &g4[0], 1); - Vmath::Svtvp (nqtot, m_df[8][0], &wsp3[0], 1, &g4[0], 1, &g4[0], 1); + Vmath::Svtsvtp(nqtot, df[2][0], &wsp1[0], 1, df[5][0], &wsp2[0], 1, &g4[0], 1); + Vmath::Svtvp (nqtot, df[8][0], &wsp3[0], 1, &g4[0], 1, &g4[0], 1); // f_{2k} - Vmath::Svtsvtp(nqtot, m_df[1][0], &h0[0], 1, m_df[2][0], &h2[0], 1, &wsp4[0], 1); - Vmath::Svtsvtp(nqtot, m_df[4][0], &h0[0], 1, m_df[5][0], &h2[0], 1, &wsp5[0], 1); - Vmath::Svtsvtp(nqtot, m_df[7][0], &h0[0], 1, m_df[8][0], &h2[0], 1, &wsp6[0], 1); + Vmath::Svtsvtp(nqtot, df[1][0], &h0[0], 1, df[2][0], &h2[0], 1, &wsp4[0], 1); + Vmath::Svtsvtp(nqtot, df[4][0], &h0[0], 1, df[5][0], &h2[0], 1, &wsp5[0], 1); + Vmath::Svtsvtp(nqtot, df[7][0], &h0[0], 1, df[8][0], &h2[0], 1, &wsp6[0], 1); // g1 Vmath::Vvtvvtp(nqtot, &wsp4[0], 1, &wsp4[0], 1, &wsp5[0], 1, &wsp5[0], 1, &g1[0], 1); @@ -1151,11 +1156,11 @@ namespace Nektar Vmath::Vvtvp (nqtot, &wsp3[0], 1, &wsp6[0], 1, &g3[0], 1, &g3[0], 1); // g5 - Vmath::Svtsvtp(nqtot, m_df[2][0], &wsp4[0], 1, m_df[5][0], &wsp5[0], 1, &g5[0], 1); - Vmath::Svtvp (nqtot, m_df[8][0], &wsp6[0], 1, &g5[0], 1, &g5[0], 1); + Vmath::Svtsvtp(nqtot, df[2][0], &wsp4[0], 1, df[5][0], &wsp5[0], 1, &g5[0], 1); + Vmath::Svtvp (nqtot, df[8][0], &wsp6[0], 1, &g5[0], 1, &g5[0], 1); // g2 - Vmath::Fill(nqtot, m_df[2][0]*m_df[2][0] + m_df[5][0]*m_df[5][0] + m_df[8][0]*m_df[8][0], &g2[0], 1); + Vmath::Fill(nqtot, df[2][0]*df[2][0] + df[5][0]*df[5][0] + df[8][0]*df[8][0], &g2[0], 1); } for (unsigned int i = 0; i < dim; ++i) diff --git a/library/LocalRegions/QuadExp.cpp b/library/LocalRegions/QuadExp.cpp index 9b03850c4..4f21a8078 100644 --- a/library/LocalRegions/QuadExp.cpp +++ b/library/LocalRegions/QuadExp.cpp @@ -116,6 +116,7 @@ namespace Nektar int nquad0 = m_base[0]->GetNumPoints(); int nquad1 = m_base[1]->GetNumPoints(); int nqtot = nquad0*nquad1; + const Array& df = m_metricinfo->GetDerivFactors(GetPointsKeys()); Array diff0(2*nqtot); Array diff1(diff0+nqtot); @@ -125,41 +126,41 @@ namespace Nektar { if (out_d0.num_elements()) { - Vmath::Vmul (nqtot, m_df[0], 1, diff0, 1, out_d0, 1); - Vmath::Vvtvp (nqtot, m_df[1], 1, diff1, 1, out_d0, 1, + Vmath::Vmul (nqtot, df[0], 1, diff0, 1, out_d0, 1); + Vmath::Vvtvp (nqtot, df[1], 1, diff1, 1, out_d0, 1, out_d0,1); } if(out_d1.num_elements()) { - Vmath::Vmul (nqtot,m_df[2],1,diff0,1, out_d1, 1); - Vmath::Vvtvp (nqtot,m_df[3],1,diff1,1, out_d1, 1, out_d1,1); + Vmath::Vmul (nqtot,df[2],1,diff0,1, out_d1, 1); + Vmath::Vvtvp (nqtot,df[3],1,diff1,1, out_d1, 1, out_d1,1); } if (out_d2.num_elements()) { - Vmath::Vmul (nqtot,m_df[4],1,diff0,1, out_d2, 1); - Vmath::Vvtvp (nqtot,m_df[5],1,diff1,1, out_d2, 1, out_d2,1); + Vmath::Vmul (nqtot,df[4],1,diff0,1, out_d2, 1); + Vmath::Vvtvp (nqtot,df[5],1,diff1,1, out_d2, 1, out_d2,1); } } else // regular geometry { if (out_d0.num_elements()) { - Vmath::Smul (nqtot, m_df[0][0], diff0, 1, out_d0, 1); - Blas::Daxpy (nqtot, m_df[1][0], diff1, 1, out_d0, 1); + Vmath::Smul (nqtot, df[0][0], diff0, 1, out_d0, 1); + Blas::Daxpy (nqtot, df[1][0], diff1, 1, out_d0, 1); } if (out_d1.num_elements()) { - Vmath::Smul (nqtot, m_df[2][0], diff0, 1, out_d1, 1); - Blas::Daxpy (nqtot, m_df[3][0], diff1, 1, out_d1, 1); + Vmath::Smul (nqtot, df[2][0], diff0, 1, out_d1, 1); + Blas::Daxpy (nqtot, df[3][0], diff1, 1, out_d1, 1); } if (out_d2.num_elements()) { - Vmath::Smul (nqtot, m_df[4][0], diff0, 1, out_d2, 1); - Blas::Daxpy (nqtot, m_df[5][0], diff1, 1, out_d2, 1); + Vmath::Smul (nqtot, df[4][0], diff0, 1, out_d2, 1); + Blas::Daxpy (nqtot, df[5][0], diff1, 1, out_d2, 1); } } } @@ -208,6 +209,8 @@ namespace Nektar int nquad1 = m_base[1]->GetNumPoints(); int nqtot = nquad0*nquad1; + const Array& df = m_metricinfo->GetDerivFactors(GetPointsKeys()); + Array diff0(2*nqtot); Array diff1(diff0+nqtot); @@ -224,7 +227,7 @@ namespace Nektar for (int k=0; k<(m_geom->GetCoordim()); ++k) { Vmath::Vvtvp(nqtot, - &m_df[2*k+i][0], 1, + &df[2*k+i][0], 1, &direction[k*nqtot], 1, &tangmat[i][0], 1, &tangmat[i][0], 1); @@ -489,6 +492,8 @@ namespace Nektar int nqtot = nquad0*nquad1; int nmodes0 = m_base[0]->GetNumModes(); + const Array& df = m_metricinfo->GetDerivFactors(GetPointsKeys()); + Array tmp1(2*nqtot+m_ncoeffs+nmodes0*nquad1); Array tmp2(tmp1 + nqtot); Array tmp3(tmp1 + 2*nqtot); @@ -497,21 +502,21 @@ namespace Nektar if (m_metricinfo->GetGtype() == SpatialDomains::eDeformed) { Vmath::Vmul(nqtot, - &m_df[2*dir][0], 1, + &df[2*dir][0], 1, inarray.get(), 1, tmp1.get(), 1); Vmath::Vmul(nqtot, - &m_df[2*dir+1][0], 1, + &df[2*dir+1][0], 1, inarray.get(), 1, tmp2.get(),1); } else { Vmath::Smul(nqtot, - m_df[2*dir][0], inarray.get(), 1, + df[2*dir][0], inarray.get(), 1, tmp1.get(), 1); Vmath::Smul(nqtot, - m_df[2*dir+1][0], inarray.get(), 1, + df[2*dir+1][0], inarray.get(), 1, tmp2.get(), 1); } @@ -932,6 +937,7 @@ namespace Nektar LibUtilities::PointsKeyVector ptsKeys = GetPointsKeys(); const Array& jac = m_metricinfo->GetJac(ptsKeys); + const Array& df = m_metricinfo->GetDerivFactors(ptsKeys); Array j (max(nquad0, nquad1), 0.0); Array g0(max(nquad0, nquad1), 0.0); @@ -950,9 +956,9 @@ namespace Nektar switch (edge) { case 0: - Vmath::Vcopy(nquad0, &(m_df[1][0]), + Vmath::Vcopy(nquad0, &(df[1][0]), 1, &(g1[0]), 1); - Vmath::Vcopy(nquad0, &(m_df[3][0]), + Vmath::Vcopy(nquad0, &(df[3][0]), 1, &(g3[0]), 1); Vmath::Vcopy(nquad0, &(jac[0]),1, &(j[0]), 1); @@ -964,11 +970,11 @@ namespace Nektar break; case 1: Vmath::Vcopy(nquad1, - &(m_df[0][0])+(nquad0-1), nquad0, + &(df[0][0])+(nquad0-1), nquad0, &(g0[0]), 1); Vmath::Vcopy(nquad1, - &(m_df[2][0])+(nquad0-1), nquad0, + &(df[2][0])+(nquad0-1), nquad0, &(g2[0]), 1); Vmath::Vcopy(nquad1, @@ -984,11 +990,11 @@ namespace Nektar case 2: Vmath::Vcopy(nquad0, - &(m_df[1][0])+(nquad0*nquad1-1), -1, + &(df[1][0])+(nquad0*nquad1-1), -1, &(g1[0]), 1); Vmath::Vcopy(nquad0, - &(m_df[3][0])+(nquad0*nquad1-1), -1, + &(df[3][0])+(nquad0*nquad1-1), -1, &(g3[0]), 1); Vmath::Vcopy(nquad0, @@ -1004,11 +1010,11 @@ namespace Nektar case 3: Vmath::Vcopy(nquad1, - &(m_df[0][0])+nquad0*(nquad1-1), + &(df[0][0])+nquad0*(nquad1-1), -nquad0,&(g0[0]), 1); Vmath::Vcopy(nquad1, - &(m_df[2][0])+nquad0*(nquad1-1), + &(df[2][0])+nquad0*(nquad1-1), -nquad0,&(g2[0]), 1); Vmath::Vcopy(nquad1, @@ -1042,9 +1048,9 @@ namespace Nektar switch (edge) { case 0: - Vmath::Vmul(nqtot,&(m_df[1][0]),1,&jac[0],1, + Vmath::Vmul(nqtot,&(df[1][0]),1,&jac[0],1, &(tmp_gmat1[0]),1); - Vmath::Vmul(nqtot,&(m_df[3][0]),1,&jac[0],1, + Vmath::Vmul(nqtot,&(df[3][0]),1,&jac[0],1, &(tmp_gmat3[0]),1); QuadExp::v_GetEdgeInterpVals( edge, tmp_gmat1, g1_edge); @@ -1060,11 +1066,11 @@ namespace Nektar case 1: Vmath::Vmul(nqtot, - &(m_df[0][0]), 1, + &(df[0][0]), 1, &jac[0], 1, &(tmp_gmat0[0]), 1); Vmath::Vmul(nqtot, - &(m_df[2][0]), 1, + &(df[2][0]), 1, &jac[0], 1, &(tmp_gmat2[0]), 1); @@ -1083,11 +1089,11 @@ namespace Nektar case 2: Vmath::Vmul(nqtot, - &(m_df[1][0]), 1, + &(df[1][0]), 1, &jac[0], 1, &(tmp_gmat1[0]), 1); Vmath::Vmul(nqtot, - &(m_df[3][0]), 1, + &(df[3][0]), 1, &jac[0], 1, &(tmp_gmat3[0]),1); QuadExp::v_GetEdgeInterpVals( @@ -1107,11 +1113,11 @@ namespace Nektar break; case 3: Vmath::Vmul(nqtot, - &(m_df[0][0]), 1, + &(df[0][0]), 1, &jac[0], 1, &(tmp_gmat0[0]), 1); Vmath::Vmul(nqtot, - &(m_df[2][0]),1, + &(df[2][0]),1, &jac[0], 1, &(tmp_gmat2[0]),1); QuadExp::v_GetEdgeInterpVals( @@ -1148,29 +1154,29 @@ namespace Nektar for (i = 0; i < nquad0; ++i) { - outarray[i] = jac[0]*sqrt(m_df[1][0]*m_df[1][0] + - m_df[3][0]*m_df[3][0]); + outarray[i] = jac[0]*sqrt(df[1][0]*df[1][0] + + df[3][0]*df[3][0]); } break; case 1: for (i = 0; i < nquad1; ++i) { - outarray[i] = jac[0]*sqrt(m_df[0][0]*m_df[0][0] + - m_df[2][0]*m_df[2][0]); + outarray[i] = jac[0]*sqrt(df[0][0]*df[0][0] + + df[2][0]*df[2][0]); } break; case 2: for (i = 0; i < nquad0; ++i) { - outarray[i] = jac[0]*sqrt(m_df[1][0]*m_df[1][0] + - m_df[3][0]*m_df[3][0]); + outarray[i] = jac[0]*sqrt(df[1][0]*df[1][0] + + df[3][0]*df[3][0]); } break; case 3: for (i = 0; i < nquad1; ++i) { - outarray[i] = jac[0]*sqrt(m_df[0][0]*m_df[0][0] + - m_df[2][0]*m_df[2][0]); + outarray[i] = jac[0]*sqrt(df[0][0]*df[0][0] + + df[2][0]*df[2][0]); } break; default: @@ -1188,6 +1194,7 @@ namespace Nektar GetGeom()->GetMetricInfo(); SpatialDomains::GeomType type = geomFactors->GetGtype(); LibUtilities::PointsKeyVector ptsKeys = GetPointsKeys(); + const Array & df = geomFactors->GetDerivFactors(ptsKeys); const Array & jac = geomFactors->GetJac(ptsKeys); int nqe; if (edge == 0 || edge == 2) @@ -1219,25 +1226,25 @@ namespace Nektar case 0: for (i = 0; i < vCoordDim; ++i) { - Vmath::Fill(nqe, -m_df[2*i+1][0], normal[i], 1); + Vmath::Fill(nqe, -df[2*i+1][0], normal[i], 1); } break; case 1: for (i = 0; i < vCoordDim; ++i) { - Vmath::Fill(nqe, m_df[2*i][0], normal[i], 1); + Vmath::Fill(nqe, df[2*i][0], normal[i], 1); } break; case 2: for (i = 0; i < vCoordDim; ++i) { - Vmath::Fill(nqe, m_df[2*i+1][0], normal[i], 1); + Vmath::Fill(nqe, df[2*i+1][0], normal[i], 1); } break; case 3: for (i = 0; i < vCoordDim; ++i) { - Vmath::Fill(nqe, -m_df[2*i][0], normal[i], 1); + Vmath::Fill(nqe, -df[2*i][0], normal[i], 1); } break; default: @@ -1287,7 +1294,7 @@ namespace Nektar for (i = 0; i < vCoordDim; ++i) { normals[i*nquad0+j] = - -m_df[2*i+1][j]*edgejac[j]; + -df[2*i+1][j]*edgejac[j]; } } from_key = ptsKeys[0]; @@ -1299,7 +1306,7 @@ namespace Nektar for (i = 0; i < vCoordDim; ++i) { normals[i*nquad1+j] = - m_df[2*i][nquad0*j + nquad0-1] + df[2*i][nquad0*j + nquad0-1] *edgejac[j]; } } @@ -1312,7 +1319,7 @@ namespace Nektar for (i = 0; i < vCoordDim; ++i) { normals[i*nquad0+j] = - (m_df[2*i+1][nquad0*(nquad1-1)+j]) + (df[2*i+1][nquad0*(nquad1-1)+j]) *edgejac[j]; } } @@ -1325,7 +1332,7 @@ namespace Nektar for (i = 0; i < vCoordDim; ++i) { normals[i*nquad1+j] = - -m_df[2*i][nquad0*j]*edgejac[j]; + -df[2*i][nquad0*j]*edgejac[j]; } } from_key = ptsKeys[1]; @@ -1348,7 +1355,7 @@ namespace Nektar for (i = 0; i < vCoordDim; ++i) { Vmath::Vmul(nqtot, - &(m_df[2*i+1][0]), 1, + &(df[2*i+1][0]), 1, &jac[0], 1, &(tmp_gmat[0]), 1); QuadExp::v_GetEdgeInterpVals( @@ -1364,7 +1371,7 @@ namespace Nektar for (i = 0; i < vCoordDim; ++i) { Vmath::Vmul(nqtot, - &(m_df[2*i][0]), 1, + &(df[2*i][0]), 1, &jac[0], 1, &(tmp_gmat[0]), 1); QuadExp::v_GetEdgeInterpVals( @@ -1380,7 +1387,7 @@ namespace Nektar for (i = 0; i < vCoordDim; ++i) { Vmath::Vmul(nqtot, - &(m_df[2*i+1][0]), 1, + &(df[2*i+1][0]), 1, &jac[0], 1, &(tmp_gmat[0]), 1); QuadExp::v_GetEdgeInterpVals( @@ -1396,7 +1403,7 @@ namespace Nektar for (i = 0; i < vCoordDim; ++i) { Vmath::Vmul(nqtot, - &(m_df[2*i][0]), 1, + &(df[2*i][0]), 1, &jac[0], 1, &(tmp_gmat[0]) ,1); QuadExp::v_GetEdgeInterpVals( @@ -1673,6 +1680,8 @@ namespace Nektar else { NekDouble jac = (m_metricinfo->GetJac(ptsKeys))[0]; + Array df = + m_metricinfo->GetDerivFactors(ptsKeys); int dir = 0; switch(mkey.GetMatrixType()) @@ -1703,8 +1712,8 @@ namespace Nektar DNekMatSharedPtr WeakDeriv = MemoryManager:: AllocateSharedPtr(rows,cols); - (*WeakDeriv) = m_df[2*dir][0]*deriv0 + - m_df[2*dir+1][0]*deriv1; + (*WeakDeriv) = df[2*dir][0]*deriv0 + + df[2*dir+1][0]*deriv1; returnval = MemoryManager:: AllocateSharedPtr(jac,WeakDeriv); } @@ -1817,6 +1826,8 @@ namespace Nektar else { NekDouble jac = (m_metricinfo->GetJac(ptsKeys))[0]; + const Array& df = + m_metricinfo->GetDerivFactors(ptsKeys); int dir = 0; switch(mkey.GetMatrixType()) @@ -1849,8 +1860,8 @@ namespace Nektar DNekMatSharedPtr mat = MemoryManager:: AllocateSharedPtr(rows,cols); - (*mat) = m_df[2*dir][0]*stdiprod0 + - m_df[2*dir+1][0]*stdiprod1; + (*mat) = df[2*dir][0]*stdiprod0 + + df[2*dir+1][0]*stdiprod1; returnval = MemoryManager:: AllocateSharedPtr(jac,mat); diff --git a/library/LocalRegions/SegExp.cpp b/library/LocalRegions/SegExp.cpp index 6b15c2961..fab77fb96 100644 --- a/library/LocalRegions/SegExp.cpp +++ b/library/LocalRegions/SegExp.cpp @@ -169,6 +169,8 @@ namespace Nektar Array &out_d2) { int nquad0 = m_base[0]->GetNumPoints(); + Array gmat = + m_metricinfo->GetDerivFactors(GetPointsKeys()); Array diff(nquad0); //StdExpansion1D::PhysTensorDeriv(inarray,diff); @@ -177,19 +179,19 @@ namespace Nektar { if(out_d0.num_elements()) { - Vmath::Vmul(nquad0,&m_df[0][0],1,&diff[0],1, + Vmath::Vmul(nquad0,&gmat[0][0],1,&diff[0],1, &out_d0[0],1); } if(out_d1.num_elements()) { - Vmath::Vmul(nquad0,&m_df[1][0],1,&diff[0],1, + Vmath::Vmul(nquad0,&gmat[1][0],1,&diff[0],1, &out_d1[0],1); } if(out_d2.num_elements()) { - Vmath::Vmul(nquad0,&m_df[2][0],1,&diff[0],1, + Vmath::Vmul(nquad0,&gmat[2][0],1,&diff[0],1, &out_d2[0],1); } } @@ -197,19 +199,19 @@ namespace Nektar { if(out_d0.num_elements()) { - Vmath::Smul(nquad0, m_df[0][0], diff, 1, + Vmath::Smul(nquad0, gmat[0][0], diff, 1, out_d0, 1); } if(out_d1.num_elements()) { - Vmath::Smul(nquad0, m_df[1][0], diff, 1, + Vmath::Smul(nquad0, gmat[1][0], diff, 1, out_d1, 1); } if(out_d2.num_elements()) { - Vmath::Smul(nquad0, m_df[2][0], diff, 1, + Vmath::Smul(nquad0, gmat[2][0], diff, 1, out_d2, 1); } } @@ -268,6 +270,8 @@ namespace Nektar Array& out_dn) { int nquad0 = m_base[0]->GetNumPoints(); + Array gmat = + m_metricinfo->GetDerivFactors(GetPointsKeys()); int coordim = m_geom->GetCoordim(); Array out_dn_tmp(nquad0,0.0); switch(coordim) @@ -561,6 +565,8 @@ cout<<"deps/dx ="<GetGtype() == SpatialDomains::eDeformed) { - Vmath::Vmul(nquad,m_df[2],1,inarray,1,tmp1,1); + Vmath::Vmul(nquad,gmat[2],1,inarray,1,tmp1,1); } else { - Vmath::Smul(nquad, m_df[2][0], inarray, 1, tmp1, 1); + Vmath::Smul(nquad, gmat[2][0], inarray, 1, tmp1, 1); } } break; @@ -882,7 +888,8 @@ cout<<"deps/dx ="< 2.51953e-08 9.56014e-09 - 1.10694e-08 + 1.10694e-08 9.47464e-08 5.59175e-08 - 2.93085e-07 + 2.93085e-07 -- GitLab From c72ec2733ad2c1ae2de8da3f00559cbd2c88b2f0 Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Fri, 15 Jul 2016 12:06:22 +0100 Subject: [PATCH 30/34] Attempt to fix compilation on OS X --- library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp b/library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp index 23b654aa0..d8d9536b5 100644 --- a/library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp +++ b/library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp @@ -92,13 +92,6 @@ namespace Nektar return m_scale*m_matrix->Scale(); } - template<> - typename NekMatrix, ScaledMatrixTag>::NumberType - NekMatrix, ScaledMatrixTag>::Scale() const - { - return m_scale; - } - template void NekMatrix, ScaledMatrixTag>::SetScale(const NumberType& value) { -- GitLab From 8970a408ce4bea60d10a33f55c9e1e2e83443b09 Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Mon, 18 Jul 2016 14:16:26 +0100 Subject: [PATCH 31/34] Fix block matrix vector multiplication --- .../MatrixVectorMultiplication.cpp | 35 +++++++++++++------ 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp b/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp index e8c333c40..2ef20f3bc 100644 --- a/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp +++ b/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp @@ -214,9 +214,12 @@ namespace Nektar unsigned int curResultRow = 0; unsigned int curWrapperRow = 0; - unsigned int rowsInBlock, columnsInBlock; + unsigned int rowsInBlock = 0; + unsigned int columnsInBlock = 0; for(unsigned int blockRow = 0; blockRow < numberOfBlockRows; ++blockRow) { + curResultRow += rowsInBlock; + curWrapperRow += columnsInBlock; if ( blockRow == 0) { rowsInBlock = rowSizes[blockRow] + 1; @@ -228,8 +231,14 @@ namespace Nektar columnsInBlock = colSizes[blockRow] - colSizes[blockRow-1]; } - if( rowsInBlock == 0 || columnsInBlock == 0) + if( rowsInBlock == 0) + { + continue; + } + if( columnsInBlock == 0) { + std::fill(result.begin()+curResultRow, + result.begin()+curResultRow + rowsInBlock, 0.0); continue; } @@ -243,10 +252,8 @@ namespace Nektar const double* rhsWrapper = rhs_ptr + curWrapperRow; Multiply(resultWrapper, *block, rhsWrapper); //resultWrapper = (*block)*rhsWrapper; - - curResultRow += rowsInBlock; - curWrapperRow += columnsInBlock; } + curResultRow += rowsInBlock; if (curResultRow < result.GetRows()) { std::fill(result.begin()+curResultRow, result.end(), 0.0); @@ -267,9 +274,12 @@ namespace Nektar unsigned int curResultRow = 0; unsigned int curWrapperRow = 0; - unsigned int rowsInBlock, columnsInBlock; + unsigned int rowsInBlock = 0; + unsigned int columnsInBlock = 0; for(unsigned int blockRow = 0; blockRow < numberOfBlockRows; ++blockRow) { + curResultRow += rowsInBlock; + curWrapperRow += columnsInBlock; if ( blockRow == 0) { rowsInBlock = rowSizes[blockRow] + 1; @@ -281,10 +291,16 @@ namespace Nektar columnsInBlock = colSizes[blockRow] - colSizes[blockRow-1]; } - if( rowsInBlock == 0 || columnsInBlock == 0) + if( rowsInBlock == 0) { continue; } + if( columnsInBlock == 0) + { + std::fill(result.begin()+curResultRow, + result.begin()+curResultRow + rowsInBlock, 0.0); + continue; + } const DNekScalMat* block = lhs.GetBlockPtr(blockRow, blockRow); if( !block ) @@ -294,15 +310,14 @@ namespace Nektar double* resultWrapper = result_ptr + curResultRow; const double* rhsWrapper = rhs_ptr + curWrapperRow; - curResultRow += rowsInBlock; - curWrapperRow += columnsInBlock; // Multiply const unsigned int* size = block->GetSize(); Blas::Dgemv('N', size[0], size[1], block->Scale(), - block->GetRawPtr(), rowsInBlock, rhsWrapper, 1, + block->GetRawPtr(), size[0], rhsWrapper, 1, 0.0, resultWrapper, 1); } + curResultRow += rowsInBlock; if (curResultRow < result.GetRows()) { std::fill(result.begin()+curResultRow, result.end(), 0.0); -- GitLab From a2f16685966a5f8bc591a3c52b4a62bb74dcc9f7 Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Mon, 18 Jul 2016 14:17:07 +0100 Subject: [PATCH 32/34] Remove symmetric matrices from local regions --- library/LocalRegions/QuadExp.cpp | 24 ++++++------------------ library/LocalRegions/SegExp.cpp | 19 ++++--------------- library/LocalRegions/TriExp.cpp | 21 +++------------------ library/StdRegions/StdExpansion.cpp | 14 +------------- 4 files changed, 14 insertions(+), 64 deletions(-) diff --git a/library/LocalRegions/QuadExp.cpp b/library/LocalRegions/QuadExp.cpp index 4f21a8078..2cadedcce 100644 --- a/library/LocalRegions/QuadExp.cpp +++ b/library/LocalRegions/QuadExp.cpp @@ -391,11 +391,10 @@ namespace Nektar rhs[i] = tmp1[ mapArray[i] ]; } - Blas::Dspmv('U',nInteriorDofs, - matsys->Scale(), - &((matsys->GetOwnedMatrix())->GetPtr())[0], - rhs.get(),1,0.0, - result.get(),1); + Blas::Dgemv('N', nInteriorDofs, nInteriorDofs, + matsys->Scale(), + &((matsys->GetOwnedMatrix())->GetPtr())[0], + nInteriorDofs,rhs.get(),1,0.0,result.get(),1); for (i = 0; i < nInteriorDofs; i++) { @@ -1997,19 +1996,8 @@ namespace Nektar AllocateSharedPtr(nbdry,nint); DNekMatSharedPtr C = MemoryManager:: AllocateSharedPtr(nint,nbdry); - DNekMatSharedPtr D; - if ( (mkey.GetMatrixType() == StdRegions::eMass) || - (mkey.GetMatrixType() == StdRegions::eLaplacian) || - (mkey.GetMatrixType() == StdRegions::eHelmholtz)) - { - D = MemoryManager:: - AllocateSharedPtr(nint,nint, eSYMMETRIC); - } - else - { - D = MemoryManager:: - AllocateSharedPtr(nint,nint, eFULL); - } + DNekMatSharedPtr D = MemoryManager:: + AllocateSharedPtr(nint,nint); Array bmap(nbdry); Array imap(nint); diff --git a/library/LocalRegions/SegExp.cpp b/library/LocalRegions/SegExp.cpp index fab77fb96..6c5d58d56 100644 --- a/library/LocalRegions/SegExp.cpp +++ b/library/LocalRegions/SegExp.cpp @@ -465,10 +465,10 @@ cout<<"deps/dx ="< &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + private: BwdTrans_IterPerExp( vector pCollExp, @@ -229,6 +247,15 @@ class BwdTrans_NoCollection : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: vector m_expList; @@ -310,6 +337,15 @@ class BwdTrans_SumFac_Seg : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; @@ -406,6 +442,15 @@ class BwdTrans_SumFac_Quad : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -495,6 +540,15 @@ class BwdTrans_SumFac_Tri : public Operator &output[0], m_nquad0); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; @@ -596,6 +650,15 @@ class BwdTrans_SumFac_Hex : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -755,6 +818,15 @@ class BwdTrans_SumFac_Tet : public Operator } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -887,6 +959,15 @@ class BwdTrans_SumFac_Prism : public Operator 0.0, output.get(), m_nquad0); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; diff --git a/library/Collections/IProductWRTBase.cpp b/library/Collections/IProductWRTBase.cpp index 419b3493c..87e3e1877 100644 --- a/library/Collections/IProductWRTBase.cpp +++ b/library/Collections/IProductWRTBase.cpp @@ -81,6 +81,15 @@ class IProductWRTBase_StdMat : public Operator 0.0, output.get(), m_stdExp->GetNcoeffs()); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: DNekMatSharedPtr m_mat; Array m_jac; @@ -170,6 +179,15 @@ class IProductWRTBase_IterPerExp : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: Array m_jac; @@ -270,6 +288,15 @@ class IProductWRTBase_NoCollection : public Operator } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: vector m_expList; @@ -364,6 +391,15 @@ class IProductWRTBase_SumFac_Seg : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nmodes0; @@ -422,6 +458,15 @@ class IProductWRTBase_SumFac_Quad : public Operator m_jac, input, output, wsp); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -487,6 +532,15 @@ class IProductWRTBase_SumFac_Tri : public Operator output,wsp); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -561,6 +615,15 @@ class IProductWRTBase_SumFac_Hex : public Operator m_jac,input,output,wsp); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -639,6 +702,15 @@ class IProductWRTBase_SumFac_Tet : public Operator } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -722,6 +794,15 @@ class IProductWRTBase_SumFac_Prism : public Operator m_jac,input,output,wsp); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; diff --git a/library/Collections/IProductWRTDerivBase.cpp b/library/Collections/IProductWRTDerivBase.cpp index 52139e56b..ee1557a11 100644 --- a/library/Collections/IProductWRTDerivBase.cpp +++ b/library/Collections/IProductWRTDerivBase.cpp @@ -124,6 +124,15 @@ class IProductWRTDerivBase_StdMat : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: Array m_iProdWRTStdDBase; Array m_derivFac; @@ -284,6 +293,15 @@ class IProductWRTDerivBase_IterPerExp : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: Array m_derivFac; Array m_jac; @@ -403,6 +421,15 @@ class IProductWRTDerivBase_NoCollection : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: int m_dim; int m_coordim; @@ -498,6 +525,15 @@ class IProductWRTDerivBase_SumFac_Seg : public Operator &output[0], m_nmodes0); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nmodes0; @@ -591,6 +627,15 @@ class IProductWRTDerivBase_SumFac_Quad : public Operator Vmath::Vadd(m_numElmt*nmodes,tmp[0],1,output,1,output,1); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -730,6 +775,15 @@ class IProductWRTDerivBase_SumFac_Tri : public Operator Vmath::Vadd(m_numElmt*nmodes,tmp[0],1,output,1,output,1); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -890,6 +944,15 @@ class IProductWRTDerivBase_SumFac_Hex : public Operator Vmath::Vadd(m_numElmt*nmodes,tmp[0],1,output,1,output,1); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -1077,6 +1140,15 @@ class IProductWRTDerivBase_SumFac_Tet : public Operator Vmath::Vadd(m_numElmt*nmodes,tmp[0],1,output,1,output,1); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -1284,6 +1356,15 @@ class IProductWRTDerivBase_SumFac_Prism : public Operator Vmath::Vadd(m_numElmt*nmodes,tmp[0],1,output,1,output,1); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; diff --git a/library/Collections/Operator.h b/library/Collections/Operator.h index d499653e7..ff8e90bbe 100644 --- a/library/Collections/Operator.h +++ b/library/Collections/Operator.h @@ -132,10 +132,7 @@ class Operator const Array &input, Array &output, Array &wsp - = NullNekDouble1DArray) - { - ASSERTL0(false, "Not valid for this operator."); - } + = NullNekDouble1DArray) = 0; COLLECTIONS_EXPORT virtual ~Operator(); -- GitLab From 71b93b7085d4ddb8279b9369c6f278491ef1644d Mon Sep 17 00:00:00 2001 From: Douglas Serson Date: Mon, 18 Jul 2016 16:34:55 +0100 Subject: [PATCH 34/34] More changes for windows --- .../LibUtilities/LinearAlgebra/MatrixOperationsDeclarations.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/LibUtilities/LinearAlgebra/MatrixOperationsDeclarations.hpp b/library/LibUtilities/LinearAlgebra/MatrixOperationsDeclarations.hpp index 640278e4f..1e7f67268 100644 --- a/library/LibUtilities/LinearAlgebra/MatrixOperationsDeclarations.hpp +++ b/library/LibUtilities/LinearAlgebra/MatrixOperationsDeclarations.hpp @@ -75,7 +75,7 @@ namespace Nektar const NekMatrix& lhs, const NekVector& rhs); - void DiagonalBlockFullScalMatrixMultiply(NekVector& result, + LIB_UTILITIES_EXPORT void DiagonalBlockFullScalMatrixMultiply(NekVector& result, const NekMatrix, ScaledMatrixTag>, BlockMatrixTag>& lhs, const NekVector& rhs); -- GitLab