diff --git a/library/Collections/BwdTrans.cpp b/library/Collections/BwdTrans.cpp index f48e27811401f4877e9328528dda2e8d775075e1..fbc0683d8da6612b08420f4ba899c356019f18f6 100644 --- a/library/Collections/BwdTrans.cpp +++ b/library/Collections/BwdTrans.cpp @@ -75,6 +75,15 @@ class BwdTrans_StdMat : public Operator 0.0, output.get(), m_stdExp->GetTotPoints()); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: DNekMatSharedPtr m_mat; @@ -155,6 +164,15 @@ class BwdTrans_IterPerExp : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + private: BwdTrans_IterPerExp( vector pCollExp, @@ -229,6 +247,15 @@ class BwdTrans_NoCollection : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: vector m_expList; @@ -310,6 +337,15 @@ class BwdTrans_SumFac_Seg : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; @@ -406,6 +442,15 @@ class BwdTrans_SumFac_Quad : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -495,6 +540,15 @@ class BwdTrans_SumFac_Tri : public Operator &output[0], m_nquad0); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; @@ -596,6 +650,15 @@ class BwdTrans_SumFac_Hex : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -755,6 +818,15 @@ class BwdTrans_SumFac_Tet : public Operator } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -887,6 +959,15 @@ class BwdTrans_SumFac_Prism : public Operator 0.0, output.get(), m_nquad0); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; diff --git a/library/Collections/Collection.h b/library/Collections/Collection.h index 9f59a81c7d543d9883e699d824cacbdbf8d61ece..79d516205e08a395c9781ec59ae68e5b93b6b1b2 100644 --- a/library/Collections/Collection.h +++ b/library/Collections/Collection.h @@ -79,6 +79,12 @@ class Collection Array &output1, Array &output2); + inline void ApplyOperator( + const OperatorType &op, + int dir, + const Array &inarray, + Array &output); + inline bool HasOperator(const OperatorType &op); protected: @@ -135,6 +141,19 @@ inline void Collection::ApplyOperator( (*m_ops[op])(inarray, output0, output1, output2, wsp); } +/** + * + */ +inline void Collection::ApplyOperator( + const OperatorType &op, + int dir, + const Array &inarray, + Array &output) +{ + Array wsp(m_ops[op]->GetWspSize()); + (*m_ops[op])(dir, inarray, output, wsp); +} + inline bool Collection::HasOperator(const OperatorType &op) { return (m_ops.find(op) != m_ops.end()); diff --git a/library/Collections/IProductWRTBase.cpp b/library/Collections/IProductWRTBase.cpp index 419b3493c126c1800232e67f676777d13053fcc5..87e3e18777b2d2e0b88e886113399446a0569a80 100644 --- a/library/Collections/IProductWRTBase.cpp +++ b/library/Collections/IProductWRTBase.cpp @@ -81,6 +81,15 @@ class IProductWRTBase_StdMat : public Operator 0.0, output.get(), m_stdExp->GetNcoeffs()); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: DNekMatSharedPtr m_mat; Array m_jac; @@ -170,6 +179,15 @@ class IProductWRTBase_IterPerExp : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: Array m_jac; @@ -270,6 +288,15 @@ class IProductWRTBase_NoCollection : public Operator } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: vector m_expList; @@ -364,6 +391,15 @@ class IProductWRTBase_SumFac_Seg : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nmodes0; @@ -422,6 +458,15 @@ class IProductWRTBase_SumFac_Quad : public Operator m_jac, input, output, wsp); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -487,6 +532,15 @@ class IProductWRTBase_SumFac_Tri : public Operator output,wsp); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -561,6 +615,15 @@ class IProductWRTBase_SumFac_Hex : public Operator m_jac,input,output,wsp); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -639,6 +702,15 @@ class IProductWRTBase_SumFac_Tet : public Operator } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -722,6 +794,15 @@ class IProductWRTBase_SumFac_Prism : public Operator m_jac,input,output,wsp); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; diff --git a/library/Collections/IProductWRTDerivBase.cpp b/library/Collections/IProductWRTDerivBase.cpp index 52139e56bac2865aca15be4871679a51f8091c32..ee1557a118ddf69502cf508c0f9bd9e2aac84db7 100644 --- a/library/Collections/IProductWRTDerivBase.cpp +++ b/library/Collections/IProductWRTDerivBase.cpp @@ -124,6 +124,15 @@ class IProductWRTDerivBase_StdMat : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: Array m_iProdWRTStdDBase; Array m_derivFac; @@ -284,6 +293,15 @@ class IProductWRTDerivBase_IterPerExp : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: Array m_derivFac; Array m_jac; @@ -403,6 +421,15 @@ class IProductWRTDerivBase_NoCollection : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: int m_dim; int m_coordim; @@ -498,6 +525,15 @@ class IProductWRTDerivBase_SumFac_Seg : public Operator &output[0], m_nmodes0); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nmodes0; @@ -591,6 +627,15 @@ class IProductWRTDerivBase_SumFac_Quad : public Operator Vmath::Vadd(m_numElmt*nmodes,tmp[0],1,output,1,output,1); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -730,6 +775,15 @@ class IProductWRTDerivBase_SumFac_Tri : public Operator Vmath::Vadd(m_numElmt*nmodes,tmp[0],1,output,1,output,1); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -890,6 +944,15 @@ class IProductWRTDerivBase_SumFac_Hex : public Operator Vmath::Vadd(m_numElmt*nmodes,tmp[0],1,output,1,output,1); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -1077,6 +1140,15 @@ class IProductWRTDerivBase_SumFac_Tet : public Operator Vmath::Vadd(m_numElmt*nmodes,tmp[0],1,output,1,output,1); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; const int m_nquad1; @@ -1284,6 +1356,15 @@ class IProductWRTDerivBase_SumFac_Prism : public Operator Vmath::Vadd(m_numElmt*nmodes,tmp[0],1,output,1,output,1); } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + ASSERTL0(false, "Not valid for this operator."); + } + protected: const int m_nquad0; diff --git a/library/Collections/Operator.h b/library/Collections/Operator.h index 70a7f8d8dd43163f1a51a7b35ef297aa81eaaff6..ff8e90bbe9ae3f8703935f7b1ad9a9a4a649261f 100644 --- a/library/Collections/Operator.h +++ b/library/Collections/Operator.h @@ -127,6 +127,13 @@ class Operator Array &wsp = NullNekDouble1DArray) = 0; + COLLECTIONS_EXPORT virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp + = NullNekDouble1DArray) = 0; + COLLECTIONS_EXPORT virtual ~Operator(); /// Get the size of the required workspace diff --git a/library/Collections/PhysDeriv.cpp b/library/Collections/PhysDeriv.cpp index e107cd73d0efb677d4025d6e0ee5e2b7c0868826..da8eb73631f879b5a60e49782d11a83f97b6d9d1 100644 --- a/library/Collections/PhysDeriv.cpp +++ b/library/Collections/PhysDeriv.cpp @@ -105,6 +105,43 @@ class PhysDeriv_StdMat : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + int nPhys = m_stdExp->GetTotPoints(); + int ntot = m_numElmt*nPhys; + Array tmp0,tmp1,tmp2; + Array > Diff(3); + + for(int i = 0; i < m_dim; ++i) + { + Diff[i] = wsp + i*ntot; + } + + // calculate local derivatives + for(int i = 0; i < m_dim; ++i) + { + Blas::Dgemm('N', 'N', m_derivMat[i]->GetRows(), m_numElmt, + m_derivMat[i]->GetColumns(), 1.0, + m_derivMat[i]->GetRawPtr(), + m_derivMat[i]->GetRows(), input.get(), nPhys, + 0.0, &Diff[i][0],nPhys); + } + + // calculate full derivative + Vmath::Zero(ntot,output,1); + for(int j = 0; j < m_dim; ++j) + { + Vmath::Vvtvp (ntot, m_derivFac[dir*m_dim+j], 1, + Diff[j], 1, + output, 1, + output, 1); + } + } + protected: Array m_derivMat; Array m_derivFac; @@ -240,6 +277,42 @@ class PhysDeriv_IterPerExp : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + int nPhys = m_stdExp->GetTotPoints(); + int ntot = m_numElmt*nPhys; + Array tmp0,tmp1,tmp2; + Array > Diff(3); + + for(int i = 0; i < m_dim; ++i) + { + Diff[i] = wsp + i*ntot; + } + + // calculate local derivatives + for (int i = 0; i < m_numElmt; ++i) + { + m_stdExp->PhysDeriv(input + i*nPhys, + tmp0 = Diff[0] + i*nPhys, + tmp1 = Diff[1] + i*nPhys, + tmp2 = Diff[2] + i*nPhys); + } + + // calculate full derivative + Vmath::Vmul(ntot,m_derivFac[dir*m_dim],1,Diff[0],1,output,1); + for(int j = 1; j < m_dim; ++j) + { + Vmath::Vvtvp (ntot, m_derivFac[dir*m_dim+j], 1, + Diff[j], 1, + output, 1, + output, 1); + } + } + protected: Array m_derivFac; int m_dim; @@ -361,6 +434,23 @@ class PhysDeriv_NoCollection : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + const int nPhys = m_expList[0]->GetTotPoints(); + Array tmp; + + // calculate local derivatives + for (int i = 0; i < m_numElmt; ++i) + { + m_expList[i]->PhysDeriv(dir, input + i*nPhys, + tmp = output + i*nPhys); + } + } + protected: vector m_expList; @@ -456,6 +546,29 @@ class PhysDeriv_SumFac_Seg : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + const int nqcol = m_nquad0*m_numElmt; + + ASSERTL1(wsp.num_elements() == m_wspSize, + "Incorrect workspace size"); + ASSERTL1(input.num_elements() >= nqcol, + "Incorrect input size"); + + Array diff0(nqcol, wsp); + + Blas::Dgemm('N', 'N', m_nquad0, m_numElmt, + m_nquad0, 1.0, m_Deriv0, m_nquad0, + input.get(), m_nquad0, 0.0, + diff0.get(), m_nquad0); + + Vmath::Vmul(nqcol, m_derivFac[dir], 1, diff0, 1, output, 1); + } + protected: int m_coordim; const int m_nquad0; @@ -547,6 +660,42 @@ class PhysDeriv_SumFac_Quad : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + const int nqtot = m_nquad0 * m_nquad1; + const int nqcol = nqtot*m_numElmt; + + ASSERTL1(wsp.num_elements() == m_wspSize, + "Incorrect workspace size"); + ASSERTL1(input.num_elements() >= nqcol, + "Incorrect input size"); + + Array diff0(nqcol, wsp ); + Array diff1(nqcol, wsp + nqcol); + + Blas::Dgemm('N', 'N', m_nquad0, m_nquad1*m_numElmt, + m_nquad0, 1.0, m_Deriv0, m_nquad0, + input.get(), m_nquad0, 0.0, + diff0.get(), m_nquad0); + + int cnt = 0; + for (int i = 0; i < m_numElmt; ++i, cnt += nqtot) + { + Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0, + input.get() + cnt, m_nquad0, + m_Deriv1, m_nquad1, 0.0, + diff1.get() + cnt, m_nquad0); + } + + Vmath::Vmul (nqcol, m_derivFac[2*dir] , 1, diff0, 1, output, 1); + Vmath::Vvtvp (nqcol, m_derivFac[2*dir+1], 1, diff1, 1, output, 1, + output, 1); + } + protected: int m_coordim; const int m_nquad0; @@ -651,6 +800,52 @@ class PhysDeriv_SumFac_Tri : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + const int nqtot = m_nquad0 * m_nquad1; + const int nqcol = nqtot*m_numElmt; + + ASSERTL1(wsp.num_elements() == m_wspSize, + "Incorrect workspace size"); + ASSERTL1(input.num_elements() >= nqcol, + "Incorrect input size"); + + Array diff0(nqcol, wsp ); + Array diff1(nqcol, wsp + nqcol); + + // Tensor Product Derivative + Blas::Dgemm('N', 'N', m_nquad0, m_nquad1*m_numElmt, + m_nquad0, 1.0, m_Deriv0, m_nquad0, + input.get(), m_nquad0, 0.0, + diff0.get(), m_nquad0); + + int cnt = 0; + for (int i = 0; i < m_numElmt; ++i, cnt += nqtot) + { + // scale diff0 by geometric factor: 2/(1-z1) + Vmath::Vmul(nqtot,&m_fac1[0],1,diff0.get()+cnt,1, + diff0.get()+cnt,1); + + Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, 1.0, + input.get() + cnt, m_nquad0, + m_Deriv1, m_nquad1, 0.0, + diff1.get() + cnt, m_nquad0); + + // add to diff1 by diff0 scaled by: (1_z0)/(1-z1) + Vmath::Vvtvp(nqtot,m_fac0.get(),1,diff0.get()+cnt,1, + diff1.get()+cnt,1,diff1.get()+cnt,1); + } + + + Vmath::Vmul (nqcol, m_derivFac[2*dir] , 1, diff0, 1, output, 1); + Vmath::Vvtvp (nqcol, m_derivFac[2*dir+1], 1, diff1, 1, output, 1, + output, 1); + } + protected: int m_coordim; const int m_nquad0; @@ -783,6 +978,54 @@ class PhysDeriv_SumFac_Hex : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + int nPhys = m_stdExp->GetTotPoints(); + int ntot = m_numElmt*nPhys; + Array tmp0,tmp1,tmp2; + Array > Diff(3); + + for(int i = 0; i < m_dim; ++i) + { + Diff[i] = wsp + i*ntot; + } + + Blas::Dgemm('N','N', m_nquad0,m_nquad1*m_nquad2*m_numElmt, + m_nquad0,1.0, m_Deriv0,m_nquad0,&input[0], + m_nquad0,0.0,&Diff[0][0],m_nquad0); + + for(int i = 0; i < m_numElmt; ++i) + { + for (int j = 0; j < m_nquad2; ++j) + { + Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, + 1.0, &input[i*nPhys+j*m_nquad0*m_nquad1], + m_nquad0, m_Deriv1, m_nquad1, 0.0, + &Diff[1][i*nPhys+j*m_nquad0*m_nquad1], + m_nquad0); + } + + Blas::Dgemm('N','T',m_nquad0*m_nquad1,m_nquad2,m_nquad2, + 1.0, &input[i*nPhys],m_nquad0*m_nquad1, + m_Deriv2,m_nquad2, 0.0,&Diff[2][i*nPhys], + m_nquad0*m_nquad1); + } + + // calculate full derivative + Vmath::Vmul(ntot,m_derivFac[dir*m_dim],1,Diff[0],1,output,1); + for(int j = 1; j < m_dim; ++j) + { + Vmath::Vvtvp (ntot, m_derivFac[dir*m_dim+j], 1, + Diff[j], 1, + output, 1, + output, 1); + } + } + protected: Array m_derivFac; int m_dim; @@ -927,6 +1170,88 @@ class PhysDeriv_SumFac_Tet : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + int nPhys = m_stdExp->GetTotPoints(); + int ntot = m_numElmt*nPhys; + Array tmp0,tmp1,tmp2; + Array > Diff(3); + + for(int i = 0; i < m_dim; ++i) + { + Diff[i] = wsp + i*ntot; + } + + // dEta0 + Blas::Dgemm('N','N', m_nquad0,m_nquad1*m_nquad2*m_numElmt, + m_nquad0,1.0, m_Deriv0,m_nquad0,&input[0], + m_nquad0,0.0,&Diff[0][0],m_nquad0); + + // dEta2 + for(int i = 0; i < m_numElmt; ++i) + { + Blas::Dgemm('N','T',m_nquad0*m_nquad1,m_nquad2,m_nquad2, + 1.0, &input[i*nPhys],m_nquad0*m_nquad1, + m_Deriv2,m_nquad2, 0.0,&Diff[2][i*nPhys], + m_nquad0*m_nquad1); + } + + for(int i = 0; i < m_numElmt; ++i) + { + + // dEta1 + for (int j = 0; j < m_nquad2; ++j) + { + Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, + 1.0, &input[i*nPhys+j*m_nquad0*m_nquad1], + m_nquad0, m_Deriv1, m_nquad1, 0.0, + &Diff[1][i*nPhys+j*m_nquad0*m_nquad1], + m_nquad0); + } + + // dxi2 = (1 + eta_1)/(1 -eta_2)*dEta1 + dEta2 + Vmath::Vvtvp(nPhys, m_fac3.get(), 1, + Diff[1].get() + i*nPhys, 1, + Diff[2].get() + i*nPhys, 1, + Diff[2].get() + i*nPhys, 1); + + // dxi1 = 2/(1 - eta_2) dEta1 + Vmath::Vmul(nPhys, m_fac2.get(), 1, + Diff[1].get() + i*nPhys, 1, + Diff[1].get() + i*nPhys, 1); + + // dxi1 = 2.0(1+eta_0)/((1-eta_1)(1-eta_2)) dEta0 + dxi1 + Vmath::Vvtvp(nPhys, m_fac1.get(), 1, + Diff[0].get() + i*nPhys, 1, + Diff[1].get() + i*nPhys, 1, + Diff[1].get() + i*nPhys, 1); + + // dxi2 = 2.0(1+eta_0)/((1-eta_1)(1-eta_2)) dEta0 + dxi2 + Vmath::Vvtvp(nPhys, m_fac1.get(), 1, + Diff[0].get() + i*nPhys, 1, + Diff[2].get() + i*nPhys, 1, + Diff[2].get() + i*nPhys, 1); + + // dxi0 = 4.0/((1-eta_1)(1-eta_2)) dEta0 + Vmath::Vmul(nPhys, m_fac0.get(), 1, + Diff[0].get() + i*nPhys, 1, + Diff[0].get() + i*nPhys, 1); + + } + + // calculate full derivative + Vmath::Vmul(ntot,m_derivFac[dir*m_dim],1,Diff[0],1,output,1); + for(int j = 1; j < m_dim; ++j) + { + Vmath::Vvtvp (ntot, m_derivFac[dir*m_dim+j], 1, + Diff[j], 1, output, 1, output, 1); + } + } + protected: Array m_derivFac; int m_dim; @@ -1085,6 +1410,66 @@ class PhysDeriv_SumFac_Prism : public Operator } } + virtual void operator()( + int dir, + const Array &input, + Array &output, + Array &wsp) + { + int nPhys = m_stdExp->GetTotPoints(); + int ntot = m_numElmt*nPhys; + Array tmp0,tmp1,tmp2; + Array > Diff(3); + + for(int i = 0; i < m_dim; ++i) + { + Diff[i] = wsp + i*ntot; + } + + // dEta0 + Blas::Dgemm('N','N', m_nquad0,m_nquad1*m_nquad2*m_numElmt, + m_nquad0,1.0, m_Deriv0,m_nquad0,&input[0], + m_nquad0,0.0,&Diff[0][0],m_nquad0); + + int cnt = 0; + for(int i = 0; i < m_numElmt; ++i) + { + + // dEta 1 + for (int j = 0; j < m_nquad2; ++j) + { + Blas::Dgemm('N', 'T', m_nquad0, m_nquad1, m_nquad1, + 1.0, &input[i*nPhys+j*m_nquad0*m_nquad1], + m_nquad0, m_Deriv1, m_nquad1, 0.0, + &Diff[1][i*nPhys+j*m_nquad0*m_nquad1], + m_nquad0); + } + + // dEta 2 + Blas::Dgemm('N','T',m_nquad0*m_nquad1,m_nquad2,m_nquad2, + 1.0, &input[i*nPhys],m_nquad0*m_nquad1, + m_Deriv2,m_nquad2, 0.0,&Diff[2][i*nPhys], + m_nquad0*m_nquad1); + + // dxi0 = 2/(1-eta_2) d Eta_0 + Vmath::Vmul(nPhys,&m_fac0[0],1,Diff[0].get()+cnt,1, + Diff[0].get()+cnt,1); + + // dxi2 = (1+eta0)/(1-eta_2) d Eta_0 + d/dEta2; + Vmath::Vvtvp(nPhys,&m_fac1[0],1,Diff[0].get()+cnt,1, + Diff[2].get()+cnt,1,Diff[2].get()+cnt,1); + cnt += nPhys; + } + + // calculate full derivative + Vmath::Vmul(ntot,m_derivFac[dir*m_dim],1,Diff[0],1,output,1); + for(int j = 1; j < m_dim; ++j) + { + Vmath::Vvtvp (ntot, m_derivFac[dir*m_dim+j], 1, + Diff[j], 1, output, 1, output, 1); + } + } + protected: Array m_derivFac; int m_dim; diff --git a/library/LibUtilities/LinearAlgebra/Blas.hpp b/library/LibUtilities/LinearAlgebra/Blas.hpp index 3d1cdd2e8873ee243ff0640d7bea06a498b33ee5..a230624998e6a00e8726ee1d24f7ef621d02b92f 100644 --- a/library/LibUtilities/LinearAlgebra/Blas.hpp +++ b/library/LibUtilities/LinearAlgebra/Blas.hpp @@ -79,7 +79,7 @@ namespace Blas void F77NAME(dtpmv) (const char& uplo, const char& trans, const char& diag, const int& n, const double* ap, double* x, const int& incx); - void F77NAME(dspmv) (const char& trans, const int& n, const double& alpha, + void F77NAME(dspmv) (const char& uplo, const int& n, const double& alpha, const double* a, const double* x, const int& incx, const double& beta, double* y, const int& incy); @@ -192,11 +192,11 @@ namespace Blas /// \brief BLAS level 2: Matrix vector multiply y = A \e x where A /// is symmetric packed - static inline void Dspmv (const char& trans, const int& n, const double& alpha, + static inline void Dspmv (const char& uplo, const int& n, const double& alpha, const double* a, const double* x, const int& incx, const double& beta, double* y, const int& incy) { - F77NAME(dspmv) (trans,n,alpha,a,x,incx,beta,y,incy); + F77NAME(dspmv) (uplo,n,alpha,a,x,incx,beta,y,incy); } static inline void Dsbmv (const char& uplo, const int& m, const int& k, diff --git a/library/LibUtilities/LinearAlgebra/BlockMatrix.cpp b/library/LibUtilities/LinearAlgebra/BlockMatrix.cpp index ff218cd256e5129ec8f2da66e46330c9ffcf3564..242e98698163d1470051f9df889d37a5f228e7d8 100644 --- a/library/LibUtilities/LinearAlgebra/BlockMatrix.cpp +++ b/library/LibUtilities/LinearAlgebra/BlockMatrix.cpp @@ -40,14 +40,13 @@ namespace Nektar { template NekMatrix, BlockMatrixTag>::NekMatrix(MatrixStorage type) : - BaseType(0,0), + BaseType(0,0,type), m_data(), m_rowSizes(), m_columnSizes(), m_storageSize(), m_numberOfBlockRows(0), - m_numberOfBlockColumns(0), - m_storageType(type) + m_numberOfBlockColumns(0) { } @@ -55,14 +54,13 @@ namespace Nektar NekMatrix, BlockMatrixTag>::NekMatrix(unsigned int numberOfBlockRows, unsigned int numberOfBlockColumns, unsigned int rowsPerBlock, unsigned int columnsPerBlock, MatrixStorage type) : - BaseType(numberOfBlockRows*rowsPerBlock, numberOfBlockColumns*columnsPerBlock), + BaseType(numberOfBlockRows*rowsPerBlock, numberOfBlockColumns*columnsPerBlock,type), m_data(), m_rowSizes(numberOfBlockRows), m_columnSizes(numberOfBlockColumns), m_storageSize(0), m_numberOfBlockRows(numberOfBlockRows), - m_numberOfBlockColumns(numberOfBlockColumns), - m_storageType(type) + m_numberOfBlockColumns(numberOfBlockColumns) { m_storageSize = GetRequiredStorageSize(); m_data = Array >(m_storageSize, boost::shared_ptr()); @@ -82,14 +80,14 @@ namespace Nektar const unsigned int* rowsPerBlock, const unsigned int* columnsPerBlock, MatrixStorage type) : BaseType(std::accumulate(rowsPerBlock, rowsPerBlock + numberOfBlockRows, 0), - std::accumulate(columnsPerBlock, columnsPerBlock + numberOfBlockColumns, 0)), + std::accumulate(columnsPerBlock, columnsPerBlock + numberOfBlockColumns, 0), + type), m_data(), m_rowSizes(numberOfBlockRows), m_columnSizes(numberOfBlockColumns), m_storageSize(0), m_numberOfBlockRows(numberOfBlockRows), - m_numberOfBlockColumns(numberOfBlockColumns), - m_storageType(type) + m_numberOfBlockColumns(numberOfBlockColumns) { m_storageSize = GetRequiredStorageSize(); m_data = Array >(m_storageSize, boost::shared_ptr()); @@ -101,14 +99,14 @@ namespace Nektar const Array& rowsPerBlock, const Array& columnsPerBlock, MatrixStorage type) : BaseType(std::accumulate(rowsPerBlock.data(), rowsPerBlock.data() + numberOfBlockRows, 0), - std::accumulate(columnsPerBlock.data(), columnsPerBlock.data() + numberOfBlockColumns, 0)), + std::accumulate(columnsPerBlock.data(), columnsPerBlock.data() + numberOfBlockColumns, 0), + type), m_data(), m_rowSizes(numberOfBlockRows), m_columnSizes(numberOfBlockColumns), m_storageSize(0), m_numberOfBlockRows(numberOfBlockRows), - m_numberOfBlockColumns(numberOfBlockColumns), - m_storageType(type) + m_numberOfBlockColumns(numberOfBlockColumns) { m_storageSize = GetRequiredStorageSize(); m_data = Array >(m_storageSize, boost::shared_ptr()); @@ -120,14 +118,14 @@ namespace Nektar const Array& columnsPerBlock, MatrixStorage type) : BaseType(std::accumulate(rowsPerBlock.begin(), rowsPerBlock.end(), 0), - std::accumulate(columnsPerBlock.begin(), columnsPerBlock.end(), 0)), + std::accumulate(columnsPerBlock.begin(), columnsPerBlock.end(), 0), + type), m_data(), m_rowSizes(rowsPerBlock.num_elements()), m_columnSizes(columnsPerBlock.num_elements()), m_storageSize(0), m_numberOfBlockRows(rowsPerBlock.num_elements()), - m_numberOfBlockColumns(columnsPerBlock.num_elements()), - m_storageType(type) + m_numberOfBlockColumns(columnsPerBlock.num_elements()) { m_storageSize = GetRequiredStorageSize(); m_data = Array >(m_storageSize, boost::shared_ptr()); @@ -142,8 +140,7 @@ namespace Nektar m_columnSizes(rhs.m_columnSizes), m_storageSize(rhs.m_storageSize), m_numberOfBlockRows(rhs.m_numberOfBlockRows), - m_numberOfBlockColumns(rhs.m_numberOfBlockColumns), - m_storageType(rhs.m_storageType) + m_numberOfBlockColumns(rhs.m_numberOfBlockColumns) { for(unsigned int i = 0; i < rhs.m_data.num_elements(); ++i) { @@ -161,16 +158,8 @@ namespace Nektar template unsigned int NekMatrix, BlockMatrixTag>::CalculateBlockIndex(unsigned int row, unsigned int column) const { - unsigned int blockRows = GetNumberOfBlockRows(); - unsigned int blockCols = GetNumberOfBlockColumns(); - - if( this->GetTransposeFlag() == 'T' ) - { - std::swap(blockRows, blockCols); - } - return BaseType::CalculateIndex(this->GetStorageType(), - row, column, blockRows, blockCols, this->GetTransposeFlag()); + row, column, m_numberOfBlockRows, m_numberOfBlockColumns, this->GetTransposeFlag()); } template @@ -303,12 +292,6 @@ namespace Nektar return m_storageSize; } - template - MatrixStorage NekMatrix, BlockMatrixTag>::GetType() const - { - return m_storageType; - } - template unsigned int NekMatrix, BlockMatrixTag>::GetNumberOfBlockRows() const { @@ -361,6 +344,23 @@ namespace Nektar } } + template + void NekMatrix, BlockMatrixTag>::GetBlockSizes( + Array& rowSizes, + Array& colSizes) const + { + if( this->GetTransposeFlag() == 'T' ) + { + rowSizes = m_columnSizes; + colSizes = m_rowSizes; + } + else + { + rowSizes = m_rowSizes; + colSizes = m_columnSizes; + } + } + template typename NekMatrix, BlockMatrixTag>::iterator NekMatrix, BlockMatrixTag>::begin() { return iterator(*this, 0, 0); } @@ -438,12 +438,6 @@ namespace Nektar return this->GetStorageSize(); } - template - MatrixStorage NekMatrix, BlockMatrixTag>::v_GetStorageType() const - { - return this->GetType(); - } - template void NekMatrix, BlockMatrixTag>::v_Transpose() { diff --git a/library/LibUtilities/LinearAlgebra/BlockMatrix.hpp b/library/LibUtilities/LinearAlgebra/BlockMatrix.hpp index 217b909eeefc91ee586488a549d0966ecc48be1e..67ea865ba19112bec5e2d8280786a2511f0d24de 100644 --- a/library/LibUtilities/LinearAlgebra/BlockMatrix.hpp +++ b/library/LibUtilities/LinearAlgebra/BlockMatrix.hpp @@ -167,8 +167,6 @@ namespace Nektar LIB_UTILITIES_EXPORT unsigned int GetStorageSize() const; - LIB_UTILITIES_EXPORT MatrixStorage GetType() const; - LIB_UTILITIES_EXPORT unsigned int GetNumberOfBlockRows() const; LIB_UTILITIES_EXPORT unsigned int GetNumberOfBlockColumns() const; @@ -177,6 +175,9 @@ namespace Nektar LIB_UTILITIES_EXPORT unsigned int GetNumberOfColumnsInBlockColumn(unsigned int blockCol) const; + LIB_UTILITIES_EXPORT void GetBlockSizes(Array& rowSizes, + Array& colSizes) const; + LIB_UTILITIES_EXPORT iterator begin(); LIB_UTILITIES_EXPORT iterator end(); LIB_UTILITIES_EXPORT const_iterator begin() const; @@ -197,8 +198,6 @@ namespace Nektar LIB_UTILITIES_EXPORT virtual unsigned int v_GetStorageSize() const; - LIB_UTILITIES_EXPORT virtual MatrixStorage v_GetStorageType() const; - LIB_UTILITIES_EXPORT virtual void v_Transpose(); Array > m_data; @@ -208,7 +207,6 @@ namespace Nektar unsigned int m_storageSize; unsigned int m_numberOfBlockRows; unsigned int m_numberOfBlockColumns; - MatrixStorage m_storageType; static NumberType m_zeroElement; }; diff --git a/library/LibUtilities/LinearAlgebra/Lapack.hpp b/library/LibUtilities/LinearAlgebra/Lapack.hpp index ea099f319e05d903ab875d92ecb2b46d7eb830cc..6b54adfd92b4c9450f5a99962fbecececc11aa55 100644 --- a/library/LibUtilities/LinearAlgebra/Lapack.hpp +++ b/library/LibUtilities/LinearAlgebra/Lapack.hpp @@ -48,6 +48,10 @@ namespace Lapack const int& nrhs, const double* ap, const int *ipiv, double* b, const int& ldb, int& info); + void F77NAME(dsptri) (const char& uplo, const int& n, + const double* ap, + const int *ipiv, double* work, + int& info); void F77NAME(dtrtrs) (const char& uplo, const char& trans, const char& diag, const int& n, const int& nrhs, const double* a, const int& lda, double* b, const int& ldb, int& info); @@ -118,6 +122,14 @@ namespace Lapack F77NAME(dsptrs) (uplo,n,nrhs,ap,ipiv,b,ldb,info); } + /// \brief Invert a real symmetric matrix problem + static inline void Dsptri (const char& uplo, const int& n, + const double* ap, const int *ipiv, double* work, + int& info) + { + F77NAME(dsptri) (uplo,n,ap,ipiv,work,info); + } + /// \brief Cholesky factor a real Positive Definite packed-symmetric matrix. static inline void Dpptrf (const char& uplo, const int& n, double *ap, int& info) diff --git a/library/LibUtilities/LinearAlgebra/MatrixBase.cpp b/library/LibUtilities/LinearAlgebra/MatrixBase.cpp index 7105f352e2f02eb98d502d17dcef11c1511cdd22..f1d479dafc1f46b05d069727d83a7fac1309e73c 100644 --- a/library/LibUtilities/LinearAlgebra/MatrixBase.cpp +++ b/library/LibUtilities/LinearAlgebra/MatrixBase.cpp @@ -55,12 +55,6 @@ namespace Nektar return v_GetStorageSize(); } - template - MatrixStorage ConstMatrix::GetStorageType() const - { - return v_GetStorageType(); - } - template unsigned int ConstMatrix::GetRows() const { @@ -229,9 +223,11 @@ namespace Nektar } template - ConstMatrix::ConstMatrix(unsigned int rows, unsigned int columns) : + ConstMatrix::ConstMatrix(unsigned int rows, unsigned int columns, + MatrixStorage policy) : m_size(), - m_transpose('N') + m_transpose('N'), + m_storageType(policy) { m_size[0] = rows; m_size[1] = columns; @@ -240,7 +236,8 @@ namespace Nektar template ConstMatrix::ConstMatrix(const ConstMatrix& rhs) : m_size(), - m_transpose(rhs.m_transpose) + m_transpose(rhs.m_transpose), + m_storageType(rhs.m_storageType) { m_size[0] = rhs.m_size[0]; m_size[1] = rhs.m_size[1]; @@ -252,6 +249,7 @@ namespace Nektar m_size[0] = rhs.m_size[0]; m_size[1] = rhs.m_size[1]; m_transpose = rhs.m_transpose; + m_storageType = m_storageType; return *this; } @@ -269,9 +267,6 @@ namespace Nektar m_transpose = newValue; } - template - char ConstMatrix::GetRawTransposeFlag() const { return m_transpose; } - template void ConstMatrix::v_Transpose() {} @@ -295,8 +290,9 @@ namespace Nektar } template - Matrix::Matrix(unsigned int rows, unsigned int columns) : - ConstMatrix(rows, columns) + Matrix::Matrix(unsigned int rows, unsigned int columns, + MatrixStorage policy) : + ConstMatrix(rows, columns,policy) { } diff --git a/library/LibUtilities/LinearAlgebra/MatrixBase.hpp b/library/LibUtilities/LinearAlgebra/MatrixBase.hpp index 6023f4ec1d18c10c09aa87313df19ffa9c2fb507..2d063eb91b62a01c5e530aabea5271f764166e08 100644 --- a/library/LibUtilities/LinearAlgebra/MatrixBase.hpp +++ b/library/LibUtilities/LinearAlgebra/MatrixBase.hpp @@ -64,8 +64,16 @@ namespace Nektar LIB_UTILITIES_EXPORT unsigned int GetStorageSize() const; - LIB_UTILITIES_EXPORT MatrixStorage GetStorageType() const ; - + LIB_UTILITIES_EXPORT inline MatrixStorage GetType() const + { + return m_storageType; + } + + LIB_UTILITIES_EXPORT inline MatrixStorage GetStorageType() const + { + return m_storageType; + } + LIB_UTILITIES_EXPORT unsigned int GetRows() const; LIB_UTILITIES_EXPORT unsigned int GetTransposedRows(char transpose) const ; @@ -92,7 +100,8 @@ namespace Nektar // All constructors are private to enforce the abstract nature of ConstMatrix without // resorting to pure virtual functions. - LIB_UTILITIES_EXPORT ConstMatrix(unsigned int rows, unsigned int columns); + LIB_UTILITIES_EXPORT ConstMatrix(unsigned int rows, unsigned int columns, + MatrixStorage policy = eFULL); LIB_UTILITIES_EXPORT ConstMatrix(const ConstMatrix& rhs); @@ -103,17 +112,20 @@ namespace Nektar LIB_UTILITIES_EXPORT void Resize(unsigned int rows, unsigned int columns); LIB_UTILITIES_EXPORT void SetTransposeFlag(char newValue); - LIB_UTILITIES_EXPORT char GetRawTransposeFlag() const; + LIB_UTILITIES_EXPORT inline char GetRawTransposeFlag() const + { + return m_transpose; + } private: virtual typename boost::call_traits::value_type v_GetValue(unsigned int row, unsigned int column) const = 0; virtual unsigned int v_GetStorageSize() const = 0; - virtual MatrixStorage v_GetStorageType() const = 0; LIB_UTILITIES_EXPORT virtual void v_Transpose(); LIB_UTILITIES_EXPORT virtual char v_GetTransposeFlag() const; unsigned int m_size[2]; char m_transpose; + MatrixStorage m_storageType; }; template @@ -127,7 +139,8 @@ namespace Nektar protected: // All constructors are private to enforce the abstract nature of ConstMatrix without // resorting to pure virtual functions. - LIB_UTILITIES_EXPORT Matrix(unsigned int rows, unsigned int columns); + LIB_UTILITIES_EXPORT Matrix(unsigned int rows, unsigned int columns, + MatrixStorage policy = eFULL); LIB_UTILITIES_EXPORT Matrix(const Matrix& rhs); diff --git a/library/LibUtilities/LinearAlgebra/MatrixFuncs.h b/library/LibUtilities/LinearAlgebra/MatrixFuncs.h index 88824521fb7d15f9f8e6d182b5c4a0c1af9e1851..fe829dd7185fd8171e03edaccc6d12693ea23842 100644 --- a/library/LibUtilities/LinearAlgebra/MatrixFuncs.h +++ b/library/LibUtilities/LinearAlgebra/MatrixFuncs.h @@ -202,6 +202,51 @@ namespace Nektar static unsigned int CalculateIndex(unsigned int curRow, unsigned int curColumn); + template + static void Invert(unsigned int rows, unsigned int columns, + Array& data) + { +#ifdef NEKTAR_USING_BLAS + ASSERTL0(rows==columns, "Only square matrices can be inverted."); + + int n = columns; + int info = 0; + Array ipivot(n); + Array work(n); + + Lapack::Dsptrf('U', n, data.get(), ipivot.get(), info); + + if( info < 0 ) + { + std::string message = "ERROR: The " + boost::lexical_cast(-info) + "th parameter had an illegal parameter for dsptrf"; + ASSERTL0(false, message.c_str()); + } + else if( info > 0 ) + { + std::string message = "ERROR: Element u_" + boost::lexical_cast(info) + boost::lexical_cast(info) + " is 0 from dsptrf"; + ASSERTL0(false, message.c_str()); + } + + Lapack::Dsptri('U', n, data.get(), ipivot.get(), + work.get(), info); + + if( info < 0 ) + { + std::string message = "ERROR: The " + boost::lexical_cast(-info) + "th parameter had an illegal parameter for dsptri"; + ASSERTL0(false, message.c_str()); + } + else if( info > 0 ) + { + std::string message = "ERROR: Element u_" + boost::lexical_cast(info) + boost::lexical_cast(info) + " is 0 from dsptri"; + ASSERTL0(false, message.c_str()); + } + +#else + // error Full matrix inversion not supported without blas. + BOOST_STATIC_ASSERT(sizeof(DataType) == 0); +#endif + } + static boost::tuples::tuple Advance(const unsigned int totalRows, const unsigned int totalColumns, const unsigned int curRow, const unsigned int curColumn); diff --git a/library/LibUtilities/LinearAlgebra/MatrixOperationsDeclarations.hpp b/library/LibUtilities/LinearAlgebra/MatrixOperationsDeclarations.hpp index 5dd8c72acde4a6405b205096cb16410273351d99..1e7f672685e0aa0290c54a08ab45cfc1bd49ee18 100644 --- a/library/LibUtilities/LinearAlgebra/MatrixOperationsDeclarations.hpp +++ b/library/LibUtilities/LinearAlgebra/MatrixOperationsDeclarations.hpp @@ -75,6 +75,10 @@ namespace Nektar const NekMatrix& lhs, const NekVector& rhs); + LIB_UTILITIES_EXPORT void DiagonalBlockFullScalMatrixMultiply(NekVector& result, + const NekMatrix, ScaledMatrixTag>, BlockMatrixTag>& lhs, + const NekVector& rhs); + //////////////////////////////////////////////////////////////////////////////////// // Matrix-Constant Multiplication //////////////////////////////////////////////////////////////////////////////////// diff --git a/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp b/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp index afde10e377ea44632ad665d3630133e9d7a5b23c..2ef20f3bc91aed0a9682191aed4bac4cf6978f45 100644 --- a/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp +++ b/library/LibUtilities/LinearAlgebra/MatrixVectorMultiplication.cpp @@ -208,37 +208,41 @@ namespace Nektar double* result_ptr = result.GetRawPtr(); const double* rhs_ptr = rhs.GetRawPtr(); - std::fill(result.begin(), result.end(), 0.0); - + Array rowSizes; + Array colSizes; + lhs.GetBlockSizes(rowSizes, colSizes); + unsigned int curResultRow = 0; unsigned int curWrapperRow = 0; + unsigned int rowsInBlock = 0; + unsigned int columnsInBlock = 0; for(unsigned int blockRow = 0; blockRow < numberOfBlockRows; ++blockRow) { - - if( blockRow != 0 ) + curResultRow += rowsInBlock; + curWrapperRow += columnsInBlock; + if ( blockRow == 0) { - curResultRow += lhs.GetNumberOfRowsInBlockRow(blockRow-1); + rowsInBlock = rowSizes[blockRow] + 1; + columnsInBlock = colSizes[blockRow] + 1; } - - unsigned int blockColumn = blockRow; - if( blockColumn != 0 ) + else { - curWrapperRow += lhs.GetNumberOfColumnsInBlockColumn(blockColumn-1); + rowsInBlock = rowSizes[blockRow] - rowSizes[blockRow-1]; + columnsInBlock = colSizes[blockRow] - colSizes[blockRow-1]; } - unsigned int rowsInBlock = lhs.GetNumberOfRowsInBlockRow(blockRow); - if( rowsInBlock == 0 ) + if( rowsInBlock == 0) { continue; } - - unsigned int columnsInBlock = lhs.GetNumberOfColumnsInBlockColumn(blockColumn); - if( columnsInBlock == 0 ) + if( columnsInBlock == 0) { + std::fill(result.begin()+curResultRow, + result.begin()+curResultRow + rowsInBlock, 0.0); continue; } - const LhsInnerMatrixType* block = lhs.GetBlockPtr(blockRow, blockColumn); + const LhsInnerMatrixType* block = lhs.GetBlockPtr(blockRow, blockRow); if( !block ) { continue; @@ -249,6 +253,75 @@ namespace Nektar Multiply(resultWrapper, *block, rhsWrapper); //resultWrapper = (*block)*rhsWrapper; } + curResultRow += rowsInBlock; + if (curResultRow < result.GetRows()) + { + std::fill(result.begin()+curResultRow, result.end(), 0.0); + } + } + + void DiagonalBlockFullScalMatrixMultiply(NekVector& result, + const NekMatrix, ScaledMatrixTag>, BlockMatrixTag>& lhs, + const NekVector& rhs) + { + unsigned int numberOfBlockRows = lhs.GetNumberOfBlockRows(); + double* result_ptr = result.GetRawPtr(); + const double* rhs_ptr = rhs.GetRawPtr(); + + Array rowSizes; + Array colSizes; + lhs.GetBlockSizes(rowSizes, colSizes); + + unsigned int curResultRow = 0; + unsigned int curWrapperRow = 0; + unsigned int rowsInBlock = 0; + unsigned int columnsInBlock = 0; + for(unsigned int blockRow = 0; blockRow < numberOfBlockRows; ++blockRow) + { + curResultRow += rowsInBlock; + curWrapperRow += columnsInBlock; + if ( blockRow == 0) + { + rowsInBlock = rowSizes[blockRow] + 1; + columnsInBlock = colSizes[blockRow] + 1; + } + else + { + rowsInBlock = rowSizes[blockRow] - rowSizes[blockRow-1]; + columnsInBlock = colSizes[blockRow] - colSizes[blockRow-1]; + } + + if( rowsInBlock == 0) + { + continue; + } + if( columnsInBlock == 0) + { + std::fill(result.begin()+curResultRow, + result.begin()+curResultRow + rowsInBlock, 0.0); + continue; + } + + const DNekScalMat* block = lhs.GetBlockPtr(blockRow, blockRow); + if( !block ) + { + continue; + } + + double* resultWrapper = result_ptr + curResultRow; + const double* rhsWrapper = rhs_ptr + curWrapperRow; + + // Multiply + const unsigned int* size = block->GetSize(); + Blas::Dgemv('N', size[0], size[1], block->Scale(), + block->GetRawPtr(), size[0], rhsWrapper, 1, + 0.0, resultWrapper, 1); + } + curResultRow += rowsInBlock; + if (curResultRow < result.GetRows()) + { + std::fill(result.begin()+curResultRow, result.end(), 0.0); + } } void NekMultiplyLowerTriangularMatrix(NekDouble* result, @@ -335,6 +408,36 @@ namespace Nektar } } + template + void NekMultiplySymmetricMatrix(NekDouble* result, const NekMatrix& lhs, const NekDouble* rhs, + typename boost::enable_if + < + CanGetRawPtr > + >::type* p=0) + { + const unsigned int* size = lhs.GetSize(); + + double alpha = lhs.Scale(); + const double* a = lhs.GetRawPtr(); + const double* x = rhs; + int incx = 1; + double beta = 0.0; + double* y = result; + int incy = 1; + + Blas::Dspmv('U', size[0], alpha, a, x, incx, beta, y, incy); + } + + template + void NekMultiplySymmetricMatrix(NekDouble* result, const NekMatrix& lhs, const NekDouble* rhs, + typename boost::enable_if + < + boost::mpl::not_ > > + >::type* p = 0) + { + NekMultiplyUnspecializedMatrixType(result, lhs, rhs); + } + template void NekMultiplyFullMatrix(NekDouble* result, const NekMatrix& lhs, const NekDouble* rhs, typename boost::enable_if @@ -342,25 +445,20 @@ namespace Nektar CanGetRawPtr > >::type* p=0) { - int m = lhs.GetRows(); - int n = lhs.GetColumns(); + const unsigned int* size = lhs.GetSize(); char t = lhs.GetTransposeFlag(); - if( t == 'T' ) - { - std::swap(m, n); - } double alpha = lhs.Scale(); const double* a = lhs.GetRawPtr(); - int lda = m; + int lda = size[0]; const double* x = rhs; int incx = 1; double beta = 0.0; double* y = result; int incy = 1; - Blas::Dgemv(t, m, n, alpha, a, lda, x, incx, beta, y, incy); + Blas::Dgemv(t, size[0], size[1], alpha, a, lda, x, incx, beta, y, incy); } template @@ -393,7 +491,7 @@ namespace Nektar NekMultiplyLowerTriangularMatrix(result, lhs, rhs); break; case eSYMMETRIC: - NekMultiplyUnspecializedMatrixType(result, lhs, rhs); + NekMultiplySymmetricMatrix(result, lhs, rhs); break; case eBANDED: NekMultiplyBandedMatrix(result, lhs, rhs); diff --git a/library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp b/library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp index e8444da880a1446249709915808cde5e01859084..d8d9536b5f2cdf74a3c8b48565c232bd76c5373f 100644 --- a/library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp +++ b/library/LibUtilities/LinearAlgebra/ScaledMatrix.cpp @@ -31,6 +31,7 @@ // /////////////////////////////////////////////////////////////////////////////// +#include #include #include @@ -38,7 +39,7 @@ namespace Nektar { template NekMatrix, ScaledMatrixTag>::NekMatrix() : - NekMatrix, ScaledMatrixTag>::BaseType(0,0), + NekMatrix, ScaledMatrixTag>::BaseType(0,0,m_matrix->GetStorageType()), m_matrix(new typename NekMatrix, ScaledMatrixTag>::InnerType()), m_scale(0) { @@ -47,7 +48,7 @@ namespace Nektar template NekMatrix, ScaledMatrixTag>::NekMatrix(typename boost::call_traits::const_reference scale, boost::shared_ptr m) : - NekMatrix, ScaledMatrixTag>::BaseType(m->GetRows(), m->GetColumns()), + NekMatrix, ScaledMatrixTag>::BaseType(m->GetRows(), m->GetColumns(),m->GetStorageType()), m_matrix(m), m_scale(scale) { @@ -84,9 +85,6 @@ namespace Nektar return m_matrix->GetStorageSize(); } - template - MatrixStorage NekMatrix, ScaledMatrixTag>::GetType() const { return m_matrix->GetStorageType(); } - template typename NekMatrix, ScaledMatrixTag>::NumberType NekMatrix, ScaledMatrixTag>::Scale() const @@ -153,12 +151,6 @@ namespace Nektar return ThisType::GetStorageSize(); } - template - MatrixStorage NekMatrix, ScaledMatrixTag>::v_GetStorageType() const - { - return ThisType::GetType(); - } - template char NekMatrix, ScaledMatrixTag>::v_GetTransposeFlag() const { diff --git a/library/LibUtilities/LinearAlgebra/ScaledMatrix.hpp b/library/LibUtilities/LinearAlgebra/ScaledMatrix.hpp index 413e18d7f9f9492bb373eade789f1389b1d456d5..3b248b9cac7e0155f3cc86f48f62731bb1341512 100644 --- a/library/LibUtilities/LinearAlgebra/ScaledMatrix.hpp +++ b/library/LibUtilities/LinearAlgebra/ScaledMatrix.hpp @@ -121,8 +121,6 @@ namespace Nektar LIB_UTILITIES_EXPORT ConstGetValueType operator()(unsigned int row, unsigned int col) const; LIB_UTILITIES_EXPORT unsigned int GetStorageSize() const ; - - LIB_UTILITIES_EXPORT MatrixStorage GetType() const; LIB_UTILITIES_EXPORT NumberType Scale() const; LIB_UTILITIES_EXPORT void SetScale(const NumberType&); @@ -149,8 +147,6 @@ namespace Nektar LIB_UTILITIES_EXPORT virtual unsigned int v_GetStorageSize() const ; - LIB_UTILITIES_EXPORT virtual MatrixStorage v_GetStorageType() const; - LIB_UTILITIES_EXPORT virtual char v_GetTransposeFlag() const; boost::shared_ptr m_matrix; diff --git a/library/LibUtilities/LinearAlgebra/StandardMatrix.cpp b/library/LibUtilities/LinearAlgebra/StandardMatrix.cpp index 4444309fa3e8499ced8b83bde301a077702e58ec..2a490ef0ed39a4ea904425a9b395f9e4656e54b2 100644 --- a/library/LibUtilities/LinearAlgebra/StandardMatrix.cpp +++ b/library/LibUtilities/LinearAlgebra/StandardMatrix.cpp @@ -41,7 +41,6 @@ namespace Nektar Matrix(0, 0), m_data(), m_wrapperType(eCopy), - m_storagePolicy(eFULL), m_numberOfSuperDiagonals(std::numeric_limits::max()), m_numberOfSubDiagonals(std::numeric_limits::max()), m_tempSpace() @@ -53,10 +52,9 @@ namespace Nektar NekMatrix::NekMatrix(unsigned int rows, unsigned int columns, MatrixStorage policy, unsigned int subDiagonals, unsigned int superDiagonals) : - Matrix(rows, columns), + Matrix(rows, columns, policy), m_data(), m_wrapperType(eCopy), - m_storagePolicy(policy), m_numberOfSuperDiagonals(superDiagonals), m_numberOfSubDiagonals(subDiagonals), m_tempSpace() @@ -70,10 +68,9 @@ namespace Nektar unsigned int subDiagonals, unsigned int superDiagonals, unsigned int capacity) : - Matrix(rows, columns), + Matrix(rows, columns, policy), m_data(), m_wrapperType(eCopy), - m_storagePolicy(policy), m_numberOfSuperDiagonals(superDiagonals), m_numberOfSubDiagonals(subDiagonals), m_tempSpace() @@ -88,10 +85,9 @@ namespace Nektar MatrixStorage policy, unsigned int subDiagonals, unsigned int superDiagonals) : - Matrix(rows, columns), + Matrix(rows, columns, policy), m_data(), m_wrapperType(eCopy), - m_storagePolicy(policy), m_numberOfSuperDiagonals(superDiagonals), m_numberOfSubDiagonals(subDiagonals), m_tempSpace() @@ -105,10 +101,9 @@ namespace Nektar MatrixStorage policy, unsigned int subDiagonals, unsigned int superDiagonals) : - Matrix(rows, columns), + Matrix(rows, columns, policy), m_data(), m_wrapperType(eCopy), - m_storagePolicy(policy), m_numberOfSuperDiagonals(superDiagonals), m_numberOfSubDiagonals(subDiagonals), m_tempSpace() @@ -123,10 +118,9 @@ namespace Nektar MatrixStorage policy, unsigned int subDiagonals, unsigned int superDiagonals) : - Matrix(rows, columns), + Matrix(rows, columns, policy), m_data(), m_wrapperType(eCopy), - m_storagePolicy(policy), m_numberOfSuperDiagonals(superDiagonals), m_numberOfSubDiagonals(subDiagonals), m_tempSpace() @@ -140,10 +134,9 @@ namespace Nektar MatrixStorage policy, unsigned int subDiagonals, unsigned int superDiagonals) : - Matrix(rows, columns), + Matrix(rows, columns, policy), m_data(), m_wrapperType(wrapperType), - m_storagePolicy(policy), m_numberOfSuperDiagonals(superDiagonals), m_numberOfSubDiagonals(subDiagonals), m_tempSpace() @@ -164,7 +157,6 @@ namespace Nektar Matrix(rhs), m_data(), m_wrapperType(rhs.m_wrapperType), - m_storagePolicy(rhs.m_storagePolicy), m_numberOfSuperDiagonals(rhs.m_numberOfSuperDiagonals), m_numberOfSubDiagonals(rhs.m_numberOfSubDiagonals), m_tempSpace() @@ -172,10 +164,6 @@ namespace Nektar PerformCopyConstruction(rhs); } - - template - MatrixStorage NekMatrix::GetType() const { return m_storagePolicy; } - template NekMatrix& NekMatrix::operator=(const NekMatrix& rhs) { @@ -185,7 +173,6 @@ namespace Nektar } Matrix::operator=(rhs); - m_storagePolicy = rhs.m_storagePolicy; m_numberOfSubDiagonals = rhs.m_numberOfSubDiagonals; m_numberOfSuperDiagonals = rhs.m_numberOfSuperDiagonals; @@ -398,12 +385,6 @@ namespace Nektar template PointerWrapper NekMatrix::GetWrapperType() const { return m_wrapperType; } - template - char NekMatrix::GetTransposeFlag() const - { - return this->GetRawTransposeFlag(); - } - template boost::tuples::tuple NekMatrix::Advance(unsigned int curRow, unsigned int curColumn) const @@ -418,7 +399,7 @@ namespace Nektar unsigned int numRows = this->GetTransposedRows(transpose); unsigned int numColumns = this->GetTransposedColumns(transpose); - switch(m_storagePolicy) + switch(this->GetStorageType()) { case eFULL: return FullMatrixFuncs::Advance( @@ -481,11 +462,10 @@ namespace Nektar BaseType(rhs), m_data(), m_wrapperType(wrapperType), - m_storagePolicy(rhs.m_storagePolicy), m_numberOfSuperDiagonals(rhs.m_numberOfSuperDiagonals), m_numberOfSubDiagonals(rhs.m_numberOfSubDiagonals) { - PerformCopyConstruction(rhs); + PerformCopyConstruction(rhs); } @@ -564,12 +544,6 @@ namespace Nektar return NekMatrix::GetStorageSize(); } - template - MatrixStorage NekMatrix::v_GetStorageType() const - { - return NekMatrix::GetType(); - } - template void NekMatrix::v_SetValue(unsigned int row, unsigned int column, typename boost::call_traits::const_reference d) { @@ -765,9 +739,12 @@ namespace Nektar DiagonalMatrixFuncs::Invert(this->GetRows(), this->GetColumns(), this->GetData()); break; + case eSYMMETRIC: + SymmetricMatrixFuncs::Invert(this->GetRows(), this->GetColumns(), + this->GetData()); + break; case eUPPER_TRIANGULAR: case eLOWER_TRIANGULAR: - case eSYMMETRIC: case eBANDED: NEKERROR(ErrorUtil::efatal, "Unhandled matrix type"); break; diff --git a/library/LibUtilities/LinearAlgebra/StandardMatrix.hpp b/library/LibUtilities/LinearAlgebra/StandardMatrix.hpp index e03098feb50e4d16b1128efa2bd2d8a244a4f0ae..0971e6986768d63c41d5cb7a7c49855da25e0dd5 100644 --- a/library/LibUtilities/LinearAlgebra/StandardMatrix.hpp +++ b/library/LibUtilities/LinearAlgebra/StandardMatrix.hpp @@ -311,7 +311,6 @@ namespace Nektar NekMatrix(const expt::Node& rhs) : BaseType(0, 0), m_wrapperType(eCopy), - m_storagePolicy(eFULL), m_numberOfSuperDiagonals(std::numeric_limits::max()), m_numberOfSubDiagonals(std::numeric_limits::max()), m_tempSpace() @@ -331,9 +330,6 @@ namespace Nektar } #endif //NEKTAR_USE_EXPRESSION_TEMPLATES - - - LIB_UTILITIES_EXPORT MatrixStorage GetType() const; LIB_UTILITIES_EXPORT ThisType& operator=(const ThisType& rhs); @@ -341,7 +337,6 @@ namespace Nektar ThisType& operator=(const NekMatrix& rhs) { BaseType::operator=(rhs); - m_storagePolicy = rhs.GetType(); m_numberOfSubDiagonals = rhs.GetNumberOfSubDiagonals(); m_numberOfSuperDiagonals = rhs.GetNumberOfSuperDiagonals(); @@ -511,10 +506,6 @@ namespace Nektar LIB_UTILITIES_EXPORT PointerWrapper GetWrapperType() const; - - LIB_UTILITIES_EXPORT char GetTransposeFlag() const ; - - LIB_UTILITIES_EXPORT boost::tuples::tuple Advance(unsigned int curRow, unsigned int curColumn) const; @@ -585,14 +576,11 @@ namespace Nektar LIB_UTILITIES_EXPORT virtual unsigned int v_GetStorageSize() const ; - LIB_UTILITIES_EXPORT virtual MatrixStorage v_GetStorageType() const; - // We need to rethink class structure a little. This shouldn't be necessary. LIB_UTILITIES_EXPORT virtual void v_SetValue(unsigned int row, unsigned int column, typename boost::call_traits::const_reference d); Array m_data; PointerWrapper m_wrapperType; - MatrixStorage m_storagePolicy; // Only used by banded matrices. unsigned int m_numberOfSuperDiagonals; diff --git a/library/MultiRegions/AssemblyMap/AssemblyMapCG.cpp b/library/MultiRegions/AssemblyMap/AssemblyMapCG.cpp index 56c0087e5c627f3480436542196e84dbde454a7a..b3e5c8e1a92f893babefd0da79fabf67a679a560 100644 --- a/library/MultiRegions/AssemblyMap/AssemblyMapCG.cpp +++ b/library/MultiRegions/AssemblyMap/AssemblyMapCG.cpp @@ -1523,12 +1523,15 @@ namespace Nektar // needs to be set so that the coupled solver in // IncNavierStokesSolver can work. int nExtraDirichlet; + int mdswitch; + m_session->LoadParameter( + "MDSwitch", mdswitch, 10); int nGraphVerts = CreateGraph(locExp, bndCondExp, bndCondVec, checkIfSystemSingular, periodicVerts, periodicEdges, periodicFaces, graph, bottomUpGraph, extraDirVerts, extraDirEdges, firstNonDirGraphVertId, - nExtraDirichlet); + nExtraDirichlet, mdswitch); /* * Set up an array which contains the offset information of the diff --git a/library/MultiRegions/ExpList.cpp b/library/MultiRegions/ExpList.cpp index c5e01c0d4d36f6b842cb2b48ec59b605932b7c58..107d3773e8a790dbc8c76739d8af3a3db090ad2e 100644 --- a/library/MultiRegions/ExpList.cpp +++ b/library/MultiRegions/ExpList.cpp @@ -495,9 +495,10 @@ namespace Nektar Array e_out_d0; Array e_out_d1; Array e_out_d2; + int offset; for (int i = 0; i < m_collections.size(); ++i) { - int offset = m_coll_phys_offset[i]; + offset = m_coll_phys_offset[i]; e_out_d0 = out_d0 + offset; e_out_d1 = out_d1 + offset; e_out_d2 = out_d2 + offset; @@ -544,12 +545,17 @@ namespace Nektar // convert enum into int int intdir= (int)edir; Array e_out_d; - for(i= 0; i < (*m_exp).size(); ++i) + int offset; + for (int i = 0; i < m_collections.size(); ++i) { - e_out_d = out_d + m_phys_offset[i]; - (*m_exp)[i]->PhysDeriv(intdir, inarray+m_phys_offset[i], e_out_d); - } + offset = m_coll_phys_offset[i]; + e_out_d = out_d + offset; + m_collections[i].ApplyOperator(Collections::ePhysDeriv, + intdir, + inarray + offset, + e_out_d); + } } } diff --git a/library/MultiRegions/GlobalLinSysStaticCond.cpp b/library/MultiRegions/GlobalLinSysStaticCond.cpp index 2aee894522354a5a31256b4abb2c2539e7a46093..d588bbdbf99511b35a847201945675fd035d620a 100644 --- a/library/MultiRegions/GlobalLinSysStaticCond.cpp +++ b/library/MultiRegions/GlobalLinSysStaticCond.cpp @@ -121,7 +121,7 @@ namespace Nektar int nIntDofs = pLocToGloMap->GetNumGlobalCoeffs() - nGlobBndDofs; - Array F = m_wsp + 2*nLocBndDofs; + Array F = m_wsp + 2*nLocBndDofs + nGlobHomBndDofs; Array tmp; if(nDirBndDofs && dirForcCalculated) { @@ -135,7 +135,6 @@ namespace Nektar NekVector F_HomBnd(nGlobHomBndDofs,tmp=F+nDirBndDofs, eWrapper); NekVector F_GlobBnd(nGlobBndDofs,F,eWrapper); - NekVector F_LocBnd(nLocBndDofs,0.0); NekVector F_Int(nIntDofs,tmp=F+nGlobBndDofs,eWrapper); NekVector V_GlobBnd(nGlobBndDofs,out,eWrapper); @@ -145,7 +144,8 @@ namespace Nektar NekVector V_Int(nIntDofs,tmp=out+nGlobBndDofs,eWrapper); NekVector V_LocBnd(nLocBndDofs,m_wsp,eWrapper); - NekVector V_GlobHomBndTmp(nGlobHomBndDofs,0.0); + NekVector V_GlobHomBndTmp( + nGlobHomBndDofs,tmp = m_wsp + 2*nLocBndDofs,eWrapper); // set up normalisation factor for right hand side on first SC level DNekScalBlkMatSharedPtr sc = v_PreSolve(scLevel, F_GlobBnd); @@ -172,12 +172,12 @@ namespace Nektar else { DNekScalBlkMat &BinvD = *m_BinvD; - V_LocBnd = BinvD*F_Int; + DiagonalBlockFullScalMatrixMultiply( V_LocBnd, BinvD, F_Int); } pLocToGloMap->AssembleBnd(V_LocBnd,V_GlobHomBndTmp, nDirBndDofs); - F_HomBnd = F_HomBnd - V_GlobHomBndTmp; + Subtract(F_HomBnd, F_HomBnd, V_GlobHomBndTmp); // Transform from original basis to low energy v_BasisTransform(F, nDirBndDofs); @@ -202,7 +202,7 @@ namespace Nektar Vmath::Vcopy(nGlobHomBndDofs, tmp.get()+nDirBndDofs, 1, V_GlobHomBndTmp.GetPtr().get(), 1); - F_HomBnd = F_HomBnd - V_GlobHomBndTmp; + Subtract( F_HomBnd, F_HomBnd, V_GlobHomBndTmp); } } @@ -210,7 +210,6 @@ namespace Nektar if(atLastLevel) { Array pert(nGlobBndDofs,0.0); - NekVector Pert(nGlobBndDofs,pert,eWrapper); // Solve for difference from initial solution given inout; SolveLinearSystem( @@ -251,8 +250,7 @@ namespace Nektar } F_Int = F_Int - C*V_LocBnd; } - - V_Int = invD*F_Int; + Multiply( V_Int, invD, F_Int); } } @@ -269,7 +267,11 @@ namespace Nektar { int nLocalBnd = m_locToGloMap->GetNumLocalBndCoeffs(); int nGlobal = m_locToGloMap->GetNumGlobalCoeffs(); - m_wsp = Array(2*nLocalBnd + nGlobal, 0.0); + int nGlobBndDofs = pLocToGloMap->GetNumGlobalBndCoeffs(); + int nDirBndDofs = pLocToGloMap->GetNumGlobalDirBndCoeffs(); + int nGlobHomBndDofs = nGlobBndDofs - nDirBndDofs; + m_wsp = Array + (2*nLocalBnd + nGlobal + nGlobHomBndDofs, 0.0); if (pLocToGloMap->AtLastLevel()) { @@ -413,6 +415,15 @@ namespace Nektar int cntC = 0; int cntD = 0; + // Use symmetric storage for invD if possible + MatrixStorage storageTypeD = eFULL; + if ( (m_linSysKey.GetMatrixType() == StdRegions::eMass) || + (m_linSysKey.GetMatrixType() == StdRegions::eLaplacian) || + (m_linSysKey.GetMatrixType() == StdRegions::eHelmholtz)) + { + storageTypeD = eSYMMETRIC; + } + for(i = 0; i < nPatches; i++) { // Matrix A @@ -438,7 +449,8 @@ namespace Nektar substructuredMat[3][i] = MemoryManager ::AllocateSharedPtr(nIntDofsPerPatch[i], nIntDofsPerPatch[i], - tmparray, wType); + tmparray, wType, + storageTypeD); cntA += nBndDofsPerPatch[i] * nBndDofsPerPatch[i]; cntB += nBndDofsPerPatch[i] * nIntDofsPerPatch[i]; @@ -454,17 +466,12 @@ namespace Nektar Array patchId, dofId; Array isBndDof; Array sign; - NekDouble scale; for(n = cnt = 0; n < SchurCompl->GetNumberOfBlockRows(); ++n) { schurComplSubMat = SchurCompl->GetBlock(n,n); schurComplSubMatnRows = schurComplSubMat->GetRows(); - - scale = SchurCompl->GetBlock(n,n)->Scale(); - Array schurSubMat - = SchurCompl->GetBlock(n,n)->GetOwnedMatrix()->GetPtr(); - + patchId = pLocToGloMap->GetPatchMapFromPrevLevel() ->GetPatchId() + cnt; dofId = pLocToGloMap->GetPatchMapFromPrevLevel() @@ -484,13 +491,12 @@ namespace Nektar = substructuredMat[1][patchId[i]]->GetPtr(); Array subMat2 = substructuredMat[2][patchId[i]]->GetPtr(); - Array subMat3 - = substructuredMat[3][patchId[i]]->GetPtr(); + DNekMatSharedPtr subMat3 + = substructuredMat[3][patchId[i]]; int subMat0rows = substructuredMat[0][pId]->GetRows(); int subMat1rows = substructuredMat[1][pId]->GetRows(); int subMat2rows = substructuredMat[2][pId]->GetRows(); - int subMat3rows = substructuredMat[3][pId]->GetRows(); - + if(isBndDof[i]) { for(j = 0; j < schurComplSubMatnRows; ++j) @@ -501,16 +507,14 @@ namespace Nektar if(isBndDof[j]) { subMat0[dofId[i]+dofId[j]*subMat0rows] += - sign[i]*sign[j]*( - scale*schurSubMat[ - i+j*schurComplSubMatnRows]); + sign[i]*sign[j]* + (*schurComplSubMat)(i,j); } else { subMat1[dofId[i]+dofId[j]*subMat1rows] += - sign[i]*sign[j]*( - scale*schurSubMat[ - i+j*schurComplSubMatnRows]); + sign[i]*sign[j]* + (*schurComplSubMat)(i,j); } } } @@ -524,16 +528,26 @@ namespace Nektar if(isBndDof[j]) { subMat2[dofId[i]+dofId[j]*subMat2rows] += - sign[i]*sign[j]*( - scale*schurSubMat[ - i+j*schurComplSubMatnRows]); + sign[i]*sign[j]* + (*schurComplSubMat)(i,j); } else { - subMat3[dofId[i]+dofId[j]*subMat3rows] += - sign[i]*sign[j]*( - scale*schurSubMat[ - i+j*schurComplSubMatnRows]); + if (storageTypeD == eSYMMETRIC) + { + if (dofId[i] <= dofId[j]) + { + (*subMat3)(dofId[i],dofId[j]) += + sign[i]*sign[j]* + (*schurComplSubMat)(i,j); + } + } + else + { + (*subMat3)(dofId[i],dofId[j]) += + sign[i]*sign[j]* + (*schurComplSubMat)(i,j); + } } } } diff --git a/library/SolverUtils/DriverArpack.cpp b/library/SolverUtils/DriverArpack.cpp index 6c1329fea19990b2a812b18bd68111795c110352..001cc7012f441f143aca28d4482114afb8107684 100644 --- a/library/SolverUtils/DriverArpack.cpp +++ b/library/SolverUtils/DriverArpack.cpp @@ -101,7 +101,10 @@ void DriverArpack::v_InitObject(ostream &out) out << ArpackProblemTypeTrans[m_session->GetSolverInfoAsEnum("ArpackProblemType")] << endl; DriverArnoldi::ArnoldiSummary(out); - m_equ[m_nequ - 1]->DoInitialise(); + for( int i = 0; i < m_nequ; ++i) + { + m_equ[i]->DoInitialise(); + } // FwdTrans Initial conditions to be in Coefficient Space m_equ[m_nequ-1] ->TransPhysToCoeff(); diff --git a/library/SolverUtils/DriverModifiedArnoldi.cpp b/library/SolverUtils/DriverModifiedArnoldi.cpp index fb2f13814f6517585f5bd40ceb34d89f5cfbc7f6..bf0aa589540d6664e3440deba47e2fa063bccb67 100644 --- a/library/SolverUtils/DriverModifiedArnoldi.cpp +++ b/library/SolverUtils/DriverModifiedArnoldi.cpp @@ -87,7 +87,10 @@ void DriverModifiedArnoldi::v_InitObject(ostream &out) DriverArnoldi::ArnoldiSummary(out); - m_equ[m_nequ - 1]->DoInitialise(); + for( int i = 0; i < m_nequ; ++i) + { + m_equ[i]->DoInitialise(); + } //FwdTrans Initial conditions to be in Coefficient Space m_equ[m_nequ-1] ->TransPhysToCoeff(); diff --git a/library/SolverUtils/DriverSteadyState.cpp b/library/SolverUtils/DriverSteadyState.cpp index 0f0048d0336cd7cf879117175f17f638a82e1000..21c532aa1a7eaccfc6f98fd84854c42b81e9ced0 100644 --- a/library/SolverUtils/DriverSteadyState.cpp +++ b/library/SolverUtils/DriverSteadyState.cpp @@ -85,7 +85,6 @@ void DriverSteadyState::v_Execute(ostream &out) // to find the steady state of a flow above the critical Reynolds // number. m_equ[m_nequ - 1]->PrintSummary(out); - m_equ[m_nequ - 1]->DoInitialise(); m_session->LoadParameter("IO_InfoSteps", m_infosteps, 1000); m_session->LoadParameter("IO_CheckSteps", m_checksteps, 100000); diff --git a/library/SolverUtils/UnsteadySystem.cpp b/library/SolverUtils/UnsteadySystem.cpp index 48a0efa5fcb4c15a9e97380bf6c2a78b5458a1ce..b0c9d68c55d3b4b196972a73f332a99c0fd4d9dc 100644 --- a/library/SolverUtils/UnsteadySystem.cpp +++ b/library/SolverUtils/UnsteadySystem.cpp @@ -89,6 +89,8 @@ namespace Nektar m_session->MatchSolverInfo("REACTIONADVANCEMENT", "Explicit", m_explicitReaction, true); + m_session->LoadParameter("CheckNanSteps", m_nanSteps, 1); + // For steady problems, we do not initialise the time integration if (m_session->DefinesSolverInfo("TIMEINTEGRATIONMETHOD")) { @@ -317,9 +319,12 @@ namespace Nektar for (i = 0; i < nvariables; ++i) { m_fields[m_intVariables[i]]->SetPhys(fields[i]); - m_fields[m_intVariables[i]]->FwdTrans_IterPerExp( - fields[i], - m_fields[m_intVariables[i]]->UpdateCoeffs()); + if( v_RequireFwdTrans() ) + { + m_fields[m_intVariables[i]]->FwdTrans_IterPerExp( + fields[i], + m_fields[m_intVariables[i]]->UpdateCoeffs()); + } m_fields[m_intVariables[i]]->SetPhysState(false); } @@ -330,19 +335,22 @@ namespace Nektar } // search for NaN and quit if found - int nanFound = 0; - for (i = 0; i < nvariables; ++i) + if (m_nanSteps && !((step+1) % m_nanSteps) ) { - if (Vmath::Nnan(fields[i].num_elements(), fields[i], 1) > 0) + int nanFound = 0; + for (i = 0; i < nvariables; ++i) { - nanFound = 1; + if (Vmath::Nnan(fields[i].num_elements(), + fields[i], 1) > 0) + { + nanFound = 1; + } } + m_session->GetComm()->AllReduce(nanFound, + LibUtilities::ReduceMax); + ASSERTL0 (!nanFound, + "NaN found during time integration."); } - m_session->GetComm()->AllReduce(nanFound, - LibUtilities::ReduceMax); - ASSERTL0 (!nanFound, - "NaN found during time integration."); - // Update filters std::vector::iterator x; for (x = m_filters.begin(); x != m_filters.end(); ++x) diff --git a/library/SolverUtils/UnsteadySystem.h b/library/SolverUtils/UnsteadySystem.h index 8302c97695d1acb02507d7b3f4c07ab029616485..82d2f0c291d843d0705144076acc991e89f06aac 100644 --- a/library/SolverUtils/UnsteadySystem.h +++ b/library/SolverUtils/UnsteadySystem.h @@ -61,6 +61,8 @@ namespace Nektar protected: /// Number of time steps between outputting status information. int m_infosteps; + + int m_nanSteps; /// Wrapper to the time integration scheme LibUtilities::TimeIntegrationWrapperSharedPtr m_intScheme; /// The time integration scheme operators to use. @@ -135,6 +137,11 @@ namespace Nektar SOLVER_UTILS_EXPORT virtual bool v_PostIntegrate(int step); SOLVER_UTILS_EXPORT virtual bool v_SteadyStateCheck(int step); + SOLVER_UTILS_EXPORT virtual bool v_RequireFwdTrans() + { + return true; + } + SOLVER_UTILS_EXPORT void CheckForRestartTime(NekDouble &time); /// \brief Evaluate the SVV diffusion coefficient diff --git a/library/StdRegions/StdExpansion.cpp b/library/StdRegions/StdExpansion.cpp index 2bcd02c9774fdd4081ed84cd656c9078f9d734b9..ef6e0826d437999708f9ecd9cf1a6271d442323a 100644 --- a/library/StdRegions/StdExpansion.cpp +++ b/library/StdRegions/StdExpansion.cpp @@ -466,7 +466,6 @@ namespace Nektar returnval = MemoryManager::AllocateSharedPtr( m_ncoeffs,m_ncoeffs); - int cnt = 0; for(int i = 0; i < m_ncoeffs; ++i) { // Get mode at quadrature points diff --git a/library/StdRegions/StdExpansion.h b/library/StdRegions/StdExpansion.h index f23a6bb46d62a2e85b7fd6f87d991d5492849ab9..ce6d34c07649e024e67dacdd2b9340ea0dd1b34b 100644 --- a/library/StdRegions/StdExpansion.h +++ b/library/StdRegions/StdExpansion.h @@ -1335,6 +1335,7 @@ namespace Nektar const LibUtilities::PointsKeyVector GetPointsKeys() const { LibUtilities::PointsKeyVector p; + p.reserve(m_base.num_elements()); for (int i = 0; i < m_base.num_elements(); ++i) { p.push_back(m_base[i]->GetPointsKey()); diff --git a/library/StdRegions/StdTriExp.cpp b/library/StdRegions/StdTriExp.cpp index aaa3a6eca239868e2cc14ce28022a9e0324e9e57..54d2255250121ae8ffc7a706bc5b42ba6b9e38f4 100644 --- a/library/StdRegions/StdTriExp.cpp +++ b/library/StdRegions/StdTriExp.cpp @@ -136,16 +136,14 @@ namespace Nektar int i; int nquad0 = m_base[0]->GetNumPoints(); int nquad1 = m_base[1]->GetNumPoints(); - Array wsp(nquad0*nquad1); + Array wsp(std::max(nquad0, nquad1)); const Array& z0 = m_base[0]->GetZ(); const Array& z1 = m_base[1]->GetZ(); // set up geometric factor: 2/(1-z1) - for (i = 0; i < nquad1; ++i) - { - wsp[i] = 2.0/(1-z1[i]); - } + Vmath::Sadd(nquad1, -1.0, z1, 1, wsp, 1); + Vmath::Sdiv(nquad1, -2.0, wsp, 1, wsp, 1); if (out_d0.num_elements() > 0) { @@ -160,10 +158,8 @@ namespace Nektar if (out_d1.num_elements() > 0) { // set up geometric factor: (1_z0)/(1-z1) - for (i = 0; i < nquad0; ++i) - { - wsp[i] = 0.5*(1+z0[i]); - } + Vmath::Sadd(nquad0, 1.0, z0, 1, wsp, 1); + Vmath::Smul(nquad0, 0.5, wsp, 1, wsp, 1); for (i = 0; i < nquad1; ++i) { @@ -183,10 +179,8 @@ namespace Nektar Blas::Dscal(nquad0,wsp[i],&diff0[0]+i*nquad0,1); } - for (i = 0; i < nquad0; ++i) - { - wsp[i] = 0.5*(1+z0[i]); - } + Vmath::Sadd(nquad0, 1.0, z0, 1, wsp, 1); + Vmath::Smul(nquad0, 0.5, wsp, 1, wsp, 1); for (i = 0; i < nquad1; ++i) { diff --git a/solvers/IncNavierStokesSolver/EquationSystems/VCSMapping.cpp b/solvers/IncNavierStokesSolver/EquationSystems/VCSMapping.cpp index 2cb60f7bbfa8a515ddceec3247a167b9f3f5db7a..41470577082de8bdaa1439eb81c985ccd763a13d 100644 --- a/solvers/IncNavierStokesSolver/EquationSystems/VCSMapping.cpp +++ b/solvers/IncNavierStokesSolver/EquationSystems/VCSMapping.cpp @@ -157,6 +157,7 @@ namespace Nektar // Correct Dirichlet boundary conditions to account for mapping m_mapping->UpdateBCs(0.0); // + m_F = Array > (m_nConvectiveFields); for(int i = 0; i < m_nConvectiveFields; ++i) { m_fields[i]->LocalToGlobal(); @@ -164,6 +165,7 @@ namespace Nektar m_fields[i]->GlobalToLocal(); m_fields[i]->BwdTrans(m_fields[i]->GetCoeffs(), m_fields[i]->UpdatePhys()); + m_F[i] = Array< OneD, NekDouble> (m_fields[0]->GetTotPoints(), 0.0); } // Initialise m_gradP diff --git a/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.cpp b/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.cpp index f4bffd84e0221e15e44bce363d2428e4d76e44f2..b387d1a54e607a11fc4fc7b8a72febf6083b4ce5 100644 --- a/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.cpp +++ b/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.cpp @@ -250,6 +250,7 @@ namespace Nektar // field below SetBoundaryConditions(m_time); + m_F = Array > (m_nConvectiveFields); for(int i = 0; i < m_nConvectiveFields; ++i) { m_fields[i]->LocalToGlobal(); @@ -257,6 +258,7 @@ namespace Nektar m_fields[i]->GlobalToLocal(); m_fields[i]->BwdTrans(m_fields[i]->GetCoeffs(), m_fields[i]->UpdatePhys()); + m_F[i] = Array< OneD, NekDouble> (m_fields[0]->GetTotPoints(), 0.0); } } @@ -347,32 +349,23 @@ namespace Nektar const NekDouble time, const NekDouble aii_Dt) { - int i; - int phystot = m_fields[0]->GetTotPoints(); - - Array > F(m_nConvectiveFields); - for(i = 0; i < m_nConvectiveFields; ++i) - { - F[i] = Array (phystot, 0.0); - } - // Enforcing boundary conditions on all fields SetBoundaryConditions(time); - + // Substep the pressure boundary condition if using substepping m_extrapolation->SubStepSetPressureBCs(inarray,aii_Dt,m_kinvis); - + // Set up forcing term for pressure Poisson equation - SetUpPressureForcing(inarray, F, aii_Dt); + SetUpPressureForcing(inarray, m_F, aii_Dt); // Solve Pressure System - SolvePressure (F[0]); + SolvePressure (m_F[0]); // Set up forcing term for Helmholtz problems - SetUpViscousForcing(inarray, F, aii_Dt); - + SetUpViscousForcing(inarray, m_F, aii_Dt); + // Solve velocity system - SolveViscous( F, outarray, aii_Dt); + SolveViscous( m_F, outarray, aii_Dt); } /** @@ -386,17 +379,17 @@ namespace Nektar int i; int physTot = m_fields[0]->GetTotPoints(); int nvel = m_velocity.num_elements(); - Array wk(physTot, 0.0); - - Vmath::Zero(physTot,Forcing[0],1); - - for(i = 0; i < nvel; ++i) + + m_fields[0]->PhysDeriv(0,fields[0], + Forcing[0]); + for(i = 1; i < nvel; ++i) { - m_fields[i]->PhysDeriv(MultiRegions::DirCartesianMap[i],fields[i], wk); - Vmath::Vadd(physTot,wk,1,Forcing[0],1,Forcing[0],1); + // Use Forcing[1] as storage since it is not needed for the pressure + m_fields[i]->PhysDeriv(MultiRegions::DirCartesianMap[i],fields[i],Forcing[1]); + Vmath::Vadd(physTot,Forcing[1],1,Forcing[0],1,Forcing[0],1); } - Vmath::Smul(physTot,1.0/aii_Dt,Forcing[0],1,Forcing[0],1); + Vmath::Smul(physTot,1.0/aii_Dt,Forcing[0],1,Forcing[0],1); } /** @@ -412,7 +405,7 @@ namespace Nektar // Grad p m_pressure->BwdTrans(m_pressure->GetCoeffs(),m_pressure->UpdatePhys()); - + int nvel = m_velocity.num_elements(); if(nvel == 2) { @@ -423,7 +416,7 @@ namespace Nektar m_pressure->PhysDeriv(m_pressure->GetPhys(), Forcing[0], Forcing[1], Forcing[2]); } - + // Subtract inarray/(aii_dt) and divide by kinvis. Kinvis will // need to be updated for the convected fields. for(int i = 0; i < m_nConvectiveFields; ++i) diff --git a/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.h b/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.h index 878729338520d1c6778c6a483ac9e468e004663c..05442555657d56688b5c5474f79b5723d903fbbc 100644 --- a/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.h +++ b/solvers/IncNavierStokesSolver/EquationSystems/VelocityCorrectionScheme.h @@ -160,7 +160,14 @@ namespace Nektar const Array > &inarray, Array > &outarray, const NekDouble time); + + virtual bool v_RequireFwdTrans() + { + return false; + } + Array > m_F; + private: }; diff --git a/solvers/IncNavierStokesSolver/Tests/KovaFlow_m8_short_HOBC.tst b/solvers/IncNavierStokesSolver/Tests/KovaFlow_m8_short_HOBC.tst index 158d6fe7cb03ea4110ea3b6983aae6138cc1fc3e..90415ecb4c2511e9e12a999d9f61b153e464e1fc 100644 --- a/solvers/IncNavierStokesSolver/Tests/KovaFlow_m8_short_HOBC.tst +++ b/solvers/IncNavierStokesSolver/Tests/KovaFlow_m8_short_HOBC.tst @@ -10,12 +10,12 @@ 2.51953e-08 9.56014e-09 - 1.10694e-08 + 1.10694e-08 9.47464e-08 5.59175e-08 - 2.93085e-07 + 2.93085e-07