Commit 1cba82c9 authored by Alessandro Bolis's avatar Alessandro Bolis
Browse files

Fixed bug in trasposition routines (need further tests to confirm).

Added optianal data shuffling in Homogeneous Fwd and Bwd trasnform to
minimise the number of transpositions (for parallel efficiency).


git-svn-id: https://gforge.sci.utah.edu/svn/nektar/trunk@4024 305cdda6-5ce1-45b3-a98d-dfc68c8b3305
parent d6d1648f
......@@ -260,9 +260,12 @@ namespace Nektar
{
for(j = 0; j < m_num_points_per_proc[0];j++)
{
Vmath::Vcopy(copy_len,
&(inarray[i*num_pencil_per_proc+j*num_points_per_plane]),1,
&(outarray[i*num_pencil_per_proc*m_num_points_per_proc[0]+j*num_pencil_per_proc]),1);
if((i*num_pencil_per_proc+j*num_points_per_plane) < num_dofs)
{
Vmath::Vcopy(copy_len,
&(inarray[i*num_pencil_per_proc+j*num_points_per_plane]),1,
&(outarray[i*num_pencil_per_proc*m_num_points_per_proc[0]+j*num_pencil_per_proc]),1);
}
}
}
......@@ -373,9 +376,12 @@ namespace Nektar
{
for(j = 0; j < m_num_points_per_proc[0];j++)
{
Vmath::Vcopy(copy_len,
if((i*num_pencil_per_proc+j*num_points_per_plane) < num_dofs)
{
Vmath::Vcopy(copy_len,
&(tmp_outarray[i*num_pencil_per_proc*m_num_points_per_proc[0]+j*num_pencil_per_proc]),1,
&(outarray[i*num_pencil_per_proc+j*num_points_per_plane]),1);
}
}
}
}
......
......@@ -2292,16 +2292,20 @@ namespace Nektar
}
void ExpList::v_HomogeneousFwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs)
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs,
bool Shuff,
bool UnShuff)
{
ASSERTL0(false,
"This method is not defined or valid for this class type");
}
void ExpList::v_HomogeneousBwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs)
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs,
bool Shuff,
bool UnShuff)
{
ASSERTL0(false,
"This method is not defined or valid for this class type");
......
......@@ -320,12 +320,16 @@ namespace Nektar
// Homogeneous transforms
inline void HomogeneousFwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs = false);
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs = false,
bool Shuff = true,
bool UnShuff = true);
inline void HomogeneousBwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs = false);
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs = false,
bool Shuff = true,
bool UnShuff = true);
inline void DealiasedProd(const Array<OneD, NekDouble> &inarray1,
const Array<OneD, NekDouble> &inarray2,
......@@ -1129,11 +1133,15 @@ namespace Nektar
virtual void v_HomogeneousFwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs = false);
bool UseContCoeffs = false,
bool Shuff = true,
bool UnShuff = true);
virtual void v_HomogeneousBwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs = false);
bool UseContCoeffs = false,
bool Shuff = true,
bool UnShuff = true);
virtual void v_DealiasedProd(const Array<OneD, NekDouble> &inarray1,
const Array<OneD, NekDouble> &inarray2,
......@@ -1592,20 +1600,24 @@ namespace Nektar
*
*/
inline void ExpList::HomogeneousFwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs)
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs,
bool Shuff,
bool UnShuff)
{
v_HomogeneousFwdTrans(inarray,outarray,UseContCoeffs);
v_HomogeneousFwdTrans(inarray,outarray,UseContCoeffs,Shuff,UnShuff);
}
/**
*
*/
inline void ExpList::HomogeneousBwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs)
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs,
bool Shuff,
bool UnShuff)
{
v_HomogeneousBwdTrans(inarray,outarray,UseContCoeffs);
v_HomogeneousBwdTrans(inarray,outarray,UseContCoeffs,Shuff,UnShuff);
}
/**
......
......@@ -105,16 +105,24 @@ namespace Nektar
{
}
void ExpListHomogeneous1D::v_HomogeneousFwdTrans(const Array<OneD, const NekDouble> &inarray, Array<OneD, NekDouble> &outarray, bool UseContCoeffs)
void ExpListHomogeneous1D::v_HomogeneousFwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs,
bool Shuff,
bool UnShuff)
{
// Forwards trans
Homogeneous1DTrans(inarray,outarray,true, UseContCoeffs);
Homogeneous1DTrans(inarray,outarray,true,UseContCoeffs,Shuff,UnShuff);
}
void ExpListHomogeneous1D::v_HomogeneousBwdTrans(const Array<OneD, const NekDouble> &inarray, Array<OneD, NekDouble> &outarray, bool UseContCoeffs)
void ExpListHomogeneous1D::v_HomogeneousBwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs,
bool Shuff,
bool UnShuff)
{
// Backwards trans
Homogeneous1DTrans(inarray,outarray,false, UseContCoeffs);
Homogeneous1DTrans(inarray,outarray,false,UseContCoeffs,Shuff,UnShuff);
}
/**
......@@ -204,7 +212,6 @@ namespace Nektar
//Moving the results in physical space for the output
HomogeneousBwdTrans(V1V2,outarray,UseContCoeffs);
}
}
......@@ -352,7 +359,12 @@ namespace Nektar
/**
* Homogeneous transform Bwd/Fwd (MVM and FFT)
*/
void ExpListHomogeneous1D::Homogeneous1DTrans(const Array<OneD, const NekDouble> &inarray, Array<OneD, NekDouble> &outarray, bool IsForwards, bool UseContCoeffs)
void ExpListHomogeneous1D::Homogeneous1DTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool IsForwards,
bool UseContCoeffs,
bool Shuff,
bool UnShuff)
{
if(m_useFFT)
{
......@@ -360,11 +372,18 @@ namespace Nektar
int num_points_per_plane = num_dofs/m_planes.num_elements();
int num_dfts_per_proc = num_points_per_plane/m_comm->GetColumnComm()->GetSize() + (num_points_per_plane%m_comm->GetColumnComm()->GetSize() > 0);
Array<OneD, NekDouble> fft_in(num_dfts_per_proc*m_homogeneousBasis->GetNumPoints());
Array<OneD, NekDouble> fft_out(num_dfts_per_proc*m_homogeneousBasis->GetNumPoints());
Array<OneD, NekDouble> fft_in(num_dfts_per_proc*m_homogeneousBasis->GetNumPoints(),0.0);
Array<OneD, NekDouble> fft_out(num_dfts_per_proc*m_homogeneousBasis->GetNumPoints(),0.0);
m_transposition->Transpose(inarray,fft_in,false,LibUtilities::eXYtoZ);
if(Shuff)
{
m_transposition->Transpose(inarray,fft_in,false,LibUtilities::eXYtoZ);
}
else
{
fft_in = inarray;
}
if(IsForwards)
{
for(int i = 0 ; i < num_dfts_per_proc ; i++)
......@@ -380,7 +399,14 @@ namespace Nektar
}
}
m_transposition->Transpose(fft_out,outarray,false,LibUtilities::eZtoXY);
if(UnShuff)
{
m_transposition->Transpose(fft_out,outarray,false,LibUtilities::eZtoXY);
}
else
{
outarray = fft_out;
}
}
else
{
......@@ -412,11 +438,18 @@ namespace Nektar
int nrows = blkmat->GetRows();
int ncols = blkmat->GetColumns();
Array<OneD, NekDouble> sortedinarray(ncols);
Array<OneD, NekDouble> sortedoutarray(nrows);
Array<OneD, NekDouble> sortedinarray(ncols,0.0);
Array<OneD, NekDouble> sortedoutarray(nrows,0.0);
m_transposition->Transpose(inarray,sortedinarray,!IsForwards,LibUtilities::eXYtoZ);
if(Shuff)
{
m_transposition->Transpose(inarray,sortedinarray,!IsForwards,LibUtilities::eXYtoZ);
}
else
{
sortedinarray = inarray;
}
// Create NekVectors from the given data arrays
NekVector<NekDouble> in (ncols,sortedinarray,eWrapper);
NekVector<NekDouble> out(nrows,sortedoutarray,eWrapper);
......@@ -424,7 +457,15 @@ namespace Nektar
// Perform matrix-vector multiply.
out = (*blkmat)*in;
m_transposition->Transpose(sortedoutarray,outarray,IsForwards,LibUtilities::eZtoXY);
if(UnShuff)
{
m_transposition->Transpose(sortedoutarray,outarray,IsForwards,LibUtilities::eZtoXY);
}
else
{
outarray = sortedinarray;
}
}
}
......@@ -502,7 +543,6 @@ namespace Nektar
StdPoint);
loc_mat = StdPoint.GetStdMatrix(matkey);
}
else
{
......@@ -511,7 +551,6 @@ namespace Nektar
StdPoint);
loc_mat = StdPoint.GetStdMatrix(matkey);
}
}
//other cases
......@@ -526,7 +565,6 @@ namespace Nektar
StdSeg);
loc_mat = StdSeg.GetStdMatrix(matkey);
}
else
{
......@@ -535,7 +573,6 @@ namespace Nektar
StdSeg);
loc_mat = StdSeg.GetStdMatrix(matkey);
}
}
......@@ -738,13 +775,10 @@ namespace Nektar
ElmtID_to_ExpID[(*m_exp)[i]->GetGeom()->GetGlobalID()] = i;
}
Vmath::Vcopy(datalen,&fielddata[offset],1,&m_coeffs[0],1);
}
/**
* Write Tecplot Files Header
* @param outfile Output file name.
......@@ -891,8 +925,7 @@ namespace Nektar
m_transposition->Transpose(outarray,out_d2,false,LibUtilities::eZtoXY);
Vmath::Smul(nT_pts,2.0/m_lhom,out_d2,1,out_d2,1);
Vmath::Smul(nT_pts,2.0/m_lhom,out_d2,1,out_d2,1);
}
}
}
......
......@@ -88,11 +88,24 @@ namespace Nektar
/// Destructor.
MULTI_REGIONS_EXPORT virtual ~ExpListHomogeneous1D();
MULTI_REGIONS_EXPORT void Homogeneous1DTrans(const Array<OneD, const NekDouble> &inarray, Array<OneD, NekDouble> &outarray, bool IsForwards, bool UseContCoeffs = false);
inline void HomogeneousFwdTrans(const Array<OneD, const NekDouble> &inarray, Array<OneD, NekDouble> &outarray, bool UseContCoeffs = false);
inline void HomogeneousBwdTrans(const Array<OneD, const NekDouble> &inarray, Array<OneD, NekDouble> &outarray, bool UseContCoeffs = false);
MULTI_REGIONS_EXPORT void Homogeneous1DTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool IsForwards,
bool UseContCoeffs = false,
bool Shuff = true,
bool UnShuff = true);
inline void HomogeneousFwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs = false,
bool Shuff = true,
bool UnShuff = true);
inline void HomogeneousBwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs = false,
bool Shuff = true,
bool UnShuff = true);
inline void DealiasedProd(const Array<OneD, NekDouble> &inarray1,
const Array<OneD, NekDouble> &inarray2,
......@@ -199,12 +212,16 @@ namespace Nektar
std::string var);
virtual void v_HomogeneousFwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs = false);
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs = false,
bool Shuff = true,
bool UnShuff = true);
virtual void v_HomogeneousBwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs = false);
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs = false,
bool Shuff = true,
bool UnShuff = true);
virtual void v_DealiasedProd(const Array<OneD, NekDouble> &inarray1,
const Array<OneD, NekDouble> &inarray2,
......@@ -236,14 +253,22 @@ namespace Nektar
DNekMatSharedPtr MatBwdPAD;
};
inline void ExpListHomogeneous1D::HomogeneousFwdTrans(const Array<OneD, const NekDouble> &inarray, Array<OneD, NekDouble> &outarray, bool UseContCoeffs)
inline void ExpListHomogeneous1D::HomogeneousFwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs,
bool Shuff,
bool UnShuff)
{
v_HomogeneousFwdTrans(inarray,outarray,UseContCoeffs);
v_HomogeneousFwdTrans(inarray,outarray,UseContCoeffs,Shuff,UnShuff);
}
inline void ExpListHomogeneous1D::HomogeneousBwdTrans(const Array<OneD, const NekDouble> &inarray, Array<OneD, NekDouble> &outarray, bool UseContCoeffs)
inline void ExpListHomogeneous1D::HomogeneousBwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs,
bool Shuff,
bool UnShuff)
{
v_HomogeneousBwdTrans(inarray,outarray,UseContCoeffs);
v_HomogeneousBwdTrans(inarray,outarray,UseContCoeffs,Shuff,UnShuff);
}
inline void ExpListHomogeneous1D::DealiasedProd(const Array<OneD, NekDouble> &inarray1,
......
......@@ -130,16 +130,24 @@ namespace Nektar
{
}
void ExpListHomogeneous2D::v_HomogeneousFwdTrans(const Array<OneD, const NekDouble> &inarray, Array<OneD, NekDouble> &outarray, bool UseContCoeffs)
void ExpListHomogeneous2D::v_HomogeneousFwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs,
bool Shuff,
bool UnShuff)
{
// Forwards trans
Homogeneous2DTrans(inarray,outarray,true, UseContCoeffs);
Homogeneous2DTrans(inarray,outarray,true,UseContCoeffs,Shuff,UnShuff);
}
void ExpListHomogeneous2D::v_HomogeneousBwdTrans(const Array<OneD, const NekDouble> &inarray, Array<OneD, NekDouble> &outarray, bool UseContCoeffs)
void ExpListHomogeneous2D::v_HomogeneousBwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs,
bool Shuff,
bool UnShuff)
{
// Backwards trans
Homogeneous2DTrans(inarray,outarray,false, UseContCoeffs);
Homogeneous2DTrans(inarray,outarray,false,UseContCoeffs,Shuff,UnShuff);
}
void ExpListHomogeneous2D::v_DealiasedProd(const Array<OneD, NekDouble> &inarray1,
......@@ -359,8 +367,12 @@ namespace Nektar
}
}
void ExpListHomogeneous2D::Homogeneous2DTrans(const Array<OneD, const NekDouble> &inarray, Array<OneD, NekDouble> &outarray, bool IsForwards, bool UseContCoeffs)
{
void ExpListHomogeneous2D::Homogeneous2DTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool IsForwards,
bool UseContCoeffs,
bool Shuff,
bool UnShuff) {
if(m_useFFT)
{
......@@ -415,7 +427,6 @@ namespace Nektar
}
else
{
DNekBlkMatSharedPtr blkmatY;
DNekBlkMatSharedPtr blkmatZ;
......@@ -478,100 +489,6 @@ namespace Nektar
}
}
void ExpListHomogeneous2D::ShuffleIntoHomogeneous2DClosePacked(
const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseNumModes)
{
int i, pts_per_line;
int n = inarray.num_elements();
int packed_len;
int NumMod_y = m_homogeneousBasis_y->GetNumModes();
int NumMod_z = m_homogeneousBasis_z->GetNumModes();
pts_per_line = n/m_lines.num_elements();
if(UseNumModes)
{
packed_len = NumMod_y*NumMod_z;
}
else
{
packed_len = m_lines.num_elements();
}
ASSERTL1(&inarray[0] != &outarray[0],"Inarray and outarray cannot be the same");
for(i = 0; i < packed_len; ++i)
{
Vmath::Vcopy(pts_per_line,&(inarray[i*pts_per_line]),1,
&(outarray[i]),packed_len);
}
}
void ExpListHomogeneous2D::UnshuffleFromHomogeneous2DClosePacked(
const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseNumModes)
{
int i,pts_per_line;
int n = inarray.num_elements();
int packed_len;
int NumMod_y = m_homogeneousBasis_y->GetNumModes();
int NumMod_z = m_homogeneousBasis_z->GetNumModes();
// use length of inarray to determine data storage type
// (i.e.modal or physical).
pts_per_line = n/m_lines.num_elements();
if(UseNumModes)
{
packed_len = NumMod_y*NumMod_z;
}
else
{
packed_len = m_lines.num_elements();
}
ASSERTL1(&inarray[0] != &outarray[0],"Inarray and outarray cannot be the same");
for(i = 0; i < packed_len; ++i)
{
Vmath::Vcopy(pts_per_line,&(inarray[i]),packed_len,
&(outarray[i*pts_per_line]),1);
}
}
void ExpListHomogeneous2D::Transpose(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool YtoZ)
{
int n = m_lines.num_elements(); //number of Fourier points in the Fourier directions (x-z grid)
int s = inarray.num_elements(); //number of total points = n. of Fourier points * n. of points per line
int pts_per_line = s/n;
int packed_len = pts_per_line*m_nz;
if (YtoZ)
{
for(int i = 0; i < m_ny ; ++i)
{
Vmath::Vcopy(packed_len,&(inarray[i]),m_ny,&(outarray[i*packed_len]),1);
}
}
else
{
for(int i = 0; i < packed_len ; ++i)
{
Vmath::Vcopy(m_ny,&(inarray[i]),packed_len,&(outarray[i*m_ny]),1);
}
}
}
DNekBlkMatSharedPtr ExpListHomogeneous2D::GetHomogeneous2DBlockMatrix(Homogeneous2DMatType mattype, bool UseContCoeffs) const
{
Homo2DBlockMatrixMap::iterator matrixIter = m_homogeneous2DBlockMat->find(mattype);
......@@ -587,7 +504,6 @@ namespace Nektar
}
}
DNekBlkMatSharedPtr ExpListHomogeneous2D::GenHomogeneous2DBlockMatrix(Homogeneous2DMatType mattype, bool UseContCoeffs) const
{
int i;
......
......@@ -98,33 +98,32 @@ namespace Nektar
/// Destructor.
MULTI_REGIONS_EXPORT virtual ~ExpListHomogeneous2D();
MULTI_REGIONS_EXPORT void Homogeneous2DTrans(const Array<OneD, const NekDouble> &inarray, Array<OneD, NekDouble> &outarray, bool IsForwards, bool UseContCoeffs = false);
inline void HomogeneousFwdTrans(const Array<OneD, const NekDouble> &inarray, Array<OneD, NekDouble> &outarray, bool UseContCoeffs = false);
inline void HomogeneousBwdTrans(const Array<OneD, const NekDouble> &inarray, Array<OneD, NekDouble> &outarray, bool UseContCoeffs = false);
MULTI_REGIONS_EXPORT void Homogeneous2DTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool IsForwards,
bool UseContCoeffs = false,
bool Shuff = true,
bool UnShuff = true);
inline void HomogeneousFwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs = false,
bool Shuff = true,
bool UnShuff = true);
inline void HomogeneousBwdTrans(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs = false,
bool Shuff = true,
bool UnShuff = true);
inline void DealiasedProd(const Array<OneD, NekDouble> &inarray1,
const Array<OneD, NekDouble> &inarray2,
Array<OneD, NekDouble> &outarray,
bool UseContCoeffs = false);
MULTI_REGIONS_EXPORT void ShuffleIntoHomogeneous2DClosePacked(
const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseNumModes = false);
MULTI_REGIONS_EXPORT void UnshuffleFromHomogeneous2DClosePacked(
const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool UseNumModes = false);
MULTI_REGIONS_EXPORT void SetPaddingBase(void);
MULTI_REGIONS_EXPORT void Transpose(const Array<OneD, const NekDouble> &inarray,
Array<OneD, NekDouble> &outarray,
bool XtoZ);
MULTI_REGIONS_EXPORT void PhysDeriv(const Array<OneD, const NekDouble> &inarray,