Skip to content
Snippets Groups Projects
Commit 9af28280 authored by BrianXia1996's avatar BrianXia1996
Browse files

add quad and seg kernel, just for quick test

parent 1066cd37
No related branches found
No related tags found
2 merge requests!22Basic implementation of Matrix-free BwdTrans and padding feature in Field,!3First naive implementation
......@@ -94,28 +94,82 @@ public:
// outptr += nqBlocks;
// }
//------- version: SegExp--------
// //------- version: SegExp--------
// for (size_t block_idx = 0; block_idx < in.GetBlocks().size();
// ++block_idx)
// {
// const auto basis0 = this->m_expansionList->GetExp(block_idx)->GetBasis(0);
// const auto nm0 = basis0->GetNumModes();
// const auto nq0 = basis0->GetNumPoints();
// auto const block = in.GetBlocks()[block_idx];
// int MynBlocks = block.num_elements / vec_t::width;
// const auto nqTot = nq0;
// const auto nqBlocks = nqTot * vec_t::width;
// int numTot = nm0;
// const auto nmBlocks = numTot * vec_t::width;
// // auto *inptr = &input[0];
// // auto *outptr = &output[0];
// // prepare the bdata
// std::array<std::vector<vec_t, tinysimd::allocator<vec_t>>, 1> bdata;
// for (int i = 0; i < 1; ++i)
// {
// auto bdataRAW = basis0->GetBdata();
// bdata[i].resize(bdataRAW.size());
// for (auto j = 0; j < bdataRAW.size(); ++j)
// {
// bdata[i][j] = bdataRAW[j];
// }
// }
// // Workspace for kernels - also checks preconditions
// // BwdTrans1DWorkspace<SHAPE_TYPE>(nm0, nq0);
// std::vector<vec_t, allocator<vec_t>> tmpIn(numTot), tmpOut(nqTot);
// for (int e = 0; e < MynBlocks; ++e)
// {
// // Load and transpose data
// load_interleave(inptr, numTot, tmpIn);
// matfree::BwdTransSegKernel(nm0, nq0, tmpIn, bdata[0],
// tmpOut);
// // de-interleave and store data
// deinterleave_store(tmpOut, nqTot, outptr);
// inptr += nmBlocks;
// outptr += nqBlocks;
// }
// }
// ------- version: QuadExp--------
for (size_t block_idx = 0; block_idx < in.GetBlocks().size();
++block_idx)
{
const auto basis0 = this->m_expansionList[block_idx]->GetBasis(0);
const auto nm0 = basis0->GetNumModes();
const auto nq0 = basis0->GetNumPoints();
const auto basis0 = this->m_expansionList->GetExp(block_idx)->GetBasis(0);
const auto basis1 = this->m_expansionList->GetExp(block_idx)->GetBasis(1);
const auto nm0 = basis0->GetNumModes(); const auto nm1 = basis1->GetNumModes();
const auto nq0 = basis0->GetNumPoints();const auto nq1 = basis1->GetNumPoints();
auto const block = in.GetBlocks()[block_idx];
int MynBlocks = block.num_elements / vec_t::width;
const auto nqTot = nq0;
const auto nqTot = nq0 * nq1;
const auto nqBlocks = nqTot * vec_t::width;
int numTot = nm0;
int numTot = nm0 * nm1;
const auto nmBlocks = numTot * vec_t::width;
// auto *inptr = &input[0];
// auto *outptr = &output[0];
const bool correct =
(basis0->GetBasisType() == LibUtilities::eModified_A);
// prepare the bdata
std::array<std::vector<vec_t, tinysimd::allocator<vec_t>>, 1> bdata;
for (int i = 0; i < 1; ++i)
std::array<std::vector<vec_t, tinysimd::allocator<vec_t>>, 2> bdata;
for (int i = 0; i < 2; ++i)
{
auto bdataRAW = basis0->GetBdata();
bdata[i].resize(bdataRAW.size());
......@@ -126,17 +180,19 @@ public:
}
// Workspace for kernels - also checks preconditions
// BwdTrans1DWorkspace<SHAPE_TYPE>(nm0, nq0);
// BwdTrans2DWorkspace<SHAPE_TYPE>(nm0, nm1, nq0, nq1, wsp0Size);
size_t wsp0Size = nm0 * nq0;
std::vector<vec_t, allocator<vec_t>> tmpIn(numTot), tmpOut(nqTot);
std::vector<vec_t, allocator<vec_t>> wsp0(wsp0Size), tmpIn(numTot), tmpOut(nqTot);
for (int e = 0; e < MynBlocks; ++e)
{
// Load and transpose data
load_interleave(inptr, numTot, tmpIn);
matfree::BwdTransSegKernel(nm0, nq0, tmpIn, bdata[0],
tmpOut);
matfree::BwdTransQuadKernel(nm0, nm1, nq0, nq1, tmpIn,
bdata[0], bdata[1],
wsp0, tmpOut);
// de-interleave and store data
deinterleave_store(tmpOut, nqTot, outptr);
......@@ -144,8 +200,6 @@ public:
inptr += nmBlocks;
outptr += nqBlocks;
}
}
std::cout << "Op bwd trans mat free\n";
}
......
......@@ -190,12 +190,19 @@ int main(int argc, char *argv[])
}
}
std::cout << in << std::endl;
std::cout <<"In:\n" << in << std::endl;
BwdTrans<>::create(explist, "StdMat")->apply(in, out);
std::cout << out << std::endl;
std::cout << "StdMat:\n" << out << std::endl;
// BwdTrans<>::create(explist, "SumFac")->apply(in, out);
// std::cout << "SumFac:\n" << out << std::endl;
BwdTrans<>::create(explist, "MatFree")->apply(in, out);
std::cout << "MatFree:\n" << out << std::endl;
#ifdef NEKTAR_USE_CUDA
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment