Skip to content
Snippets Groups Projects
Commit 2f79a27b authored by Chris Cantwell's avatar Chris Cantwell
Browse files

Merge branch 'helmholtz_sum_fac_cuda_kernels' into 'master'

Implement CUDA Helmholtz sum-factorization kernels

See merge request !20
parents 4360038b bca3414a
No related branches found
No related tags found
1 merge request!20Implement CUDA Helmholtz sum-factorization kernels
......@@ -26,8 +26,7 @@ message(STATUS "Found Nektar++: version ${NEKTAR++_VERSION}")
set(CMAKE_INSTALL_RPATH "${NEKTAR++_LIBRARY_DIRS}")
set(SRC Field.cpp Operators/Operator.cpp Operators/OperatorBwdTrans.cpp Operators/BwdTrans/BwdTransImpl.cpp Operators/OperatorIProductWRTBase.cpp Operators/IProductWRTBase/IProductWRTBaseImpl.cpp Operators/OperatorIProductWRTDerivBase.cpp Operators/IProductWRTDerivBase/IProductWRTDerivBaseImpl.cpp Operators/OperatorPhysDeriv.cpp Operators/PhysDeriv/PhysDerivImpl.cpp)
set(SRC Field.cpp Operators/Operator.cpp Operators/OperatorBwdTrans.cpp Operators/BwdTrans/BwdTransImpl.cpp Operators/OperatorIProductWRTBase.cpp Operators/IProductWRTBase/IProductWRTBaseImpl.cpp Operators/OperatorIProductWRTDerivBase.cpp Operators/IProductWRTDerivBase/IProductWRTDerivBaseImpl.cpp Operators/OperatorPhysDeriv.cpp Operators/PhysDeriv/PhysDerivImpl.cpp Operators/OperatorHelmholtz.cpp Operators/Helmholtz/HelmholtzImpl.cpp)
if (NEKTAR_USE_CUDA AND NEKTAR_USE_SIMD)
MESSAGE(FATAL_ERROR "Cannot use both SIMD and CUDA")
endif()
......@@ -35,7 +34,7 @@ endif()
if (NEKTAR_USE_CUDA)
enable_language(CUDA)
add_definitions(-DNEKTAR_USE_CUDA)
set(SRC ${SRC} MemoryRegionCUDA.cu Operators/BwdTrans/BwdTransCUDA.cu Operators/IProductWRTBase/IProductWRTBaseCUDA.cu Operators/IProductWRTDerivBase/IProductWRTDerivBaseCUDA.cu Operators/PhysDeriv/PhysDerivCUDA.cu)
set(SRC ${SRC} MemoryRegionCUDA.cu Operators/BwdTrans/BwdTransCUDA.cu Operators/IProductWRTBase/IProductWRTBaseCUDA.cu Operators/IProductWRTDerivBase/IProductWRTDerivBaseCUDA.cu Operators/PhysDeriv/PhysDerivCUDA.cu Operators/Helmholtz/HelmholtzCUDA.cu)
endif()
if (NEKTAR_USE_SIMD)
......
#include "HelmholtzCUDA.hpp"
namespace Nektar::Operators::detail
{
template <>
std::string OperatorHelmholtzImpl<double, ImplCUDA>::className =
GetOperatorFactory<double>().RegisterCreatorFunction(
"HelmholtzCUDA", OperatorHelmholtzImpl<double, ImplCUDA>::instantiate,
"...");
}
#include "Operators/BwdTrans/BwdTransCUDA.hpp"
#include "Operators/Helmholtz/HelmholtzCUDAKernels.cuh"
#include "Operators/IProductWRTBase/IProductWRTBaseCUDA.hpp"
#include "Operators/IProductWRTDerivBase/IProductWRTDerivBaseCUDA.hpp"
#include "Operators/OperatorHelmholtz.hpp"
#include "Operators/PhysDeriv/PhysDerivCUDA.hpp"
namespace Nektar::Operators::detail
{
// standard matrix implementation
template <typename TData>
class OperatorHelmholtzImpl<TData, ImplCUDA> : public OperatorHelmholtz<TData>
{
public:
OperatorHelmholtzImpl(const MultiRegions::ExpListSharedPtr &expansionList)
: OperatorHelmholtz<TData>(expansionList),
m_bwd(
Field<TData, FieldState::Phys>::template create<MemoryRegionCUDA>(
GetBlockAttributes(FieldState::Phys, expansionList))),
m_deriv0(
Field<TData, FieldState::Phys>::template create<MemoryRegionCUDA>(
GetBlockAttributes(FieldState::Phys, expansionList))),
m_deriv1(
Field<TData, FieldState::Phys>::template create<MemoryRegionCUDA>(
GetBlockAttributes(FieldState::Phys, expansionList))),
m_deriv2(
Field<TData, FieldState::Phys>::template create<MemoryRegionCUDA>(
GetBlockAttributes(FieldState::Phys, expansionList)))
{
auto nCoord = this->m_expansionList->GetCoordim(0);
m_BwdTransOp = BwdTrans<>::create(this->m_expansionList, "CUDA");
m_PhysDerivOp = PhysDeriv<>::create(this->m_expansionList, "CUDA");
m_IProductWRTBaseOp =
IProductWRTBase<>::create(this->m_expansionList, "CUDA");
m_IProductWRTDerivBaseOp =
IProductWRTDerivBase<>::create(this->m_expansionList, "CUDA");
Array<OneD, TData> diffCoeff(nCoord * nCoord, 0.0);
for (size_t d = 0; d < nCoord; d++)
{
diffCoeff[d * nCoord + d] = 1.0; // Temporary solution
}
cudaMalloc((void **)&m_diffCoeff, sizeof(TData) * nCoord * nCoord);
cudaMemcpy(m_diffCoeff, diffCoeff.get(),
sizeof(TData) * nCoord * nCoord, cudaMemcpyHostToDevice);
}
void apply(Field<TData, FieldState::Coeff> &in,
Field<TData, FieldState::Coeff> &out) override
{
// Step 1: BwdTrans
m_BwdTransOp->apply(in, m_bwd);
// Step 2: PhysDeriv
m_PhysDerivOp->apply(m_bwd, m_deriv0, m_deriv1, m_deriv2);
// Step 3: Inner product for mass matrix operation
m_IProductWRTBaseOp->apply(m_bwd, out, m_lambda);
// Step 4: Multiply by diffusion coefficient
DiffusionCoeff(m_deriv0, m_deriv1, m_deriv2);
// Step 5: Inner product
m_IProductWRTDerivBaseOp->apply(m_deriv0, m_deriv1, m_deriv2, out,
true);
}
void DiffusionCoeff(Field<TData, FieldState::Phys> &deriv0,
Field<TData, FieldState::Phys> &deriv1,
Field<TData, FieldState::Phys> &deriv2)
{
// Initialize pointers.
std::vector<TData *> derivptr{deriv0.template GetStorage<MemoryRegionCUDA>().GetGPUPtr(),
deriv1.template GetStorage<MemoryRegionCUDA>().GetGPUPtr(),
deriv2.template GetStorage<MemoryRegionCUDA>().GetGPUPtr()};
// Initialize index.
size_t expIdx = 0;
for (size_t block_idx = 0; block_idx < deriv0.GetBlocks().size();
++block_idx)
{
// Determine shape and type of the element.
auto const expPtr = this->m_expansionList->GetExp(expIdx);
auto nElmts = deriv0.GetBlocks()[block_idx].num_elements;
auto nCoord = expPtr->GetCoordim();
auto nqTot = expPtr->GetTotPoints();
// Determine CUDA grid parameters.
m_gridSize = nElmts / m_blockSize;
m_gridSize += (nElmts % m_blockSize == 0) ? 0 : 1;
// Multiply by diffusion coefficient.
if (nCoord == 1)
{
auto nq0 = expPtr->GetNumPoints(0);
DiffusionCoeff1DKernel<<<m_gridSize, m_blockSize>>>(
nq0, nElmts, m_diffCoeff, derivptr[0]);
}
else if (nCoord == 2)
{
auto nq0 = expPtr->GetNumPoints(0);
auto nq1 = expPtr->GetNumPoints(1);
DiffusionCoeff2DKernel<<<m_gridSize, m_blockSize>>>(
nq0, nq1, nElmts, m_diffCoeff, derivptr[0], derivptr[1]);
}
else
{
auto nq0 = expPtr->GetNumPoints(0);
auto nq1 = expPtr->GetNumPoints(1);
auto nq2 = expPtr->GetNumPoints(2);
DiffusionCoeff3DKernel<<<m_gridSize, m_blockSize>>>(
nq0, nq1, nq2, nElmts, m_diffCoeff, derivptr[0],
derivptr[1], derivptr[2]);
}
// Increment pointer and index for next element type.
for (size_t d = 0; d < nCoord; d++)
{
derivptr[d] += nqTot * nElmts;
}
expIdx += nElmts;
}
}
static std::unique_ptr<Operator<TData>> instantiate(
const MultiRegions::ExpListSharedPtr &expansionList)
{
return std::make_unique<OperatorHelmholtzImpl<TData, ImplCUDA>>(
expansionList);
}
void SetLambda(TData lambda)
{
m_lambda = lambda;
}
static std::string className;
private:
std::shared_ptr<OperatorBwdTrans<TData>> m_BwdTransOp;
std::shared_ptr<OperatorPhysDeriv<TData>> m_PhysDerivOp;
std::shared_ptr<OperatorIProductWRTBase<TData>> m_IProductWRTBaseOp;
std::shared_ptr<OperatorIProductWRTDerivBase<TData>>
m_IProductWRTDerivBaseOp;
Field<TData, FieldState::Phys> m_bwd;
Field<TData, FieldState::Phys> m_deriv0;
Field<TData, FieldState::Phys> m_deriv1;
Field<TData, FieldState::Phys> m_deriv2;
TData m_lambda = 1.0;
TData *m_diffCoeff;
size_t m_blockSize = 32;
size_t m_gridSize;
};
} // namespace Nektar::Operators::detail
namespace Nektar::Operators::detail
{
template <typename TData>
__global__ void DiffusionCoeff1DKernel(const size_t nq0, const size_t nelmt,
const TData *diffCoeff, TData *deriv0)
{
size_t e = blockDim.x * blockIdx.x + threadIdx.x;
if (e >= nelmt)
{
return;
}
// Assign pointers.
TData *derivptr = deriv0 + nq0 * e;
for (size_t i = 0; i < nq0; i++)
{
derivptr[i] *= diffCoeff[0];
}
}
template <typename TData>
__global__ void DiffusionCoeff2DKernel(const size_t nq0, const size_t nq1,
const size_t nelmt,
const TData *diffCoeff, TData *deriv0,
TData *deriv1)
{
size_t e = blockDim.x * blockIdx.x + threadIdx.x;
if (e >= nelmt)
{
return;
}
constexpr size_t ncoord = 2;
// Assign pointers.
TData **derivptr = new TData *[ncoord];
derivptr[0] = deriv0 + nq0 * nq1 * e;
derivptr[1] = deriv1 + nq0 * nq1 * e;
for (size_t j = 0, cnt = 0; j < nq1; j++)
{
for (size_t i = 0; i < nq0; i++)
{
TData deriv[2] = {derivptr[0][cnt], derivptr[1][cnt]};
for (size_t d = 0; d < ncoord; d++)
{
derivptr[d][cnt] = diffCoeff[d * ncoord + 0] * deriv[0] +
diffCoeff[d * ncoord + 1] * deriv[1];
}
cnt++;
}
}
}
template <typename TData>
__global__ void DiffusionCoeff3DKernel(const size_t nq0, const size_t nq1,
const size_t nq2, const size_t nelmt,
TData *diffCoeff, TData *deriv0,
TData *deriv1, TData *deriv2)
{
size_t e = blockDim.x * blockIdx.x + threadIdx.x;
if (e >= nelmt)
{
return;
}
constexpr size_t ncoord = 3;
// Assign pointers.
TData **derivptr = new TData *[ncoord];
derivptr[0] = deriv0 + nq0 * nq1 * nq2 * e;
derivptr[1] = deriv1 + nq0 * nq1 * nq2 * e;
derivptr[2] = deriv2 + nq0 * nq1 * nq2 * e;
for (size_t k = 0, cnt = 0; k < nq2; k++)
{
for (size_t j = 0; j < nq1; j++)
{
for (size_t i = 0; i < nq0; i++)
{
TData deriv[3] = {derivptr[0][cnt], derivptr[1][cnt],
derivptr[2][cnt]};
for (size_t d = 0; d < ncoord; d++)
{
derivptr[d][cnt] = diffCoeff[d * ncoord + 0] * deriv[0] +
diffCoeff[d * ncoord + 1] * deriv[1] +
diffCoeff[d * ncoord + 2] * deriv[2];
}
cnt++;
}
}
}
}
} // namespace Nektar::Operators::detail
#include "HelmholtzStdMat.hpp"
namespace Nektar::Operators::detail
{
// Add different Helmholtz implementations to the factory.
template <>
std::string OperatorHelmholtzImpl<double, ImplStdMat>::className =
GetOperatorFactory<double>().RegisterCreatorFunction(
"HelmholtzStdMat",
OperatorHelmholtzImpl<double, ImplStdMat>::instantiate, "...");
} // namespace Nektar::Operators::detail
#include "Operators/BwdTrans/BwdTransStdMat.hpp"
#include "Operators/IProductWRTBase/IProductWRTBaseStdMat.hpp"
#include "Operators/IProductWRTDerivBase/IProductWRTDerivBaseStdMat.hpp"
#include "Operators/OperatorHelmholtz.hpp"
#include "Operators/PhysDeriv/PhysDerivStdMat.hpp"
namespace Nektar::Operators::detail
{
// standard matrix implementation
template <typename TData>
class OperatorHelmholtzImpl<TData, ImplStdMat> : public OperatorHelmholtz<TData>
{
public:
OperatorHelmholtzImpl(const MultiRegions::ExpListSharedPtr &expansionList)
: OperatorHelmholtz<TData>(expansionList),
m_bwd(Field<TData, FieldState::Phys>::create(
GetBlockAttributes(FieldState::Phys, expansionList))),
m_deriv0(Field<TData, FieldState::Phys>::create(
GetBlockAttributes(FieldState::Phys, expansionList))),
m_deriv1(Field<TData, FieldState::Phys>::create(
GetBlockAttributes(FieldState::Phys, expansionList))),
m_deriv2(Field<TData, FieldState::Phys>::create(
GetBlockAttributes(FieldState::Phys, expansionList))),
m_derivcoeff0(Field<TData, FieldState::Phys>::create(
GetBlockAttributes(FieldState::Phys, expansionList))),
m_derivcoeff1(Field<TData, FieldState::Phys>::create(
GetBlockAttributes(FieldState::Phys, expansionList))),
m_derivcoeff2(Field<TData, FieldState::Phys>::create(
GetBlockAttributes(FieldState::Phys, expansionList)))
{
auto nCoord = this->m_expansionList->GetCoordim(0);
m_BwdTransOp = BwdTrans<>::create(this->m_expansionList, "StdMat");
m_PhysDerivOp = PhysDeriv<>::create(this->m_expansionList, "StdMat");
m_IProductWRTBaseOp =
IProductWRTBase<>::create(this->m_expansionList, "StdMat");
m_IProductWRTDerivBaseOp =
IProductWRTDerivBase<>::create(this->m_expansionList, "StdMat");
m_diffCoeff = Array<OneD, TData>(nCoord * nCoord, 0.0);
for (size_t d = 0; d < nCoord; d++)
{
m_diffCoeff[d * nCoord + d] = 1.0; // temporary solution
}
}
void apply(Field<TData, FieldState::Coeff> &in,
Field<TData, FieldState::Coeff> &out) override
{
// Step 1: BwdTrans
m_BwdTransOp->apply(in, m_bwd);
// Step 2: PhysDeriv
m_PhysDerivOp->apply(m_bwd, m_deriv0, m_deriv1, m_deriv2);
// Step 3: Inner product for mass matrix operation
m_IProductWRTBaseOp->apply(m_bwd, out, m_lambda);
// Step 4: Multiply by diffusion coefficient
DiffusionCoeff(m_deriv0, m_deriv1, m_deriv2, m_derivcoeff0,
m_derivcoeff1, m_derivcoeff2);
// Step 5: Inner product
m_IProductWRTDerivBaseOp->apply(m_derivcoeff0, m_derivcoeff1,
m_derivcoeff2, out, true);
}
void DiffusionCoeff(Field<TData, FieldState::Phys> &deriv0,
Field<TData, FieldState::Phys> &deriv1,
Field<TData, FieldState::Phys> &deriv2,
Field<TData, FieldState::Phys> &derivcoeff0,
Field<TData, FieldState::Phys> &derivcoeff1,
Field<TData, FieldState::Phys> &derivcoeff2)
{
// Initialize pointers.
std::vector<TData *> derivptr{deriv0.GetStorage().GetCPUPtr(),
deriv1.GetStorage().GetCPUPtr(),
deriv2.GetStorage().GetCPUPtr()};
std::vector<TData *> derivcoeffptr{
derivcoeff0.GetStorage().GetCPUPtr(),
derivcoeff1.GetStorage().GetCPUPtr(),
derivcoeff2.GetStorage().GetCPUPtr()};
// Initialize index.
size_t expIdx = 0;
for (size_t block_idx = 0; block_idx < deriv0.GetBlocks().size();
++block_idx)
{
// Determine shape and type of the element.
auto const expPtr = this->m_expansionList->GetExp(expIdx);
auto nElmts = deriv0.GetBlocks()[block_idx].num_elements;
auto nCoord = expPtr->GetCoordim();
auto nqTot = expPtr->GetTotPoints();
// Multiply by diffusion coefficient.
for (size_t d = 0; d < nCoord; d++)
{
Vmath::Smul(nqTot * nElmts, m_diffCoeff[d * nCoord], derivptr[0], 1,
derivcoeffptr[d], 1);
for (size_t l = 1; l < nCoord; l++)
{
Vmath::Svtvp(nqTot * nElmts, m_diffCoeff[d * nCoord + l],
derivptr[l], 1, derivcoeffptr[d], 1,
derivcoeffptr[d], 1);
}
}
// Increment pointer and index for next element type.
for (size_t d = 0; d < nCoord; d++)
{
derivptr[d] += nqTot * nElmts;
derivcoeffptr[d] += nqTot * nElmts;
}
expIdx += nElmts;
}
}
static std::unique_ptr<Operator<TData>> instantiate(
const MultiRegions::ExpListSharedPtr &expansionList)
{
return std::make_unique<OperatorHelmholtzImpl<TData, ImplStdMat>>(
expansionList);
}
void SetLambda(TData lambda)
{
m_lambda = lambda;
}
static std::string className;
private:
std::shared_ptr<OperatorBwdTrans<TData>> m_BwdTransOp;
std::shared_ptr<OperatorPhysDeriv<TData>> m_PhysDerivOp;
std::shared_ptr<OperatorIProductWRTBase<TData>> m_IProductWRTBaseOp;
std::shared_ptr<OperatorIProductWRTDerivBase<TData>>
m_IProductWRTDerivBaseOp;
Field<TData, FieldState::Phys> m_bwd;
Field<TData, FieldState::Phys> m_deriv0;
Field<TData, FieldState::Phys> m_deriv1;
Field<TData, FieldState::Phys> m_deriv2;
Field<TData, FieldState::Phys> m_derivcoeff0;
Field<TData, FieldState::Phys> m_derivcoeff1;
Field<TData, FieldState::Phys> m_derivcoeff2;
TData m_lambda = 1.0;
Array<OneD, TData> m_diffCoeff;
};
} // namespace Nektar::Operators::detail
#include "OperatorHelmholtz.hpp"
namespace Nektar::Operators
{
template <> const std::string Helmholtz<>::key = "Helmholtz";
template <> const std::string Helmholtz<>::default_impl = "MatFree";
} // namespace Nektar::Operators
#pragma once
#include "Field.hpp"
#include "Operator.hpp"
namespace Nektar::Operators
{
// Helmholtz base class
// Defines the apply operator to enforce apply parameter types
template <typename TData> class OperatorHelmholtz : public Operator<TData>
{
public:
virtual ~OperatorHelmholtz() = default;
OperatorHelmholtz(const MultiRegions::ExpListSharedPtr &expansionList)
: Operator<TData>(expansionList)
{
}
virtual void apply(Field<TData, FieldState::Coeff> &in,
Field<TData, FieldState::Coeff> &out) = 0;
virtual void operator()(Field<TData, FieldState::Coeff> &in,
Field<TData, FieldState::Coeff> &out)
{
apply(in, out);
}
};
// Descriptor / traits class for Helmholtz
template <typename TData = default_fp_type> struct Helmholtz
{
using class_name = OperatorHelmholtz<TData>;
using FieldIn = Field<TData, FieldState::Coeff>;
using FieldOut = Field<TData, FieldState::Coeff>;
static const std::string key;
static const std::string default_impl;
static std::shared_ptr<class_name> create(
const MultiRegions::ExpListSharedPtr &expansionList,
std::string pKey = "")
{
return Operator<TData>::template create<Helmholtz>(expansionList, pKey);
}
};
namespace detail
{
// Template for Helmholtz implementations
template <typename TData, typename Op> class OperatorHelmholtzImpl;
} // namespace detail
} // namespace Nektar::Operators
......@@ -14,6 +14,7 @@
#include "Field.hpp"
#include "Operators/OperatorBwdTrans.hpp"
#include "Operators/OperatorHelmholtz.hpp"
#include "Operators/OperatorIProductWRTBase.hpp"
#include "Operators/OperatorIProductWRTDerivBase.hpp"
#include "Operators/OperatorPhysDeriv.hpp"
......@@ -586,6 +587,101 @@ int main(int argc, char *argv[])
std::cout << std::endl;
}
#endif
// Test Helmholtz Implementation
{
std::cout << "Helmholtz (StdMat) test starts." << std::endl;
// Create two Fields with memory on the CPU.
auto inCoeff = Field<double, FieldState::Coeff>::create(blocks_coeff);
auto outCoeff = Field<double, FieldState::Coeff>::create(blocks_coeff);
// Assign input values from the CPU.
auto *inptr = inCoeff.GetStorage().GetCPUPtr();
for (auto const &block : inCoeff.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t coeff = 0; coeff < block.num_pts; ++coeff)
{
*(inptr++) = coeff + 1;
}
}
}
std::cout << "Initial shape:\n" << inCoeff << std::endl;
// Perform the Helmholtz on the fields.
Helmholtz<>::create(explist, "StdMat")->apply(inCoeff, outCoeff);
// Check output values.
std::cout << "Out:" << std::endl;
auto outptr = outCoeff.GetStorage().GetCPUPtr();
for (auto const &block : outCoeff.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t coeff = 0; coeff < block.num_pts; ++coeff)
{
std::cout << *(outptr++) << ' ';
}
}
std::cout << std::endl;
}
std::cout << std::endl;
}
// Test Helmholtz (CUDA) Implementation
#ifdef NEKTAR_USE_CUDA
{
std::cout << "Helmholtz (CUDA) test starts." << std::endl;
// Create two Fields with memory on the GPU.
auto inCoeff =
Field<double, FieldState::Coeff>::create<MemoryRegionCUDA>(
blocks_coeff);
auto outCoeff =
Field<double, FieldState::Coeff>::create<MemoryRegionCUDA>(
blocks_coeff);
// Assign input values from the CPU.
std::cout << "Initial shape: " << std::endl;
auto *inptr =
inCoeff.template GetStorage<MemoryRegionCUDA>().GetCPUPtr();
for (auto const &block : inCoeff.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t coeff = 0; coeff < block.num_pts; ++coeff)
{
*(inptr++) = coeff + 1;
std::cout << coeff + 1 << " ";
}
}
}
std::cout << std::endl << std::endl;
// Perform the Helmholtz on the fields using the CUDA implementation
// Since this is a CUDA operator, acting on CUDA fields, everything
// happens on the GPU.
Helmholtz<>::create(explist, "CUDA")->apply(inCoeff, outCoeff);
// Check output values.
std::cout << "Out:" << std::endl;
auto *outptr =
outCoeff.template GetStorage<MemoryRegionCUDA>().GetCPUPtr();
for (auto const &block : outCoeff.GetBlocks())
{
for (size_t el = 0; el < block.num_elements; ++el)
{
for (size_t coeff = 0; coeff < block.num_pts; ++coeff)
{
std::cout << *(outptr++) << ' ';
}
}
std::cout << std::endl;
}
std::cout << std::endl;
}
#endif
std::cout << "END" << std::endl;
return 0;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment