From 36074f570b5de8b91af1390569ca0e1e45684358 Mon Sep 17 00:00:00 2001
From: Yu Pan
Date: Wed, 8 May 2019 19:08:42 +0100
Subject: [PATCH 1/7] Add governing equations

.../solvers/compressiblesolver.tex  35 +++++++++++++++++++
1 file changed, 35 insertions(+)
diff git a/docs/developerguide/solvers/compressiblesolver.tex b/docs/developerguide/solvers/compressiblesolver.tex
index 29f799d84..7cd0de20b 100644
 a/docs/developerguide/solvers/compressiblesolver.tex
+++ b/docs/developerguide/solvers/compressiblesolver.tex
@@ 3,6 +3,41 @@
In this chapter, we walk the reader through our 2D and 3D compressible NavierStokes Solver (CompressibleFlowSolver).
\section{Fundamental Theories of CompressibleFlowSolver}
+The governing equation systems includes continuity equation, momentum equations and energy equation. Write in conservative form
+\begin{equation}
+ \frac{\partial U}{\partial t}+
+ \sum\limits_{i=1}^{d}{\frac{\partial F_{i}(U)}{\partial x_{i}}}
+ =\sum\limits_{i=1}^{d}{\frac{\partial G_{i}(U,\nabla U)}{\partial x_{i}}}
+\end{equation}
+where $U=[\rho,\rho u_{1} \hdots \rho u_{d},E]^{T}$
+
+Invisid flux F in i direction is
+\begin{equation}
+ F_{i}=
+\begin{bmatrix}
+ \rho u_{i}\\
+ \rho u_{1}u_{i}+\delta_{1,i}P\\
+ \vdots\\
+ \rho u_{d}u_{i}+\delta_{d,i}P\\
+ u_{i}(E+P)
+\end{bmatrix}
+\end{equation}
+where total energy $E=\rho (c_{v}T+\textbf{u}^{2}/2)$, pressure $P=(\gamma1)(E\rho \textbf{u}^{2}/2)$
+
+Viscous flux G in i direction is
+\begin{equation}
+ G_{i}=
+\begin{bmatrix}
+ 0\\
+ \tau_{i1}\\
+ \vdots\\
+ \tau_{id}\\
+ \sum\limits_{j=1}^{d}{u_{j}\tau_{ij}}q_{i}
+\end{bmatrix}
+\end{equation}
+where viscous term $\tau_{ij}=\mu(\frac{\partial u_{i}}{\partial x_{j}}+\frac{\partial u_{j}}{\partial x_{i}}\frac{2}{3}\nabla \cdot\textbf{u}\delta_{ij})$ and heat flux $\textbf{q}=\kappa \nabla T$
+
+
\clearpage
\section{Data Structure of CompressibleFlowSolver}
\begin{figure}

GitLab
From 90733aa85c266629b268cc9427c546be7da2f215 Mon Sep 17 00:00:00 2001
From: Yu Pan
Date: Wed, 8 May 2019 19:46:20 +0100
Subject: [PATCH 2/7] Add weak formulation

.../developerguide/solvers/compressiblesolver.tex  13 ++++++++++++
1 file changed, 12 insertions(+), 1 deletion()
diff git a/docs/developerguide/solvers/compressiblesolver.tex b/docs/developerguide/solvers/compressiblesolver.tex
index 7cd0de20b..8833f8e90 100644
 a/docs/developerguide/solvers/compressiblesolver.tex
+++ b/docs/developerguide/solvers/compressiblesolver.tex
@@ 4,7 +4,7 @@
In this chapter, we walk the reader through our 2D and 3D compressible NavierStokes Solver (CompressibleFlowSolver).
\section{Fundamental Theories of CompressibleFlowSolver}
The governing equation systems includes continuity equation, momentum equations and energy equation. Write in conservative form
\begin{equation}
+\begin{equation}\label{eq1}
\frac{\partial U}{\partial t}+
\sum\limits_{i=1}^{d}{\frac{\partial F_{i}(U)}{\partial x_{i}}}
=\sum\limits_{i=1}^{d}{\frac{\partial G_{i}(U,\nabla U)}{\partial x_{i}}}
@@ 37,7 +37,18 @@ Viscous flux G in i direction is
\end{equation}
where viscous term $\tau_{ij}=\mu(\frac{\partial u_{i}}{\partial x_{j}}+\frac{\partial u_{j}}{\partial x_{i}}\frac{2}{3}\nabla \cdot\textbf{u}\delta_{ij})$ and heat flux $\textbf{q}=\kappa \nabla T$
+In Nektar++ current compressible flow solver, DG spatial discretization is in default. Mutiply a test function $\phi$, integrate inside an element $K\in \Omega_{h}$ and sum over all the elements.
+\begin{equation}
+ \sum\limits_{K\in \Omega_{h}}{\int_{K}{\frac{\partial U}{\partial t}\phi dx}}+I_{inv}=I_{vis}
+\end{equation}
+where
+
+$$I_{inv}= \sum\limits_{K\in \Omega_{h}}{\int_{\partial K}{\sum\limits_{i=1}^{d}{\widetilde{F}_{i}(U)n_{i}\phi}}dS}
+\sum\limits_{K\in \Omega_{h}}{\int_{K}{\sum\limits_{i=1}^{d}{F_{i}(U)\frac{\partial \phi}{\partial x_{i}}dx}}}$$
+$$I_{inv}= \sum\limits_{K\in \Omega_{h}}{\int_{\partial K}{\sum\limits_{i=1}^{d}{\widetilde{G}_{i}(U,\nabla U)n_{i}\phi}}dS}
+\sum\limits_{K\in \Omega_{h}}{\int_{K}{\sum\limits_{i=1}^{d}{G_{i}(U,\nabla U)\frac{\partial \phi}{\partial x_{i}}dx}}}$$
+Various numerical advection flux $\widetilde{F}$ and diffsion flux $\widetilde{G}(U,\nabla U)$ are supported in Nektar++ CompressibleSolver.
\clearpage
\section{Data Structure of CompressibleFlowSolver}
\begin{figure}

GitLab
From a5c0e7946792e8f929804128487132a978c6c4e5 Mon Sep 17 00:00:00 2001
From: Yu Pan
Date: Thu, 9 May 2019 09:36:15 +0100
Subject: [PATCH 3/7] Complete the theoretical part

.../solvers/compressiblesolver.tex  54 ++++++++++++++
1 file changed, 41 insertions(+), 13 deletions()
diff git a/docs/developerguide/solvers/compressiblesolver.tex b/docs/developerguide/solvers/compressiblesolver.tex
index 8833f8e90..5baed3021 100644
 a/docs/developerguide/solvers/compressiblesolver.tex
+++ b/docs/developerguide/solvers/compressiblesolver.tex
@@ 3,15 +3,15 @@
In this chapter, we walk the reader through our 2D and 3D compressible NavierStokes Solver (CompressibleFlowSolver).
\section{Fundamental Theories of CompressibleFlowSolver}
The governing equation systems includes continuity equation, momentum equations and energy equation. Write in conservative form
+The governing equation systems include continuity equation, momentum equations and energy equation. Write in conservative form
\begin{equation}\label{eq1}
\frac{\partial U}{\partial t}+
 \sum\limits_{i=1}^{d}{\frac{\partial F_{i}(U)}{\partial x_{i}}}
 =\sum\limits_{i=1}^{d}{\frac{\partial G_{i}(U,\nabla U)}{\partial x_{i}}}
+ \frac{\partial F_{i}(U)}{\partial x_{i}}
+ =\frac{\partial G_{i}(U,\nabla U)}{\partial x_{i}}
\end{equation}
where $U=[\rho,\rho u_{1} \hdots \rho u_{d},E]^{T}$
Invisid flux F in i direction is
+Inviscid flux F in i direction is
\begin{equation}
F_{i}=
\begin{bmatrix}
@@ 22,7 +22,7 @@ Invisid flux F in i direction is
u_{i}(E+P)
\end{bmatrix}
\end{equation}
where total energy $E=\rho (c_{v}T+\textbf{u}^{2}/2)$, pressure $P=(\gamma1)(E\rho \textbf{u}^{2}/2)$
+where total energy $E=\rho (c_{v}T+u_{i}u_{i}/2)$, pressure $P=(\gamma1)(E\rho u_{i}u_{i}/2)$
Viscous flux G in i direction is
\begin{equation}
@@ 37,21 +37,49 @@ Viscous flux G in i direction is
\end{equation}
where viscous term $\tau_{ij}=\mu(\frac{\partial u_{i}}{\partial x_{j}}+\frac{\partial u_{j}}{\partial x_{i}}\frac{2}{3}\nabla \cdot\textbf{u}\delta_{ij})$ and heat flux $\textbf{q}=\kappa \nabla T$
In Nektar++ current compressible flow solver, DG spatial discretization is in default. Mutiply a test function $\phi$, integrate inside an element $K\in \Omega_{h}$ and sum over all the elements.
+In Nektar++ current compressible flow solver, DG spatial discretization is in default. Multiply a test function $\phi$, integrate inside an element $K\in \Omega_{h}$ and sum over all the elements.
\begin{equation}
\sum\limits_{K\in \Omega_{h}}{\int_{K}{\frac{\partial U}{\partial t}\phi dx}}+I_{inv}=I_{vis}
\end{equation}
where
+\begin{equation}\label{eq2}
+I_{inv}= \sum\limits_{K\in \Omega_{h}}{\int_{\partial K}{\sum\limits_{i=1}^{d}{\widetilde{F}_{i}(U)n_{i}\phi}}dS}
+\sum\limits_{K\in \Omega_{h}}{\int_{K}{\sum\limits_{i=1}^{d}{F_{i}(U)\frac{\partial \phi}{\partial x_{i}}dx}}}
+\end{equation}
+\begin{equation}\label{eq3}
+I_{inv}= \sum\limits_{K\in \Omega_{h}}{\int_{\partial K}{\sum\limits_{i=1}^{d}{\widetilde{G}_{i}(U,\nabla U)n_{i}\phi}}dS}
+\sum\limits_{K\in \Omega_{h}}{\int_{K}{\sum\limits_{i=1}^{d}{G_{i}(U,\nabla U)\frac{\partial \phi}{\partial x_{i}}dx}}}
+\end{equation}
$$I_{inv}= \sum\limits_{K\in \Omega_{h}}{\int_{\partial K}{\sum\limits_{i=1}^{d}{\widetilde{F}_{i}(U)n_{i}\phi}}dS}
\sum\limits_{K\in \Omega_{h}}{\int_{K}{\sum\limits_{i=1}^{d}{F_{i}(U)\frac{\partial \phi}{\partial x_{i}}dx}}}$$
$$I_{inv}= \sum\limits_{K\in \Omega_{h}}{\int_{\partial K}{\sum\limits_{i=1}^{d}{\widetilde{G}_{i}(U,\nabla U)n_{i}\phi}}dS}
\sum\limits_{K\in \Omega_{h}}{\int_{K}{\sum\limits_{i=1}^{d}{G_{i}(U,\nabla U)\frac{\partial \phi}{\partial x_{i}}dx}}}$$

Various numerical advection flux $\widetilde{F}$ and diffsion flux $\widetilde{G}(U,\nabla U)$ are supported in Nektar++ CompressibleSolver.
+Different flux terms such as Equation \eqref{eq2} and \eqref{eq3} and source terms are treated separated and transfered through member variables as the sketch \ref{fig1} shows.
+Various advection flux $\widetilde{F}$ and diffsion flux $\widetilde{G}(U,\nabla U)$ are supported in Nektar++ CompressibleSolver. And each specific flux such as LDG diffusion flux (LDGNS) inherits from its general flux type (Diffison).
+
+Usually inside one specific flux type, the codes includes the following simliar process to caculate the flux. Take AdvectionWeakDG.cpp as an example.
+\begin {table}[!h]
+\caption {AdvectionWeakDG.cpp} \label{table1}
+\begin{center}
+\scalebox{0.9}[1.]{
+\begin{tabular}{  c  c}
+ \hline
+Variable/Function name & Physical meaning \\
+ \hline
+ m$\_$fluxVector & Advection volume flux: $F_{i}$\\
+ \hline
+ numflux & Advection numerical flux at trace: $\widetilde{F}_{i}$\\
+ \hline
+ IProductWRTDerivBase & Volume flux integration: $\sum\limits_{K\in \Omega_{h}}{\int_{K}{\sum\limits_{i=1}^{d}{F_{i}(U)\frac{\partial \phi}{\partial x_{i}}dx}}}$ \\
+ \hline
+ AddTraceIntegral & Add Surface flux into volume flux interation: $+\sum\limits_{K\in \Omega_{h}}{\int_{\partial K}{\sum\limits_{i=1}^{d}{\widetilde{F}_{i}(U)n_{i}\phi}}dS}$\\
+ \hline
+ MultiplyByElmImvMass& Multiply the inverse of mass matrix to get the flux coefficients\\
+ \hline
+\end{tabular}
+}
+\end{center}
+\end{table}
\clearpage
\section{Data Structure of CompressibleFlowSolver}
 \begin{figure}
+ \begin{figure}\label{fig1}
\caption{CompressibleFlowSolver DataStructure}
\centering
\begin{turn}{90}

GitLab
From 715b1fc3ee37581e83e9c2dcde871379ab73262d Mon Sep 17 00:00:00 2001
From: Yu Pan
Date: Thu, 9 May 2019 09:53:59 +0100
Subject: [PATCH 4/7] do nothing

docs/developerguide/solvers/compressiblesolver.tex  2 +
1 file changed, 1 insertion(+), 1 deletion()
diff git a/docs/developerguide/solvers/compressiblesolver.tex b/docs/developerguide/solvers/compressiblesolver.tex
index 5baed3021..840a40bd5 100644
 a/docs/developerguide/solvers/compressiblesolver.tex
+++ b/docs/developerguide/solvers/compressiblesolver.tex
@@ 51,7 +51,7 @@ I_{inv}= \sum\limits_{K\in \Omega_{h}}{\int_{\partial K}{\sum\limits_{i=1}^{d}{\
\sum\limits_{K\in \Omega_{h}}{\int_{K}{\sum\limits_{i=1}^{d}{G_{i}(U,\nabla U)\frac{\partial \phi}{\partial x_{i}}dx}}}
\end{equation}
Different flux terms such as Equation \eqref{eq2} and \eqref{eq3} and source terms are treated separated and transfered through member variables as the sketch \ref{fig1} shows.
+Different flux terms such as Equation \eqref{eq2} and \eqref{eq3} and source terms are treated separated and transferred through member variables as the sketch \ref{fig1} shows.
Various advection flux $\widetilde{F}$ and diffsion flux $\widetilde{G}(U,\nabla U)$ are supported in Nektar++ CompressibleSolver. And each specific flux such as LDG diffusion flux (LDGNS) inherits from its general flux type (Diffison).
Usually inside one specific flux type, the codes includes the following simliar process to caculate the flux. Take AdvectionWeakDG.cpp as an example.

GitLab
From ed51fbd5aee62a28357159b0a1625bd1664afff4 Mon Sep 17 00:00:00 2001
From: Yu Pan
Date: Tue, 28 May 2019 20:50:46 +0100
Subject: [PATCH 5/7] Add advection and diffusion components separately

docs/developerguide/developersguide.tex  1 +
.../solvers/compressiblesolver.tex  200 ++++++++++++++
2 files changed, 165 insertions(+), 36 deletions()
diff git a/docs/developerguide/developersguide.tex b/docs/developerguide/developersguide.tex
index edf10b042..2804fde01 100644
 a/docs/developerguide/developersguide.tex
+++ b/docs/developerguide/developersguide.tex
@@ 38,6 +38,7 @@
[thick,>,>=stealth]
\tikzstyle{arrow1}=
[thick,dashed,>,>=stealth]
+\usepackage{float}
\usepackage{standalone}
\usepackage{rotating}
diff git a/docs/developerguide/solvers/compressiblesolver.tex b/docs/developerguide/solvers/compressiblesolver.tex
index 840a40bd5..58ea7f6a4 100644
 a/docs/developerguide/solvers/compressiblesolver.tex
+++ b/docs/developerguide/solvers/compressiblesolver.tex
@@ 3,59 +3,101 @@
In this chapter, we walk the reader through our 2D and 3D compressible NavierStokes Solver (CompressibleFlowSolver).
\section{Fundamental Theories of CompressibleFlowSolver}
The governing equation systems include continuity equation, momentum equations and energy equation. Write in conservative form
+The governing equation systems include continuity equation, momentum equations and energy equation. Written in conservative form
\begin{equation}\label{eq1}
 \frac{\partial U}{\partial t}+
 \frac{\partial F_{i}(U)}{\partial x_{i}}
 =\frac{\partial G_{i}(U,\nabla U)}{\partial x_{i}}
+ \frac{d \textbf{U}}{d t}+
+ \frac{\partial \textbf{F}_{i}(U)}{\partial x_{i}}+
+ \frac{\partial \textbf{G}_{i}(U,\nabla U)}{\partial x_{i}}
+ =0
\end{equation}
where $U=[\rho,\rho u_{1} \hdots \rho u_{d},E]^{T}$
+where $\textbf{U}=[\rho,\rho u_{1} \hdots \rho u_{d},E]^{T}$ is the vector of conservative variables.
Inviscid flux F in i direction is
+And the inviscid flux $\textbf{F}$ in i direction is
\begin{equation}
 F_{i}=
+ \textbf{F}_{i}=
\begin{bmatrix}
\rho u_{i}\\
 \rho u_{1}u_{i}+\delta_{1,i}P\\
+ \rho u_{1}u_{i}+\delta_{1,i}p\\
\vdots\\
 \rho u_{d}u_{i}+\delta_{d,i}P\\
 u_{i}(E+P)
+ \rho u_{d}u_{i}+\delta_{d,i}p\\
+ u_{i}(E+p)
\end{bmatrix}
\end{equation}
where total energy $E=\rho (c_{v}T+u_{i}u_{i}/2)$, pressure $P=(\gamma1)(E\rho u_{i}u_{i}/2)$
+where total energy $E=\rho (c_{v}T+u_{i}u_{i}/2)$, pressure $p=(\gamma1)(E\rho u_{i}u_{i}/2)$
Viscous flux G in i direction is
\begin{equation}
 G_{i}=
+The viscous flux $\textbf{G}$ in i direction is
+\begin{equation}\label{eq7}
+ \textbf{G}_{i}=
\begin{bmatrix}
0\\
 \tau_{i1}\\
+ \tau_{i1}\\
\vdots\\
 \tau_{id}\\
 \sum\limits_{j=1}^{d}{u_{j}\tau_{ij}}q_{i}
+ \tau_{id}\\
+ q_{i}\sum\limits_{j=1}^{d}{u_{j}\tau_{ij}}
\end{bmatrix}
\end{equation}
where viscous term $\tau_{ij}=\mu(\frac{\partial u_{i}}{\partial x_{j}}+\frac{\partial u_{j}}{\partial x_{i}}\frac{2}{3}\nabla \cdot\textbf{u}\delta_{ij})$ and heat flux $\textbf{q}=\kappa \nabla T$
+where the viscous tensor $\tau_{ij}=\mu(\frac{\partial u_{i}}{\partial x_{j}}+\frac{\partial u_{j}}{\partial x_{i}}\frac{2}{3}\frac{\partial u_{i}}{\partial x_{i}}\delta_{ij})$ and heat flux $q_{i}=\kappa \frac{\partial T_{i}}{\partial x_{i}}$
In Nektar++ current compressible flow solver, DG spatial discretization is in default. Multiply a test function $\phi$, integrate inside an element $K\in \Omega_{h}$ and sum over all the elements.
\begin{equation}
 \sum\limits_{K\in \Omega_{h}}{\int_{K}{\frac{\partial U}{\partial t}\phi dx}}+I_{inv}=I_{vis}
+In Nektar++, the compressible flow solver spatial discretization is dicountinous Galerkin. Multiply a test function $\phi$, integrate inside an element $K\in \Omega_{h}$, where $\Omega_{h}$ is the total computational region.
+\begin{equation}\label{eq4}
+\int_{K}{\frac{d \textbf{U}^{e}}{d t}\phi^{e} dx}+I_{inv}+I_{vis}=0
\end{equation}
where
\begin{equation}\label{eq2}
I_{inv}= \sum\limits_{K\in \Omega_{h}}{\int_{\partial K}{\sum\limits_{i=1}^{d}{\widetilde{F}_{i}(U)n_{i}\phi}}dS}
\sum\limits_{K\in \Omega_{h}}{\int_{K}{\sum\limits_{i=1}^{d}{F_{i}(U)\frac{\partial \phi}{\partial x_{i}}dx}}}
+I_{inv}={\int_{\partial K}{\sum\limits_{i=1}^{d}{\widetilde{\textbf{F}^{e}_{i}}(U)n_{i}\phi^{e}}}dS}
+{\int_{K}{\sum\limits_{i=1}^{d}{\textbf{F}^{e}_{i}(U)\frac{\partial \phi^{e}}{\partial x_{i}}dx}}}
\end{equation}
\begin{equation}\label{eq3}
I_{inv}= \sum\limits_{K\in \Omega_{h}}{\int_{\partial K}{\sum\limits_{i=1}^{d}{\widetilde{G}_{i}(U,\nabla U)n_{i}\phi}}dS}
\sum\limits_{K\in \Omega_{h}}{\int_{K}{\sum\limits_{i=1}^{d}{G_{i}(U,\nabla U)\frac{\partial \phi}{\partial x_{i}}dx}}}
+I_{inv}= {\int_{\partial K}{\sum\limits_{i=1}^{d}{\widetilde{\textbf{G}}^{e}_{i}(U,\nabla U)n_{i}\phi^{e}}}dS}
+{\int_{K}{\sum\limits_{i=1}^{d}{\textbf{G}^{e}_{i}(U,\nabla U)\frac{\partial \phi^{e}}{\partial x_{i}}dx}}}
+\end{equation}
+
+Different flux terms such as Equation \eqref{eq2} and \eqref{eq3} and source terms are treated separated and transferred through member variables as the sketch \ref{fig5} shows.
+Various advection flux $\widetilde{\textbf{F}}$ and diffusion flux $\widetilde{\textbf{G}}(U,\nabla U)$ are supported in Nektar++ CompressibleSolver. And each specific type of flux such as LDG (DiffusionLDGNS) and Interior Penalty (DiffusionIP) diffusion fluxes inherit from their general flux class (Diffison).
+
+\paragraph{Advection flux $\textbf{F}_{i}$: }
+the following section takes AdvectionWeakDG.cpp as an example to demonstrate how the advection flux is calculated in Nektar++. Some matrices index can refer to Spencer's Book.
+
+Separate the advection flux from Equation \eqref{eq4}.
+\begin{equation}
+\int_{K}{\frac{d \textbf{U}^{e}}{d t}\phi^{e} dx}+I_{inv}=0
\end{equation}
Different flux terms such as Equation \eqref{eq2} and \eqref{eq3} and source terms are treated separated and transferred through member variables as the sketch \ref{fig1} shows.
Various advection flux $\widetilde{F}$ and diffsion flux $\widetilde{G}(U,\nabla U)$ are supported in Nektar++ CompressibleSolver. And each specific flux such as LDG diffusion flux (LDGNS) inherits from its general flux type (Diffison).
+Transform Equation \eqref{eq4} into elemental matrix form.
+\begin{equation}\label{eq5}
+(B^{e})^{T}W^{e}B^{e}\frac{d \hat{\textbf{U}}^{e}}{d t}
+\sum\limits_{i=1}^{d}{(D^{e}_{x_{i}} B^{e})^{T}}W^{e}B^{e}\hat{\textbf{F}}^{e}_{i}+
+b^{e}
+=0
+\end{equation}
+where the integration of elemental trace flux $b^{e}=\sum\limits_{i=1}^{d}{B^{e}\widetilde{\textbf{F}}^{e}_{i}}n_{i}$ and the Riemann flux $\widetilde{\textbf{F}_{i}}=\textbf{F}^{e}_{i}(U^{+},U^{})$.
Usually inside one specific flux type, the codes includes the following simliar process to caculate the flux. Take AdvectionWeakDG.cpp as an example.
\begin {table}[!h]
+Other matrix index explanations are as Table \ref{table2}
+\begin {table}[H]
+\caption {Item explanation} \label{table2}
+\begin{center}
+\scalebox{0.9}[1.]{
+\begin{tabular}{ cccc}
+ \hline
+$\phi^{e}$ & $\frac{\partial \phi^{e}}{\partial x_{i}}$& Diagonal weight matrix & Coefficients\\
+ \hline
+$B^{e}$ &$(D^{e}_{x_{i}} B^{e})^{T}$& $W^{e}$& $\hat{U}^{e}$,$\hat{F}^{e}$\\
+ \hline
+\end{tabular}
+}
+\end{center}
+\end{table}
+
+Write Equation \eqref{eq5} in semidiscretization form.
+\begin{equation}\label{eq8}
+\frac{d \hat{\textbf{U}}^{e}}{d t}
+=((B^{e})^{T}W^{e}B^{e})^{1}(\sum\limits_{i=1}^{d}{(D^{e}_{x_{i}} B^{e})^{T}}W^{e}B^{e}\hat{\textbf{F}}^{e}_{i}
+b^{e})
+\end{equation}
+where mass matrix $M=(B^{e})^{T}W^{e}B^{e}$.
+
+Table \ref{table1} lists some functions that the process to calculate the advection term.
+\begin {table}[h]
\caption {AdvectionWeakDG.cpp} \label{table1}
\begin{center}
\scalebox{0.9}[1.]{
@@ 63,24 +105,110 @@ Usually inside one specific flux type, the codes includes the following simliar
\hline
Variable/Function name & Physical meaning \\
\hline
 m$\_$fluxVector & Advection volume flux: $F_{i}$\\
+ fluxVector & Advection volume flux: $\textbf{F}_{i}$\\
\hline
 numflux & Advection numerical flux at trace: $\widetilde{F}_{i}$\\
+ numflux & Advection (Riemann) numerical flux at trace: $\widetilde{\textbf{F}}_{i}$\\
\hline
 IProductWRTDerivBase & Volume flux integration: $\sum\limits_{K\in \Omega_{h}}{\int_{K}{\sum\limits_{i=1}^{d}{F_{i}(U)\frac{\partial \phi}{\partial x_{i}}dx}}}$ \\
+ IProductWRTDerivBase & Volume flux integration: $\sum\limits_{i=1}^{d}{(D^{e}_{x_{i}} B^{e})^{T}}W^{e}B^{e}\hat{\textbf{F}}^{e}_{i}$ \\
\hline
 AddTraceIntegral & Add Surface flux into volume flux interation: $+\sum\limits_{K\in \Omega_{h}}{\int_{\partial K}{\sum\limits_{i=1}^{d}{\widetilde{F}_{i}(U)n_{i}\phi}}dS}$\\
+ AddTraceIntegral & Add surface flux into volume flux interation: $+b^{e}$\\
\hline
 MultiplyByElmImvMass& Multiply the inverse of mass matrix to get the flux coefficients\\
+ MultiplyByElmImvMass& Multiply the inverse of mass matrix to get the flux coefficients $ \hat{\textbf{U}}$\\
\hline
\end{tabular}
}
\end{center}
\end{table}
+
+
+\paragraph{Diffusion flux $\textbf{G}_{i}$:}
+the following section takes DiffusionLDGNS.cpp as an example to demonstrate how the diffusion flux is calculated in Nektar++. Some matrices index can refer to Spencer's Book.
+
+Separate the diffusion flux from Equation \eqref{eq4}.
+\begin{equation}\label{eq6}
+\int_{K}{\frac{d \textbf{U}^{e}}{d t}\phi^{e} dx}+I_{vis}=0
+\end{equation}
+
+As the resolution of diffusion flux is to solve a $2^{nd}$ PDE, it needs two equations to resolve the problem. LDG separates the resolution into two $1^{st}$ PDE.
+
+From Equation \eqref{eq7}, diffusion derivatives $\textbf{G}_{i}(P,\nabla P)$ so that can reduce the computation using primitive variables $\textbf{P}=[u_{1}\hdots u_{d}, T]^{T}$. Therefore, the first step is to calculate the derivatives $\textbf{Q}_{i}=\frac{\partial P}{\partial x_{i}}$
+\begin{equation}\label{eq9}
+\int_{K}{\textbf{Q}^{e}_{i} \phi^{e} dx}=
+{\int_{\partial K}{\widetilde{\textbf{P}}^{e}n_{i}\phi^{e}}dS}
+\int_{K}{\textbf{P}^{e}\frac{\partial \phi^{e}}{\partial x_{i}}dx}
+\end{equation}
+
+Transform Equation \eqref{eq9} into elemental matrix form.
+\begin{equation}\label{eq10}
+\hat{\textbf{Q}}_{i}^{e}
+=((B^{e})^{T}W^{e}B^{e})^{1}
+((D^{e}_{x_{i}} B^{e})^{T}W^{e}B^{e}\hat{\textbf{P}}^{e}
+b_{1,i}^{e})
+\end{equation}
+where the integration of elemental trace flux $b_{1,i}^{e}=B^{e}\widetilde{\textbf{P}}n_{i}$ and the LDG numerical flux $\widetilde{\textbf{P}}=\textbf{P}^{e}_{i}(P^{+},P^{})$.
+
+When we get the diffusion derivatives $\textbf{G}_{i}$, the process to solve Equation \eqref{eq6} is simliar to the process of Equation \eqref{eq8}.
+\begin{equation}\label{eq8}
+\frac{d \hat{\textbf{U}}^{e}}{d t}
+=((B^{e})^{T}W^{e}B^{e})^{1}(\sum\limits_{i=1}^{d}{(D^{e}_{x_{i}} B^{e})^{T}}W^{e}B^{e}\hat{\textbf{G}}^{e}_{i}
+b_{2}^{e})
+\end{equation}
+where the integration of elemental trace flux $b_{2}^{e}=\sum\limits_{i=1}^{d}{B^{e}\widetilde{\textbf{G}}^{e}_{i}}n_{i}$ and the LDG numerical flux $\widetilde{\textbf{G}_{i}}=\textbf{G}^{e}_{i}(G^{+},G^{})$.
+
+LDG numerical fluxes in Equations \eqref{eq9} and \eqref{eq8} satisfy
+
+\begin{equation}
+\tilde{\textbf{P}}=\{\textbf{P}\}+C_{12}\cdot [\textbf{P} n]
+\end{equation}
+where $C_{12}$ is a coefficient vector.
+\begin{equation}
+\tilde{\textbf{G}}=\{\textbf{G}\}C_{12} [\textbf{G} \cdot n]
+\end{equation}
+where $[\quad]$ represents the jump while $\{\quad \}$ represents the average. In Nektar++, $C_{12}=\frac{1}{2}$, $C_{11}$=0
+
+
+Table \ref{table3} lists some functions that the process to calculate the diffusion term.
+\begin {table}[h]
+\caption {DiffusionLDGNS.cpp} \label{table3}
+\begin{center}
+\scalebox{0.9}[1.]{
+\begin{tabular}{  c  c}
+\hline
+Variable/Function name & Physical meaning \\
+\hline
+v\_NumericalFluxO1 & Numerical flux $\tilde{\textbf{P}}^{e}$ \\
+\hline
+First IProductWRTDerivBase & Volume flux $(D^{e}_{x_{i}} B^{e})^{T}W^{e}B^{e}\hat{\textbf{P}}^{e}$\\
+\hline
+First AddTraceIntegral& Add numerical flux integration $+b_{1,i}^{e}$ \\
+\hline
+MultiplyByElmImvMass& Multiply the inverse of mass matrix to get the flux coefficients $\textbf{Q}$\\
+\hline
+m\_fluxVectorNS & Diffusion volume flux: $\textbf{G}_{i}$\\
+\hline
+v\_NumericalFluxO2 & Diffusion numerical flux : $\widetilde{\textbf{G}}_{i}$\\
+ \hline
+Second IProductWRTDerivBase & Volume flux integration: $\sum\limits_{i=1}^{d}{(D^{e}_{x_{i}} B^{e})^{T}}W^{e}B^{e}\hat{\textbf{G}}^{e}_{i}$ \\
+ \hline
+Second AddTraceIntegral & Add surface flux into volume flux interation: $+b_{2}^{e}$\\
+\hline
+MultiplyByElmImvMass& Multiply the inverse of mass matrix to get the flux coefficients $\hat{\textbf{U}}$\\
+\hline
+\end{tabular}
+}
+\end{center}
+\end{table}
+
+
+
+
+
+
+
\clearpage
\section{Data Structure of CompressibleFlowSolver}
 \begin{figure}\label{fig1}
 \caption{CompressibleFlowSolver DataStructure}
+ \begin{figure}
+ \caption{CompressibleFlowSolver DataStructure}\label{fig5}
\centering
\begin{turn}{90}
\includestandalone[width=1.2\linewidth]{DataStructure}

GitLab
From 6eff57e4f44c73d5e394ed46271fdd37f11c4afd Mon Sep 17 00:00:00 2001
From: Yu Pan
Date: Wed, 5 Jun 2019 14:54:20 +0100
Subject: [PATCH 6/7] Modify according to Spencer's suggestions

.../developerguide/solvers/DataStructure.tex  2 +
.../solvers/compressiblesolver.tex  58 +++++++++
2 files changed, 30 insertions(+), 30 deletions()
diff git a/docs/developerguide/solvers/DataStructure.tex b/docs/developerguide/solvers/DataStructure.tex
index 8dc4faf41..640268d1f 100644
 a/docs/developerguide/solvers/DataStructure.tex
+++ b/docs/developerguide/solvers/DataStructure.tex
@@ 304,7 +304,7 @@ yshift=1cm]
\draw [arrow](55,20)(60,20);
\node (text15) at (48,20) {Inheritance};
\draw [arrow1](55,25)(60,25);
\node (text14) at (48,25) {Date
+\node (text14) at (48,25) {Data
Footprint };
\node (text13) at (48,30) {Factory};
\node (text14)
diff git a/docs/developerguide/solvers/compressiblesolver.tex b/docs/developerguide/solvers/compressiblesolver.tex
index 58ea7f6a4..1bad508f9 100644
 a/docs/developerguide/solvers/compressiblesolver.tex
+++ b/docs/developerguide/solvers/compressiblesolver.tex
@@ 2,7 +2,7 @@
\chapter{CompressibleFlowSolver: Solving the Compressible NavierStokes Equations}
In this chapter, we walk the reader through our 2D and 3D compressible NavierStokes Solver (CompressibleFlowSolver).
\section{Fundamental Theories of CompressibleFlowSolver}
+\section{Fundamental Theory of CompressibleFlowSolver}
The governing equation systems include continuity equation, momentum equations and energy equation. Written in conservative form
\begin{equation}\label{eq1}
\frac{d \textbf{U}}{d t}+
@@ 12,7 +12,7 @@ The governing equation systems include continuity equation, momentum equations a
\end{equation}
where $\textbf{U}=[\rho,\rho u_{1} \hdots \rho u_{d},E]^{T}$ is the vector of conservative variables.
And the inviscid flux $\textbf{F}$ in i direction is
+And the inviscid flux $\textbf{F}$ in $i^{th}$ direction is
\begin{equation}
\textbf{F}_{i}=
\begin{bmatrix}
@@ 25,7 +25,7 @@ And the inviscid flux $\textbf{F}$ in i direction is
\end{equation}
where total energy $E=\rho (c_{v}T+u_{i}u_{i}/2)$, pressure $p=(\gamma1)(E\rho u_{i}u_{i}/2)$
The viscous flux $\textbf{G}$ in i direction is
+The viscous flux $\textbf{G}$ in $i^{th}$ direction is
\begin{equation}\label{eq7}
\textbf{G}_{i}=
\begin{bmatrix}
@@ 38,7 +38,7 @@ The viscous flux $\textbf{G}$ in i direction is
\end{equation}
where the viscous tensor $\tau_{ij}=\mu(\frac{\partial u_{i}}{\partial x_{j}}+\frac{\partial u_{j}}{\partial x_{i}}\frac{2}{3}\frac{\partial u_{i}}{\partial x_{i}}\delta_{ij})$ and heat flux $q_{i}=\kappa \frac{\partial T_{i}}{\partial x_{i}}$
In Nektar++, the compressible flow solver spatial discretization is dicountinous Galerkin. Multiply a test function $\phi$, integrate inside an element $K\in \Omega_{h}$, where $\Omega_{h}$ is the total computational region.
+In Nektar++, after the spatial discretization utilizing dicountinous Galerkin formulation, multiplied by a test function $\phi$, integrating inside an element $K\in \Omega_{h}$, we obtain Eequation \eqref{eq4}, , where $\Omega_{h}$ is the total computational region
\begin{equation}\label{eq4}
\int_{K}{\frac{d \textbf{U}^{e}}{d t}\phi^{e} dx}+I_{inv}+I_{vis}=0
\end{equation}
@@ 48,31 +48,31 @@ I_{inv}={\int_{\partial K}{\sum\limits_{i=1}^{d}{\widetilde{\textbf{F}^{e}_{i}}(
{\int_{K}{\sum\limits_{i=1}^{d}{\textbf{F}^{e}_{i}(U)\frac{\partial \phi^{e}}{\partial x_{i}}dx}}}
\end{equation}
\begin{equation}\label{eq3}
I_{inv}= {\int_{\partial K}{\sum\limits_{i=1}^{d}{\widetilde{\textbf{G}}^{e}_{i}(U,\nabla U)n_{i}\phi^{e}}}dS}
+I_{vis}= {\int_{\partial K}{\sum\limits_{i=1}^{d}{\widetilde{\textbf{G}}^{e}_{i}(U,\nabla U)n_{i}\phi^{e}}}dS}
{\int_{K}{\sum\limits_{i=1}^{d}{\textbf{G}^{e}_{i}(U,\nabla U)\frac{\partial \phi^{e}}{\partial x_{i}}dx}}}
\end{equation}
+are the inviscid and viscus fluxes.
Different flux terms such as Equation \eqref{eq2} and \eqref{eq3} and source terms are treated separated and transferred through member variables as the sketch \ref{fig5} shows.
+Different flux terms such as Equation \eqref{eq2} and \eqref{eq3} and source terms are treated separately and transferred through member variables as shown in the diagram \ref{fig5}.
Various advection flux $\widetilde{\textbf{F}}$ and diffusion flux $\widetilde{\textbf{G}}(U,\nabla U)$ are supported in Nektar++ CompressibleSolver. And each specific type of flux such as LDG (DiffusionLDGNS) and Interior Penalty (DiffusionIP) diffusion fluxes inherit from their general flux class (Diffison).
\paragraph{Advection flux $\textbf{F}_{i}$: }
the following section takes AdvectionWeakDG.cpp as an example to demonstrate how the advection flux is calculated in Nektar++. Some matrices index can refer to Spencer's Book.
+\section{Advection flux $\textbf{F}_{i}$}
+The following section takes AdvectionWeakDG.cpp as an example to demonstrate how the advection flux is calculated in Nektar++, where we have adopted the matrix notation from \cite{KaSh05} and a summary is proivded in Table \ref{table2}
Separate the advection flux from Equation \eqref{eq4}.
\begin{equation}
\int_{K}{\frac{d \textbf{U}^{e}}{d t}\phi^{e} dx}+I_{inv}=0
\end{equation}
Transform Equation \eqref{eq4} into elemental matrix form.
+Transforming Equation \eqref{eq4} into elemental matrix form, we obtain
\begin{equation}\label{eq5}
(B^{e})^{T}W^{e}B^{e}\frac{d \hat{\textbf{U}}^{e}}{d t}
\sum\limits_{i=1}^{d}{(D^{e}_{x_{i}} B^{e})^{T}}W^{e}B^{e}\hat{\textbf{F}}^{e}_{i}+
b^{e}
=0
\end{equation}
where the integration of elemental trace flux $b^{e}=\sum\limits_{i=1}^{d}{B^{e}\widetilde{\textbf{F}}^{e}_{i}}n_{i}$ and the Riemann flux $\widetilde{\textbf{F}_{i}}=\textbf{F}^{e}_{i}(U^{+},U^{})$.
+where the integration of elemental trace flux is denoted by $b^{e}=\sum\limits_{i=1}^{d}{B^{e}\widetilde{\textbf{F}}^{e}_{i}}n_{i}$ and this further involves the evaluation of a Riemann flux $\widetilde{\textbf{F}_{i}}=\textbf{F}^{e}_{i}(U^{+},U^{})$.
Other matrix index explanations are as Table \ref{table2}
\begin {table}[H]
\caption {Item explanation} \label{table2}
\begin{center}
@@ 88,17 +88,17 @@ $B^{e}$ &$(D^{e}_{x_{i}} B^{e})^{T}$& $W^{e}$& $\hat{U}^{e}$,$\hat{F}^{e}$\\
\end{center}
\end{table}
Write Equation \eqref{eq5} in semidiscretization form.
+Now writing Equation \eqref{eq5} in semidiscretization form.
\begin{equation}\label{eq8}
\frac{d \hat{\textbf{U}}^{e}}{d t}
=((B^{e})^{T}W^{e}B^{e})^{1}(\sum\limits_{i=1}^{d}{(D^{e}_{x_{i}} B^{e})^{T}}W^{e}B^{e}\hat{\textbf{F}}^{e}_{i}
b^{e})
\end{equation}
where mass matrix $M=(B^{e})^{T}W^{e}B^{e}$.
+where we note the elemental mass matrix is mass matrix $M^{e}=(B^{e})^{T}W^{e}B^{e}$.
Table \ref{table1} lists some functions that the process to calculate the advection term.
+Table \ref{table1} lists some functions that the process to calculate the advection term and provides a mapping from the Nektar++ compressible flow solver name to the mathematical operations they perofrm
\begin {table}[h]
\caption {AdvectionWeakDG.cpp} \label{table1}
+\caption {Table of variable and function mapping used in the compressible flow solver to their mathemaitcal operations} \label{table1}
\begin{center}
\scalebox{0.9}[1.]{
\begin{tabular}{  c  c}
@@ 121,8 +121,8 @@ Variable/Function name & Physical meaning \\
\end{table}
\paragraph{Diffusion flux $\textbf{G}_{i}$:}
the following section takes DiffusionLDGNS.cpp as an example to demonstrate how the diffusion flux is calculated in Nektar++. Some matrices index can refer to Spencer's Book.
+\section{Diffusion flux $\textbf{G}_{i}$}
+The following section takes DiffusionLDGNS.cpp as an example to demonstrate how the diffusion flux is calculated in Nektar++.
Separate the diffusion flux from Equation \eqref{eq4}.
\begin{equation}\label{eq6}
@@ 131,21 +131,21 @@ Separate the diffusion flux from Equation \eqref{eq4}.
As the resolution of diffusion flux is to solve a $2^{nd}$ PDE, it needs two equations to resolve the problem. LDG separates the resolution into two $1^{st}$ PDE.
From Equation \eqref{eq7}, diffusion derivatives $\textbf{G}_{i}(P,\nabla P)$ so that can reduce the computation using primitive variables $\textbf{P}=[u_{1}\hdots u_{d}, T]^{T}$. Therefore, the first step is to calculate the derivatives $\textbf{Q}_{i}=\frac{\partial P}{\partial x_{i}}$
+From Equation \eqref{eq7}, diffusion derivatives $\textbf{G}_{i}(V,\nabla V)$ so that can reduce the computation using primitive variables $\textbf{V}=[u_{1}\hdots u_{d}, T]^{T}$. Therefore, the first step is to calculate the derivatives $\textbf{Q}_{i}=\frac{\partial V}{\partial x_{i}}$
\begin{equation}\label{eq9}
\int_{K}{\textbf{Q}^{e}_{i} \phi^{e} dx}=
{\int_{\partial K}{\widetilde{\textbf{P}}^{e}n_{i}\phi^{e}}dS}
\int_{K}{\textbf{P}^{e}\frac{\partial \phi^{e}}{\partial x_{i}}dx}
+{\int_{\partial K}{\widetilde{\textbf{V}}^{e}n_{i}\phi^{e}}dS}
+\int_{K}{\textbf{V}^{e}\frac{\partial \phi^{e}}{\partial x_{i}}dx}
\end{equation}
Transform Equation \eqref{eq9} into elemental matrix form.
+Transforming Equation \eqref{eq9} into elemental matrix form, we obtain
\begin{equation}\label{eq10}
\hat{\textbf{Q}}_{i}^{e}
=((B^{e})^{T}W^{e}B^{e})^{1}
((D^{e}_{x_{i}} B^{e})^{T}W^{e}B^{e}\hat{\textbf{P}}^{e}
+((D^{e}_{x_{i}} B^{e})^{T}W^{e}B^{e}\hat{\textbf{V}}^{e}
b_{1,i}^{e})
\end{equation}
where the integration of elemental trace flux $b_{1,i}^{e}=B^{e}\widetilde{\textbf{P}}n_{i}$ and the LDG numerical flux $\widetilde{\textbf{P}}=\textbf{P}^{e}_{i}(P^{+},P^{})$.
+where the integration of elemental trace flux is denoted by $b_{1,i}^{e}=B^{e}\widetilde{\textbf{V}}n_{i}$, which involves the LDG numerical flux $\widetilde{\textbf{V}}=\textbf{V}^{e}_{i}(P^{+},P^{})$.
When we get the diffusion derivatives $\textbf{G}_{i}$, the process to solve Equation \eqref{eq6} is simliar to the process of Equation \eqref{eq8}.
\begin{equation}\label{eq8}
@@ 153,12 +153,12 @@ When we get the diffusion derivatives $\textbf{G}_{i}$, the process to solve Equ
=((B^{e})^{T}W^{e}B^{e})^{1}(\sum\limits_{i=1}^{d}{(D^{e}_{x_{i}} B^{e})^{T}}W^{e}B^{e}\hat{\textbf{G}}^{e}_{i}
b_{2}^{e})
\end{equation}
where the integration of elemental trace flux $b_{2}^{e}=\sum\limits_{i=1}^{d}{B^{e}\widetilde{\textbf{G}}^{e}_{i}}n_{i}$ and the LDG numerical flux $\widetilde{\textbf{G}_{i}}=\textbf{G}^{e}_{i}(G^{+},G^{})$.
+where the integration of elemental trace flux is denoted by $b_{2}^{e}=\sum\limits_{i=1}^{d}{B^{e}\widetilde{\textbf{G}}^{e}_{i}}n_{i}$, which involves the LDG numerical flux $\widetilde{\textbf{G}_{i}}=\textbf{G}^{e}_{i}(G^{+},G^{})$.
LDG numerical fluxes in Equations \eqref{eq9} and \eqref{eq8} satisfy
\begin{equation}
\tilde{\textbf{P}}=\{\textbf{P}\}+C_{12}\cdot [\textbf{P} n]
+\tilde{\textbf{V}}=\{\textbf{V}\}+C_{12}\cdot [\textbf{V} n]
\end{equation}
where $C_{12}$ is a coefficient vector.
\begin{equation}
@@ 167,18 +167,18 @@ where $C_{12}$ is a coefficient vector.
where $[\quad]$ represents the jump while $\{\quad \}$ represents the average. In Nektar++, $C_{12}=\frac{1}{2}$, $C_{11}$=0
Table \ref{table3} lists some functions that the process to calculate the diffusion term.
+Table \ref{table3} lists some functions that the process to calculate the diffusion term and provides a mapping from the Nektar++ compressible flow solver name to the mathematical operations they perofrm.
\begin {table}[h]
\caption {DiffusionLDGNS.cpp} \label{table3}
+\caption {Table of variable and function mapping used in the compressible flow solver to their mathemaitcal operations} \label{table3}
\begin{center}
\scalebox{0.9}[1.]{
\begin{tabular}{  c  c}
\hline
Variable/Function name & Physical meaning \\
\hline
v\_NumericalFluxO1 & Numerical flux $\tilde{\textbf{P}}^{e}$ \\
+v\_NumericalFluxO1 & Numerical flux $\tilde{\textbf{V}}^{e}$ \\
\hline
First IProductWRTDerivBase & Volume flux $(D^{e}_{x_{i}} B^{e})^{T}W^{e}B^{e}\hat{\textbf{P}}^{e}$\\
+First IProductWRTDerivBase & Volume flux $(D^{e}_{x_{i}} B^{e})^{T}W^{e}B^{e}\hat{\textbf{V}}^{e}$\\
\hline
First AddTraceIntegral& Add numerical flux integration $+b_{1,i}^{e}$ \\
\hline

GitLab
From 96f6d3b0463625d2e370cddf497ac5f841ef223c Mon Sep 17 00:00:00 2001
From: Mike Kirby
Date: Tue, 14 Jul 2020 12:34:14 0600
Subject: [PATCH 7/7] Mike updates (corrections from Songzhe)

.../StdRegions/stdregdatastructures.tex  272 ++++++++++++++
1 file changed, 223 insertions(+), 49 deletions()
diff git a/docs/developerguide/library/StdRegions/stdregdatastructures.tex b/docs/developerguide/library/StdRegions/stdregdatastructures.tex
index f5b8ac647..db5022890 100644
 a/docs/developerguide/library/StdRegions/stdregdatastructures.tex
+++ b/docs/developerguide/library/StdRegions/stdregdatastructures.tex
@@ 153,76 +153,93 @@ There are public methods but no public data members within StdExpansion\$D.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{General Layout of the Basis Functions in Memory}
\subsection{General Layout}
+In this section, we explain how the modes for the modified $C^0$ basis are ordered in (linear) memory for each of the element types.
+Basis functions are stored in a 1D array indexed by both mode and quadrature point. The fastest index runs over quadrature points while the
+slower index runs over modes.
+This was done to match the memory access pattern of the inner product which is the most frequently computed kernel for most solvers.
+To begin, we remind the reader of the definitions of the 1D basis functions.
Basis functions are stored in a 1D array indexed by both mode and quadrature point. The fast index runs over quadrature points while the slow index runs over modes. This was done to match the memory access pattern of the inner product, which is the most frequently computed kernel for most solvers.
+Bases are built from the tensor products of three basic 1D bases:\\
Bases are built from the tensor product of three different equation types (subsequently called Type 1, Type II and Type III respectively):
+\noindent {Type I Basis:}
\begin{equation}
\phi_{p}(z) =
\begin{cases}
 \frac{1  z}{2} & p = 0 \\
 \frac{1 + z}{2} & p = 1 \\
 \left(\frac{1z}{2}\right)\left(\frac{1+z}{2}\right) P_{p1}^{1,1}(z) & 2 \leq p < P
+ 1 & \text{if p = 0, q = 0 for triangle} \\
+ 1 & \text{else if p = 0, q = 1, or p = 0, q = 0, r = 1 for tet} \\
+ \frac{1  z}{2} & \text{else if p = 0} \\
+ \frac{1 + z}{2} & \text{else if p = 1} \\
+ \left(\frac{1z}{2}\right)\left(\frac{1+z}{2}\right) P_{p2}^{1,1}(z) & \text{otherwise}
\end{cases}
\end{equation}
+\noindent {Type II Basis:}
+
\[
\phi_{pq}(z) = \left\{
 \begin{array}{lll}
 \phi_{q}(z) & p = 0 & 0 \leq q < P \\
 \left(\frac{1  z}{2}\right)^p & 1 \leq p < P & q = 0 \\
 \left(\frac{1z}{2}\right)^p \left(\frac{1+z}{2}\right) P_{q1}^{2p1,1}(z) & 1 \leq p < P, & 1 \leq q < P  p
+ \begin{array}{ll}
+ 1 & \text{if p = 0, q = 0, r = 1 for tet}\\
+ \phi_{q}(z) & \text{else if p = 0} \\
+ \left(\frac{1  z}{2}\right)^p & \text{else if p $\geq 1$, q = 0} \\
+ \left(\frac{1z}{2}\right)^p \left(\frac{1+z}{2}\right) P_{q1}^{2p1,1}(z) & \text{otherwise}
\end{array}\right.
\]
+\noindent {Type III Basis:}
+
\[
\phi_{pqr}(z) = \left\{
 \begin{array}{llll}
 \phi_{qr} & p = 0 & 0 \leq q < P & 0 < r < P  q \\
 \left(\frac{1z}{2}\right)^{p+q} & 1 \leq p < P, & 0 \leq q < P  p, & r = 0 \\
 \left(\frac{1  z}{2}\right)^{p + q} \left(\frac{1+z}{2}\right) P_{r1}^{2p + 2q  1, r}(z) & 1 \leq p < P, & 0 \leq q < P  p, & 1 \leq r < P  p  qr.
+ \begin{array}{ll}
+ \phi_{qr}(z) & \text{if p = 0} \\
+ \phi_{pr}(z) & \text{else if q = 0} \\
+ \left(\frac{1z}{2}\right)^{p+q} & \text{else if 1 $\leq$ p, 1 $\leq q$ , r = 0} \\
+ \left(\frac{1  z}{2}\right)^{p + q} \left(\frac{1+z}{2}\right) P_{r1}^{2p + 2q  1, 1}(z) & \text{otherwise}
\end{array}\right.
\]
Here, $P$ is the polynomial order of the basis and $P^{\alpha,\beta}_{p}$ are the $p^{\text{th}}$ order jacobi polynomial.
+Here, $P$ is the polynomial order of the basis and $P^{\alpha,\beta}_{p}$ are the $p^{\text{th}}$ order Jacobi polynomial.
\subsubsection{A Note Concerning Adjustments For $C_0$ Continuity}

Before going further it is worth reviewing the spatial shape of each node. The term $\frac{1 + z}{2}$ is an increasing function which is equal to zero at $z = 1$ and equal to one at $z = 1$. Similarly, $\frac{1  z}{2}$ is a decreasing function which is equal to one at $z = 1$ and equal to zero at $z = +1$. These two functions are thus nonzero at one of each of the boundaries. If we need to maintain $C_0$ continuity with surrounding elements (as we do in the continuous Galerkin method), then these local modes must be assembled together with the correct local modes in adjacent elements to create a continuous, global mode. For instance $\frac{1 + z}{2}$ in the left element would be continuous with $\frac{1  z}{2}$ in the right element. The union of these two modes under assembly form a single ``hat'' function. By contrast, functions of the form
+Before going further, it is worth reviewing the spatial shape of each mode. The term $\frac{1 + z}{2}$ is an
+increasing function which is equal to zero at $z = 1$ and equal to one at $z = +1$. Similarly, $\frac{1  z}{2}$ is a decreasing function which is equal to one at $z = 1$ and equal to zero at $z = +1$. These two functions are thus nonzero at one of each of the boundaries.
+If we need to maintain $C_0$ continuity with surrounding elements (as we do in the continuous Galerkin (cG) method),
+then these local modes must be assembled together with the correct local modes in adjacent elements to create a continuous, global mode.
+For instance $\frac{1 + z}{2}$ in the left element would be continuous with $\frac{1  z}{2}$ in the right element. The combination of these
+two modes under assembly form a single ``hat'' function. By contrast, functions of the form
\[
 \frac{1  z}{2} \frac{1 + z}{2}
+ \left(\frac{1  z}{2} \right)\left(\frac{1 + z}{2}\right)
\]
are zero at both end points $z = \pm 1$. As a result, they are trivially continuous with any other function which is also equal to zero on the boundary. These ``bubble'' functions may be treated entirely locally and thus are used to construct the interior modes of a basis. Only bases with $p > 1$ have interior modes.
All of this holds separately in one dimension. Higher dimensional bases are constructed via the tensor product of 1D basis functions. As a result, we end up with a greater number of possibilities in terms of continuity. When the tensor product is taken between two bubble functions from different bases, the result is still a bubble function. When the tensor product is taken between a hat function and a bubble function we get ``edge'' modes (in both 2D and 3D). These are nonzero along one edge of the standard domain and zero along the remaining edges. When the tensor product is taken between two hat functions, they form a ``vertex'' mode which reaches its maximum at one of the vertices and is nonzero on two edges. The 3D bases are constructed similarly.

Based upon this convention, the 1D basis set consists of vertex modes and bubble modes. The 2D basis function set consists of vertex modes, edge modes and bubble modes. The 3D basis set contains vertex modes, edge modes, face modes and bubble modes.
+All of this holds separately in one dimension. Higher dimensional bases are constructed via the tensor product of 1D basis functions. As a result, we end up with a greater number of possibilities in terms of continuity. When the tensor product is taken between two bubble functions from different bases, the result is still a bubble function. When the tensor product is taken between a hat function and a bubble function we get ``edge'' modes. These are nonzero along one edge of the standard domain and zero along the remaining edges. When the tensor product is taken between two hat functions, they form a ``vertex'' mode which reaches its maximum at one of the vertices and is nonzero on two edges. The 3D bases are constructed in a similar manner.
\subsection{2D Geometries}
\subsubsection{Quadrilateral Element Memory Organization}
Quads have the simplest memory organization. The quadrilateral basis is composed of the tensor product of two Type I functions $\phi_p(\xi_{0,i}) \phi_q(\xi_{1,j})$. This function would then be indexed as
+We now present the memory layout for both triangle and quadrilateral elements.
+
+\subsubsection{Quadrilateral Memory Organization}
+Quads have the simplest memory organization. The Quadrilateral basis is composed of the tensor product of two Type I functions $\phi_p(\xi_{0,i}) \phi_q(\xi_{1,j})$. This function would then be indexed as
\begin{lstlisting}
basis0[p*nq0 + i] * basis1[q*nq1 + j]
\end{lstlisting}
where nq is the number of quadrature points for the $b^{\text{th}}$ basis. Unlike certain mode orderings (e.g. Karniadakis and Sherwin \cite{KaSh05}), the two hat functions are accessed as the first and second modes in memory with interior modes placed afterward. Thus,
+where nq$<$b$>$ is the number of quadrature points for the $b^{\text{th}}$ basis. Unlike certain mode orderings (e.g. Karniadakis and Sherwin), the two hat functions are accessed as the first and second modes in memory with interior modes placed afterward. Thus,
\begin{lstlisting}
basis[i], basis[nq + i]
\end{lstlisting}
correspond to $\frac{1  z}{2}$ and $\frac{1 + z}{2}$ respectively.
\subsubsection{Triangle Element Memory Organization}
Due to the use of collapsed coordinates, triangular element bases are formed via the tensor product of one basis function of Type I, and one of Type II, i.e. $\phi_p(\eta_{0,i} \phi_pq(\eta{1,j}))$. Since $\phi_p$ is also a Type I function, its memory ordering is identical to that used for quads. The second function is complicated by the mixing of $\xi_0$ and $\xi_1$ in the construction of $\eta_1$.
+\subsubsection{Triangle Memory Organization}
+Due to the use of collapsed coordinates (i.e. Duffy transformations), triangle bases are formed via the tensor product of one basis function of Type I, and one of Type II, i.e. $\phi_p(\eta_{0,i}) \phi_{pq}(\eta_{1,j})$. Since $\phi_p$ is also a Type I function, its memory ordering is identical to that used for quads. The second function is complicated by the mixing of $\xi_0$ and $\xi_1$ in the construction of $\eta_1$.
In particular, this means that the basis function has two modal indices, $p$ and $q$. While $p$ can run all the way to $P$, The number of $q$ modes depends on the value of the $p$ index q index such that $0 \leq q < P  p$. Thus, for $p = 0$, the q index can run all the way up to $P$. When p = 1, it runs up to $P  1$ and so on. Memory is laid out in the same way starting with $p=0$. To access all values in order, we write
+In particular, this means that the basis function has two modal indices, $p$ and $q$. While $p$ can run all the way to $P$, The number of $q$ modes depends on the value of the $p$ index q index such that $0 \leq q < P  p$. Thus, for $p = 0$, the q index can run all the way up to $P$. When p = 1, it runs up to $P  1$ and so on. Memory is laid out in the same way starting with $p=0$. To access all values in order, we write\\
\begin{lstlisting}
mode = 0
for p in P:
for q in P  p:
 out[mode*nq + q] = basis0[p*nq]*basis1[mode*nq + q]
+ out[mode*nq + q] =
+ basis0[p*nq]*basis1[mode*nq + q]
mode += Pp
\end{lstlisting}
Notice the use of the extra ``mode'' variable. Since the maximum value of $q$ changes with $p$, basis1 is not simply a linearized matrix and instead has a triangular structure which necessitates keeping track of our current memory location.
@@ 235,25 +252,28 @@ represents the top right vertex in the standard basis. However, when we move to
\subsection{3D Geometries}
\subsubsection{Hexahedral Element Memory Organization}
The hexahedral element does not differ much from the quadrilateral as it is the simply the product of three Type I functions.
+We now present the memory layout for hexahedral, prismatic, pyramidal and tetrahedral elements.
+
+\subsubsection{Hexahedron Memory Organization}
+The hexahedron does not differ much from the quadrilateral as it is the simply the product of three Type I functions.
\[
 \Phi_{pqr} = \phi_p(\xi_0) \phi_q(\xi_1) \phi_r(\xi_2).
+ \Phi_{pqr} = \phi_p(\xi_0) \phi_q(\xi_1) \phi_r(\xi_2)
\]
\subsubsection{Prismatic Element Memory Organization}
+\subsubsection{Prism Memory Organization}
Cross sections of a triangular prism yield either a quad or a triangle based chosen direction. The basis, therefore, looks like a combination of the two different 2D geometries.
\[
 \Phi_{pqr} = \phi_p(\eta_0)\phi_q(\xi_1)\phi_{pr}(\eta_1).
+ \Phi_{pqr} = \phi_p(\eta_0)\phi_q(\xi_1)\phi_{pr}(\eta_1)
\]
Taking $\phi_p \phi_{pr}$ on its own produces a triangular face while taking $\phi_p \phi_q$ on its own produces a quadrilateral face. When the three basis functions are combined into a single array (as in the inner product kernel), modes are accessed in the order p,q,r with r being the fastest index and p the slowest. The access pattern for the prism thus looks like
+Taking $\phi_p \phi_{pr}$ on its own produces a triangular face while taking $\phi_p \phi_q$ on its own produces a quadrilateral face. When the three basis functions are combined into a single array (as in the inner product kernel), modes are accessed in the order p,q,r with r being the fastest index and p the slowest. The access pattern for the prism thus looks like\\
\begin{lstlisting}
mode_pqr = 0
mode_pr = 0
for p in P:
for q in Q:
for r in P  p:
 out[mode_pqr*nq + r] = basis0[p*nq]*basis1[q*nq]*basis2[mode_pr + r]
+ out[mode_pqr*nq + r] =
+ basis0[p*nq]*basis1[q*nq]*basis2[mode_pr + r]
mode_pqr += P  p
mode_pr += P  p
\end{lstlisting}
@@ 263,40 +283,194 @@ As with the triangle, we have to deal with complications due to collapsed coordi
\Phi_{0q1} += \phi_1 \phi_q \phi_{10}.
\]
\subsubsection{Tetrahedral Element Memory Organization}
The tetrahedral element is the most complicated of the element constructions. It cannot simply be formed as the composition of multiple triangles since $\eta_2$ is constructed by mixing three coordinate directions. We thus need to introduce our first Type III function.
+\subsubsection{Tetrahedron Memory Organization}
+The tetrahedron is the most complicated of the element types. It cannot simply be formed as the composition of multiple triangles since $\eta_2$ is constructed by mixing three coordinate directions. We thus need to introduce our first Type III function.
\[
\Phi_{pqr}(\eta_0, \eta_1, \eta_2) = \phi_{p}(\eta_0) \phi_{pq} (\eta_1) \phi_{pqr}(\eta_2).
\]
The $r$ index is constrained by both $p$ and $q$ indices. It runs from $P  p  q$ to $1$ in a similar manner to the Type II function. Our typical access pattern is thus
+The $r$ index is constrained by both $p$ and $q$ indices. It runs from $P  p  q$ to $1$ in a similar manner to the Type II function. Our typical access pattern is thus \\
\begin{lstlisting}
mode_pqr = 0
mode_pq = 0
for p in P:
for q in P  p:
 for r in P  p  r:
 out[mode_pqr*nq + r] = basis0[p*nq]*basis1[mode_pq + q]*basis2[mode_pqr + r]
 mode_pqr += (P  p  r)
+ for r in P  p  q:
+ out[mode_pqr*nq + r] =
+ basis0[p*nq]*basis1[mode_pq + q]*basis2[mode_pqr + r]
+ mode_pqr += (P  p  q)
mode_pq += (P  p)
\end{lstlisting}
The tetrahedral element also has to add a correction due to collapsed coordinates. Similar to the prism, the correction must be applied to an entire edge indexed by $r$
+
+The tetrahedron also has to add a correction due to collapsed coordinates. Similar to the prism, the correction must be applied to an entire edge indexed by $r$
\[
\Phi_{01r} += \phi_1 \phi_1 \phi_{11r}.
\]
\subsubsection{Pyramidic Element Memory Organization}
Like the tetrahedral element, a pyramid contains a collapsed coordinate direction which mixes three standard coordinates from the standard region. Unlike the tetrahedra, the collapse only occurs along one axis. Thus it is constructed from two Type I functions and one Type III function
+\subsubsection{Pyramid Memory Organization}
+
+Like the tetrahedron, a pyramid contains a collapsed coordinate direction which mixes three standard coordinates from the standard region. Unlike the tetrahedron, the collapse only occurs along one axis. Thus it is constructed from two Type I functions and one Type III function: \\
\[
\Phi_{pqr} = \phi_p(\eta_1) \phi_q(\eta_2) \phi_{pqr}(\eta_3).
\]
The product $\phi_p \phi_q$ looks like the a quad construction which reflects the quad which serves as the base of the pyramid. A typical memory access looks like
+
+The product $\phi_p \phi_q$ looks like the a quad construction which reflects the quad which serves as the base of the pyramid. A typical memory access looks like \\
+
\begin{lstlisting}
mode_pqr = 0
for p in P:
 for q in P  p:
 for r in P  p  r:
 out[mode_pqr*nq + r] = basis0[p*nq]*basis1[q*nq]*basis2[mode_pqr*nq + r]
 mode_pqr += (P  p  r)
+ for q in Q:
+ for r in P  p  q:
+ out[mode_pqr*nq + r] =
+ basis0[p*nq]*basis1[q*nq]*basis2[mode_pqr*nq + r]
+ mode_pqr += (P  p  q)
\end{lstlisting}
+%
+%\subsection{General Layout}
+%
+%Basis functions are stored in a 1D array indexed by both mode and quadrature point. The fast index runs over quadrature points while the slow index runs over modes. This was done to match the memory access pattern of the inner product, which is the most frequently computed kernel for most solvers.
+%
+%Bases are built from the tensor product of three different equation types (subsequently called Type 1, Type II and Type III respectively):
+%\begin{equation}
+% \phi_{p}(z) =
+% \begin{cases}
+% \frac{1  z}{2} & p = 0 \\
+% \frac{1 + z}{2} & p = 1 \\
+% \left(\frac{1z}{2}\right)\left(\frac{1+z}{2}\right) P_{p1}^{1,1}(z) & 2 \leq p < P
+% \end{cases}
+%\end{equation}
+%
+%\[
+% \phi_{pq}(z) = \left\{
+% \begin{array}{lll}
+% \phi_{q}(z) & p = 0 & 0 \leq q < P \\
+% \left(\frac{1  z}{2}\right)^p & 1 \leq p < P & q = 0 \\
+% \left(\frac{1z}{2}\right)^p \left(\frac{1+z}{2}\right) P_{q1}^{2p1,1}(z) & 1 \leq p < P, & 1 \leq q < P  p
+% \end{array}\right.
+%\]
+%
+%\[
+% \phi_{pqr}(z) = \left\{
+% \begin{array}{llll}
+% \phi_{qr} & p = 0 & 0 \leq q < P & 0 < r < P  q \\
+% \left(\frac{1z}{2}\right)^{p+q} & 1 \leq p < P, & 0 \leq q < P  p, & r = 0 \\
+% \left(\frac{1  z}{2}\right)^{p + q} \left(\frac{1+z}{2}\right) P_{r1}^{2p + 2q  1, r}(z) & 1 \leq p < P, & 0 \leq q < P  p, & 1 \leq r < P  p  qr.
+% \end{array}\right.
+%\]
+%
+%Here, $P$ is the polynomial order of the basis and $P^{\alpha,\beta}_{p}$ are the $p^{\text{th}}$ order jacobi polynomial.
+%
+%\subsubsection{A Note Concerning Adjustments For $C_0$ Continuity}
+%
+%
+%Before going further it is worth reviewing the spatial shape of each node. The term $\frac{1 + z}{2}$ is an increasing function which is equal to zero at $z = 1$ and equal to one at $z = 1$. Similarly, $\frac{1  z}{2}$ is a decreasing function which is equal to one at $z = 1$ and equal to zero at $z = +1$. These two functions are thus nonzero at one of each of the boundaries. If we need to maintain $C_0$ continuity with surrounding elements (as we do in the continuous Galerkin method), then these local modes must be assembled together with the correct local modes in adjacent elements to create a continuous, global mode. For instance $\frac{1 + z}{2}$ in the left element would be continuous with $\frac{1  z}{2}$ in the right element. The union of these two modes under assembly form a single ``hat'' function. By contrast, functions of the form
+%\[
+% \frac{1  z}{2} \frac{1 + z}{2}
+%\]
+%are zero at both end points $z = \pm 1$. As a result, they are trivially continuous with any other function which is also equal to zero on the boundary. These ``bubble'' functions may be treated entirely locally and thus are used to construct the interior modes of a basis. Only bases with $p > 1$ have interior modes.
+%
+%All of this holds separately in one dimension. Higher dimensional bases are constructed via the tensor product of 1D basis functions. As a result, we end up with a greater number of possibilities in terms of continuity. When the tensor product is taken between two bubble functions from different bases, the result is still a bubble function. When the tensor product is taken between a hat function and a bubble function we get ``edge'' modes (in both 2D and 3D). These are nonzero along one edge of the standard domain and zero along the remaining edges. When the tensor product is taken between two hat functions, they form a ``vertex'' mode which reaches its maximum at one of the vertices and is nonzero on two edges. The 3D bases are constructed similarly.
+%
+%Based upon this convention, the 1D basis set consists of vertex modes and bubble modes. The 2D basis function set consists of vertex modes, edge modes and bubble modes. The 3D basis set contains vertex modes, edge modes, face modes and bubble modes.
+%
+%\subsection{2D Geometries}
+%
+%\subsubsection{Quadrilateral Element Memory Organization}
+%Quads have the simplest memory organization. The quadrilateral basis is composed of the tensor product of two Type I functions $\phi_p(\xi_{0,i}) \phi_q(\xi_{1,j})$. This function would then be indexed as
+%
+%\begin{lstlisting}
+% basis0[p*nq0 + i] * basis1[q*nq1 + j]
+%\end{lstlisting}
+%where nq is the number of quadrature points for the $b^{\text{th}}$ basis. Unlike certain mode orderings (e.g. Karniadakis and Sherwin \cite{KaSh05}), the two hat functions are accessed as the first and second modes in memory with interior modes placed afterward. Thus,
+%\begin{lstlisting}
+% basis[i], basis[nq + i]
+%\end{lstlisting}
+%correspond to $\frac{1  z}{2}$ and $\frac{1 + z}{2}$ respectively.
+%
+%\subsubsection{Triangle Element Memory Organization}
+%Due to the use of collapsed coordinates, triangular element bases are formed via the tensor product of one basis function of Type I, and one of Type II, i.e. $\phi_p(\eta_{0,i} \phi_pq(\eta{1,j}))$. Since $\phi_p$ is also a Type I function, its memory ordering is identical to that used for quads. The second function is complicated by the mixing of $\xi_0$ and $\xi_1$ in the construction of $\eta_1$.
+%
+%In particular, this means that the basis function has two modal indices, $p$ and $q$. While $p$ can run all the way to $P$, The number of $q$ modes depends on the value of the $p$ index q index such that $0 \leq q < P  p$. Thus, for $p = 0$, the q index can run all the way up to $P$. When p = 1, it runs up to $P  1$ and so on. Memory is laid out in the same way starting with $p=0$. To access all values in order, we write
+%\begin{lstlisting}
+% mode = 0
+% for p in P:
+% for q in P  p:
+% out[mode*nq + q] = basis0[p*nq]*basis1[mode*nq + q]
+% mode += Pp
+%\end{lstlisting}
+%Notice the use of the extra ``mode'' variable. Since the maximum value of $q$ changes with $p$, basis1 is not simply a linearized matrix and instead has a triangular structure which necessitates keeping track of our current memory location.
+%
+%The collapsed coordinate system introduces one extra subtlety. The mode
+%\[
+% \phi_1(\eta_1) \phi_1(\eta_2)
+%\]
+%represents the top right vertex in the standard basis. However, when we move to the standard element basis, we are dealing with a triangle which only has three vertices. During the transformation, the top right vertex collapses into the top left vertex. If we naively construct an operators by iterating through all of our modes, the contribution from this vertex to mode $\Phi_{01}$ will not be included. To deal with this, we add its contribution as a correction when computing a kernel. The correction is $\Phi_{01} = \phi_0 \phi_{01} + \phi_1 \phi_{10}$ for a triangle.
+%
+%\subsection{3D Geometries}
+%
+%\subsubsection{Hexahedral Element Memory Organization}
+%The hexahedral element does not differ much from the quadrilateral as it is the simply the product of three Type I functions.
+%\[
+% \Phi_{pqr} = \phi_p(\xi_0) \phi_q(\xi_1) \phi_r(\xi_2).
+%\]
+%
+%\subsubsection{Prismatic Element Memory Organization}
+%Cross sections of a triangular prism yield either a quad or a triangle based chosen direction. The basis, therefore, looks like a combination of the two different 2D geometries.
+%\[
+% \Phi_{pqr} = \phi_p(\eta_0)\phi_q(\xi_1)\phi_{pr}(\eta_1).
+%\]
+%Taking $\phi_p \phi_{pr}$ on its own produces a triangular face while taking $\phi_p \phi_q$ on its own produces a quadrilateral face. When the three basis functions are combined into a single array (as in the inner product kernel), modes are accessed in the order p,q,r with r being the fastest index and p the slowest. The access pattern for the prism thus looks like
+%\begin{lstlisting}
+% mode_pqr = 0
+% mode_pr = 0
+% for p in P:
+% for q in Q:
+% for r in P  p:
+% out[mode_pqr*nq + r] = basis0[p*nq]*basis1[q*nq]*basis2[mode_pr + r]
+% mode_pqr += P  p
+% mode_pr += P  p
+%\end{lstlisting}
+%
+%As with the triangle, we have to deal with complications due to collapsed coordinates. This time, the singular vertex from the triangle occurs along an entire edge of the prism. Our correction must be added to a collection of modes indexed by $q$
+%\[
+% \Phi_{0q1} += \phi_1 \phi_q \phi_{10}.
+%\]
+%
+%\subsubsection{Tetrahedral Element Memory Organization}
+%The tetrahedral element is the most complicated of the element constructions. It cannot simply be formed as the composition of multiple triangles since $\eta_2$ is constructed by mixing three coordinate directions. We thus need to introduce our first Type III function.
+%\[
+% \Phi_{pqr}(\eta_0, \eta_1, \eta_2) = \phi_{p}(\eta_0) \phi_{pq} (\eta_1) \phi_{pqr}(\eta_2).
+%\]
+%The $r$ index is constrained by both $p$ and $q$ indices. It runs from $P  p  q$ to $1$ in a similar manner to the Type II function. Our typical access pattern is thus
+%\begin{lstlisting}
+%mode_pqr = 0
+%mode_pq = 0
+%for p in P:
+% for q in P  p:
+% for r in P  p  r:
+% out[mode_pqr*nq + r] = basis0[p*nq]*basis1[mode_pq + q]*basis2[mode_pqr + r]
+% mode_pqr += (P  p  r)
+% mode_pq += (P  p)
+%\end{lstlisting}
+%The tetrahedral element also has to add a correction due to collapsed coordinates. Similar to the prism, the correction must be applied to an entire edge indexed by $r$
+%\[
+% \Phi_{01r} += \phi_1 \phi_1 \phi_{11r}.
+%\]
+%
+%\subsubsection{Pyramidic Element Memory Organization}
+%Like the tetrahedral element, a pyramid contains a collapsed coordinate direction which mixes three standard coordinates from the standard region. Unlike the tetrahedra, the collapse only occurs along one axis. Thus it is constructed from two Type I functions and one Type III function
+%\[
+% \Phi_{pqr} = \phi_p(\eta_1) \phi_q(\eta_2) \phi_{pqr}(\eta_3).
+%\]
+%The product $\phi_p \phi_q$ looks like the a quad construction which reflects the quad which serves as the base of the pyramid. A typical memory access looks like
+%\begin{lstlisting}
+%mode_pqr = 0
+%for p in P:
+% for q in P  p:
+% for r in P  p  r:
+% out[mode_pqr*nq + r] = basis0[p*nq]*basis1[q*nq]*basis2[mode_pqr*nq + r]
+% mode_pqr += (P  p  r)
+%\end{lstlisting}
+%
+

GitLab