This page was generated from unit-5.5-cuda/EulerEquations.ipynb.

5.5.3 Euler equations

state variables:

  • mass density \(\rho\)

  • momentum \(m = \rho u\), with velocity \(u\)

  • energy density \(E\)

conservation of mass, momentum and energy:

\begin{eqnarray*} \frac{\partial \rho}{\partial t} & = & -\operatorname{div} \rho u \\ \frac{\partial \rho u}{\partial t} & = & -\operatorname{div} (\rho u \otimes u + p I) \\ \frac{\partial E}{\partial t} & = & -\operatorname{div} (E+p) u \end{eqnarray*}

closure equation for pressure: \(p = (\gamma-1) (E - \rho \tfrac{1}{2} |u|^2)\)

with heat capacity ration \(\gamma\) depending on gas, \(\gamma=1.4\) for atmosphere.

Compact notation:

\[\frac{\partial U}{\partial t} + \operatorname{div} F(U) = 0\]

with vector of state variables (in \(R^{d+2}\)):

\[\begin{split}U = \left( \begin{array}{c} \rho \\ m \\ E \end{array} \right)\end{split}\]

and flux in \(R^{(d+2) \times d}\):

\[\begin{split}F = \left( \begin{array}{c} \rho u \\ \rho u \otimes u + p I \\ (E + p) u \end{array} \right)\end{split}\]
[1]:
from ngsolve import *
from ngsolve.webgui import Draw
from time import sleep

try:
    import ngsolve.ngscuda as ngscuda
except:
    pass
[2]:
mesh = Mesh(unit_square.GenerateMesh(maxh=0.02))

order=3
fesT = L2(mesh, order=order)**4
feshat = FacetFESpace(mesh, order=order)**4
fes = fesT*feshat

gfu = GridFunction(fes)
rho0 = 1+1*exp(-400*( (x-0.5)**2 + (y-0.5)**2))
with TaskManager():
    gfu.components[0].Set( (rho0, 0, 0, 1) )
[3]:
gamma = 1.4  # Gas constant
def Flux (U):
    rho = U[0]
    u = U[1:3]/rho
    E = U[3]
    p = (gamma-1)*(E-rho/2*(u*u))
    return CF ( (rho*u, rho*OuterProduct(u,u)+p*Id(2),
                 (E+p)*u)).Reshape((4,2))

ngsglobals.msg_level = 0

n = specialcf.normal(2)
stab = 1
def NumFlux(u, uo):
    return 0.5*(Flux(u)+Flux(uo))
[4]:
truecompile = False

(u,uhat), (v,vhat) = fes.TnT()
# bfa1 = BilinearForm(fes, nonassemble=True)

term1a = InnerProduct (NumFlux(u, 2*uhat-u), v.Operator("normal")).Compile(truecompile, wait=True)*dx(element_boundary=True)
term1b = (2*stab*v*(u-uhat)).Compile(truecompile, wait=True)*dx(element_vb=BND)
term2 = (-InnerProduct(Flux(u),Grad(v))).Compile(truecompile, wait=True)*dx

bfa = BilinearForm (term1a+term1b+term2, nonlinear_matrix_free_bdb=True).Assemble()

embT, embhat = fes.embeddings
resT, reshat = fes.restrictions
rangeT = fes.Range(0)
rangehat = fes.Range(1)

invm1 = embT@fesT.Mass(1).Inverse()@embT.T
# traceop = fesT.TraceOperator(feshat, average=True)
traceop = 0.5*fesT.TraceOperator(feshat, average=False)
traceop = traceop.CreateDeviceMatrix()

uT, vT = fesT.TnT()
with TaskManager():
    invm = BilinearForm(uT*vT*dx, diagonal=True).Assemble().mat.Inverse()
invm_host =  embT@invm@embT.T
invm = invm_host.CreateDeviceMatrix()

print(rangeT, rangehat)
[0,232160) [232160,373056)
[5]:
rho0 = 0.2+1*exp(-400*( (x-0.5)**2 + (y-0.5)**2))
with TaskManager():
    gfu.components[0].Set( (rho0, 0, 0, rho0) )

gfubnd = GridFunction(fes)
gfubnd.components[1].vec.data = traceop * gfu.components[0].vec
Projector(fes.GetDofs(mesh.Boundaries(".*")), True).Project(gfubnd.vec)

gf_rho = gfu.components[0][0]
gf_u = gfu.components[0][1:3] / gf_rho
gf_E = gfu.components[0][3]
gf_p = (gamma-1)*(gf_E-gf_rho/2*(gf_u*gf_u))  # pressure
gf_a = sqrt(gamma*gf_p/gf_rho)        # speed of sound
gf_M = Norm(gf_u) / gf_a              # Mach number

print ("Density")
scene_rho = Draw (gf_rho, mesh, deformation=True)
print ("Velocity")
scene_u = Draw(gf_u, mesh, vectors={"grid_size":100})
print ("Mach number")
scene_M = Draw(Norm(gf_u)/gf_a, mesh)

t = 0
tend = 0.3
tau = 1e-4
i = 0
bfamat = bfa.mat.CreateDeviceMatrix()
traceop = traceop.CreateDeviceMatrix()
vec = gfu.vec.CreateDeviceVector()
vecbnd = gfubnd.vec.CreateDeviceVector()
hv = gfu.vec.CreateDeviceVector()
from time import time

with TaskManager():
    while t < tend:
        vec[rangehat] = traceop * vec[rangeT] + vecbnd[rangehat]
        hv.data = bfamat * vec
        vec -= tau * invm * hv

        t += tau
        i += 1
        if i%20 == 0:
            gfu.vec.data = vec
            scene_rho.Redraw()
            scene_u.Redraw()
            scene_M.Redraw()
Density
Velocity
Mach number
[6]:
print (bfamat.GetOperatorInfo())
SumMatrix, h = 373056, w = 373056
  SumMatrix, h = 373056, w = 373056
    SumMatrix, h = 373056, w = 373056
      SumMatrix, h = 373056, w = 373056
        SumMatrix, h = 373056, w = 373056
          ProductMatrix, h = 373056, w = 373056
            ProductMatrix, h = 373056, w = 287808
              Transpose, h = 373056, w = 287808
                ProductMatrix, h = 287808, w = 373056
                  BlockDiagonalMatrixSoA (bs = 8x4), h = 287808, w = 143904
                  ConstantEBEMatrix (bs = 48x40), h = 143904, w = 373056
              N6ngcomp22ApplyIntegrationPointsE, h = 287808, w = 287808
            ProductMatrix, h = 287808, w = 373056
              BlockDiagonalMatrixSoA (bs = 8x8), h = 287808, w = 287808
              ConstantEBEMatrix (bs = 96x88), h = 287808, w = 373056
          ProductMatrix, h = 373056, w = 373056
            ProductMatrix, h = 373056, w = 269376
              Transpose, h = 373056, w = 269376
                ProductMatrix, h = 269376, w = 373056
                  BlockDiagonalMatrixSoA (bs = 8x4), h = 269376, w = 134688
                  ConstantEBEMatrix (bs = 48x40), h = 134688, w = 373056
              N6ngcomp22ApplyIntegrationPointsE, h = 269376, w = 269376
            ProductMatrix, h = 269376, w = 373056
              BlockDiagonalMatrixSoA (bs = 8x8), h = 269376, w = 269376
              ConstantEBEMatrix (bs = 96x88), h = 269376, w = 373056
        ProductMatrix, h = 373056, w = 373056
          ProductMatrix, h = 373056, w = 287808
            Transpose, h = 373056, w = 143904
              ProductMatrix, h = 143904, w = 373056
                BlockDiagonalMatrixSoA (bs = 4x4), h = 143904, w = 143904
                ConstantEBEMatrix (bs = 48x40), h = 143904, w = 373056
            N6ngcomp22ApplyIntegrationPointsE, h = 143904, w = 287808
          ProductMatrix, h = 287808, w = 373056
            BlockDiagonalMatrixSoA (bs = 8x8), h = 287808, w = 287808
            ConstantEBEMatrix (bs = 96x88), h = 287808, w = 373056
      ProductMatrix, h = 373056, w = 373056
        ProductMatrix, h = 373056, w = 269376
          Transpose, h = 373056, w = 134688
            ProductMatrix, h = 134688, w = 373056
              BlockDiagonalMatrixSoA (bs = 4x4), h = 134688, w = 134688
              ConstantEBEMatrix (bs = 48x40), h = 134688, w = 373056
          N6ngcomp22ApplyIntegrationPointsE, h = 134688, w = 269376
        ProductMatrix, h = 269376, w = 373056
          BlockDiagonalMatrixSoA (bs = 8x8), h = 269376, w = 269376
          ConstantEBEMatrix (bs = 96x88), h = 269376, w = 373056
    ProductMatrix, h = 373056, w = 373056
      ProductMatrix, h = 373056, w = 143904
        Transpose, h = 373056, w = 287808
          ProductMatrix, h = 287808, w = 373056
            BlockDiagonalMatrixSoA (bs = 8x8), h = 287808, w = 287808
            ConstantEBEMatrix (bs = 96x36), h = 287808, w = 373056
        N6ngcomp22ApplyIntegrationPointsE, h = 287808, w = 143904
      ProductMatrix, h = 143904, w = 373056
        BlockDiagonalMatrixSoA (bs = 4x4), h = 143904, w = 143904
        ConstantEBEMatrix (bs = 48x40), h = 143904, w = 373056
  ProductMatrix, h = 373056, w = 373056
    ProductMatrix, h = 373056, w = 134688
      Transpose, h = 373056, w = 269376
        ProductMatrix, h = 269376, w = 373056
          BlockDiagonalMatrixSoA (bs = 8x8), h = 269376, w = 269376
          ConstantEBEMatrix (bs = 96x36), h = 269376, w = 373056
      N6ngcomp22ApplyIntegrationPointsE, h = 269376, w = 134688
    ProductMatrix, h = 134688, w = 373056
      BlockDiagonalMatrixSoA (bs = 4x4), h = 134688, w = 134688
      ConstantEBEMatrix (bs = 48x40), h = 134688, w = 373056

Code generation for the device

the volume-part of the bilinear-form

\[\begin{split}\int_T F(u) \nabla v \qquad \text{with} \qquad F = \left( \begin{array}{c} \rho u \\ \rho u \otimes u + p I \\ (E + p) u \end{array} \right)\end{split}\]

is represented in NGSolve as a list of operations:

[7]:
print (term2)
Compiled CF:
Step 0: trial-function diffop = Id, dim=4
Step 1: ComponentCoefficientFunction 0
     input: 0
Step 2: 1
Step 3: binary operation '/'
     input: 2 1
Step 4: subtensor [ first: 1, num: ( 2,), dist: ( 1,) ], dim=2
     input: 0
Step 5: scalar-vector multiply, dim=2
     input: 3 4
Step 6: scalar-vector multiply, dim=2
     input: 1 5
Step 7: reshape, dims = 2 x 1
     input: 5
Step 8: reshape, dims = 1 x 2
     input: 5
Step 9: matrix-matrix multiply, dims = 2 x 2
     input: 7 8
Step 10: scalar-matrix multiply, dims = 2 x 2
     input: 1 9
Step 11: ComponentCoefficientFunction 3
     input: 0
Step 12: 2
Step 13: binary operation '/'
     input: 1 12
Step 14: innerproduct, fix size = 2
     input: 5 5
Step 15: binary operation '*'
     input: 13 14
Step 16: binary operation '-'
     input: 11 15
Step 17: scale 0.4
     input: 16
Step 18: Identity matrix, dims = 2 x 2
Step 19: scalar-matrix multiply, dims = 2 x 2
     input: 17 18
Step 20: binary operation '+', dims = 2 x 2
     input: 10 19
Step 21: binary operation '+'
     input: 11 17
Step 22: scalar-vector multiply, dim=2
     input: 21 5
Step 23: VectorialCoefficientFunction, dim=8
     input: 6 20 22
Step 24: unary operation ' ', dim=8
     input: 23
Step 25: reshape, dims = 4 x 2
     input: 24
Step 26: test-function diffop = grad, dims = 4 x 2
Step 27: innerproduct, fix size = 8
     input: 25 26
Step 28: scale -1
     input: 27
 VOL

to extract the flux, we differentiate w.r.t. the test-function

[8]:
print (term2[0].coef.Diff(Grad(v)))
Compiled CF:
Step 0: trial-function diffop = Id, dim=4
Step 1: ComponentCoefficientFunction 0
     input: 0
Step 2: 1
Step 3: binary operation '/'
     input: 2 1
Step 4: subtensor [ first: 1, num: ( 2,), dist: ( 1,) ], dim=2
     input: 0
Step 5: scalar-vector multiply, dim=2
     input: 3 4
Step 6: scalar-vector multiply, dim=2
     input: 1 5
Step 7: reshape, dims = 2 x 1
     input: 5
Step 8: reshape, dims = 1 x 2
     input: 5
Step 9: matrix-matrix multiply, dims = 2 x 2
     input: 7 8
Step 10: scalar-matrix multiply, dims = 2 x 2
     input: 1 9
Step 11: ComponentCoefficientFunction 3
     input: 0
Step 12: 2
Step 13: binary operation '/'
     input: 1 12
Step 14: innerproduct, fix size = 2
     input: 5 5
Step 15: binary operation '*'
     input: 13 14
Step 16: binary operation '-'
     input: 11 15
Step 17: scale 0.4
     input: 16
Step 18: Identity matrix, dims = 2 x 2
Step 19: scalar-matrix multiply, dims = 2 x 2
     input: 17 18
Step 20: binary operation '+', dims = 2 x 2
     input: 10 19
Step 21: binary operation '+'
     input: 11 17
Step 22: scalar-vector multiply, dim=2
     input: 21 5
Step 23: VectorialCoefficientFunction, dim=8
     input: 6 20 22
Step 24: unary operation ' ', dim=8
     input: 23
Step 25: reshape, dims = 4 x 2
     input: 24
Step 26: scale -1, dims = 4 x 2
     input: 25

And for this program we generate low-level C-code. Only the function header and loop had to be changed for a cuda-kernel:

#include <cstddef>
__global__ void ApplyIPFunctionKernel (size_t nip, double * input, size_t dist_input,
                                       double * output, size_t dist_output) {
  {
    auto values_0 = [dist_input,input](size_t i, int comp)
      { return input[i + (comp+0)*dist_input]; };
    bool constexpr has_values_0 = true;

    int tid = blockIdx.x*blockDim.x+threadIdx.x;
    for (int i = tid; i < nip; i += blockDim.x*gridDim.x) {
      // step 0: trial-function diffop = Id
      double var_0_0(0x0p+0 /* (0.0000000000000000e+00) */);
      var_0_0 = 0.0;
      double var_0_1(0x0p+0 /* (0.0000000000000000e+00) */);
      var_0_1 = 0.0;
      double var_0_2(0x0p+0 /* (0.0000000000000000e+00) */);
      var_0_2 = 0.0;
      double var_0_3(0x0p+0 /* (0.0000000000000000e+00) */);
      var_0_3 = 0.0;

      if (has_values_0) {
        var_0_0 = values_0(i,0);
        var_0_1 = values_0(i,1);
        var_0_2 = values_0(i,2);
        var_0_3 = values_0(i,3);
      }
      // step 1: ComponentCoefficientFunction 0
      double var_1;
      var_1 = var_0_0;
      // step 2: 1
      double var_2;
      var_2 = 0x1p+0 /* (1.0000000000000000e+00) */;
      // step 3: binary operation '/'
      double var_3;
      var_3 = var_2 / var_1;
      // step 4: subtensor [ first: 1, num: ( 2,), dist: ( 1,) ]
      double var_4_0;
      double var_4_1;
      var_4_0 = var_0_1;
      var_4_1 = var_0_2;
      // step 5: scalar-vector multiply
      double var_5_0;
      double var_5_1;
      var_5_0 = (var_3 * var_4_0);
      var_5_1 = (var_3 * var_4_1);
      // step 6: scalar-vector multiply
      double var_6_0;
      double var_6_1;
      var_6_0 = (var_1 * var_5_0);
      var_6_1 = (var_1 * var_5_1);
      // step 7: reshape
      double var_7_0_0;
      double var_7_1_0;
      var_7_0_0 =  (var_5_0);
      var_7_1_0 =  (var_5_1);
      // step 8: reshape
      double var_8_0_0;
      double var_8_0_1;
      var_8_0_0 =  (var_5_0);
      var_8_0_1 =  (var_5_1);
      // step 9: matrix-matrix multiply
      double var_9_0_0;
      double var_9_0_1;
      double var_9_1_0;
      double var_9_1_1;
      var_9_0_0 = ((var_7_0_0 * var_8_0_0));
      var_9_0_1 = ((var_7_0_0 * var_8_0_1));
      var_9_1_0 = ((var_7_1_0 * var_8_0_0));
      var_9_1_1 = ((var_7_1_0 * var_8_0_1));
      // step 10: scalar-matrix multiply
      double var_10_0_0;
      double var_10_0_1;
      double var_10_1_0;
      double var_10_1_1;
      var_10_0_0 = (var_1 * var_9_0_0);
      var_10_0_1 = (var_1 * var_9_0_1);
      var_10_1_0 = (var_1 * var_9_1_0);
      var_10_1_1 = (var_1 * var_9_1_1);
      // step 11: ComponentCoefficientFunction 3
      double var_11;
      var_11 = var_0_3;
      // step 12: 2
      double var_12;
      var_12 = 0x1p+1 /* (2.0000000000000000e+00) */;
      // step 13: binary operation '/'
      double var_13;
      var_13 = var_1 / var_12;
      // step 14: innerproduct, fix size = 2
      double var_14;
      var_14 = (((var_5_0 * var_5_0)) + (var_5_1 * var_5_1));
      // step 15: binary operation '*'
      double var_15;
      var_15 = var_13 * var_14;
      // step 16: binary operation '-'
      double var_16;
      var_16 = var_11 - var_15;
      // step 17: scale 0.4
      double var_17;
      var_17 = (0x1.9999999999998p-2 /* (3.9999999999999991e-01) */ * var_16);
      // step 18: Identity matrix
      double var_18_0_0;
      double var_18_0_1;
      double var_18_1_0;
      double var_18_1_1;
      var_18_0_0 = 1.0;
      var_18_0_1 = 0.0;
      var_18_1_0 = 0.0;
      var_18_1_1 = 1.0;
      // step 19: scalar-matrix multiply
      double var_19_0_0;
      double var_19_0_1;
      double var_19_1_0;
      double var_19_1_1;
      var_19_0_0 = (var_17 * var_18_0_0);
      var_19_0_1 = (var_17 * var_18_0_1);
      var_19_1_0 = (var_17 * var_18_1_0);
      var_19_1_1 = (var_17 * var_18_1_1);
      // step 20: binary operation '+'
      double var_20_0_0;
      double var_20_0_1;
      double var_20_1_0;
      double var_20_1_1;
      var_20_0_0 = var_10_0_0 + var_19_0_0;
      var_20_0_1 = var_10_0_1 + var_19_0_1;
      var_20_1_0 = var_10_1_0 + var_19_1_0;
      var_20_1_1 = var_10_1_1 + var_19_1_1;
      // step 21: binary operation '+'
      double var_21;
      var_21 = var_11 + var_17;
      // step 22: scalar-vector multiply
      double var_22_0;
      double var_22_1;
      var_22_0 = (var_21 * var_5_0);
      var_22_1 = (var_21 * var_5_1);
      // step 23: VectorialCoefficientFunction
      double var_23_0;
      double var_23_1;
      double var_23_2;
      double var_23_3;
      double var_23_4;
      double var_23_5;
      double var_23_6;
      double var_23_7;
      var_23_0 = var_6_0;
      var_23_1 = var_6_1;
      var_23_2 = var_20_0_0;
      var_23_3 = var_20_0_1;
      var_23_4 = var_20_1_0;
      var_23_5 = var_20_1_1;
      var_23_6 = var_22_0;
      var_23_7 = var_22_1;
      // step 24: unary operation ' '
      double var_24_0;
      double var_24_1;
      double var_24_2;
      double var_24_3;
      double var_24_4;
      double var_24_5;
      double var_24_6;
      double var_24_7;
      var_24_0 =  (var_23_0);
      var_24_1 =  (var_23_1);
      var_24_2 =  (var_23_2);
      var_24_3 =  (var_23_3);
      var_24_4 =  (var_23_4);
      var_24_5 =  (var_23_5);
      var_24_6 =  (var_23_6);
      var_24_7 =  (var_23_7);
      // step 25: reshape
      double var_25_0_0;
      double var_25_0_1;
      double var_25_1_0;
      double var_25_1_1;
      double var_25_2_0;
      double var_25_2_1;
      double var_25_3_0;
      double var_25_3_1;
      var_25_0_0 =  (var_24_0);
      var_25_0_1 =  (var_24_1);
      var_25_1_0 =  (var_24_2);
      var_25_1_1 =  (var_24_3);
      var_25_2_0 =  (var_24_4);
      var_25_2_1 =  (var_24_5);
      var_25_3_0 =  (var_24_6);
      var_25_3_1 =  (var_24_7);
      // step 26: scale -1
      double var_26_0_0;
      double var_26_0_1;
      double var_26_1_0;
      double var_26_1_1;
      double var_26_2_0;
      double var_26_2_1;
      double var_26_3_0;
      double var_26_3_1;
      var_26_0_0 = (-0x1p+0 /* (-1.0000000000000000e+00) */ * var_25_0_0);
      var_26_0_1 = (-0x1p+0 /* (-1.0000000000000000e+00) */ * var_25_0_1);
      var_26_1_0 = (-0x1p+0 /* (-1.0000000000000000e+00) */ * var_25_1_0);
      var_26_1_1 = (-0x1p+0 /* (-1.0000000000000000e+00) */ * var_25_1_1);
      var_26_2_0 = (-0x1p+0 /* (-1.0000000000000000e+00) */ * var_25_2_0);
      var_26_2_1 = (-0x1p+0 /* (-1.0000000000000000e+00) */ * var_25_2_1);
      var_26_3_0 = (-0x1p+0 /* (-1.0000000000000000e+00) */ * var_25_3_0);
      var_26_3_1 = (-0x1p+0 /* (-1.0000000000000000e+00) */ * var_25_3_1);

      output[i+0*dist_output] = var_26_0_0;
      output[i+1*dist_output] = var_26_0_1;
      output[i+2*dist_output] = var_26_1_0;
      output[i+3*dist_output] = var_26_1_1;
      output[i+4*dist_output] = var_26_2_0;
      output[i+5*dist_output] = var_26_2_1;
      output[i+6*dist_output] = var_26_3_0;
      output[i+7*dist_output] = var_26_3_1;
    }
  }}
extern "C" void ApplyIPFunction (size_t nip, double * input, size_t dist_input,
                                 double * output, size_t dist_output) {
  ApplyIPFunctionKernel<<<256,256>>> (nip, input, dist_input, output, dist_output); }