{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "0",
   "metadata": {},
   "source": [
    "# 5.5.3 Euler equations\n",
    "\n",
    "\n",
    "state variables: \n",
    "* mass density $\\rho$\n",
    "* momentum $m = \\rho u$, with velocity $u$\n",
    "* energy density $E$\n",
    "\n",
    "conservation of mass, momentum and energy:\n",
    "\n",
    "\\begin{eqnarray*}\n",
    "\\frac{\\partial \\rho}{\\partial t} & = & -\\operatorname{div} \\rho u \\\\\n",
    "\\frac{\\partial \\rho u}{\\partial t} & = & -\\operatorname{div} (\\rho u \\otimes u + p I) \\\\\n",
    "\\frac{\\partial E}{\\partial t} & = & -\\operatorname{div} (E+p) u\n",
    "\\end{eqnarray*}\n",
    "\n",
    "\n",
    "closure equation for pressure: $p = (\\gamma-1) (E - \\rho \\tfrac{1}{2} |u|^2)$ \n",
    "\n",
    "with heat capacity ration $\\gamma$ depending on gas, $\\gamma=1.4$ for atmosphere.\n",
    "\n",
    "\n",
    "Compact notation:\n",
    "$$\n",
    "\\frac{\\partial U}{\\partial t} + \\operatorname{div} F(U) = 0\n",
    "$$\n",
    "\n",
    "with vector of state variables (in $R^{d+2}$):\n",
    "$$\n",
    "U = \\left( \\begin{array}{c}\n",
    "    \\rho \\\\ m \\\\ E\n",
    "    \\end{array} \\right)\n",
    "$$\n",
    "and flux in $R^{(d+2) \\times d}$:\n",
    "$$\n",
    "F = \\left( \\begin{array}{c}\n",
    "    \\rho u \\\\ \n",
    "    \\rho u \\otimes u + p I \\\\\n",
    "    (E + p) u\n",
    "    \\end{array} \\right)\n",
    "$$\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1",
   "metadata": {},
   "outputs": [],
   "source": [
    "from ngsolve import *\n",
    "from ngsolve.webgui import Draw\n",
    "from time import sleep\n",
    "\n",
    "try:\n",
    "    import ngsolve.ngscuda as ngscuda\n",
    "except:\n",
    "    pass"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2",
   "metadata": {},
   "outputs": [],
   "source": [
    "mesh = Mesh(unit_square.GenerateMesh(maxh=0.02))\n",
    "\n",
    "order=3\n",
    "fesT = L2(mesh, order=order)**4\n",
    "feshat = FacetFESpace(mesh, order=order)**4\n",
    "fes = fesT*feshat\n",
    "\n",
    "gfu = GridFunction(fes)\n",
    "rho0 = 1+1*exp(-400*( (x-0.5)**2 + (y-0.5)**2))\n",
    "with TaskManager():\n",
    "    gfu.components[0].Set( (rho0, 0, 0, 1) )"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3",
   "metadata": {},
   "outputs": [],
   "source": [
    "gamma = 1.4  # Gas constant\n",
    "def Flux (U):\n",
    "    rho = U[0]\n",
    "    u = U[1:3]/rho\n",
    "    E = U[3]\n",
    "    p = (gamma-1)*(E-rho/2*(u*u))\n",
    "    return CF ( (rho*u, rho*OuterProduct(u,u)+p*Id(2), \n",
    "                 (E+p)*u)).Reshape((4,2))\n",
    "\n",
    "ngsglobals.msg_level = 0\n",
    "\n",
    "n = specialcf.normal(2)\n",
    "stab = 1\n",
    "def NumFlux(u, uo): \n",
    "    return 0.5*(Flux(u)+Flux(uo)) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4",
   "metadata": {},
   "outputs": [],
   "source": [
    "truecompile = False\n",
    "\n",
    "(u,uhat), (v,vhat) = fes.TnT()\n",
    "# bfa1 = BilinearForm(fes, nonassemble=True)\n",
    "\n",
    "term1a = InnerProduct (NumFlux(u, 2*uhat-u), v.Operator(\"normal\")).Compile(truecompile, wait=True)*dx(element_boundary=True)\n",
    "term1b = (2*stab*v*(u-uhat)).Compile(truecompile, wait=True)*dx(element_vb=BND)\n",
    "term2 = (-InnerProduct(Flux(u),Grad(v))).Compile(truecompile, wait=True)*dx\n",
    "\n",
    "bfa = BilinearForm (term1a+term1b+term2, nonlinear_matrix_free_bdb=True).Assemble()\n",
    "\n",
    "embT, embhat = fes.embeddings\n",
    "resT, reshat = fes.restrictions\n",
    "rangeT = fes.Range(0)\n",
    "rangehat = fes.Range(1)\n",
    "\n",
    "invm1 = embT@fesT.Mass(1).Inverse()@embT.T\n",
    "# traceop = fesT.TraceOperator(feshat, average=True)\n",
    "traceop = 0.5*fesT.TraceOperator(feshat, average=False)\n",
    "traceop = traceop.CreateDeviceMatrix()\n",
    "\n",
    "uT, vT = fesT.TnT()\n",
    "with TaskManager():\n",
    "    invm = BilinearForm(uT*vT*dx, diagonal=True).Assemble().mat.Inverse()\n",
    "invm_host =  embT@invm@embT.T\n",
    "invm = invm_host.CreateDeviceMatrix()\n",
    "\n",
    "print(rangeT, rangehat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5",
   "metadata": {},
   "outputs": [],
   "source": [
    "rho0 = 0.2+1*exp(-400*( (x-0.5)**2 + (y-0.5)**2))\n",
    "with TaskManager():\n",
    "    gfu.components[0].Set( (rho0, 0, 0, rho0) )\n",
    "\n",
    "gfubnd = GridFunction(fes)\n",
    "gfubnd.components[1].vec.data = traceop * gfu.components[0].vec\n",
    "Projector(fes.GetDofs(mesh.Boundaries(\".*\")), True).Project(gfubnd.vec)\n",
    "\n",
    "gf_rho = gfu.components[0][0]\n",
    "gf_u = gfu.components[0][1:3] / gf_rho\n",
    "gf_E = gfu.components[0][3]\n",
    "gf_p = (gamma-1)*(gf_E-gf_rho/2*(gf_u*gf_u))  # pressure\n",
    "gf_a = sqrt(gamma*gf_p/gf_rho)        # speed of sound\n",
    "gf_M = Norm(gf_u) / gf_a              # Mach number\n",
    "\n",
    "print (\"Density\")\n",
    "scene_rho = Draw (gf_rho, mesh, deformation=True)\n",
    "print (\"Velocity\")\n",
    "scene_u = Draw(gf_u, mesh, vectors={\"grid_size\":100})\n",
    "print (\"Mach number\")\n",
    "scene_M = Draw(Norm(gf_u)/gf_a, mesh)\n",
    "\n",
    "t = 0\n",
    "tend = 0.3\n",
    "tau = 1e-4 \n",
    "i = 0\n",
    "bfamat = bfa.mat.CreateDeviceMatrix()\n",
    "traceop = traceop.CreateDeviceMatrix()\n",
    "vec = gfu.vec.CreateDeviceVector()\n",
    "vecbnd = gfubnd.vec.CreateDeviceVector()\n",
    "hv = gfu.vec.CreateDeviceVector()\n",
    "from time import time\n",
    "\n",
    "with TaskManager():\n",
    "    while t < tend:\n",
    "        vec[rangehat] = traceop * vec[rangeT] + vecbnd[rangehat]\n",
    "        hv.data = bfamat * vec\n",
    "        vec -= tau * invm * hv\n",
    "        \n",
    "        t += tau\n",
    "        i += 1\n",
    "        if i%20 == 0:\n",
    "            gfu.vec.data = vec\n",
    "            scene_rho.Redraw()\n",
    "            scene_u.Redraw()\n",
    "            scene_M.Redraw()           "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6",
   "metadata": {},
   "outputs": [],
   "source": [
    "print (bfamat.GetOperatorInfo())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7",
   "metadata": {},
   "source": [
    "## Code generation for the device"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8",
   "metadata": {},
   "source": [
    "the volume-part of the bilinear-form\n",
    "\n",
    "$$\n",
    "\\int_T F(u)  \\nabla v\n",
    "\\qquad\n",
    "\\text{with}\n",
    "\\qquad\n",
    "F = \\left( \\begin{array}{c}\n",
    "    \\rho u \\\\ \n",
    "    \\rho u \\otimes u + p I \\\\\n",
    "    (E + p) u\n",
    "    \\end{array} \\right)\n",
    "$$\n",
    "\n",
    "is represented in NGSolve as a list of operations:\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9",
   "metadata": {},
   "outputs": [],
   "source": [
    "print (term2)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "10",
   "metadata": {},
   "source": [
    "to extract the flux, we differentiate w.r.t. the test-function"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "11",
   "metadata": {},
   "outputs": [],
   "source": [
    "print (term2[0].coef.Diff(Grad(v)))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "12",
   "metadata": {},
   "source": [
    "And for this *program* we generate low-level C-code. Only the function header and loop had to be changed for a cuda-kernel:"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "13",
   "metadata": {},
   "source": [
    "```cpp\n",
    "#include <cstddef>\n",
    "__global__ void ApplyIPFunctionKernel (size_t nip, double * input, size_t dist_input,\n",
    "                                       double * output, size_t dist_output) {\n",
    "  {\n",
    "    auto values_0 = [dist_input,input](size_t i, int comp)\n",
    "      { return input[i + (comp+0)*dist_input]; };\n",
    "    bool constexpr has_values_0 = true;\n",
    "\n",
    "    int tid = blockIdx.x*blockDim.x+threadIdx.x;\n",
    "    for (int i = tid; i < nip; i += blockDim.x*gridDim.x) {\n",
    "      // step 0: trial-function diffop = Id\n",
    "      double var_0_0(0x0p+0 /* (0.0000000000000000e+00) */);\n",
    "      var_0_0 = 0.0;\n",
    "      double var_0_1(0x0p+0 /* (0.0000000000000000e+00) */);\n",
    "      var_0_1 = 0.0;\n",
    "      double var_0_2(0x0p+0 /* (0.0000000000000000e+00) */);\n",
    "      var_0_2 = 0.0;\n",
    "      double var_0_3(0x0p+0 /* (0.0000000000000000e+00) */);\n",
    "      var_0_3 = 0.0;\n",
    "\n",
    "      if (has_values_0) {\n",
    "        var_0_0 = values_0(i,0);\n",
    "        var_0_1 = values_0(i,1);\n",
    "        var_0_2 = values_0(i,2);\n",
    "        var_0_3 = values_0(i,3);\n",
    "      }\n",
    "      // step 1: ComponentCoefficientFunction 0\n",
    "      double var_1;\n",
    "      var_1 = var_0_0;\n",
    "      // step 2: 1\n",
    "      double var_2;\n",
    "      var_2 = 0x1p+0 /* (1.0000000000000000e+00) */;\n",
    "      // step 3: binary operation '/'\n",
    "      double var_3;\n",
    "      var_3 = var_2 / var_1;\n",
    "      // step 4: subtensor [ first: 1, num: ( 2,), dist: ( 1,) ]\n",
    "      double var_4_0;\n",
    "      double var_4_1;\n",
    "      var_4_0 = var_0_1;\n",
    "      var_4_1 = var_0_2;\n",
    "      // step 5: scalar-vector multiply\n",
    "      double var_5_0;\n",
    "      double var_5_1;\n",
    "      var_5_0 = (var_3 * var_4_0);\n",
    "      var_5_1 = (var_3 * var_4_1);\n",
    "      // step 6: scalar-vector multiply\n",
    "      double var_6_0;\n",
    "      double var_6_1;\n",
    "      var_6_0 = (var_1 * var_5_0);\n",
    "      var_6_1 = (var_1 * var_5_1);\n",
    "      // step 7: reshape\n",
    "      double var_7_0_0;\n",
    "      double var_7_1_0;\n",
    "      var_7_0_0 =  (var_5_0);\n",
    "      var_7_1_0 =  (var_5_1);\n",
    "      // step 8: reshape\n",
    "      double var_8_0_0;\n",
    "      double var_8_0_1;\n",
    "      var_8_0_0 =  (var_5_0);\n",
    "      var_8_0_1 =  (var_5_1);\n",
    "      // step 9: matrix-matrix multiply\n",
    "      double var_9_0_0;\n",
    "      double var_9_0_1;\n",
    "      double var_9_1_0;\n",
    "      double var_9_1_1;\n",
    "      var_9_0_0 = ((var_7_0_0 * var_8_0_0));\n",
    "      var_9_0_1 = ((var_7_0_0 * var_8_0_1));\n",
    "      var_9_1_0 = ((var_7_1_0 * var_8_0_0));\n",
    "      var_9_1_1 = ((var_7_1_0 * var_8_0_1));\n",
    "      // step 10: scalar-matrix multiply\n",
    "      double var_10_0_0;\n",
    "      double var_10_0_1;\n",
    "      double var_10_1_0;\n",
    "      double var_10_1_1;\n",
    "      var_10_0_0 = (var_1 * var_9_0_0);\n",
    "      var_10_0_1 = (var_1 * var_9_0_1);\n",
    "      var_10_1_0 = (var_1 * var_9_1_0);\n",
    "      var_10_1_1 = (var_1 * var_9_1_1);\n",
    "      // step 11: ComponentCoefficientFunction 3\n",
    "      double var_11;\n",
    "      var_11 = var_0_3;\n",
    "      // step 12: 2\n",
    "      double var_12;\n",
    "      var_12 = 0x1p+1 /* (2.0000000000000000e+00) */;\n",
    "      // step 13: binary operation '/'\n",
    "      double var_13;\n",
    "      var_13 = var_1 / var_12;\n",
    "      // step 14: innerproduct, fix size = 2\n",
    "      double var_14;\n",
    "      var_14 = (((var_5_0 * var_5_0)) + (var_5_1 * var_5_1));\n",
    "      // step 15: binary operation '*'\n",
    "      double var_15;\n",
    "      var_15 = var_13 * var_14;\n",
    "      // step 16: binary operation '-'\n",
    "      double var_16;\n",
    "      var_16 = var_11 - var_15;\n",
    "      // step 17: scale 0.4\n",
    "      double var_17;\n",
    "      var_17 = (0x1.9999999999998p-2 /* (3.9999999999999991e-01) */ * var_16);\n",
    "      // step 18: Identity matrix\n",
    "      double var_18_0_0;\n",
    "      double var_18_0_1;\n",
    "      double var_18_1_0;\n",
    "      double var_18_1_1;\n",
    "      var_18_0_0 = 1.0;\n",
    "      var_18_0_1 = 0.0;\n",
    "      var_18_1_0 = 0.0;\n",
    "      var_18_1_1 = 1.0;\n",
    "      // step 19: scalar-matrix multiply\n",
    "      double var_19_0_0;\n",
    "      double var_19_0_1;\n",
    "      double var_19_1_0;\n",
    "      double var_19_1_1;\n",
    "      var_19_0_0 = (var_17 * var_18_0_0);\n",
    "      var_19_0_1 = (var_17 * var_18_0_1);\n",
    "      var_19_1_0 = (var_17 * var_18_1_0);\n",
    "      var_19_1_1 = (var_17 * var_18_1_1);\n",
    "      // step 20: binary operation '+'\n",
    "      double var_20_0_0;\n",
    "      double var_20_0_1;\n",
    "      double var_20_1_0;\n",
    "      double var_20_1_1;\n",
    "      var_20_0_0 = var_10_0_0 + var_19_0_0;\n",
    "      var_20_0_1 = var_10_0_1 + var_19_0_1;\n",
    "      var_20_1_0 = var_10_1_0 + var_19_1_0;\n",
    "      var_20_1_1 = var_10_1_1 + var_19_1_1;\n",
    "      // step 21: binary operation '+'\n",
    "      double var_21;\n",
    "      var_21 = var_11 + var_17;\n",
    "      // step 22: scalar-vector multiply\n",
    "      double var_22_0;\n",
    "      double var_22_1;\n",
    "      var_22_0 = (var_21 * var_5_0);\n",
    "      var_22_1 = (var_21 * var_5_1);\n",
    "      // step 23: VectorialCoefficientFunction\n",
    "      double var_23_0;\n",
    "      double var_23_1;\n",
    "      double var_23_2;\n",
    "      double var_23_3;\n",
    "      double var_23_4;\n",
    "      double var_23_5;\n",
    "      double var_23_6;\n",
    "      double var_23_7;\n",
    "      var_23_0 = var_6_0;\n",
    "      var_23_1 = var_6_1;\n",
    "      var_23_2 = var_20_0_0;\n",
    "      var_23_3 = var_20_0_1;\n",
    "      var_23_4 = var_20_1_0;\n",
    "      var_23_5 = var_20_1_1;\n",
    "      var_23_6 = var_22_0;\n",
    "      var_23_7 = var_22_1;\n",
    "      // step 24: unary operation ' '\n",
    "      double var_24_0;\n",
    "      double var_24_1;\n",
    "      double var_24_2;\n",
    "      double var_24_3;\n",
    "      double var_24_4;\n",
    "      double var_24_5;\n",
    "      double var_24_6;\n",
    "      double var_24_7;\n",
    "      var_24_0 =  (var_23_0);\n",
    "      var_24_1 =  (var_23_1);\n",
    "      var_24_2 =  (var_23_2);\n",
    "      var_24_3 =  (var_23_3);\n",
    "      var_24_4 =  (var_23_4);\n",
    "      var_24_5 =  (var_23_5);\n",
    "      var_24_6 =  (var_23_6);\n",
    "      var_24_7 =  (var_23_7);\n",
    "      // step 25: reshape\n",
    "      double var_25_0_0;\n",
    "      double var_25_0_1;\n",
    "      double var_25_1_0;\n",
    "      double var_25_1_1;\n",
    "      double var_25_2_0;\n",
    "      double var_25_2_1;\n",
    "      double var_25_3_0;\n",
    "      double var_25_3_1;\n",
    "      var_25_0_0 =  (var_24_0);\n",
    "      var_25_0_1 =  (var_24_1);\n",
    "      var_25_1_0 =  (var_24_2);\n",
    "      var_25_1_1 =  (var_24_3);\n",
    "      var_25_2_0 =  (var_24_4);\n",
    "      var_25_2_1 =  (var_24_5);\n",
    "      var_25_3_0 =  (var_24_6);\n",
    "      var_25_3_1 =  (var_24_7);\n",
    "      // step 26: scale -1\n",
    "      double var_26_0_0;\n",
    "      double var_26_0_1;\n",
    "      double var_26_1_0;\n",
    "      double var_26_1_1;\n",
    "      double var_26_2_0;\n",
    "      double var_26_2_1;\n",
    "      double var_26_3_0;\n",
    "      double var_26_3_1;\n",
    "      var_26_0_0 = (-0x1p+0 /* (-1.0000000000000000e+00) */ * var_25_0_0);\n",
    "      var_26_0_1 = (-0x1p+0 /* (-1.0000000000000000e+00) */ * var_25_0_1);\n",
    "      var_26_1_0 = (-0x1p+0 /* (-1.0000000000000000e+00) */ * var_25_1_0);\n",
    "      var_26_1_1 = (-0x1p+0 /* (-1.0000000000000000e+00) */ * var_25_1_1);\n",
    "      var_26_2_0 = (-0x1p+0 /* (-1.0000000000000000e+00) */ * var_25_2_0);\n",
    "      var_26_2_1 = (-0x1p+0 /* (-1.0000000000000000e+00) */ * var_25_2_1);\n",
    "      var_26_3_0 = (-0x1p+0 /* (-1.0000000000000000e+00) */ * var_25_3_0);\n",
    "      var_26_3_1 = (-0x1p+0 /* (-1.0000000000000000e+00) */ * var_25_3_1);\n",
    "\n",
    "      output[i+0*dist_output] = var_26_0_0;\n",
    "      output[i+1*dist_output] = var_26_0_1;\n",
    "      output[i+2*dist_output] = var_26_1_0;\n",
    "      output[i+3*dist_output] = var_26_1_1;\n",
    "      output[i+4*dist_output] = var_26_2_0;\n",
    "      output[i+5*dist_output] = var_26_2_1;\n",
    "      output[i+6*dist_output] = var_26_3_0;\n",
    "      output[i+7*dist_output] = var_26_3_1;\n",
    "    }\n",
    "  }}\n",
    "extern \"C\" void ApplyIPFunction (size_t nip, double * input, size_t dist_input,\n",
    "                                 double * output, size_t dist_output) {\n",
    "  ApplyIPFunctionKernel<<<256,256>>> (nip, input, dist_input, output, dist_output); } \n",
    "```"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
