From 272a6b1a18155bb3097e0cae84bc966706da759e Mon Sep 17 00:00:00 2001
From: Sebastian Grimberg <sjg@amazon.com>
Date: Mon, 19 Feb 2024 10:41:28 -0800
Subject: [PATCH] Fix HostRead bug for error estimator

---
 palace/linalg/errorestimator.cpp | 155 +++++++++++++++----------------
 1 file changed, 76 insertions(+), 79 deletions(-)
diff --git a/palace/linalg/errorestimator.cpp b/palace/linalg/errorestimator.cpp
index 5a78ce4c4..03035418d 100644
--- a/palace/linalg/errorestimator.cpp
+++ b/palace/linalg/errorestimator.cpp
@@ -130,6 +130,7 @@ void FluxProjector<VecType>::Mult(const VecType &x, VecType &y) const
   {
     if constexpr (std::is_same<VecType, Vector>::value)
     {
+      y.Write();  // Ensure memory is allocated on device before aliasing
       for (int i = 0; i < vdim; i++)
       {
         // Mpi::Print(" Computing smooth flux projection of flux component {:d}/{:d} for "
@@ -161,54 +162,76 @@ template <typename VecType>
 void CurlFluxErrorEstimator<VecType>::AddErrorIndicator(const VecType &U,
                                                         ErrorIndicator &indicator) const
 {
-  // Compute the projection of the discontinuous flux onto the smooth finite element space.
+  // Compute the projection of the discontinuous flux onto the smooth finite element space
+  // and populate the corresponding grid functions.
   BlockTimer bt(Timer::ESTIMATION);
-  auto F = workspace::NewVector<VecType>(nd_fespace.GetTrueVSize());
-  projector.Mult(U, F);
+  auto U_gf = workspace::NewVector<VecType>(nd_fespace.GetVSize());
+  auto F_gf = workspace::NewVector<VecType>(nd_fespace.GetVSize());
+  {
+    auto F = workspace::NewVector<VecType>(nd_fespace.GetTrueVSize());
+    projector.Mult(U, F);
+    if constexpr (std::is_same<VecType, ComplexVector>::value)
+    {
+      nd_fespace.GetProlongationMatrix()->Mult(U.Real(), U_gf.Real());
+      nd_fespace.GetProlongationMatrix()->Mult(U.Imag(), U_gf.Imag());
+      nd_fespace.GetProlongationMatrix()->Mult(F.Real(), F_gf.Real());
+      nd_fespace.GetProlongationMatrix()->Mult(F.Imag(), F_gf.Imag());
+      U_gf.Real().HostRead();
+      U_gf.Imag().HostRead();
+      F_gf.Real().HostRead();
+      F_gf.Imag().HostRead();
+    }
+    else
+    {
+      nd_fespace.GetProlongationMatrix()->Mult(U, U_gf);
+      nd_fespace.GetProlongationMatrix()->Mult(F, F_gf);
+      U_gf.HostRead();
+      F_gf.HostRead();
+    }
+  }
 
   // Loop over elements and accumulate the estimates from this component. The discontinuous
   // flux is μ⁻¹ ∇ × U.
-  auto AddErrorIndicatorImpl = [this](const mfem::ParGridFunction &U_gf,
-                                      const mfem::ParGridFunction &F_gf, Vector &estimates,
-                                      const bool add = false)
+  const auto &mesh = nd_fespace.GetParMesh();
+  Vector estimates(mesh.GetNE());
+  auto *h_estimates = estimates.HostWrite();
+  double norm2 = 0.0;
+  PalacePragmaOmp(parallel reduction(+ : norm2))
   {
-    const auto &mesh = nd_fespace.GetParMesh();
-    auto *h_estimates = estimates.HostWrite();
-    double norm2 = 0.0;
-    PalacePragmaOmp(parallel reduction(+ : norm2))
+    // Assuming dim == space_dim == curl_dim
+    mfem::IsoparametricTransformation T;
+    mfem::Array<int> dofs;
+    mfem::DofTransformation dof_trans;
+    mfem::Vector V_ip(mesh.SpaceDimension()), V_smooth(mesh.SpaceDimension()),
+        V_tmp(mesh.SpaceDimension()), loc_gf;
+    mfem::DenseMatrix Interp, Curl;
+
+    double loc_norm2 = 0.0;
+    PalacePragmaOmp(for schedule(static))
+    for (int e = 0; e < mesh.GetNE(); e++)
     {
-      // Assuming dim == space_dim == curl_dim
-      mfem::IsoparametricTransformation T;
-      mfem::Array<int> dofs;
-      mfem::DofTransformation dof_trans;
-      mfem::Vector V_ip(mesh.SpaceDimension()), V_smooth(mesh.SpaceDimension()),
-          V_tmp(mesh.SpaceDimension()), loc_gf;
-      mfem::DenseMatrix Interp, Curl;
-
-      double loc_norm2 = 0.0;
-      PalacePragmaOmp(for schedule(static))
-      for (int e = 0; e < mesh.GetNE(); e++)
+      const mfem::FiniteElement &fe = *nd_fespace.Get().GetFE(e);
+      mesh.GetElementTransformation(e, &T);
+      nd_fespace.Get().GetElementDofs(e, dofs, dof_trans);
+      Interp.SetSize(fe.GetDof(), V_ip.Size());
+      Curl.SetSize(fe.GetDof(), V_ip.Size());
+      const int q_order = fem::DefaultIntegrationOrder::Get(T);
+      const mfem::IntegrationRule &ir =
+          mfem::IntRules.Get(mesh.GetElementGeometry(e), q_order);
+
+      double elem_err = 0.0;
+      for (int i = 0; i < ir.GetNPoints(); i++)
       {
-        const mfem::FiniteElement &fe = *nd_fespace.Get().GetFE(e);
-        mesh.GetElementTransformation(e, &T);
-        nd_fespace.Get().GetElementDofs(e, dofs, dof_trans);
-        Interp.SetSize(fe.GetDof(), V_ip.Size());
-        Curl.SetSize(fe.GetDof(), V_ip.Size());
-        const int q_order = fem::DefaultIntegrationOrder::Get(T);
-        const mfem::IntegrationRule &ir =
-            mfem::IntRules.Get(mesh.GetElementGeometry(e), q_order);
-
-        double elem_err = 0.0;
-        for (int i = 0; i < ir.GetNPoints(); i++)
-        {
-          const mfem::IntegrationPoint &ip = ir.IntPoint(i);
-          T.SetIntPoint(&ip);
-          fe.CalcVShape(ip, Interp);
-          fe.CalcCurlShape(ip, Curl);
-          const double w = ip.weight * T.Weight();
+        const mfem::IntegrationPoint &ip = ir.IntPoint(i);
+        T.SetIntPoint(&ip);
+        fe.CalcVShape(ip, Interp);
+        fe.CalcCurlShape(ip, Curl);
+        const double w = ip.weight * T.Weight();
 
+        auto AccumulateError = [&](const Vector &U_gf_, const Vector &F_gf_)
+        {
           // μ⁻¹ ∇ × U
-          U_gf.GetSubVector(dofs, loc_gf);
+          U_gf_.GetSubVector(dofs, loc_gf);
           if (dof_trans.GetDofTransformation())
           {
             dof_trans.InvTransformPrimal(loc_gf);
@@ -219,7 +242,7 @@ void CurlFluxErrorEstimator<VecType>::AddErrorIndicator(const VecType &U,
           V_ip *= 1.0 / T.Weight();
 
           // Smooth flux
-          F_gf.GetSubVector(dofs, loc_gf);
+          F_gf_.GetSubVector(dofs, loc_gf);
           if (dof_trans.GetDofTransformation())
           {
             dof_trans.InvTransformPrimal(loc_gf);
@@ -229,45 +252,21 @@ void CurlFluxErrorEstimator<VecType>::AddErrorIndicator(const VecType &U,
 
           V_smooth -= V_ip;
           elem_err += w * (V_smooth * V_smooth);
-          loc_norm2 += w * (V_ip * V_ip);
-        }
-        if (add)
+          return w * (V_ip * V_ip);
+        };
+        if constexpr (std::is_same<VecType, ComplexVector>::value)
         {
-          h_estimates[e] = std::sqrt(h_estimates[e] * h_estimates[e] + elem_err);
+          loc_norm2 += AccumulateError(U_gf.Real(), F_gf.Real());
+          loc_norm2 += AccumulateError(U_gf.Imag(), F_gf.Imag());
         }
         else
         {
-          h_estimates[e] = std::sqrt(elem_err);
+          loc_norm2 += AccumulateError(U_gf, F_gf);
         }
       }
-      norm2 += loc_norm2;
+      h_estimates[e] = std::sqrt(elem_err);
     }
-    return norm2;
-  };
-
-  // Populate the corresponding grid functions and perform integration on the real and
-  // imaginary parts separately and accumulate the result.
-  auto data_U_gf = workspace::NewVector<Vector>(nd_fespace.GetVSize());
-  auto data_F_gf = workspace::NewVector<Vector>(nd_fespace.GetVSize());
-  mfem::ParGridFunction U_gf(&nd_fespace.Get(), data_U_gf);
-  mfem::ParGridFunction F_gf(&nd_fespace.Get(), data_F_gf);
-  const auto &mesh = nd_fespace.GetParMesh();
-  Vector estimates(mesh.GetNE());
-  double norm2 = 0.0;
-  if constexpr (std::is_same<VecType, ComplexVector>::value)
-  {
-    F_gf.SetFromTrueDofs(F.Real());
-    U_gf.SetFromTrueDofs(U.Real());
-    norm2 += AddErrorIndicatorImpl(U_gf, F_gf, estimates);
-    F_gf.SetFromTrueDofs(F.Imag());
-    U_gf.SetFromTrueDofs(U.Imag());
-    norm2 += AddErrorIndicatorImpl(U_gf, F_gf, estimates, true);
-  }
-  else
-  {
-    F_gf.SetFromTrueDofs(F);
-    U_gf.SetFromTrueDofs(U);
-    norm2 += AddErrorIndicatorImpl(U_gf, F_gf, estimates);
+    norm2 += loc_norm2;
   }
   estimates.UseDevice(true);
 
@@ -297,18 +296,16 @@ void GradFluxErrorEstimator::AddErrorIndicator(const Vector &U,
   // Compute the projection of the discontinuous flux onto the smooth finite element space
   // and populate the corresponding grid functions.
   BlockTimer bt(Timer::ESTIMATION);
-  auto data_U_gf = workspace::NewVector<Vector>(h1_fespace.GetVSize());
-  auto data_F_gf = workspace::NewVector<Vector>(h1d_fespace->GetVSize());
-  mfem::ParGridFunction U_gf(&h1_fespace.Get(), data_U_gf);
-  mfem::ParGridFunction F_gf(&h1d_fespace->Get(), data_F_gf);
+  auto U_gf = workspace::NewVector<Vector>(h1_fespace.GetVSize());
+  auto F_gf = workspace::NewVector<Vector>(h1d_fespace->GetVSize());
   {
     auto F = workspace::NewVector<Vector>(h1d_fespace->GetTrueVSize());
     projector.Mult(U, F);
-    F_gf.SetFromTrueDofs(F);
+    h1_fespace.GetProlongationMatrix()->Mult(U, U_gf);
+    h1d_fespace->GetProlongationMatrix()->Mult(F, F_gf);
+    U_gf.HostRead();
     F_gf.HostRead();
   }
-  U_gf.SetFromTrueDofs(U);
-  U_gf.HostRead();
 
   // Loop over elements and accumulate the estimates from this component. The discontinuous
   // flux is ε ∇U.