From 17e523b10adbe3c7028e636c48825aea3208c5c7 Mon Sep 17 00:00:00 2001 From: august-knox Date: Thu, 14 Nov 2024 09:20:39 -0800 Subject: [PATCH 01/24] including extra directory in makefile --- makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/makefile b/makefile index 81957b7..7cc1314 100644 --- a/makefile +++ b/makefile @@ -102,7 +102,8 @@ ifeq ($(REMHOS_DEBUG),YES) endif LIBS = $(strip $(REMHOS_LIBS) $(LDFLAGS)) -CCC = $(strip $(CXX) $(REMHOS_FLAGS)) +EXTRA_INC_DIR = $(or $(wildcard $(MFEM_DIR)/include/mfem),$(MFEM_DIR)) +CCC = $(strip $(CXX) $(REMHOS_FLAGS) $(if $(EXTRA_INC_DIR),-I$(EXTRA_INC_DIR))) Ccc = $(strip $(CC) $(CFLAGS) $(GL_OPTS)) SOURCE_FILES = remhos.cpp remhos_tools.cpp remhos_lo.cpp remhos_ho.cpp \ From 105b06dbe634f7a16fb7c3ef630e24da1b2c9e3d Mon Sep 17 00:00:00 2001 From: august-knox Date: Thu, 14 Nov 2024 15:11:46 -0800 Subject: [PATCH 02/24] initial commit of caliper --- makefile | 15 +++++++++++++-- remhos.cpp | 50 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/makefile b/makefile index 7cc1314..821c94b 100644 --- a/makefile +++ b/makefile @@ -64,6 +64,16 @@ TEST_MK = $(MFEM_DIR)/config/test.mk MFEM_DIR1 := $(MFEM_DIR) MFEM_DIR2 := $(realpath $(MFEM_DIR)) +# Caliper/Adiak flags + +CALIPER_DIR = $(spack location --install-dir caliper) +ADIAK_DIR = $(spack location --install-dir adiak) +CALIPER_FLAGS = -I${CALIPER_DIR}/include -DUSE_CALIPER +ADIAK_INCLUDE = -I${ADIAK_DIR}/include +ADIAK_LDFLAGS = -L${ADIAK_DIR}/lib -ladiak +CALIPER_LDFLAGS = -L${CALIPER_DIR}/lib64 -lcaliper + + # Use the compiler used by MFEM. Get the compiler and the options for compiling # and linking from MFEM's config.mk. (Skip this if the target does not require # building.) @@ -73,7 +83,7 @@ ifeq (,$(filter help clean distclean style,$(MAKECMDGOALS))) endif CXX = $(MFEM_CXX) -CPPFLAGS = $(MFEM_CPPFLAGS) +CPPFLAGS = $(MFEM_CPPFLAGS) $(CALIPER_FLAGS) $(ADIAK_FLAGS) CXXFLAGS = $(MFEM_CXXFLAGS) # MFEM config does not define C compiler @@ -81,7 +91,8 @@ CC = gcc CFLAGS = -O3 # Optional link flags -LDFLAGS = +LDFLAGS = $(CALIPER_LDFLAGS) $(ADIAK_LDFLAGS) + OPTIM_OPTS = -O3 DEBUG_OPTS = -g -Wall -std=c++11 diff --git a/remhos.cpp b/remhos.cpp index 977d5d2..81d5ec5 100644 --- a/remhos.cpp +++ b/remhos.cpp @@ -39,6 +39,13 @@ #include "remhos_tools.hpp" #include "remhos_sync.hpp" +#ifdef USE_CALIPER +#include +#include +#ifdef HAVE_MPI +#include +#endif + using namespace std; using namespace mfem; @@ -145,6 +152,19 @@ int main(int argc, char *argv[]) mfem::MPI_Session mpi(argc, argv); const int myid = mpi.WorldRank(); +#ifdef USE_CALIPER + setupCaliper(); + + cali::ConfigManager calimgr(params.simulationParams.caliperConfig.c_str()); + if (calimgr.error()) + std::cerr << "caliper config error: " << calimgr.error_msg() << std::endl; + calimgr.start(); + adiak::init(nullptr); + adiak::cmdline(); + adiak::hostname(); + +#endif + const char *mesh_file = "data/periodic-square.mesh"; int rs_levels = 2; int rp_levels = 0; @@ -368,14 +388,24 @@ int main(int argc, char *argv[]) v.ProjectCoefficient(vcoeff); double t = 0.0; +#ifdef USE_CALIPER + CALI_CXX_MARK_LOOP_BEGIN(mainloop, "rem.mainloop"); +#endif while (t < t_final) { +#ifdef USE_CALIPER + CALI_CXX_MARK_LOOP_ITERATION(mainloop, t); +#endif t += dt; // Move the mesh nodes. x.Add(std::min(dt, t_final-t), v); // Update the node velocities. v.ProjectCoefficient(vcoeff); } +#ifdef USE_CALIPER + CALI_CXX_MARK_LOOP_END(mainloop); +#endif + // Pseudotime velocity. add(x, -1.0, x0, v_gf); @@ -1246,7 +1276,9 @@ int main(int argc, char *argv[]) delete lom.SubFes1; delete lom.VolumeTerms; } - +#ifdef USE_CALIPER + calimgr.flush(); +#endif return 0; } @@ -1925,3 +1957,19 @@ double inflow_function(const Vector &x) } else { return 0.0; } } + +void setupCaliper() +{ +#ifdef USE_CALIPER +#ifdef HAVE_MPI + cali_mpi_init(); +#endif + + cali_config_preset("CALI_LOG_VERBOSITY", "0"); + cali_config_preset("CALI_CALIPER_ATTRIBUTE_DEFAULT_SCOPE", "process"); + + cali_set_global_string_byname("rem.git_vers", GIT_VERS); + cali_set_global_string_byname("rem.git_hash", GIT_HASH); +#endif +} + From 35f46f23498e54241a7fc464e3346a96ac84eace Mon Sep 17 00:00:00 2001 From: august-knox Date: Thu, 14 Nov 2024 16:06:38 -0800 Subject: [PATCH 03/24] adding adiak import --- remhos.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/remhos.cpp b/remhos.cpp index 81d5ec5..135a975 100644 --- a/remhos.cpp +++ b/remhos.cpp @@ -42,9 +42,11 @@ #ifdef USE_CALIPER #include #include +#include #ifdef HAVE_MPI #include #endif +#endif using namespace std; using namespace mfem; From b3672a817ac19c5c290cc20f04e1ea0623a6905a Mon Sep 17 00:00:00 2001 From: august-knox Date: Thu, 14 Nov 2024 16:18:23 -0800 Subject: [PATCH 04/24] fix? --- remhos.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/remhos.cpp b/remhos.cpp index 135a975..d3b0720 100644 --- a/remhos.cpp +++ b/remhos.cpp @@ -155,15 +155,15 @@ int main(int argc, char *argv[]) const int myid = mpi.WorldRank(); #ifdef USE_CALIPER - setupCaliper(); - - cali::ConfigManager calimgr(params.simulationParams.caliperConfig.c_str()); - if (calimgr.error()) - std::cerr << "caliper config error: " << calimgr.error_msg() << std::endl; - calimgr.start(); - adiak::init(nullptr); - adiak::cmdline(); - adiak::hostname(); + setupCaliper(); + + cali::ConfigManager calimgr(params.simulationParams.caliperConfig.c_str()); + if (calimgr.error()) + std::cerr << "caliper config error: " << calimgr.error_msg() << std::endl; + calimgr.start(); + adiak::init(nullptr); + adiak::cmdline(); + adiak::hostname(); #endif @@ -1970,8 +1970,8 @@ void setupCaliper() cali_config_preset("CALI_LOG_VERBOSITY", "0"); cali_config_preset("CALI_CALIPER_ATTRIBUTE_DEFAULT_SCOPE", "process"); - cali_set_global_string_byname("rem.git_vers", GIT_VERS); - cali_set_global_string_byname("rem.git_hash", GIT_HASH); + //cali_set_global_string_byname("rem.git_vers", GIT_VERS); + //cali_set_global_string_byname("rem.git_hash", GIT_HASH); #endif } From 2a1494e1ad14e8d354e64862aea1aa0d7b6f930b Mon Sep 17 00:00:00 2001 From: august-knox Date: Thu, 14 Nov 2024 16:24:06 -0800 Subject: [PATCH 05/24] adding instantiation --- remhos.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/remhos.cpp b/remhos.cpp index d3b0720..92961ce 100644 --- a/remhos.cpp +++ b/remhos.cpp @@ -81,6 +81,9 @@ double inflow_function(const Vector &x); // Mesh bounding box Vector bb_min, bb_max; +//Caliper setup +void setupCaliper(); + class AdvectionOperator : public TimeDependentOperator { private: From a8b8bf9e3d3e0df8f01c98797575ccdc0d3c46a7 Mon Sep 17 00:00:00 2001 From: august-knox Date: Thu, 14 Nov 2024 16:33:57 -0800 Subject: [PATCH 06/24] moving configmanager --- remhos.cpp | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/remhos.cpp b/remhos.cpp index 92961ce..5e31aca 100644 --- a/remhos.cpp +++ b/remhos.cpp @@ -157,19 +157,6 @@ int main(int argc, char *argv[]) mfem::MPI_Session mpi(argc, argv); const int myid = mpi.WorldRank(); -#ifdef USE_CALIPER - setupCaliper(); - - cali::ConfigManager calimgr(params.simulationParams.caliperConfig.c_str()); - if (calimgr.error()) - std::cerr << "caliper config error: " << calimgr.error_msg() << std::endl; - calimgr.start(); - adiak::init(nullptr); - adiak::cmdline(); - adiak::hostname(); - -#endif - const char *mesh_file = "data/periodic-square.mesh"; int rs_levels = 2; int rp_levels = 0; @@ -281,6 +268,20 @@ int main(int argc, char *argv[]) } if (myid == 0) { args.PrintOptions(cout); } +//setup caliper config manager +#ifdef USE_CALIPER + setupCaliper(); + + cali::ConfigManager calimgr(params.Parse().caliperConfig.c_str()); + if (calimgr.error()) + std::cerr << "caliper config error: " << calimgr.error_msg() << std::endl; + calimgr.start(); + adiak::init(nullptr); + adiak::cmdline(); + adiak::hostname(); + +#endif + // Enable hardware devices such as GPUs, and programming models such as // CUDA, OCCA, RAJA and OpenMP based on command line options. Device device(device_config); From 9b31e0c6a10aaa82699595125a94ca51c73bd50f Mon Sep 17 00:00:00 2001 From: august-knox Date: Thu, 14 Nov 2024 16:38:48 -0800 Subject: [PATCH 07/24] fixing calimanager --- remhos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/remhos.cpp b/remhos.cpp index 5e31aca..6c5eb89 100644 --- a/remhos.cpp +++ b/remhos.cpp @@ -272,7 +272,7 @@ int main(int argc, char *argv[]) #ifdef USE_CALIPER setupCaliper(); - cali::ConfigManager calimgr(params.Parse().caliperConfig.c_str()); + cali::ConfigManager calimgr; if (calimgr.error()) std::cerr << "caliper config error: " << calimgr.error_msg() << std::endl; calimgr.start(); From 38ac1acab0bdc610925362803c9ad43a24b9777b Mon Sep 17 00:00:00 2001 From: august-knox Date: Thu, 19 Dec 2024 11:56:35 -0800 Subject: [PATCH 08/24] rermoving unnecessary adiak import --- remhos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/remhos.cpp b/remhos.cpp index c54314b..d3b4198 100644 --- a/remhos.cpp +++ b/remhos.cpp @@ -42,7 +42,7 @@ #ifdef USE_CALIPER #include #include -#include +//#include #ifdef HAVE_MPI #include #endif From e27e71f6e746acae87198d6159c93a99f3a66160 Mon Sep 17 00:00:00 2001 From: august-knox Date: Thu, 19 Dec 2024 12:11:56 -0800 Subject: [PATCH 09/24] rmoving problematic adiak line --- makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/makefile b/makefile index 821c94b..1c81b68 100644 --- a/makefile +++ b/makefile @@ -67,7 +67,7 @@ MFEM_DIR2 := $(realpath $(MFEM_DIR)) # Caliper/Adiak flags CALIPER_DIR = $(spack location --install-dir caliper) -ADIAK_DIR = $(spack location --install-dir adiak) +#ADIAK_DIR = $(spack location --install-dir adiak) CALIPER_FLAGS = -I${CALIPER_DIR}/include -DUSE_CALIPER ADIAK_INCLUDE = -I${ADIAK_DIR}/include ADIAK_LDFLAGS = -L${ADIAK_DIR}/lib -ladiak From 1ba87a0921e943d6cfc9354472e937457d771b26 Mon Sep 17 00:00:00 2001 From: august-knox Date: Thu, 19 Dec 2024 12:31:44 -0800 Subject: [PATCH 10/24] removing adiak annotations --- remhos.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/remhos.cpp b/remhos.cpp index d3b4198..f00e841 100644 --- a/remhos.cpp +++ b/remhos.cpp @@ -42,7 +42,7 @@ #ifdef USE_CALIPER #include #include -//#include +#include #ifdef HAVE_MPI #include #endif @@ -297,9 +297,9 @@ MFEM_EXPORT int remhos(int argc, char *argv[], double &final_mass_u) if (calimgr.error()) std::cerr << "caliper config error: " << calimgr.error_msg() << std::endl; calimgr.start(); - adiak::init(nullptr); - adiak::cmdline(); - adiak::hostname(); + //adiak::init(nullptr); + //adiak::cmdline(); + //adiak::hostname(); #endif From 91b1cb5818cd8d4ccb166b0b0f23a73b0c295588 Mon Sep 17 00:00:00 2001 From: august-knox Date: Thu, 19 Dec 2024 12:35:24 -0800 Subject: [PATCH 11/24] removing other import --- remhos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/remhos.cpp b/remhos.cpp index f00e841..d1f239a 100644 --- a/remhos.cpp +++ b/remhos.cpp @@ -42,7 +42,7 @@ #ifdef USE_CALIPER #include #include -#include +//#include #ifdef HAVE_MPI #include #endif From 41d5217deed46539bbd82b9ee3f1f75436a45d3f Mon Sep 17 00:00:00 2001 From: august-knox Date: Thu, 19 Dec 2024 14:43:56 -0800 Subject: [PATCH 12/24] updating remhos_tools --- remhos_tools.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/remhos_tools.hpp b/remhos_tools.hpp index 6b29897..6bb2f63 100644 --- a/remhos_tools.hpp +++ b/remhos_tools.hpp @@ -90,7 +90,7 @@ class SmoothnessIndicator ~SmoothnessIndicator(); void ComputeSmoothnessIndicator(const Vector &u, ParGridFunction &si_vals_u); - void UpdateBounds(int dof_id, double u_HO, + __host__ __device__ void UpdateBounds(int dof_id, double u_HO, const ParGridFunction &si_vals, double &u_min, double &u_max); From 6b571d36f5af49654eb2d0c5ba72cf02788f73bb Mon Sep 17 00:00:00 2001 From: august-knox Date: Thu, 19 Dec 2024 14:46:55 -0800 Subject: [PATCH 13/24] updating class as well --- remhos_tools.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/remhos_tools.cpp b/remhos_tools.cpp index 4422eb2..77db06b 100644 --- a/remhos_tools.cpp +++ b/remhos_tools.cpp @@ -180,7 +180,7 @@ void SmoothnessIndicator::ComputeSmoothnessIndicator(const Vector &u, } } -void SmoothnessIndicator::UpdateBounds(int dof_id, double u_HO, +__host__ __device__ void SmoothnessIndicator::UpdateBounds(int dof_id, double u_HO, const ParGridFunction &si_vals, double &u_min, double &u_max) { From 93085fa350383bf5ff73553f077c6ec8f11b4a71 Mon Sep 17 00:00:00 2001 From: august-knox Date: Thu, 19 Dec 2024 14:52:04 -0800 Subject: [PATCH 14/24] undoing changes --- remhos_tools.cpp | 2 +- remhos_tools.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/remhos_tools.cpp b/remhos_tools.cpp index 77db06b..4422eb2 100644 --- a/remhos_tools.cpp +++ b/remhos_tools.cpp @@ -180,7 +180,7 @@ void SmoothnessIndicator::ComputeSmoothnessIndicator(const Vector &u, } } -__host__ __device__ void SmoothnessIndicator::UpdateBounds(int dof_id, double u_HO, +void SmoothnessIndicator::UpdateBounds(int dof_id, double u_HO, const ParGridFunction &si_vals, double &u_min, double &u_max) { diff --git a/remhos_tools.hpp b/remhos_tools.hpp index 6bb2f63..6b29897 100644 --- a/remhos_tools.hpp +++ b/remhos_tools.hpp @@ -90,7 +90,7 @@ class SmoothnessIndicator ~SmoothnessIndicator(); void ComputeSmoothnessIndicator(const Vector &u, ParGridFunction &si_vals_u); - __host__ __device__ void UpdateBounds(int dof_id, double u_HO, + void UpdateBounds(int dof_id, double u_HO, const ParGridFunction &si_vals, double &u_min, double &u_max); From 0690282ec45dc94af949bc03e1f06b1981b8ebba Mon Sep 17 00:00:00 2001 From: august-knox Date: Mon, 6 Jan 2025 12:31:28 -0800 Subject: [PATCH 15/24] make caliper optional --- makefile | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/makefile b/makefile index 1c81b68..2d888c7 100644 --- a/makefile +++ b/makefile @@ -66,12 +66,14 @@ MFEM_DIR2 := $(realpath $(MFEM_DIR)) # Caliper/Adiak flags -CALIPER_DIR = $(spack location --install-dir caliper) +#CALIPER_DIR = $(spack location --install-dir caliper) #ADIAK_DIR = $(spack location --install-dir adiak) +ifeq ($(CALIPER_DIR),) CALIPER_FLAGS = -I${CALIPER_DIR}/include -DUSE_CALIPER ADIAK_INCLUDE = -I${ADIAK_DIR}/include ADIAK_LDFLAGS = -L${ADIAK_DIR}/lib -ladiak CALIPER_LDFLAGS = -L${CALIPER_DIR}/lib64 -lcaliper +endif # Use the compiler used by MFEM. Get the compiler and the options for compiling From ca08bf8b10c85c2da799b8afc01e2ad84be9aaaf Mon Sep 17 00:00:00 2001 From: august-knox Date: Mon, 6 Jan 2025 14:48:09 -0800 Subject: [PATCH 16/24] making caliper optional --- makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/makefile b/makefile index 2d888c7..af825f5 100644 --- a/makefile +++ b/makefile @@ -68,7 +68,7 @@ MFEM_DIR2 := $(realpath $(MFEM_DIR)) #CALIPER_DIR = $(spack location --install-dir caliper) #ADIAK_DIR = $(spack location --install-dir adiak) -ifeq ($(CALIPER_DIR),) +ifdef CALIPER_DIR CALIPER_FLAGS = -I${CALIPER_DIR}/include -DUSE_CALIPER ADIAK_INCLUDE = -I${ADIAK_DIR}/include ADIAK_LDFLAGS = -L${ADIAK_DIR}/lib -ladiak From ad75451c0c190f556314a09456818f3b3f155f63 Mon Sep 17 00:00:00 2001 From: august-knox Date: Mon, 6 Jan 2025 15:52:08 -0800 Subject: [PATCH 17/24] fixing conflict --- remhos.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/remhos.cpp b/remhos.cpp index 75518f7..de1e514 100644 --- a/remhos.cpp +++ b/remhos.cpp @@ -291,7 +291,6 @@ MFEM_EXPORT int remhos(int argc, char *argv[], double &final_mass_u) } if (myid == 0) { args.PrintOptions(cout); } -<<<<<<< HEAD //setup caliper config manager #ifdef USE_CALIPER setupCaliper(); @@ -303,13 +302,12 @@ MFEM_EXPORT int remhos(int argc, char *argv[], double &final_mass_u) //adiak::init(nullptr); //adiak::cmdline(); //adiak::hostname(); +#endif -======= #ifdef REMHOS_GPU_SETUP MFEM_VERIFY(ho_type == HOSolverType::LocalInverse && lo_type == LOSolverType::MassBased && fct_type == FCTSolverType::ClipScale, "Wrong GPU setup."); ->>>>>>> f102edb64e5f4ddaf8898da28e9e275fe57cdf5d #endif // Enable hardware devices such as GPUs, and programming models such as From 63c6bcaf6eac42a06aae53bf31aad7d76c967d2b Mon Sep 17 00:00:00 2001 From: august-knox Date: Mon, 6 Jan 2025 15:56:02 -0800 Subject: [PATCH 18/24] adding back remhos_fct --- remhos_fct.cpp | 1012 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1012 insertions(+) create mode 100644 remhos_fct.cpp diff --git a/remhos_fct.cpp b/remhos_fct.cpp new file mode 100644 index 0000000..be2454b --- /dev/null +++ b/remhos_fct.cpp @@ -0,0 +1,1012 @@ +// Copyright (c) 2017, Lawrence Livermore National Security, LLC. Produced at +// the Lawrence Livermore National Laboratory. LLNL-CODE-734707. All Rights +// reserved. See files LICENSE and NOTICE for details. +// +// This file is part of CEED, a collection of benchmarks, miniapps, software +// libraries and APIs for efficient high-order finite element and spectral +// element discretizations for exascale applications. For more information and +// source code availability see http://github.com/ceed. +// +// The CEED research is supported by the Exascale Computing Project 17-SC-20-SC, +// a collaborative effort of two U.S. Department of Energy organizations (Office +// of Science and the National Nuclear Security Administration) responsible for +// the planning and preparation of a capable exascale ecosystem, including +// software, applications, hardware, advanced system engineering and early +// testbed platforms, in support of the nation's exascale computing imperative. + +#include "remhos_fct.hpp" +#include "remhos_tools.hpp" +#include "remhos_sync.hpp" + +using namespace std; + +namespace mfem +{ + +void FCTSolver::CalcCompatibleLOProduct(const ParGridFunction &us, + const Vector &m, const Vector &d_us_HO, + Vector &s_min, Vector &s_max, + const Vector &u_new, + const Array &active_el, + const Array &active_dofs, + Vector &d_us_LO_new) +{ + const double eps = 1e-12; + int dof_id; + + // Compute a compatible low-order solution. + const int NE = us.ParFESpace()->GetNE(); + const int ndofs = us.Size() / NE; + + Vector s_min_loc, s_max_loc; + + d_us_LO_new = 0.0; + + for (int k = 0; k < NE; k++) + { + if (active_el[k] == false) { continue; } + + double mass_us = 0.0, mass_u = 0.0; + for (int j = 0; j < ndofs; j++) + { + const double us_new_HO = us(k*ndofs + j) + dt * d_us_HO(k*ndofs + j); + mass_us += us_new_HO * m(k*ndofs + j); + mass_u += u_new(k*ndofs + j) * m(k*ndofs + j); + } + double s_avg = mass_us / mass_u; + + // Min and max of s using the full stencil of active dofs. + s_min_loc.SetDataAndSize(s_min.GetData() + k*ndofs, ndofs); + s_max_loc.SetDataAndSize(s_max.GetData() + k*ndofs, ndofs); + double smin = numeric_limits::infinity(), + smax = -numeric_limits::infinity(); + for (int j = 0; j < ndofs; j++) + { + if (active_dofs[k*ndofs + j] == false) { continue; } + smin = min(smin, s_min_loc(j)); + smax = max(smax, s_max_loc(j)); + } + + // Fix inconsistencies due to round-off and the usage of local bounds. + for (int j = 0; j < ndofs; j++) + { + if (active_dofs[k*ndofs + j] == false) { continue; } + + // Check if there's a violation, s_avg < s_min, due to round-offs that + // are inflated by the division of a small number (the 2nd check means + // s_avg = mass_us / mass_u > s_min up to round-off in mass_us). + if (s_avg < smin && + mass_us + eps > smin * mass_u) { s_avg = smin; } + // As above for the s_max. + if (s_avg > smax && + mass_us - eps < smax * mass_u) { s_avg = smax; } + +#ifdef REMHOS_FCT_PRODUCT_DEBUG + // Check if s_avg = mass_us / mass_u is within the bounds of the full + // stencil of active dofs. + if (mass_us + eps < smin * mass_u || + mass_us - eps > smax * mass_u || + s_avg + eps < smin || + s_avg - eps > smax) + { + std::cout << "---\ns_avg element bounds: " + << smin << " " << s_avg << " " << smax << std::endl; + std::cout << "Element " << k << std::endl; + std::cout << "Masses " << mass_us << " " << mass_u << std::endl; + PrintCellValues(k, NE, u_new, "u_loc: "); + + MFEM_ABORT("s_avg is not in the full stencil bounds!"); + } +#endif + + // When s_avg is not in the local bounds for some dof (it should be + // within the full stencil of active dofs), reset the bounds to s_avg. + if (s_avg + eps < s_min_loc(j)) { s_min_loc(j) = s_avg; } + if (s_avg - eps > s_max_loc(j)) { s_max_loc(j) = s_avg; } + } + + // Take into account the compatible low-order solution. + for (int j = 0; j < ndofs; j++) + { + // In inactive dofs we get u_new*s_avg ~ 0, which should be fine. + + // Compatible LO solution. + dof_id = k*ndofs + j; + d_us_LO_new(dof_id) = (u_new(dof_id) * s_avg - us(dof_id)) / dt; + } + +#ifdef REMHOS_FCT_PRODUCT_DEBUG + // Check the LO product solution. + double us_min, us_max; + for (int j = 0; j < ndofs; j++) + { + dof_id = k*ndofs + j; + if (active_dofs[dof_id] == false) { continue; } + + us_min = s_min_loc(j) * u_new(dof_id); + us_max = s_max_loc(j) * u_new(dof_id); + + if (s_avg * u_new(dof_id) + eps < us_min || + s_avg * u_new(dof_id) - eps > us_max) + { + std::cout << "---\ns_avg * u: " << k << " " + << us_min << " " + << s_avg * u_new(dof_id) << " " + << us_max << endl + << u_new(dof_id) << " " << s_avg << endl + << s_min_loc(j) << " " << s_max_loc(j) << "\n---\n"; + + MFEM_ABORT("s_avg * u not in bounds"); + } + } +#endif + } +} + +void FCTSolver::ScaleProductBounds(const Vector &s_min, const Vector &s_max, + const Vector &u_new, + const Array &active_el, + const Array &active_dofs, + Vector &us_min, Vector &us_max) +{ + const int NE = pfes.GetNE(); + const int ndofs = u_new.Size() / NE; + int dof_id; + us_min = 0.0; + us_max = 0.0; + for (int k = 0; k < NE; k++) + { + if (active_el[k] == false) { continue; } + + // Rescale the bounds (s_min, s_max) -> (u*s_min, u*s_max). + for (int j = 0; j < ndofs; j++) + { + dof_id = k*ndofs + j; + + // For inactive dofs, s_min and s_max are undefined (inf values). + if (active_dofs[dof_id] == false) + { + us_min(dof_id) = 0.0; + us_max(dof_id) = 0.0; + continue; + } + + us_min(dof_id) = s_min(dof_id) * u_new(dof_id); + us_max(dof_id) = s_max(dof_id) * u_new(dof_id); + } + } +} + +void FluxBasedFCT::CalcFCTSolution(const ParGridFunction &u, const Vector &m, + const Vector &du_ho, const Vector &du_lo, + const Vector &u_min, const Vector &u_max, + Vector &du) const +{ + MFEM_VERIFY(smth_indicator == NULL, "TODO: update SI bounds."); + + // Construct the flux matrix (it gets recomputed every time). + ComputeFluxMatrix(u, du_ho, flux_ij); + + // Iterated FCT correction. + Vector du_lo_fct(du_lo); + for (int fct_iter = 0; fct_iter < iter_cnt; fct_iter++) + { + // Compute sums of incoming/outgoing fluxes at each DOF. + AddFluxesAtDofs(flux_ij, gp, gm); + + // Compute the flux coefficients (aka alphas) into gp and gm. + ComputeFluxCoefficients(u, du_lo_fct, m, u_min, u_max, gp, gm); + + // Apply the alpha coefficients to get the final solution. + // Update the flux matrix for iterative FCT (when iter_cnt > 1). + UpdateSolutionAndFlux(du_lo_fct, m, gp, gm, flux_ij, du); + + du_lo_fct = du; + } +} + +void FluxBasedFCT::CalcFCTProduct(const ParGridFunction &us, const Vector &m, + const Vector &d_us_HO, const Vector &d_us_LO, + Vector &s_min, Vector &s_max, + const Vector &u_new, + const Array &active_el, + const Array &active_dofs, Vector &d_us) +{ + // Construct the flux matrix (it gets recomputed every time). + ComputeFluxMatrix(us, d_us_HO, flux_ij); + + us.HostRead(); + d_us_LO.HostRead(); + s_min.HostReadWrite(); + s_max.HostReadWrite(); + u_new.HostRead(); + active_el.HostRead(); + active_dofs.HostRead(); + + // Compute a compatible low-order solution. + Vector dus_lo_fct(us.Size()), us_min(us.Size()), us_max(us.Size()); + CalcCompatibleLOProduct(us, m, d_us_HO, s_min, s_max, u_new, + active_el, active_dofs, dus_lo_fct); + ScaleProductBounds(s_min, s_max, u_new, active_el, active_dofs, + us_min, us_max); + + // Update the flux matrix to a product-compatible version. + // Compute a compatible low-order solution. + const int NE = us.ParFESpace()->GetNE(); + const int ndofs = us.Size() / NE; + Vector flux_el(ndofs), beta(ndofs); + DenseMatrix fij_el(ndofs); + fij_el = 0.0; + Array dofs; + int dof_id; + for (int k = 0; k < NE; k++) + { + if (active_el[k] == false) { continue; } + + // Take into account the compatible low-order solution. + for (int j = 0; j < ndofs; j++) + { + // In inactive dofs we get u_new*s_avg ~ 0, which should be fine. + + dof_id = k*ndofs + j; + flux_el(j) = m(dof_id) * dt * (d_us_LO(dof_id) - dus_lo_fct(dof_id)); + beta(j) = m(dof_id) * u_new(dof_id); + } + + // Make the betas sum to 1, add the new compatible fluxes. + beta /= beta.Sum(); + for (int j = 1; j < ndofs; j++) + { + for (int i = 0; i < j; i++) + { + fij_el(i, j) = beta(j) * flux_el(i) - beta(i) * flux_el(j); + } + } + pfes.GetElementDofs(k, dofs); + flux_ij.AddSubMatrix(dofs, dofs, fij_el); + } + + // Iterated FCT correction. + // To get the LO compatible product solution (with s_avg), just do + // d_us = dus_lo_fct instead of the loop below. + for (int fct_iter = 0; fct_iter < iter_cnt; fct_iter++) + { + // Compute sums of incoming fluxes at each DOF. + AddFluxesAtDofs(flux_ij, gp, gm); + + // Compute the flux coefficients (aka alphas) into gp and gm. + ComputeFluxCoefficients(us, dus_lo_fct, m, us_min, us_max, gp, gm); + + // Apply the alpha coefficients to get the final solution. + // Update the fluxes for iterative FCT (when iter_cnt > 1). + UpdateSolutionAndFlux(dus_lo_fct, m, gp, gm, flux_ij, d_us); + + ZeroOutEmptyDofs(active_el, active_dofs, d_us); + + dus_lo_fct = d_us; + } + +#ifdef REMHOS_FCT_PRODUCT_DEBUG + // Check the bounds of the final solution. + const double eps = 1e-12; + Vector us_new(d_us.Size()); + add(1.0, us, dt, d_us, us_new); + for (int k = 0; k < NE; k++) + { + if (active_el[k] == false) { continue; } + + for (int j = 0; j < ndofs; j++) + { + dof_id = k*ndofs + j; + if (active_dofs[dof_id] == false) { continue; } + + double s = us_new(dof_id) / u_new(dof_id); + if (s + eps < s_min(dof_id) || + s - eps > s_max(dof_id)) + { + std::cout << "Final s " << j << " " << k << " " + << s_min(dof_id) << " " + << s << " " + << s_max(dof_id) << std::endl; + std::cout << "---\n"; + } + + if (us_new(dof_id) + eps < us_min(dof_id) || + us_new(dof_id) - eps > us_max(dof_id)) + { + std::cout << "Final us " << j << " " << k << " " + << us_min(dof_id) << " " + << us_new(dof_id) << " " + << us_max(dof_id) << std::endl; + std::cout << "---\n"; + } + } + } +#endif +} + +void FluxBasedFCT::ComputeFluxMatrix(const ParGridFunction &u, + const Vector &du_ho, + SparseMatrix &flux_mat) const +{ + const int s = u.Size(); + double *flux_data = flux_mat.HostReadWriteData(); + flux_mat.HostReadI(); flux_mat.HostReadJ(); + const int *K_I = K.HostReadI(), *K_J = K.HostReadJ(); + const double *K_data = K.HostReadData(); + const double *u_np = u.FaceNbrData().HostRead(); + u.HostRead(); + du_ho.HostRead(); + for (int i = 0; i < s; i++) + { + for (int k = K_I[i]; k < K_I[i + 1]; k++) + { + int j = K_J[k]; + if (j <= i) { continue; } + + double kij = K_data[k], kji = K_data[K_smap[k]]; + double dij = max(max(0.0, -kij), -kji); + double u_ij = (j < s) ? u(i) - u(j) + : u(i) - u_np[j - s]; + + flux_data[k] = dt * dij * u_ij; + } + } + + const int NE = pfes.GetMesh()->GetNE(); + const int ndof = s / NE; + Array dofs; + DenseMatrix Mz(ndof); + Vector du_z(ndof); + for (int k = 0; k < NE; k++) + { + pfes.GetElementDofs(k, dofs); + M.GetSubMatrix(dofs, dofs, Mz); + du_ho.GetSubVector(dofs, du_z); + for (int i = 0; i < ndof; i++) + { + int j = 0; + for (; j <= i; j++) { Mz(i, j) = 0.0; } + for (; j < ndof; j++) { Mz(i, j) *= dt * (du_z(i) - du_z(j)); } + } + flux_mat.AddSubMatrix(dofs, dofs, Mz, 0); + } +} + +// Compute sums of incoming fluxes for every DOF. +void FluxBasedFCT::AddFluxesAtDofs(const SparseMatrix &flux_mat, + Vector &flux_pos, Vector &flux_neg) const +{ + const int s = flux_pos.Size(); + const double *flux_data = flux_mat.GetData(); + const int *flux_I = flux_mat.GetI(), *flux_J = flux_mat.GetJ(); + flux_pos = 0.0; + flux_neg = 0.0; + flux_pos.HostReadWrite(); + flux_neg.HostReadWrite(); + for (int i = 0; i < s; i++) + { + for (int k = flux_I[i]; k < flux_I[i + 1]; k++) + { + int j = flux_J[k]; + + // The skipped fluxes will be added when the outer loop is at j as + // the flux matrix is always symmetric. + if (j <= i) { continue; } + + const double f_ij = flux_data[k]; + + if (f_ij >= 0.0) + { + flux_pos(i) += f_ij; + // Modify j if it's on the same MPI task (prevents x2 counting). + if (j < s) { flux_neg(j) -= f_ij; } + } + else + { + flux_neg(i) += f_ij; + // Modify j if it's on the same MPI task (prevents x2 counting). + if (j < s) { flux_pos(j) -= f_ij; } + } + } + } +} + +// Compute the so-called alpha coefficients that scale the fluxes into gp, gm. +void FluxBasedFCT:: +ComputeFluxCoefficients(const Vector &u, const Vector &du_lo, const Vector &m, + const Vector &u_min, const Vector &u_max, + Vector &coeff_pos, Vector &coeff_neg) const +{ + const int s = u.Size(); + for (int i = 0; i < s; i++) + { + const double u_lo = u(i) + dt * du_lo(i); + const double max_pos_diff = max((u_max(i) - u_lo) * m(i), 0.0), + min_neg_diff = min((u_min(i) - u_lo) * m(i), 0.0); + const double sum_pos = coeff_pos(i), sum_neg = coeff_neg(i); + + coeff_pos(i) = (sum_pos > max_pos_diff) ? max_pos_diff / sum_pos : 1.0; + coeff_neg(i) = (sum_neg < min_neg_diff) ? min_neg_diff / sum_neg : 1.0; + } +} + +void FluxBasedFCT:: +UpdateSolutionAndFlux(const Vector &du_lo, const Vector &m, + ParGridFunction &coeff_pos, ParGridFunction &coeff_neg, + SparseMatrix &flux_mat, Vector &du) const +{ + Vector &a_pos_n = coeff_pos.FaceNbrData(); + Vector &a_neg_n = coeff_neg.FaceNbrData(); + coeff_pos.ExchangeFaceNbrData(); + coeff_neg.ExchangeFaceNbrData(); + + du = du_lo; + + coeff_pos.HostReadWrite(); + coeff_neg.HostReadWrite(); + du.HostReadWrite(); + + double *flux_data = flux_mat.HostReadWriteData(); + const int *flux_I = flux_mat.HostReadI(), *flux_J = flux_mat.HostReadJ(); + const int s = du.Size(); + for (int i = 0; i < s; i++) + { + for (int k = flux_I[i]; k < flux_I[i + 1]; k++) + { + int j = flux_J[k]; + if (j <= i) { continue; } + + double fij = flux_data[k], a_ij; + if (fij >= 0.0) + { + a_ij = (j < s) ? min(coeff_pos(i), coeff_neg(j)) + : min(coeff_pos(i), a_neg_n(j - s)); + } + else + { + a_ij = (j < s) ? min(coeff_neg(i), coeff_pos(j)) + : min(coeff_neg(i), a_pos_n(j - s)); + } + fij *= a_ij; + + du(i) += fij / m(i) / dt; + if (j < s) { du(j) -= fij / m(j) / dt; } + + flux_data[k] -= fij; + } + } +} + + +void ClipScaleSolver::CalcFCTSolution(const ParGridFunction &u, const Vector &m, + const Vector &du_ho, const Vector &du_lo, + const Vector &u_min, const Vector &u_max, + Vector &du) const +{ + const int NE = pfes.GetMesh()->GetNE(); + const int nd = pfes.GetFE(0)->GetDof(); + Vector f_clip(nd); + + int dof_id; + double sumPos, sumNeg, u_new_ho, u_new_lo, new_mass, f_clip_min, f_clip_max; + double umin, umax; + const double eps = 1.0e-15; + + // Smoothness indicator. + ParGridFunction si_val; + if (smth_indicator) + { + smth_indicator->ComputeSmoothnessIndicator(u, si_val); + } + + u.HostRead(); + m.HostRead(); + du.HostReadWrite(); + du_lo.HostRead(); du_ho.HostRead(); + for (int k = 0; k < NE; k++) + { + sumPos = sumNeg = 0.0; + + // Clip. + for (int j = 0; j < nd; j++) + { + dof_id = k*nd+j; + + u_new_ho = u(dof_id) + dt * du_ho(dof_id); + u_new_lo = u(dof_id) + dt * du_lo(dof_id); + + umin = u_min(dof_id); + umax = u_max(dof_id); + if (smth_indicator) + { + smth_indicator->UpdateBounds(dof_id, u_new_ho, si_val, umin, umax); + } + + f_clip_min = m(dof_id) / dt * (umin - u_new_lo); + f_clip_max = m(dof_id) / dt * (umax - u_new_lo); + + f_clip(j) = m(dof_id) * (du_ho(dof_id) - du_lo(dof_id)); + f_clip(j) = min(f_clip_max, max(f_clip_min, f_clip(j))); + + sumNeg += min(f_clip(j), 0.0); + sumPos += max(f_clip(j), 0.0); + } + + new_mass = sumNeg + sumPos; + + // Rescale. + for (int j = 0; j < nd; j++) + { + if (new_mass > eps) + { + f_clip(j) = min(0.0, f_clip(j)) - + max(0.0, f_clip(j)) * sumNeg / sumPos; + } + if (new_mass < -eps) + { + f_clip(j) = max(0.0, f_clip(j)) - + min(0.0, f_clip(j)) * sumPos / sumNeg; + } + + // Set du to the discrete time derivative featuring the high order + // anti-diffusive reconstruction that leads to an forward Euler + // updated admissible solution. + dof_id = k*nd+j; + du(dof_id) = du_lo(dof_id) + f_clip(j) / m(dof_id); + } + } +} + +void ClipScaleSolver::CalcFCTProduct(const ParGridFunction &us, const Vector &m, + const Vector &d_us_HO, const Vector &d_us_LO, + Vector &s_min, Vector &s_max, + const Vector &u_new, + const Array &active_el, + const Array &active_dofs, Vector &d_us) +{ + us.HostRead(); + s_min.HostReadWrite(); + s_max.HostReadWrite(); + u_new.HostRead(); + active_el.HostRead(); + active_dofs.HostRead(); + + // Compute a compatible low-order solution. + Vector dus_lo_fct(us.Size()), us_min(us.Size()), us_max(us.Size()); + CalcCompatibleLOProduct(us, m, d_us_HO, s_min, s_max, u_new, + active_el, active_dofs, dus_lo_fct); + ScaleProductBounds(s_min, s_max, u_new, active_el, active_dofs, + us_min, us_max); + + // ClipScale solve for d_us. + CalcFCTSolution(us, m, d_us_HO, dus_lo_fct, us_min, us_max, d_us); + ZeroOutEmptyDofs(active_el, active_dofs, d_us); + +#ifdef REMHOS_FCT_PRODUCT_DEBUG + // Check the bounds of the final solution. + const int NE = pfes.GetNE(); + const int ndofs = u_new.Size() / NE; + int dof_id; + const double eps = 1e-12; + Vector us_new(d_us.Size()); + add(1.0, us, dt, d_us, us_new); + for (int k = 0; k < NE; k++) + { + if (active_el[k] == false) { continue; } + + for (int j = 0; j < ndofs; j++) + { + dof_id = k*ndofs + j; + if (active_dofs[dof_id] == false) { continue; } + + double s = us_new(dof_id) / u_new(dof_id); + if (s + eps < s_min(dof_id) || + s - eps > s_max(dof_id)) + { + std::cout << "Final s " << j << " " << k << " " + << s_min(dof_id) << " " + << s << " " + << s_max(dof_id) << std::endl; + std::cout << "---\n"; + } + + if (us_new(dof_id) + eps < us_min(dof_id) || + us_new(dof_id) - eps > us_max(dof_id)) + { + std::cout << "Final us " << j << " " << k << " " + << us_min(dof_id) << " " + << us_new(dof_id) << " " + << us_max(dof_id) << std::endl; + std::cout << "---\n"; + } + } + } +#endif +} + +void ElementFCTProjection::CalcFCTSolution(const ParGridFunction &u, + const Vector &m, + const Vector &du_HO, + const Vector &du_LO, + const Vector &u_min, + const Vector &u_max, + Vector &du) const +{ + const int NE = pfes.GetMesh()->GetNE(); + const int s = pfes.GetFE(0)->GetDof(); + int dof_id; + + DenseMatrix M(s); + Vector ML(s), rhs(s), beta(s), z(s), u_loc, du_HO_loc, du_LO_loc, du_loc, + du_max_loc(s), du_min_loc(s); + MassIntegrator mass_integ; + + for (int k = 0; k < NE; k++) + { + u_loc.SetDataAndSize(u.GetData() + k*s, s); + du_HO_loc.SetDataAndSize(du_HO.GetData() + k*s, s); + du_LO_loc.SetDataAndSize(du_LO.GetData() + k*s, s); + du_loc.SetDataAndSize(du.GetData() + k*s, s); + + // Local max/min increments. + for (int i = 0; i < s; i++) + { + dof_id = k*s + i; + du_max_loc(i) = (u_max(dof_id) - u(dof_id)) / dt; // positive + du_min_loc(i) = (u_min(dof_id) - u(dof_id)) / dt; // negative + } + + // Construct the local mass matrix. + ElementTransformation *T = pfes.GetMesh()->GetElementTransformation(k); + const FiniteElement *el = pfes.GetFE(k); + mass_integ.AssembleElementMatrix(*el, *T, M); + + M.Mult(du_HO_loc, rhs); + M.GetRowSums(ML); + + for (int i = 0; i < s; i++) + { + // Some different options for beta: + //beta(i) = 1.0; + beta(i) = ML(i); + //beta(i) = Mxy(i); + + // The low order flux correction + z(i) = rhs(i) - ML(i) * du_LO_loc(i); + } + + // Make beta_i sum to 1. + beta /= beta.Sum(); + + DenseMatrix F(s); + for (int i = 1; i < s; i++) + { + for (int j = 0; j < i; j++) + { + F(i, j) = M(i, j) * (du_HO_loc(i) - du_HO_loc(j)) + + (beta(j) * z(i) - beta(i) * z(j)); + } + } + + Vector gp(s), gm(s); + gp = 0.0; + gm = 0.0; + for (int i = 1; i < s; i++) + { + for (int j = 0; j < i; j++) + { + double fij = F(i, j); + if (fij >= 0.0) + { + gp(i) += fij; + gm(j) -= fij; + } + else + { + gm(i) += fij; + gp(j) -= fij; + } + } + } + + du_loc = du_LO_loc; + + for (int i = 0; i < s; i++) + { + double rp = std::max(ML(i) * (du_max_loc(i) - du_loc(i)), 0.0); + double rm = std::min(ML(i) * (du_min_loc(i) - du_loc(i)), 0.0); + double sp = gp(i), sm = gm(i); + + gp(i) = (rp < sp) ? rp / sp : 1.0; + gm(i) = (rm > sm) ? rm / sm : 1.0; + } + + for (int i = 1; i < s; i++) + { + for (int j = 0; j < i; j++) + { + double fij = F(i, j), aij; + + if (fij >= 0.0) + { + aij = std::min(gp(i), gm(j)); + } + else + { + aij = std::min(gm(i), gp(j)); + } + + fij *= aij; + du_loc(i) += fij / ML(i); + du_loc(j) -= fij / ML(j); + } + } + } // element loop +} + +void ElementFCTProjection::CalcFCTProduct(const ParGridFunction &us, const Vector &m, + const Vector &d_us_HO, const Vector &d_us_LO, + Vector &s_min, Vector &s_max, + const Vector &u_new, + const Array &active_el, + const Array &active_dofs, Vector &d_us) +{ + us.HostRead(); + s_min.HostReadWrite(); + s_max.HostReadWrite(); + u_new.HostRead(); + active_el.HostRead(); + active_dofs.HostRead(); + + // Compute a compatible low-order solution. + Vector dus_lo_fct(us.Size()), us_min(us.Size()), us_max(us.Size()); + CalcCompatibleLOProduct(us, m, d_us_HO, s_min, s_max, u_new, + active_el, active_dofs, dus_lo_fct); + ScaleProductBounds(s_min, s_max, u_new, active_el, active_dofs, + us_min, us_max); + + // ClipScale solve for d_us. + CalcFCTSolution(us, m, d_us_HO, dus_lo_fct, us_min, us_max, d_us); + ZeroOutEmptyDofs(active_el, active_dofs, d_us); +} + +void NonlinearPenaltySolver::CalcFCTSolution(const ParGridFunction &u, + const Vector &m, + const Vector &du_ho, + const Vector &du_lo, + const Vector &u_min, + const Vector &u_max, + Vector &du) const +{ + const int size = u.Size(); + Vector du_ho_star(size); + + double umin, umax; + + u.HostRead(); m.HostRead(); + du_ho.HostRead(); du_lo.HostRead(); + u_min.HostRead(); u_max.HostRead(); + du.HostReadWrite(); + + // Smoothness indicator. + ParGridFunction si_val; + if (smth_indicator) + { + smth_indicator->ComputeSmoothnessIndicator(u, si_val); + } + + // Clipped flux. + for (int i = 0; i < size; i++) + { + umin = u_min(i); + umax = u_max(i); + if (smth_indicator) + { + smth_indicator->UpdateBounds(i, u(i) + dt * du_ho(i), + si_val, umin, umax); + } + + // Note that this uses u(i) at the old time. + du_ho_star(i) = min( (umax - u(i)) / dt, + max(du_ho(i), (umin - u(i)) / dt) ); + } + + // Non-conservative fluxes. + Vector fL(size), fH(size); + for (int i = 0; i < size; i++) + { + fL(i) = m(i) * (du_ho_star(i) - du_lo(i)); + fH(i) = m(i) * (du_ho_star(i) - du_ho(i)); + } + + // Restore conservation. + Vector flux_correction(size); + CorrectFlux(fL, fH, flux_correction); + + for (int i = 0; i < size; i++) + { + fL(i) += flux_correction(i); + } + + for (int i = 0; i < size; i++) + { + du(i) = du_lo(i) + fL(i) / m(i); + } +} + +void get_z(double lambda, const Vector &w, const Vector &flux, Vector &zz) +{ + for (int j=0; j= lambda*abs(w(j))) ? lambda * w(j) : flux(j); + } +} + +double get_lambda_times_sum_z(double lambda, + const Vector &w, const Vector &fluxL) +{ + double lambda_times_z; + double lambda_times_sum_z = 0.0; + for (int j=0; j= lambda*abs(w(j))) + ? lambda * w(j) : fluxL(j); + + lambda_times_sum_z += lambda_times_z; + } + return lambda_times_sum_z; +} + +void get_lambda(double delta, const Vector &w, + const Vector &fluxL, Vector &zz) +{ + // solve nonlinearity F(lambda)=0 + double tol=1e-15; + double lambdaLower=0., lambdaUpper = 0.; + double FLower=0., FUpper=0.; + + // compute starting F and check F at the min (lambda = 0). + double lambda = 1.0; + double F = delta - get_lambda_times_sum_z(lambda, w, fluxL); + double F0 = delta-get_lambda_times_sum_z(0.0, w, fluxL); + if (abs(F) <= tol) + { + get_z(lambda, w, fluxL, zz); + } + else if (abs(F0)<=tol) + { + get_z(0.0, w, fluxL, zz); + } + + // solve non-linearity + // Get lambda values that give Fs on both sides of the zero. + double factor = 1.0; + do + { + factor *= 2.0; + lambdaLower = lambda/factor; + lambdaUpper = factor*lambda; + FLower = delta - get_lambda_times_sum_z(lambdaLower,w,fluxL); + FUpper = delta - get_lambda_times_sum_z(lambdaUpper,w,fluxL); + } + while (F*FLower > 0 && F*FUpper > 0); + + // check if either of lambdaLower or lambdaUpper hit the solution + if (FLower==0.0) + { + get_z(lambdaLower, w, fluxL, zz); + } + else if (FUpper==0.0) + { + get_z(lambdaUpper, w, fluxL, zz); + } + + // get STARTING lower and upper bounds for lambda + if (F*FLower < 0) + { + lambdaUpper = lambda; + } + else + { + lambdaLower = lambda; + } + + // get STARTING lower and upper bounds on F + FLower = delta - get_lambda_times_sum_z(lambdaLower,w,fluxL); + FUpper = delta - get_lambda_times_sum_z(lambdaUpper,w,fluxL); + + do + { + // compute new lambda and new F + lambda = 0.5*(lambdaLower+lambdaUpper); + F = delta - get_lambda_times_sum_z(lambda,w,fluxL); + if (F*FLower < 0) + { + lambdaUpper = lambda; + FUpper = F; + } + else + { + lambdaLower = lambda; + FLower = F; + } + } + while (abs(F)>tol); + + lambda = 0.5*(lambdaLower+lambdaUpper); + get_z(lambda, w, fluxL, zz); +} + +void NonlinearPenaltySolver::CorrectFlux(Vector &fluxL, Vector &fluxH, + Vector &flux_fix) const +{ + // This consider any definition of wi. If a violation on MPP is created, + // then wi is s.t. fi=0. + // The idea is to relax the penalization wi in favor of MPP + const int num_cells = pfes.GetNE(); + const int xd = pfes.GetFE(0)->GetDof(); + + Array ldofs; + Vector fluxL_z(xd), fluxH_z(xd), flux_correction_z(xd); + for (int i = 0; i < num_cells; i++) + { + pfes.GetElementDofs(i, ldofs); + fluxL.GetSubVector(ldofs, fluxL_z); + fluxH.GetSubVector(ldofs, fluxH_z); + + double fp = 0.0, fn = 0.0; + for (int j = 0; j < xd; j++) + { + if (fluxL_z(j) >= 0.0) { fp += fluxL_z(j); } + else { fn += fluxL_z(j); } + } + + double delta = fp + fn; + + if (delta == 0.0) + { + flux_correction_z = 0.0; + flux_fix.SetSubVector(ldofs, flux_correction_z); + continue; + } + + // compute penalization terms wi's as desired + Vector w(xd); + const double eps = pfes.GetMesh()->GetElementSize(0,0) / pfes.GetOrder(0); + for (int j = 0; j < xd; j++) + { + if (delta > 0.0) + { + w(j) = (fluxL_z(j) > 0.0) + ? eps * abs(fluxL_z(j)) + abs(get_max_on_cellNi(fluxH_z)) + : 0.0; + } + else + { + w(j) = (fluxL_z(j) < 0.0) + ? - eps * abs(fluxL_z(j)) - abs(get_max_on_cellNi(fluxH_z)) + : 0.0; + } + } + + // compute lambda and the flux correction. + get_lambda(delta, w, fluxL_z, flux_correction_z); + flux_correction_z.Neg(); + + flux_fix.SetSubVector(ldofs, flux_correction_z); + } +} + +double NonlinearPenaltySolver::get_max_on_cellNi(Vector &fluxH) const +{ + double MAX = -1.0; + for (int i = 0; i < fluxH.Size(); i++) + { + MAX = max(fabs(fluxH(i)), MAX); + } + return MAX; +} + +} // namespace mfem From 47ec6d853a89463811579a59c94f645d877d64fb Mon Sep 17 00:00:00 2001 From: august-knox Date: Mon, 6 Jan 2025 16:04:42 -0800 Subject: [PATCH 19/24] replacing remhos_fct.cpp --- remhos_fct.cpp | 185 +++++++++++++++++++++---------------------------- 1 file changed, 78 insertions(+), 107 deletions(-) diff --git a/remhos_fct.cpp b/remhos_fct.cpp index be2454b..53dc595 100644 --- a/remhos_fct.cpp +++ b/remhos_fct.cpp @@ -81,23 +81,24 @@ void FCTSolver::CalcCompatibleLOProduct(const ParGridFunction &us, if (s_avg > smax && mass_us - eps < smax * mass_u) { s_avg = smax; } -#ifdef REMHOS_FCT_PRODUCT_DEBUG - // Check if s_avg = mass_us / mass_u is within the bounds of the full - // stencil of active dofs. - if (mass_us + eps < smin * mass_u || - mass_us - eps > smax * mass_u || - s_avg + eps < smin || - s_avg - eps > smax) + if (verify_bounds) { - std::cout << "---\ns_avg element bounds: " - << smin << " " << s_avg << " " << smax << std::endl; - std::cout << "Element " << k << std::endl; - std::cout << "Masses " << mass_us << " " << mass_u << std::endl; - PrintCellValues(k, NE, u_new, "u_loc: "); + // Check if s_avg = mass_us / mass_u is within the bounds of the + // full stencil of active dofs. + if (mass_us + eps < smin * mass_u || + mass_us - eps > smax * mass_u || + s_avg + eps < smin || + s_avg - eps > smax) + { + std::cout << "---\ns_avg element bounds: " + << smin << " " << s_avg << " " << smax << std::endl; + std::cout << "Element " << k << std::endl; + std::cout << "Masses " << mass_us << " " << mass_u << std::endl; + PrintCellValues(k, NE, u_new, "u_loc: "); - MFEM_ABORT("s_avg is not in the full stencil bounds!"); + MFEM_ABORT("s_avg is not in the full stencil bounds!"); + } } -#endif // When s_avg is not in the local bounds for some dof (it should be // within the full stencil of active dofs), reset the bounds to s_avg. @@ -114,32 +115,6 @@ void FCTSolver::CalcCompatibleLOProduct(const ParGridFunction &us, dof_id = k*ndofs + j; d_us_LO_new(dof_id) = (u_new(dof_id) * s_avg - us(dof_id)) / dt; } - -#ifdef REMHOS_FCT_PRODUCT_DEBUG - // Check the LO product solution. - double us_min, us_max; - for (int j = 0; j < ndofs; j++) - { - dof_id = k*ndofs + j; - if (active_dofs[dof_id] == false) { continue; } - - us_min = s_min_loc(j) * u_new(dof_id); - us_max = s_max_loc(j) * u_new(dof_id); - - if (s_avg * u_new(dof_id) + eps < us_min || - s_avg * u_new(dof_id) - eps > us_max) - { - std::cout << "---\ns_avg * u: " << k << " " - << us_min << " " - << s_avg * u_new(dof_id) << " " - << us_max << endl - << u_new(dof_id) << " " << s_avg << endl - << s_min_loc(j) << " " << s_max_loc(j) << "\n---\n"; - - MFEM_ABORT("s_avg * u not in bounds"); - } - } -#endif } } @@ -286,43 +261,35 @@ void FluxBasedFCT::CalcFCTProduct(const ParGridFunction &us, const Vector &m, dus_lo_fct = d_us; } -#ifdef REMHOS_FCT_PRODUCT_DEBUG - // Check the bounds of the final solution. - const double eps = 1e-12; - Vector us_new(d_us.Size()); - add(1.0, us, dt, d_us, us_new); - for (int k = 0; k < NE; k++) + if (verify_bounds) { - if (active_el[k] == false) { continue; } - - for (int j = 0; j < ndofs; j++) + // Check the bounds of the final solution. + const double eps = 1e-12; + Vector us_new(d_us.Size()); + add(1.0, us, dt, d_us, us_new); + for (int k = 0; k < NE; k++) { - dof_id = k*ndofs + j; - if (active_dofs[dof_id] == false) { continue; } + if (active_el[k] == false) { continue; } - double s = us_new(dof_id) / u_new(dof_id); - if (s + eps < s_min(dof_id) || - s - eps > s_max(dof_id)) + for (int j = 0; j < ndofs; j++) { - std::cout << "Final s " << j << " " << k << " " - << s_min(dof_id) << " " - << s << " " - << s_max(dof_id) << std::endl; - std::cout << "---\n"; - } + dof_id = k*ndofs + j; + if (active_dofs[dof_id] == false) { continue; } - if (us_new(dof_id) + eps < us_min(dof_id) || - us_new(dof_id) - eps > us_max(dof_id)) - { - std::cout << "Final us " << j << " " << k << " " - << us_min(dof_id) << " " - << us_new(dof_id) << " " - << us_max(dof_id) << std::endl; - std::cout << "---\n"; + if (us_new(dof_id) + eps < us_min(dof_id) || + us_new(dof_id) - eps > us_max(dof_id)) + { + std::cout << "Final us " << j << " " << k << " " + << us_min(dof_id) << " " + << us_new(dof_id) << " " + << us_max(dof_id) << std::endl; + std::cout << "---\n"; + + MFEM_ABORT("us not in bounds after FCT."); + } } } } -#endif } void FluxBasedFCT::ComputeFluxMatrix(const ParGridFunction &u, @@ -583,46 +550,49 @@ void ClipScaleSolver::CalcFCTProduct(const ParGridFunction &us, const Vector &m, CalcFCTSolution(us, m, d_us_HO, dus_lo_fct, us_min, us_max, d_us); ZeroOutEmptyDofs(active_el, active_dofs, d_us); -#ifdef REMHOS_FCT_PRODUCT_DEBUG - // Check the bounds of the final solution. - const int NE = pfes.GetNE(); - const int ndofs = u_new.Size() / NE; - int dof_id; - const double eps = 1e-12; - Vector us_new(d_us.Size()); - add(1.0, us, dt, d_us, us_new); - for (int k = 0; k < NE; k++) + if (verify_bounds) { - if (active_el[k] == false) { continue; } - - for (int j = 0; j < ndofs; j++) + // Check the bounds of the final solution. + const int NE = pfes.GetNE(); + const int ndofs = u_new.Size() / NE; + int dof_id; + const double eps = 1e-12; + Vector us_new(d_us.Size()); + add(1.0, us, dt, d_us, us_new); + for (int k = 0; k < NE; k++) { - dof_id = k*ndofs + j; - if (active_dofs[dof_id] == false) { continue; } + if (active_el[k] == false) { continue; } - double s = us_new(dof_id) / u_new(dof_id); - if (s + eps < s_min(dof_id) || - s - eps > s_max(dof_id)) + for (int j = 0; j < ndofs; j++) { - std::cout << "Final s " << j << " " << k << " " - << s_min(dof_id) << " " - << s << " " - << s_max(dof_id) << std::endl; - std::cout << "---\n"; - } + dof_id = k*ndofs + j; + if (active_dofs[dof_id] == false) { continue; } - if (us_new(dof_id) + eps < us_min(dof_id) || - us_new(dof_id) - eps > us_max(dof_id)) - { - std::cout << "Final us " << j << " " << k << " " - << us_min(dof_id) << " " - << us_new(dof_id) << " " - << us_max(dof_id) << std::endl; - std::cout << "---\n"; + /* // this doesn't check round-offs in the division. + double s = us_new(dof_id) / u_new(dof_id); + if (s + eps < s_min(dof_id) || + s - eps > s_max(dof_id)) + { + std::cout << "Final s " << j << " " << k << " " + << s_min(dof_id) << " " + << s << " " + << s_max(dof_id) << std::endl; + std::cout << "---\n"; + }*/ + + if (us_new(dof_id) + eps < us_min(dof_id) || + us_new(dof_id) - eps > us_max(dof_id)) + { + std::cout << "Final us " << j << " " << k << " " + << us_min(dof_id) << " " + << us_new(dof_id) << " " + << us_max(dof_id) << std::endl; + std::cout << "---\n"; + MFEM_ABORT("Bounds violation FCT us."); + } } } } -#endif } void ElementFCTProjection::CalcFCTSolution(const ParGridFunction &u, @@ -745,12 +715,13 @@ void ElementFCTProjection::CalcFCTSolution(const ParGridFunction &u, } // element loop } -void ElementFCTProjection::CalcFCTProduct(const ParGridFunction &us, const Vector &m, - const Vector &d_us_HO, const Vector &d_us_LO, - Vector &s_min, Vector &s_max, - const Vector &u_new, - const Array &active_el, - const Array &active_dofs, Vector &d_us) +void ElementFCTProjection::CalcFCTProduct(const ParGridFunction &us, + const Vector &m, + const Vector &d_us_HO, const Vector &d_us_LO, + Vector &s_min, Vector &s_max, + const Vector &u_new, + const Array &active_el, + const Array &active_dofs, Vector &d_us) { us.HostRead(); s_min.HostReadWrite(); From 28c7b8c7d628121c512bff306231bd11b912d1ff Mon Sep 17 00:00:00 2001 From: august-knox Date: Mon, 6 Jan 2025 16:15:18 -0800 Subject: [PATCH 20/24] upstream remhos_fct.cpp --- remhos_fct.cpp | 255 +++++++++++++++++++++++++++++-------------------- 1 file changed, 150 insertions(+), 105 deletions(-) diff --git a/remhos_fct.cpp b/remhos_fct.cpp index 53dc595..9621e44 100644 --- a/remhos_fct.cpp +++ b/remhos_fct.cpp @@ -56,8 +56,8 @@ void FCTSolver::CalcCompatibleLOProduct(const ParGridFunction &us, double s_avg = mass_us / mass_u; // Min and max of s using the full stencil of active dofs. - s_min_loc.SetDataAndSize(s_min.GetData() + k*ndofs, ndofs); - s_max_loc.SetDataAndSize(s_max.GetData() + k*ndofs, ndofs); + s_min_loc.SetDataAndSize(s_min.HostReadWrite() + k*ndofs, ndofs); + s_max_loc.SetDataAndSize(s_max.HostReadWrite() + k*ndofs, ndofs); double smin = numeric_limits::infinity(), smax = -numeric_limits::infinity(); for (int j = 0; j < ndofs; j++) @@ -81,24 +81,23 @@ void FCTSolver::CalcCompatibleLOProduct(const ParGridFunction &us, if (s_avg > smax && mass_us - eps < smax * mass_u) { s_avg = smax; } - if (verify_bounds) +#ifdef REMHOS_FCT_PRODUCT_DEBUG + // Check if s_avg = mass_us / mass_u is within the bounds of the full + // stencil of active dofs. + if (mass_us + eps < smin * mass_u || + mass_us - eps > smax * mass_u || + s_avg + eps < smin || + s_avg - eps > smax) { - // Check if s_avg = mass_us / mass_u is within the bounds of the - // full stencil of active dofs. - if (mass_us + eps < smin * mass_u || - mass_us - eps > smax * mass_u || - s_avg + eps < smin || - s_avg - eps > smax) - { - std::cout << "---\ns_avg element bounds: " - << smin << " " << s_avg << " " << smax << std::endl; - std::cout << "Element " << k << std::endl; - std::cout << "Masses " << mass_us << " " << mass_u << std::endl; - PrintCellValues(k, NE, u_new, "u_loc: "); + std::cout << "---\ns_avg element bounds: " + << smin << " " << s_avg << " " << smax << std::endl; + std::cout << "Element " << k << std::endl; + std::cout << "Masses " << mass_us << " " << mass_u << std::endl; + PrintCellValues(k, NE, u_new, "u_loc: "); - MFEM_ABORT("s_avg is not in the full stencil bounds!"); - } + MFEM_ABORT("s_avg is not in the full stencil bounds!"); } +#endif // When s_avg is not in the local bounds for some dof (it should be // within the full stencil of active dofs), reset the bounds to s_avg. @@ -115,6 +114,32 @@ void FCTSolver::CalcCompatibleLOProduct(const ParGridFunction &us, dof_id = k*ndofs + j; d_us_LO_new(dof_id) = (u_new(dof_id) * s_avg - us(dof_id)) / dt; } + +#ifdef REMHOS_FCT_PRODUCT_DEBUG + // Check the LO product solution. + double us_min, us_max; + for (int j = 0; j < ndofs; j++) + { + dof_id = k*ndofs + j; + if (active_dofs[dof_id] == false) { continue; } + + us_min = s_min_loc(j) * u_new(dof_id); + us_max = s_max_loc(j) * u_new(dof_id); + + if (s_avg * u_new(dof_id) + eps < us_min || + s_avg * u_new(dof_id) - eps > us_max) + { + std::cout << "---\ns_avg * u: " << k << " " + << us_min << " " + << s_avg * u_new(dof_id) << " " + << us_max << endl + << u_new(dof_id) << " " << s_avg << endl + << s_min_loc(j) << " " << s_max_loc(j) << "\n---\n"; + + MFEM_ABORT("s_avg * u not in bounds"); + } + } +#endif } } @@ -261,35 +286,43 @@ void FluxBasedFCT::CalcFCTProduct(const ParGridFunction &us, const Vector &m, dus_lo_fct = d_us; } - if (verify_bounds) +#ifdef REMHOS_FCT_PRODUCT_DEBUG + // Check the bounds of the final solution. + const double eps = 1e-12; + Vector us_new(d_us.Size()); + add(1.0, us, dt, d_us, us_new); + for (int k = 0; k < NE; k++) { - // Check the bounds of the final solution. - const double eps = 1e-12; - Vector us_new(d_us.Size()); - add(1.0, us, dt, d_us, us_new); - for (int k = 0; k < NE; k++) + if (active_el[k] == false) { continue; } + + for (int j = 0; j < ndofs; j++) { - if (active_el[k] == false) { continue; } + dof_id = k*ndofs + j; + if (active_dofs[dof_id] == false) { continue; } - for (int j = 0; j < ndofs; j++) + double s = us_new(dof_id) / u_new(dof_id); + if (s + eps < s_min(dof_id) || + s - eps > s_max(dof_id)) { - dof_id = k*ndofs + j; - if (active_dofs[dof_id] == false) { continue; } - - if (us_new(dof_id) + eps < us_min(dof_id) || - us_new(dof_id) - eps > us_max(dof_id)) - { - std::cout << "Final us " << j << " " << k << " " - << us_min(dof_id) << " " - << us_new(dof_id) << " " - << us_max(dof_id) << std::endl; - std::cout << "---\n"; + std::cout << "Final s " << j << " " << k << " " + << s_min(dof_id) << " " + << s << " " + << s_max(dof_id) << std::endl; + std::cout << "---\n"; + } - MFEM_ABORT("us not in bounds after FCT."); - } + if (us_new(dof_id) + eps < us_min(dof_id) || + us_new(dof_id) - eps > us_max(dof_id)) + { + std::cout << "Final us " << j << " " << k << " " + << us_min(dof_id) << " " + << us_new(dof_id) << " " + << us_max(dof_id) << std::endl; + std::cout << "---\n"; } } } +#endif } void FluxBasedFCT::ComputeFluxMatrix(const ParGridFunction &u, @@ -451,78 +484,93 @@ void ClipScaleSolver::CalcFCTSolution(const ParGridFunction &u, const Vector &m, const Vector &u_min, const Vector &u_max, Vector &du) const { + timer->sw_FCT.Start(); + const int NE = pfes.GetMesh()->GetNE(); const int nd = pfes.GetFE(0)->GetDof(); - Vector f_clip(nd); - - int dof_id; - double sumPos, sumNeg, u_new_ho, u_new_lo, new_mass, f_clip_min, f_clip_max; - double umin, umax; - const double eps = 1.0e-15; + Vector f_clip(nd*NE); // Smoothness indicator. ParGridFunction si_val; + if (smth_indicator) { + MFEM_ABORT("smth_indicator not supported in kernel!"); smth_indicator->ComputeSmoothnessIndicator(u, si_val); } - u.HostRead(); - m.HostRead(); - du.HostReadWrite(); - du_lo.HostRead(); du_ho.HostRead(); - for (int k = 0; k < NE; k++) + const auto N4t = dt; + + const auto U = mfem::Reshape(u.Read(), NE*nd); + const auto M = mfem::Reshape(m.Read(), NE*nd); + const auto DU_LO = mfem::Reshape(du_lo.Read(), NE*nd); + const auto DU_HO = mfem::Reshape(du_ho.Read(), NE*nd); + const auto U_MIN = mfem::Reshape(u_min.Read(), NE*nd); + const auto U_MAX = mfem::Reshape(u_max.Read(), NE*nd); + + auto F_CLIP = mfem::Reshape(f_clip.Write(), NE, nd); + auto DU = mfem::Reshape(du.Write(), NE*nd); + + const bool update_bounds = smth_indicator != nullptr; + if (update_bounds) MFEM_ABORT("UpdateBounds not ported to device") + + mfem::forall(NE, [=] MFEM_HOST_DEVICE (int k) { - sumPos = sumNeg = 0.0; + constexpr auto eps = 1.0e-15; + double sumPos = 0.0, sumNeg = 0.0; // Clip. for (int j = 0; j < nd; j++) { - dof_id = k*nd+j; + const int dof_id = k*nd+j; + + const auto u_new_lo = U(dof_id) + N4t * DU_LO(dof_id); - u_new_ho = u(dof_id) + dt * du_ho(dof_id); - u_new_lo = u(dof_id) + dt * du_lo(dof_id); + auto umin = U_MIN(dof_id); + auto umax = U_MAX(dof_id); - umin = u_min(dof_id); - umax = u_max(dof_id); - if (smth_indicator) +#if !defined(MFEM_USE_CUDA) && !defined(MFEM_USE_HIP) + if (update_bounds) { + const auto u_new_ho = U(dof_id) + N4t * DU_HO(dof_id); smth_indicator->UpdateBounds(dof_id, u_new_ho, si_val, umin, umax); } +#endif - f_clip_min = m(dof_id) / dt * (umin - u_new_lo); - f_clip_max = m(dof_id) / dt * (umax - u_new_lo); + const auto f_clip_min = M(dof_id) / N4t * (umin - u_new_lo); + const auto f_clip_max = M(dof_id) / N4t * (umax - u_new_lo); - f_clip(j) = m(dof_id) * (du_ho(dof_id) - du_lo(dof_id)); - f_clip(j) = min(f_clip_max, max(f_clip_min, f_clip(j))); + F_CLIP(k,j) = M(dof_id) * (DU_HO(dof_id) - DU_LO(dof_id)); + F_CLIP(k,j) = fmin(f_clip_max, fmax(f_clip_min, F_CLIP(k,j))); - sumNeg += min(f_clip(j), 0.0); - sumPos += max(f_clip(j), 0.0); + sumNeg += fmin(F_CLIP(k,j), 0.0); + sumPos += fmax(F_CLIP(k,j), 0.0); } - new_mass = sumNeg + sumPos; + double new_mass = sumNeg + sumPos; // Rescale. for (int j = 0; j < nd; j++) { if (new_mass > eps) { - f_clip(j) = min(0.0, f_clip(j)) - - max(0.0, f_clip(j)) * sumNeg / sumPos; + F_CLIP(k,j) = fmin(0.0, F_CLIP(k,j)) - + fmax(0.0, F_CLIP(k,j)) * sumNeg / sumPos; } if (new_mass < -eps) { - f_clip(j) = max(0.0, f_clip(j)) - - min(0.0, f_clip(j)) * sumPos / sumNeg; + F_CLIP(k,j) = fmax(0.0, F_CLIP(k,j)) - + fmin(0.0, F_CLIP(k,j)) * sumPos / sumNeg; } // Set du to the discrete time derivative featuring the high order // anti-diffusive reconstruction that leads to an forward Euler // updated admissible solution. - dof_id = k*nd+j; - du(dof_id) = du_lo(dof_id) + f_clip(j) / m(dof_id); + const int dof_id = k*nd+j; + DU(dof_id) = DU_LO(dof_id) + F_CLIP(k,j) / M(dof_id); } - } + }); + timer->sw_FCT.Stop(); } void ClipScaleSolver::CalcFCTProduct(const ParGridFunction &us, const Vector &m, @@ -550,49 +598,46 @@ void ClipScaleSolver::CalcFCTProduct(const ParGridFunction &us, const Vector &m, CalcFCTSolution(us, m, d_us_HO, dus_lo_fct, us_min, us_max, d_us); ZeroOutEmptyDofs(active_el, active_dofs, d_us); - if (verify_bounds) +#ifdef REMHOS_FCT_PRODUCT_DEBUG + // Check the bounds of the final solution. + const int NE = pfes.GetNE(); + const int ndofs = u_new.Size() / NE; + int dof_id; + const double eps = 1e-12; + Vector us_new(d_us.Size()); + add(1.0, us, dt, d_us, us_new); + for (int k = 0; k < NE; k++) { - // Check the bounds of the final solution. - const int NE = pfes.GetNE(); - const int ndofs = u_new.Size() / NE; - int dof_id; - const double eps = 1e-12; - Vector us_new(d_us.Size()); - add(1.0, us, dt, d_us, us_new); - for (int k = 0; k < NE; k++) + if (active_el[k] == false) { continue; } + + for (int j = 0; j < ndofs; j++) { - if (active_el[k] == false) { continue; } + dof_id = k*ndofs + j; + if (active_dofs[dof_id] == false) { continue; } - for (int j = 0; j < ndofs; j++) + double s = us_new(dof_id) / u_new(dof_id); + if (s + eps < s_min(dof_id) || + s - eps > s_max(dof_id)) { - dof_id = k*ndofs + j; - if (active_dofs[dof_id] == false) { continue; } + std::cout << "Final s " << j << " " << k << " " + << s_min(dof_id) << " " + << s << " " + << s_max(dof_id) << std::endl; + std::cout << "---\n"; + } - /* // this doesn't check round-offs in the division. - double s = us_new(dof_id) / u_new(dof_id); - if (s + eps < s_min(dof_id) || - s - eps > s_max(dof_id)) - { - std::cout << "Final s " << j << " " << k << " " - << s_min(dof_id) << " " - << s << " " - << s_max(dof_id) << std::endl; - std::cout << "---\n"; - }*/ - - if (us_new(dof_id) + eps < us_min(dof_id) || - us_new(dof_id) - eps > us_max(dof_id)) - { - std::cout << "Final us " << j << " " << k << " " - << us_min(dof_id) << " " - << us_new(dof_id) << " " - << us_max(dof_id) << std::endl; - std::cout << "---\n"; - MFEM_ABORT("Bounds violation FCT us."); - } + if (us_new(dof_id) + eps < us_min(dof_id) || + us_new(dof_id) - eps > us_max(dof_id)) + { + std::cout << "Final us " << j << " " << k << " " + << us_min(dof_id) << " " + << us_new(dof_id) << " " + << us_max(dof_id) << std::endl; + std::cout << "---\n"; } } } +#endif } void ElementFCTProjection::CalcFCTSolution(const ParGridFunction &u, From 4a89f5abd3c44a0b2cb91c88a8d7acbf2aa7cb38 Mon Sep 17 00:00:00 2001 From: august-knox Date: Tue, 7 Jan 2025 09:08:15 -0800 Subject: [PATCH 21/24] making caliper option clearer --- makefile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/makefile b/makefile index af825f5..72a41df 100644 --- a/makefile +++ b/makefile @@ -64,11 +64,11 @@ TEST_MK = $(MFEM_DIR)/config/test.mk MFEM_DIR1 := $(MFEM_DIR) MFEM_DIR2 := $(realpath $(MFEM_DIR)) -# Caliper/Adiak flags +# Use Caliper annotations -#CALIPER_DIR = $(spack location --install-dir caliper) -#ADIAK_DIR = $(spack location --install-dir adiak) -ifdef CALIPER_DIR +ifdef -DUSE_CALIPER +CALIPER_DIR = $(spack location --install-dir caliper) +ADIAK_DIR = $(spack location --install-dir adiak) CALIPER_FLAGS = -I${CALIPER_DIR}/include -DUSE_CALIPER ADIAK_INCLUDE = -I${ADIAK_DIR}/include ADIAK_LDFLAGS = -L${ADIAK_DIR}/lib -ladiak From 61aeca020b96375adfa6354d5b39308f710d7e40 Mon Sep 17 00:00:00 2001 From: august-knox Date: Tue, 7 Jan 2025 09:16:48 -0800 Subject: [PATCH 22/24] changing use_caliper syntax --- makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/makefile b/makefile index 72a41df..a1865f2 100644 --- a/makefile +++ b/makefile @@ -66,7 +66,7 @@ MFEM_DIR2 := $(realpath $(MFEM_DIR)) # Use Caliper annotations -ifdef -DUSE_CALIPER +ifdef USE_CALIPER CALIPER_DIR = $(spack location --install-dir caliper) ADIAK_DIR = $(spack location --install-dir adiak) CALIPER_FLAGS = -I${CALIPER_DIR}/include -DUSE_CALIPER From 12b052be24a5aa9c7765b0695d385602bd2df617 Mon Sep 17 00:00:00 2001 From: august-knox Date: Tue, 7 Jan 2025 09:29:12 -0800 Subject: [PATCH 23/24] returning to original method --- makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/makefile b/makefile index a1865f2..1434602 100644 --- a/makefile +++ b/makefile @@ -66,7 +66,7 @@ MFEM_DIR2 := $(realpath $(MFEM_DIR)) # Use Caliper annotations -ifdef USE_CALIPER +ifdef CALIPER_DIR CALIPER_DIR = $(spack location --install-dir caliper) ADIAK_DIR = $(spack location --install-dir adiak) CALIPER_FLAGS = -I${CALIPER_DIR}/include -DUSE_CALIPER From 777013652ac1711d360a8431edc8232beb21e63c Mon Sep 17 00:00:00 2001 From: august-knox <112430443+august-knox@users.noreply.github.com> Date: Tue, 7 Jan 2025 10:02:31 -0800 Subject: [PATCH 24/24] Update remhos_fct.cpp adding delta symbols back --- remhos_fct.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/remhos_fct.cpp b/remhos_fct.cpp index 9621e44..d3cb1cc 100644 --- a/remhos_fct.cpp +++ b/remhos_fct.cpp @@ -499,7 +499,7 @@ void ClipScaleSolver::CalcFCTSolution(const ParGridFunction &u, const Vector &m, smth_indicator->ComputeSmoothnessIndicator(u, si_val); } - const auto N4t = dt; + const auto δt = dt; const auto U = mfem::Reshape(u.Read(), NE*nd); const auto M = mfem::Reshape(m.Read(), NE*nd); @@ -524,7 +524,7 @@ void ClipScaleSolver::CalcFCTSolution(const ParGridFunction &u, const Vector &m, { const int dof_id = k*nd+j; - const auto u_new_lo = U(dof_id) + N4t * DU_LO(dof_id); + const auto u_new_lo = U(dof_id) + δt * DU_LO(dof_id); auto umin = U_MIN(dof_id); auto umax = U_MAX(dof_id); @@ -532,13 +532,13 @@ void ClipScaleSolver::CalcFCTSolution(const ParGridFunction &u, const Vector &m, #if !defined(MFEM_USE_CUDA) && !defined(MFEM_USE_HIP) if (update_bounds) { - const auto u_new_ho = U(dof_id) + N4t * DU_HO(dof_id); + const auto u_new_ho = U(dof_id) + δt * DU_HO(dof_id); smth_indicator->UpdateBounds(dof_id, u_new_ho, si_val, umin, umax); } #endif - const auto f_clip_min = M(dof_id) / N4t * (umin - u_new_lo); - const auto f_clip_max = M(dof_id) / N4t * (umax - u_new_lo); + const auto f_clip_min = M(dof_id) / δt * (umin - u_new_lo); + const auto f_clip_max = M(dof_id) / δt * (umax - u_new_lo); F_CLIP(k,j) = M(dof_id) * (DU_HO(dof_id) - DU_LO(dof_id)); F_CLIP(k,j) = fmin(f_clip_max, fmax(f_clip_min, F_CLIP(k,j)));