From ef0678d83d5426d42f5e0e18ce00b928a5cb16a6 Mon Sep 17 00:00:00 2001 From: Vladimir Z Tomov Date: Tue, 3 Dec 2024 11:57:04 -0800 Subject: [PATCH] Automatic creation of well-partitioned meshes based on #tasks and #elements-per-task. --- remhos.cpp | 51 ++++++++++++++------ remhos_tools.cpp | 121 +++++++++++++++++++++++++++++++++++++++++++++++ remhos_tools.hpp | 3 ++ 3 files changed, 161 insertions(+), 14 deletions(-) diff --git a/remhos.cpp b/remhos.cpp index 7e157db..cd9c4ca 100644 --- a/remhos.cpp +++ b/remhos.cpp @@ -157,7 +157,9 @@ MFEM_EXPORT int remhos(int argc, char *argv[], double &final_mass_u) mfem::MPI_Session mpi(argc, argv); const int myid = mpi.WorldRank(); - const char *mesh_file = "data/periodic-square.mesh"; + const char *mesh_file = "default"; + int dim = 3; + int elem_per_mpi = 1; int rs_levels = 2; int rp_levels = 0; int order = 3; @@ -189,6 +191,9 @@ MFEM_EXPORT int remhos(int argc, char *argv[], double &final_mass_u) OptionsParser args(argc, argv); args.AddOption(&mesh_file, "-m", "--mesh", "Mesh file to use."); + args.AddOption(&dim, "-dim", "--dimension", "Dimension of the problem."); + args.AddOption(&elem_per_mpi, "-epm", "--elem-per-mpi", + "Number of element per mpi task."); args.AddOption(&problem_num, "-p", "--problem", "Problem setup to use. See options in velocity_function()."); args.AddOption(&rs_levels, "-rs", "--refine-serial", @@ -282,29 +287,47 @@ MFEM_EXPORT int remhos(int argc, char *argv[], double &final_mass_u) else if (problem_num < 20) { exec_mode = 1; } else { MFEM_ABORT("Unspecified execution mode."); } - // Read the serial mesh from the given mesh file on all processors. - // Refine the mesh in serial to increase the resolution. - Mesh *mesh = new Mesh(Mesh::LoadFromFile(mesh_file, 1, 1)); - const int dim = mesh->Dimension(); - for (int lev = 0; lev < rs_levels; lev++) { mesh->UniformRefinement(); } - mesh->GetBoundingBox(bb_min, bb_max, max(order, 1)); - - // Only standard assembly in 1D (some mfem functions just abort in 1D). - if ((pa || next_gen_full) && dim == 1) + Mesh *mesh = nullptr; + int *mpi_partitioning = nullptr; + if (strncmp(mesh_file, "default", 7) != 0) { - MFEM_WARNING("Disabling PA / FA for 1D."); - pa = false; - next_gen_full = false; + // Read the serial mesh from the given mesh file on all processors. + // Refine the mesh in serial to increase the resolution. + mesh = new Mesh(Mesh::LoadFromFile(mesh_file, 1, 1)); + for (int lev = 0; lev < rs_levels; lev++) { mesh->UniformRefinement(); } } + else + { + mesh = CartesianMesh(dim, Mpi::WorldSize(), elem_per_mpi, myid == 0, + rp_levels, &mpi_partitioning); + } + dim = mesh->Dimension(); + mesh->GetBoundingBox(bb_min, bb_max, max(order, 1)); // Parallel partitioning of the mesh. // Refine the mesh further in parallel to increase the resolution. - ParMesh pmesh(MPI_COMM_WORLD, *mesh); + ParMesh pmesh(MPI_COMM_WORLD, *mesh, mpi_partitioning); delete mesh; + delete mpi_partitioning; for (int lev = 0; lev < rp_levels; lev++) { pmesh.UniformRefinement(); } MPI_Comm comm = pmesh.GetComm(); const int NE = pmesh.GetNE(); + if (strncmp(mesh_file, "default", 7) == 0) + { + MFEM_VERIFY(pmesh.GetGlobalNE() == Mpi::WorldSize() * elem_per_mpi, + "Mesh generation error."); + MFEM_VERIFY(NE == elem_per_mpi, "Mesh generation error."); + } + + // Only standard assembly in 1D (some mfem functions just abort in 1D). + if ((pa || next_gen_full) && dim == 1) + { + MFEM_WARNING("Disabling PA / FA for 1D."); + pa = false; + next_gen_full = false; + } + // Define the ODE solver used for time integration. Several explicit // Runge-Kutta methods are available. ODESolver *ode_solver = NULL; diff --git a/remhos_tools.cpp b/remhos_tools.cpp index 0a180eb..4422eb2 100644 --- a/remhos_tools.cpp +++ b/remhos_tools.cpp @@ -1075,6 +1075,127 @@ void MixedConvectionIntegrator::AssembleElementMatrix2( } } +Mesh *CartesianMesh(int dim, int mpi_cnt, int elem_per_mpi, bool print, + int &par_ref, int **partitioning) +{ + MFEM_VERIFY(dim > 1, "Not implemented for 1D meshes."); + + auto factor = [&](int N) + { + for (int i = static_cast(sqrt(N)); i > 0; i--) + { if (N % i == 0) { return i; } } + return 1; + }; + + par_ref = 0; + const int ref_factor = (dim == 2) ? 4 : 8; + + // Elements per task before performing parallel refinements. + // This will be used to form the serial mesh. + int el0 = elem_per_mpi; + while (el0 % ref_factor == 0) + { + el0 /= ref_factor; + par_ref++; + } + + // In the serial mesh we have: + // The number of MPI blocks is mpi_cnt = mp_x.mpy_y.mpy_z. + // The size of each MPI block is el0 = el0_x.el0_y.el0_z. + int mpi_x, mpi_y, mpi_z; + int el0_x, el0_y, el0_z; + if (dim == 2) + { + mpi_x = factor(mpi_cnt); + mpi_y = mpi_cnt / mpi_x; + + // Switch order for better balance. + el0_y = factor(el0); + el0_x = el0 / el0_y; + } + else + { + mpi_x = factor(mpi_cnt); + mpi_y = factor(mpi_cnt / mpi_x); + mpi_z = mpi_cnt / mpi_x / mpi_y; + + // Switch order for better balance. + el0_z = factor(el0); + el0_y = factor(el0 / el0_z); + el0_x = el0 / el0_y / el0_z; + } + + if (print && dim == 2) + { + int elem_par_x = mpi_x * el0_x * pow(2, par_ref), + elem_par_y = mpi_y * el0_y * pow(2, par_ref); + + std::cout << "--- Mesh generation: \n"; + std::cout << "Par mesh: " << elem_par_x << " x " << elem_par_y + << " (" << elem_par_x * elem_par_y << " elements)\n" + << "Elem / task: " + << el0_x * pow(2, par_ref) << " x " + << el0_y * pow(2, par_ref) + << " (" << el0_x * pow(2, 2*par_ref) * el0_y << " elements)\n" + << "MPI blocks: " << mpi_x << " x " << mpi_y + << " (" << mpi_x * mpi_y << " mpi tasks)\n" << "-\n" + << "Serial mesh: " + << mpi_x * el0_x << " x " << mpi_y * el0_y + << " (" << mpi_x * el0_x * mpi_y * el0_y << " elements)\n" + << "Elem / task: " << el0_x << " x " << el0_y << std::endl + << "Par refine: " << par_ref << std::endl; + std::cout << "--- \n"; + } + + if (print && dim == 3) + { + int elem_par_x = mpi_x * el0_x * pow(2, par_ref), + elem_par_y = mpi_y * el0_y * pow(2, par_ref), + elem_par_z = mpi_z * el0_z * pow(2, par_ref); + + std::cout << "--- Mesh generation: \n"; + std::cout << "Par mesh: " + << elem_par_x << " x " << elem_par_y << " x " << elem_par_z + << " (" << elem_par_x*elem_par_y*elem_par_z << " elements)\n" + << "Elem / task: " + << el0_x * pow(2, par_ref) << " x " + << el0_y * pow(2, par_ref) << " x " + << el0_z * pow(2, par_ref) + << " (" << el0_x*pow(2, 3*par_ref)*el0_y*el0_z << " elements)\n" + << "MPI blocks: " << mpi_x << " x " << mpi_y << " x " << mpi_z + << " (" << mpi_x * mpi_y * mpi_z << " mpi tasks)\n" << "-\n" + << "Serial mesh: " + << mpi_x*el0_x << " x " << mpi_y*el0_y << " x " << mpi_z*el0_z + << " (" << mpi_x*el0_x*mpi_y*el0_y*mpi_z*el0_z << " elements)\n" + << "Elem / task: " + << el0_x << " x " << el0_y << " x " << el0_z << std::endl + << "Par refine: " << par_ref << std::endl; + std::cout << "--- \n"; + } + + Mesh *mesh; + if (dim == 2) + { + mesh = new Mesh(Mesh::MakeCartesian2D(mpi_x * el0_x, + mpi_y * el0_y, + Element::QUADRILATERAL, true)); + } + else + { + mesh = new Mesh(Mesh::MakeCartesian3D(mpi_x * el0_x, + mpi_y * el0_y, + mpi_z * el0_z, + Element::HEXAHEDRON, true)); + } + + int nxyz[dim]; + if (dim == 2) { nxyz[0] = mpi_x; nxyz[1] = mpi_y; } + else { nxyz[0] = mpi_x; nxyz[1] = mpi_y; nxyz[2] = mpi_z; } + *partitioning = mesh->CartesianPartitioning(nxyz); + + return mesh; +} + int GetLocalFaceDofIndex3D(int loc_face_id, int face_orient, int face_dof_id, int face_dof1D_cnt) { diff --git a/remhos_tools.hpp b/remhos_tools.hpp index 4aa88bf..6b29897 100644 --- a/remhos_tools.hpp +++ b/remhos_tools.hpp @@ -23,6 +23,9 @@ namespace mfem { +Mesh *CartesianMesh(int dim, int mpi_cnt, int elem_per_mpi, bool print, + int &par_ref, int **partitioning); + int GetLocalFaceDofIndex(int dim, int loc_face_id, int face_orient, int face_dof_id, int face_dof1D_cnt);