From ef88a00d899009283143dfb64c3f9629c0b6e31d Mon Sep 17 00:00:00 2001 From: Joseph Coffland Date: Thu, 9 Mar 2023 18:36:14 +0200 Subject: [PATCH] Fix CPU allocation when there are more GPUs than CPUs. #129, Don't reserve a CPU for each disabled GPUs. --- CHANGELOG.md | 4 +++- src/fah/client/App.cpp | 2 +- src/fah/client/Units.cpp | 14 ++++---------- 3 files changed, 8 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ae47260..355e361 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,9 @@ Folding@home Client Changelog ## v8.1.16 - Fix core download retry logic. - Only add client executable directory to lib path on Windows. - - Retry WU if core crashes. #127 + - Retry WU if core crashes. #127 + - Fix CPU allocation when there are more GPUs than CPUs. #129 + - Don't reserve a CPU for each disabled GPUs. ## v8.1.15 - Fix CUDA/OpenCL driver mixup from v8.1.14. diff --git a/src/fah/client/App.cpp b/src/fah/client/App.cpp index 2324235..8992f04 100644 --- a/src/fah/client/App.cpp +++ b/src/fah/client/App.cpp @@ -301,7 +301,7 @@ void App::updateResources() { uint32_t cpus = min(config.getCPUs(), availableCPUs); info->insert("cpus", cpus + remainingCPUs); - availableCPUs -= cpus; + availableCPUs -= min(availableCPUs, cpus); if (cpus < config.getCPUs()) config.insert("cpus", cpus); JSON::ValuePtr groupGPUs = new JSON::Dict; diff --git a/src/fah/client/Units.cpp b/src/fah/client/Units.cpp index 251c1aa..90f2a82 100644 --- a/src/fah/client/Units.cpp +++ b/src/fah/client/Units.cpp @@ -200,22 +200,18 @@ void Units::update() { if (unitGPUs.empty()) continue; uint32_t minCPUs = unit.getMinCPUs(); - bool runable = minCPUs <= remainingCPUs; + bool runable = minCPUs <= remainingCPUs || minCPUs < 2; std::set gpusWithWU = remainingGPUs; for (auto id: unitGPUs) runable |= gpusWithWU.erase(id); if (runable) { remainingGPUs = gpusWithWU; - remainingCPUs -= minCPUs; // Initially allocate only minimum CPUs + remainingCPUs -= min(remainingCPUs, minCPUs); // Allocate minimum CPUs enabledWUs.insert(i); } } - // Reserve one CPU for any unused GPUs - uint32_t reservedCPUs = min(remainingCPUs, (uint32_t)remainingGPUs.size()); - remainingCPUs -= reservedCPUs; - // Allocate extra CPUs to enabled GPU WUs for (unsigned i = 0; i < size(); i++) { if (!enabledWUs.count(i)) continue; // GPU WUs that were enabled above @@ -226,7 +222,8 @@ void Units::update() { uint32_t cpus = min(maxCPUs, remainingCPUs + minCPUs); unit.setCPUs(cpus); - remainingCPUs -= cpus - minCPUs; // Minimum CPUs subtracted above + // minCPUs was subtracted above or remainingCPUs is already zero + remainingCPUs -= min(remainingCPUs, cpus - minCPUs); } // Allocate remaining CPUs to existing CPU WUs @@ -243,9 +240,6 @@ void Units::update() { enabledWUs.insert(i); } - // Restore reserved CPUs - remainingCPUs += reservedCPUs; - // Start and stop WUs for (unsigned i = 0; i < size(); i++) getUnit(i).setPause(getUnit(i).atRunState() && !enabledWUs.count(i));