Skip to content

Commit

Permalink
- Don't allow invalid machine name option. #282
Browse files Browse the repository at this point in the history
 - Track WU end state.
 - Clear WU retry count after WU has run successfully for some time.
 - Set WU final WU progress correctly.
  • Loading branch information
jcoffland committed Sep 17, 2024
1 parent 28ff80a commit 138cb16
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 58 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
Folding@home Client Changelog
=============================

## v8.4.5
- Don't allow invalid machine name option. #282
- Track WU end state.
- Clear WU retry count after WU has run successfully for some time.
- Set WU final WU progress correctly.

## v8.4.4
- Acquire client DB lock on startup. #269
- Added ``fahctl`` command line client control script. #119
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "fah-client",
"version": "8.4.4",
"version": "8.4.5",
"bin": {"fah-client": "./fah-client"},
"author": "Joseph Coffland <[email protected]>",
"homepage": "https://foldingathome.org/",
Expand Down
5 changes: 4 additions & 1 deletion src/fah/client/Account.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
#include <cbang/comp/Press.h>
#include <cbang/os/SystemInfo.h>
#include <cbang/http/Conn.h>
#include <cbang/config/RegexConstraint.h>

using namespace FAH::Client;
using namespace cb;
Expand All @@ -53,7 +54,9 @@ Account::Account(App &app) : app(app) {
auto &options = app.getOptions();
options.pushCategory("Account");
options.add("account-token", "Folding@home account token.");
options.add("machine-name", "Name used to identify this machine.");
options.add("machine-name", "Name used to identify this machine.",
new RegexConstraint(Regex("[^<>;&'\\\"]{1,64}"),
"Must be between 1 and 64 characters and cannot include any of <>;&'\""));
options.popCategory();

updateEvent = app.getEventBase().newEvent(this, &Account::update, 0);
Expand Down
10 changes: 6 additions & 4 deletions src/fah/client/App.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
#include "Cores.h"
#include "Config.h"
#include "OS.h"
#include "PasskeyConstraint.h"
#include "Remote.h"
#include "LogTracker.h"

Expand Down Expand Up @@ -64,7 +63,7 @@
#include <cbang/openssl/CertificateStoreContext.h>

#include <cbang/config/MinMaxConstraint.h>
#include <cbang/config/MinConstraint.h>
#include <cbang/config/RegexConstraint.h>

#include <set>
#include <csignal>
Expand Down Expand Up @@ -143,7 +142,10 @@ App::App() :
options.add("user", "Your user name.")->setDefault("Anonymous");
options.add("team", "Your team number.",
new MinMaxConstraint<int32_t>(0, 2147483647))->setDefault(0);
opt = options.add("passkey", "Your passkey.", new PasskeyConstraint);
opt = options.add("passkey", "Your passkey.",
new RegexConstraint(Regex("[a-fA-F0-9]{32}"),
"Passkey must be 32 characters long and can only contain hexadecimal "
"characters."));
opt->setDefault("");
opt->setObscured();
options.popCategory();
Expand Down Expand Up @@ -204,7 +206,7 @@ App::App() :


App::~App() {
clear(); // Deallocate object in ObservableDict before Event::Base
clear(); // Deallocate objects in ObservableDict before Event::Base
}


Expand Down
93 changes: 44 additions & 49 deletions src/fah/client/Unit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,6 @@ bool Unit::isFinished() const {
switch (getState()) {
case UNIT_UPLOAD:
case UNIT_DUMP:
case UNIT_CLEAN:
case UNIT_DONE:
return true;

Expand All @@ -374,7 +373,6 @@ bool Unit::isExpired() const {
switch (getState()) {
case UNIT_ASSIGN:
case UNIT_DUMP:
case UNIT_CLEAN:
case UNIT_DONE:
return false;

Expand All @@ -389,19 +387,17 @@ void Unit::triggerNext(double secs) {event->add(secs);}
void Unit::dumpWU() {
LOG_INFO(3, "Dumping " << id);

switch (getState()) {
case UNIT_ASSIGN: case UNIT_DOWNLOAD: setState(UNIT_CLEAN); break;
case UNIT_CORE: case UNIT_RUN: case UNIT_UPLOAD: setState(UNIT_DUMP); break;
case UNIT_DUMP: case UNIT_CLEAN: case UNIT_DONE: return; // Do nothing
}

// Stop waiting
setWait(0);
setWait(0); // Stop waiting
retries = 0;
success = true; // Don't delay group retry
event->del();

cancelRequest(); // Terminate any active connections

switch (getState()) {
case UNIT_ASSIGN: case UNIT_DOWNLOAD: clean("dumped"); break;
case UNIT_CORE: case UNIT_RUN: case UNIT_UPLOAD: setState(UNIT_DUMP); break;
case UNIT_DUMP: case UNIT_DONE: return; // Do nothing
}

save();
triggerNext();
}
Expand Down Expand Up @@ -429,15 +425,15 @@ void Unit::setState(UnitState state) {
if (hasString("state") && state == getState()) return;
if (group.isSet()) group->triggerUpdate();
insert("state", state.toString());
setProgress(0, 0);
clearProgress();
}


void Unit::next() {
// Check if WU has expired
if (isExpired()) {
LOG_INFO(1, "Unit expired, deleting");
setState(UNIT_CLEAN);
return clean("expired");
}

// Update pause reason
Expand All @@ -457,7 +453,7 @@ void Unit::next() {

// Handle pause
if (isPaused() && getState() < UNIT_DUMP) {
if (!pr.isSet() && getState() == UNIT_ASSIGN) setState(UNIT_CLEAN);
if (!pr.isSet() && getState() == UNIT_ASSIGN) return clean("aborted");
else {
setWait(0); // Stop waiting
retries = 0;
Expand All @@ -466,7 +462,7 @@ void Unit::next() {
}

// Handle event backoff
if (getState() < UNIT_CLEAN && isWaiting())
if (getState() <= UNIT_DUMP && isWaiting())
return triggerNext(wait - Time::now());

try {
Expand All @@ -477,7 +473,6 @@ void Unit::next() {
case UNIT_RUN: return run();
case UNIT_UPLOAD: return upload();
case UNIT_DUMP: return dump();
case UNIT_CLEAN: return clean();
case UNIT_DONE: return;
}
} CATCH_ERROR;
Expand All @@ -496,7 +491,6 @@ void Unit::processStarted(const SmartPointer<CoreProcess> &process) {


void Unit::processEnded() {
if (Time::SEC_PER_MIN * 5 < getRunTimeDelta()) retries = 0;
insert("run_time", getRunTime());
erase("start_time");
processStartTime = 0;
Expand Down Expand Up @@ -528,26 +522,23 @@ void Unit::updateKnownProgress(uint64_t done, uint64_t total) {
if (!total || total < done) return;

if (lastKnownDone != done || lastKnownTotal != total) {
lastKnownDone = done;
lastKnownTotal = total;
lastKnownProgressUpdate = Time::now();
lastKnownDone = done;
lastKnownTotal = total;
lastKnownProgressUpdate = Time::now();
lastKnownProgressUpdateRunTime = getRunTime();
}
}


void Unit::setProgress(double done, double total) {
const char *key = getState() == UNIT_RUN ? "wu_progress" : "progress";
double progress = total ? done / total : 0;
void Unit::setProgress(double done, double total, bool wu) {
const char *key = wu ? "wu_progress" : "progress";
double progress = round((total ? done / total : 0) * 1000) / 1000;
double oldValue = getNumber(key, 0);

progress = round(progress * 1000) / 1000;

if (oldValue != progress) {
insert(key, progress);

if (floor(oldValue * 100) < floor(progress * 100) &&
getState() != UNIT_RUN && 1 < total)
if (floor(oldValue * 100) < floor(progress * 100) && !wu && 1 < total)
LOG_INFO(1, getState() << String::printf(" %0.0f%% ", progress * 100)
<< HumanSize(done) << "B of " << HumanSize(total) << 'B');
}
Expand Down Expand Up @@ -589,7 +580,7 @@ void Unit::run() {
// Make sure WU data exists
if (!SystemUtilities::exists(getDirectory() + "/wudata_01.dat")) {
LOG_ERROR("Missing WU data");
return setState(UNIT_CLEAN);
return clean("missing");
}

// Remove old results if exists
Expand Down Expand Up @@ -769,10 +760,11 @@ void Unit::finalizeRun() {
}

process.release();
if (code == ExitCode::FINISHED_UNIT) setProgress(1, 1);
if (code == ExitCode::FINISHED_UNIT) setProgress(1, 1, true);
processEnded();

bool ok = code == ExitCode::FINISHED_UNIT || code == ExitCode::INTERRUPTED;
success = code == ExitCode::FINISHED_UNIT;
bool ok = success || code == ExitCode::INTERRUPTED;
LOG(CBANG_LOG_DOMAIN, ok ? LOG_INFO_LEVEL(1) : Logger::LEVEL_WARNING,
"Core returned " << code << " (" << (unsigned)code << ')');

Expand All @@ -798,9 +790,8 @@ void Unit::finalizeRun() {
string resultData = SystemUtilities::read(filename);
string hash64 = Digest::base64(resultData, "sha256");

// TODO Set status "ok" once WS are upgraded
// TODO Send multi-part data with JSON followed by binary data
setResults("", hash64);
setResults(ok ? "ok" : "failed", hash64);
data->insert("data", Base64().encode(resultData));

return setState(UNIT_UPLOAD);
Expand Down Expand Up @@ -833,20 +824,24 @@ void Unit::monitorRun() {
auto ppd = getPPD();
if (eta != getString("eta", "")) insert("eta", eta);
if (ppd != getU64("ppd", -1)) insert("ppd", ppd);
setProgress(getEstimatedProgress(), 1);
setProgress(getEstimatedProgress(), 1, true);

// Clear retries after running long enough
if (retries && Time::SEC_PER_MIN * 2 < getRunTimeDelta()) retries = 0;
} CATCH_ERROR;

triggerNext(1);
}


void Unit::clean() {
void Unit::clean(const string &result) {
LOG_DEBUG(3, "Cleaning WU");

// Log WU
if (!id.empty()) {
insert("end_time", Time().toString());
app.logWU(*this);
insert("result", result);
TRY_CATCH_ERROR(app.logWU(*this));
}

// Remove from disk
Expand Down Expand Up @@ -900,19 +895,21 @@ void Unit::retry() {
LOG_INFO(1, "Too many retries (" << (retries - 1) << "), failing WU");
setWait(0);
retries = 0;
setState(UNIT_CLEAN);
success = false;
return clean("retries");
}

insert("retries", retries);
return;

} CATCH_ERROR;

// Retry failed
success = false;
switch (getState()) {
case UNIT_DONE: break;
case UNIT_CLEAN: setState(UNIT_DONE); break;
case UNIT_DUMP: setState(UNIT_CLEAN); break;
default: setState(UNIT_DUMP); break;
case UNIT_DONE: return;
case UNIT_DUMP: return clean("failed");
default: return setState(UNIT_DUMP);
}
}

Expand Down Expand Up @@ -1099,17 +1096,16 @@ void Unit::download() {
&Unit::response);

data->write(*pr->getJSONWriter());
setProgress(0, 0);
clearProgress();
pr->getConnection()->getReadProgress().setCallback(progressCB, 1);
pr->send();
}


void Unit::uploadResponse(const JSON::ValuePtr &data) {
LOG_INFO(1, "Credited");
setState(UNIT_CLEAN);
success = true;
logCredit(data);
clean(success ? "credited" : "failed");
}


Expand Down Expand Up @@ -1138,16 +1134,16 @@ void Unit::upload() {
data->write(*writer);
writer->close();

setProgress(0, 0);
clearProgress();
pr->getConnection()->getWriteProgress().setCallback(progressCB, 1);
pr->send();
}


void Unit::dumpResponse(const JSON::ValuePtr &data) {
LOG_INFO(1, "Dumped");
setState(UNIT_CLEAN);
logCredit(data);
clean("dumped");
}


Expand Down Expand Up @@ -1200,9 +1196,7 @@ void Unit::response(HTTP::Request &req) {
break;

case HTTP_BAD_REQUEST: case HTTP_NOT_ACCEPTABLE: case HTTP_GONE:
default:
setState(UNIT_CLEAN);
break;
default: return clean("rejected");
}

} else {
Expand Down Expand Up @@ -1233,6 +1227,7 @@ void Unit::logCredit(const JSON::ValuePtr &data) {
} CATCH_ERROR;
}


void Unit::startLogCopy(const string &filename) {
bytesCopiedToLog = 0;
endLogCopy();
Expand Down
5 changes: 3 additions & 2 deletions src/fah/client/Unit.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,8 @@ namespace FAH {
double getKnownProgress() const;
void updateKnownProgress(uint64_t done, uint64_t total);

void setProgress(double done, double total);
void clearProgress() {setProgress(0, 0);}
void setProgress(double done, double total, bool wu = false);
void getCore();
void run();
void readInfo();
Expand All @@ -167,7 +168,7 @@ namespace FAH {
void finalizeRun();
void stopRun();
void monitorRun();
void clean();
void clean(const std::string &result);
void setWait(double delay);
void retry();

Expand Down
1 change: 0 additions & 1 deletion src/fah/client/UnitState.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ CBANG_ENUM(UNIT_CORE)
CBANG_ENUM(UNIT_RUN)
CBANG_ENUM(UNIT_UPLOAD)
CBANG_ENUM(UNIT_DUMP)
CBANG_ENUM(UNIT_CLEAN)
CBANG_ENUM(UNIT_DONE)

#endif // CBANG_ENUM

0 comments on commit 138cb16

Please sign in to comment.