Skip to content

Commit b354273

Browse files
authored
Return TRYAGAIN for cfg changes until log replay (#165)
1 parent 9a0c6c0 commit b354273

File tree

5 files changed

+78
-14
lines changed

5 files changed

+78
-14
lines changed

include/raft.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ typedef enum {
2626
RAFT_ERR_DONE = -10,
2727
RAFT_ERR_NOTFOUND = -11,
2828
RAFT_ERR_MISUSE = -12,
29+
RAFT_ERR_TRYAGAIN = -13,
2930
} raft_error_e;
3031

3132
typedef enum {

src/raft_server.c

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1152,10 +1152,22 @@ int raft_recv_entry(raft_server_t *me,
11521152
raft_entry_req_t *ety,
11531153
raft_entry_resp_t *r)
11541154
{
1155+
if (!raft_is_leader(me)) {
1156+
return RAFT_ERR_NOT_LEADER;
1157+
}
1158+
11551159
if (raft_entry_is_voting_cfg_change(ety)) {
11561160
/* Only one voting cfg change at a time */
11571161
if (raft_voting_change_is_in_progress(me)) {
1158-
return RAFT_ERR_ONE_VOTING_CHANGE_ONLY;
1162+
/* On restart, if logs are not replayed yet, we don't know whether
1163+
* cfg change entry was applied in the previous run. Returning
1164+
* ONE_VOTING_CHANGE_ONLY might be confusing if there is no ongoing
1165+
* config change. So, we check if we've applied an entry from the
1166+
* current term which implicitly means we replayed previous logs.
1167+
* Then, returning TRYAGAIN if log replay is not completed yet. */
1168+
int log_replayed = (me->current_term == me->last_applied_term);
1169+
return log_replayed ? RAFT_ERR_ONE_VOTING_CHANGE_ONLY:
1170+
RAFT_ERR_TRYAGAIN;
11591171
}
11601172

11611173
/* Multi-threading: need to fail here because user might be
@@ -1165,10 +1177,6 @@ int raft_recv_entry(raft_server_t *me,
11651177
}
11661178
}
11671179

1168-
if (!raft_is_leader(me)) {
1169-
return RAFT_ERR_NOT_LEADER;
1170-
}
1171-
11721180
if (raft_get_transfer_leader(me) != RAFT_NODE_ID_NONE) {
11731181
return RAFT_ERR_LEADER_TRANSFER_IN_PROGRESS;
11741182
}

src/raft_server_properties.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,8 @@ const char *raft_get_error_str(int err)
161161
return "not found";
162162
case RAFT_ERR_MISUSE:
163163
return "misuse";
164+
case RAFT_ERR_TRYAGAIN:
165+
return "try again";
164166
default:
165167
return "unknown error";
166168
}

tests/test_server.c

Lines changed: 59 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5070,22 +5070,23 @@ void TestRaft_delete_configuration_change_entries(CuTest *tc)
50705070

50715071
void *r = raft_new();
50725072
raft_add_node(r, NULL, 1, 1);
5073-
raft_add_node(r, NULL, 2, 0);
5074-
50755073
raft_set_callbacks(r, &funcs, NULL);
50765074
raft_set_current_term(r, 1);
5077-
raft_set_state(r, RAFT_STATE_LEADER);
5075+
raft_become_leader(r);
5076+
raft_apply_all(r);
5077+
5078+
raft_add_node(r, NULL, 2, 0);
50785079

50795080
/* If there is no entry, delete should return immediately. */
5080-
CuAssertIntEquals(tc, 0, raft_delete_entry_from_idx(r, 2));
5081+
CuAssertIntEquals(tc, 0, raft_delete_entry_from_idx(r, 3));
50815082

50825083
/* Add the non-voting node. */
50835084
raft_entry_t *ety = __MAKE_ENTRY(1, 1, "3");
50845085
ety->type = RAFT_LOGTYPE_ADD_NONVOTING_NODE;
50855086
CuAssertIntEquals(tc, 0, raft_recv_entry(r, ety, NULL));
50865087

50875088
/* If there idx is out of bounds, delete should return immediately. */
5088-
CuAssertIntEquals(tc, 0, raft_delete_entry_from_idx(r, 2));
5089+
CuAssertIntEquals(tc, 0, raft_delete_entry_from_idx(r, 3));
50895090

50905091
/* Append a removal log entry for the non-voting node we just added. */
50915092
ety = __MAKE_ENTRY(1, 1, "3");
@@ -5100,12 +5101,12 @@ void TestRaft_delete_configuration_change_entries(CuTest *tc)
51005101

51015102
/* Add some random entries. */
51025103
__RAFT_APPEND_ENTRIES_SEQ_ID(r, 10, 1, 1, "data");
5103-
CuAssertIntEquals(tc, 12, raft_get_log_count(r));
5104+
CuAssertIntEquals(tc, 13, raft_get_log_count(r));
51045105

51055106
/* Delete the removal log entry and others after it. */
5106-
CuAssertIntEquals(tc, 0, raft_delete_entry_from_idx(r, 2));
5107+
CuAssertIntEquals(tc, 0, raft_delete_entry_from_idx(r, 3));
51075108

5108-
CuAssertIntEquals(tc, 1, raft_get_log_count(r));
5109+
CuAssertIntEquals(tc, 2, raft_get_log_count(r));
51095110
CuAssertIntEquals(tc, 1, raft_node_is_active(raft_get_node(r, 3)));
51105111
CuAssertIntEquals(tc, 0, raft_node_is_voting(raft_get_node(r, 3)));
51115112

@@ -5152,6 +5153,55 @@ void TestRaft_propagate_persist_metadata_failure(CuTest *tc)
51525153
CuAssertIntEquals(tc, e, -1);
51535154
}
51545155

5156+
void TestRaft_reject_config_change_before_log_replay(CuTest *tc)
5157+
{
5158+
raft_cbs_t funcs = {
5159+
.get_node_id = __raft_get_node_id
5160+
};
5161+
5162+
raft_server_t *r = raft_new();
5163+
raft_set_callbacks(r, &funcs, NULL);
5164+
raft_add_non_voting_node(r, NULL, 1, 1);
5165+
5166+
raft_become_leader(r);
5167+
5168+
raft_entry_req_t *ety = __MAKE_ENTRY(1, 1, "1");
5169+
ety->type = RAFT_LOGTYPE_ADD_NODE;
5170+
CuAssertIntEquals(tc, 0, raft_recv_entry(r, ety, NULL));
5171+
raft_periodic_internal(r, 2000);
5172+
5173+
/* To simulate restart, restore log with the first node's data.*/
5174+
raft_server_t *r2 = raft_new();
5175+
r2->log = raft_get_log(r);
5176+
5177+
raft_set_callbacks(r2, &funcs, NULL);
5178+
raft_add_non_voting_node(r2, NULL, 1, 1);
5179+
5180+
raft_restore_metadata(r2, 1, 0);
5181+
raft_restore_log(r2);
5182+
raft_become_leader(r2);
5183+
5184+
ety = __MAKE_ENTRY(2, 2, "2");
5185+
ety->type = RAFT_LOGTYPE_ADD_NODE;
5186+
5187+
/* Node has cfg change entry in the log, but it doesn't know the commit idx.
5188+
* As server does not know whether cfg change entry in the log was applied
5189+
* in the previous run, it should reply with TRYAGAIN instead of
5190+
* ONE_VOTING_CHANGE_ONLY which might be confusing if there is no ongoing
5191+
* cfg change. */
5192+
CuAssertIntEquals(tc, RAFT_ERR_TRYAGAIN, raft_recv_entry(r2, ety, NULL));
5193+
5194+
raft_set_commit_idx(r2, raft_get_current_idx(r2));
5195+
raft_apply_all(r2);
5196+
5197+
/* After log replay is completed (server applied NOOP entry of the term),
5198+
* multiple config change requests should be rejected with
5199+
* ONE_VOTING_CHANGE_ONLY. */
5200+
CuAssertIntEquals(tc, 0, raft_recv_entry(r2, ety, NULL));
5201+
CuAssertIntEquals(tc, RAFT_ERR_ONE_VOTING_CHANGE_ONLY,
5202+
raft_recv_entry(r2, ety, NULL));
5203+
}
5204+
51555205
int main(void)
51565206
{
51575207
CuString *output = CuStringNew();
@@ -5309,7 +5359,7 @@ int main(void)
53095359
SUITE_ADD_TEST(suite, TestRaft_test_metadata_on_restart);
53105360
SUITE_ADD_TEST(suite, TestRaft_rebuild_config_after_restart);
53115361
SUITE_ADD_TEST(suite, TestRaft_delete_configuration_change_entries);
5312-
SUITE_ADD_TEST(suite, TestRaft_propagate_persist_metadata_failure);
5362+
SUITE_ADD_TEST(suite, TestRaft_reject_config_change_before_log_replay);
53135363
CuSuiteRun(suite);
53145364
CuSuiteDetails(suite, output);
53155365
printf("%s\n", output->buffer);

tests/virtraft2.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ def err2str(err):
133133
lib.RAFT_INVALID_NODEID: 'RAFT_INVALID_NODEID',
134134
lib.RAFT_ERR_LEADER_TRANSFER_IN_PROGRESS: 'RAFT_ERR_LEADER_TRANSFER_IN_PROGRESS',
135135
lib.RAFT_ERR_DONE: 'RAFT_ERR_DONE',
136+
lib.RAFT_ERR_NOT_FOUND: 'RAFT_ERR_NOT_FOUND',
137+
lib.RAFT_ERR_MISUSE: 'RAFT_ERR_MISUSE',
138+
lib.RAFT_ERR_TRYAGAIN: 'RAFT_ERR_TRYAGAIN',
136139
lib.RAFT_ERR_LAST: 'RAFT_ERR_LAST',
137140
}[err]
138141

0 commit comments

Comments
 (0)