1
1
#include " raft.h"
2
2
3
- #include " db.h"
4
-
5
3
#include < cstdint>
6
4
#include < cstdlib>
7
5
#include < memory>
25
23
#include < fmt/format.h>
26
24
#include < spdlog/spdlog.h>
27
25
28
- static int gFirstElection = 0 ;
26
+ static bool gFirstElection = true ;
29
27
30
28
NodeClient::NodeClient (ID nodeId, IP nodeIp)
31
29
: m_id{nodeId},
32
30
m_ip{std::move (nodeIp)},
33
31
m_channel (grpc::CreateChannel(m_ip, grpc::InsecureChannelCredentials())),
34
- m_stub(RaftService::NewStub(m_channel))
32
+ m_stub(RaftService::NewStub(m_channel)),
33
+ m_kvStub(TinyKVPPService::NewStub(m_channel))
35
34
{
36
35
assert (m_id > 0 );
37
36
assert (!m_ip.empty ());
@@ -45,6 +44,11 @@ NodeClient::NodeClient(ID nodeId, IP nodeIp)
45
44
{
46
45
throw std::runtime_error (fmt::format (" Failed to create a stub for node={} ip={}" , m_id, m_id));
47
46
}
47
+
48
+ if (!m_kvStub)
49
+ {
50
+ throw std::runtime_error (fmt::format (" Failed to create a KV stub for node={} ip={}" , m_id, m_id));
51
+ }
48
52
}
49
53
50
54
auto NodeClient::appendEntries (const AppendEntriesRequest &request, AppendEntriesResponse *response) -> bool
@@ -77,6 +81,22 @@ auto NodeClient::requestVote(const RequestVoteRequest &request, RequestVoteRespo
77
81
return true ;
78
82
}
79
83
84
+ auto NodeClient::put (const PutRequest &request, PutResponse *pResponse) -> bool
85
+ {
86
+ grpc::ClientContext context;
87
+ context.set_deadline (std::chrono::system_clock::now () + std::chrono::seconds (generateRandomTimeout ()));
88
+ grpc::Status status = m_kvStub->Put (&context, request, pResponse);
89
+ if (!status.ok ())
90
+ {
91
+ spdlog::error (" Put RPC call failed. Error code={} and message={}" ,
92
+ static_cast <int >(status.error_code ()),
93
+ status.error_message ());
94
+ return false ;
95
+ }
96
+
97
+ return true ;
98
+ }
99
+
80
100
auto NodeClient::getId () const -> ID
81
101
{
82
102
return m_id;
@@ -126,10 +146,9 @@ auto ConsensusModule::AppendEntries(grpc::ServerContext *pContext,
126
146
(void )pRequest;
127
147
(void )pResponse;
128
148
129
- spdlog::info (" Recevied AppendEntries RPC from leader={} during term={}" , pRequest->senderid (), pRequest->term ());
149
+ spdlog::debug (" Recevied AppendEntries RPC from leader={} during term={}" , pRequest->senderid (), pRequest->term ());
130
150
131
151
absl::MutexLock locker (&m_stateMutex);
132
- /* absl::MutexLock timerLocker(&m_timerMutex);*/
133
152
134
153
// 1. Term check
135
154
if (pRequest->term () < m_currentTerm)
@@ -172,19 +191,34 @@ auto ConsensusModule::AppendEntries(grpc::ServerContext *pContext,
172
191
173
192
m_log.insert (m_log.end (), pRequest->entries ().begin (), pRequest->entries ().end ());
174
193
194
+ /* const auto &entries = pRequest->entries();*/
195
+ /* for (const auto &entry : entries)*/
196
+ /* {*/
197
+ /* m_wal.add(db::wal::wal_t::record_t{.op = db::wal::wal_t::operation_k::add_k,*/
198
+ /* .kv = {structures::memtable::memtable_t::record_t::key_t{entry.key()},*/
199
+ /* structures::memtable::memtable_t::record_t::value_t{entry.value()}}});*/
200
+ /* }*/
201
+
175
202
if (pRequest->leadercommit () > m_commitIndex)
176
203
{
177
204
m_commitIndex = std::min (pRequest->leadercommit (), (uint32_t )m_log.size ());
205
+
206
+ while (m_lastApplied < m_commitIndex)
207
+ {
208
+ ++m_lastApplied;
209
+ m_kv[m_log[m_lastApplied - 1 ].key ()] = m_log[m_lastApplied - 1 ].value ();
210
+ }
178
211
}
179
212
180
213
pResponse->set_term (m_currentTerm);
181
214
pResponse->set_success (true );
182
215
pResponse->set_responderid (m_id);
183
216
pResponse->set_match_index (m_log.size ());
184
217
218
+ m_votedFor = pRequest->senderid ();
185
219
m_leaderHeartbeatReceived.store (true );
186
220
187
- spdlog::info (" Node={} is resetting election timeout at term={}" , m_id, m_currentTerm);
221
+ spdlog::debug (" Node={} is resetting election timeout at term={}" , m_id, m_currentTerm);
188
222
189
223
return grpc::Status::OK;
190
224
}
@@ -197,10 +231,10 @@ auto ConsensusModule::RequestVote(grpc::ServerContext *pContext,
197
231
198
232
absl::WriterMutexLock locker (&m_stateMutex);
199
233
200
- spdlog::info (" Received RequestVote RPC from candidate={} during term={} peerTerm={}" ,
201
- pRequest->candidateid (),
202
- m_currentTerm,
203
- pRequest->term ());
234
+ spdlog::debug (" Received RequestVote RPC from candidate={} during term={} peerTerm={}" ,
235
+ pRequest->candidateid (),
236
+ m_currentTerm,
237
+ pRequest->term ());
204
238
205
239
pResponse->set_term (m_currentTerm);
206
240
pResponse->set_votegranted (0 );
@@ -245,19 +279,42 @@ auto ConsensusModule::Put(grpc::ServerContext *pContext, const PutRequest *pRequ
245
279
246
280
uint32_t currentTerm = 0 ;
247
281
uint32_t lastLogIndex = 0 ;
282
+ uint32_t votedFor = 0 ;
248
283
{
249
284
absl::MutexLock locker{&m_stateMutex};
250
285
if (m_state != NodeState::LEADER)
251
286
{
252
- spdlog::error (" Non-leader node={} received a put request" , m_id);
253
- pResponse->set_status (" " );
254
- return grpc::Status::OK;
287
+ if (m_votedFor != invalidId)
288
+ {
289
+ votedFor = m_votedFor;
290
+ }
291
+ else
292
+ {
293
+ spdlog::error (" Non-leader node={} received a put request. Leader at current term is unkown." , m_id);
294
+ pResponse->set_status (" " );
295
+ return grpc::Status::OK;
296
+ }
255
297
}
256
298
257
299
currentTerm = m_currentTerm;
258
300
lastLogIndex = getLastLogIndex () + 1 ;
259
301
}
260
302
303
+ if (votedFor != invalidId)
304
+ {
305
+ spdlog::error (" Non-leader node={} received a put request. Forwarding to leader={} during currentTerm={}" ,
306
+ m_id,
307
+ votedFor,
308
+ currentTerm);
309
+
310
+ if (!m_replicas[votedFor]->put (*pRequest, pResponse))
311
+ {
312
+ spdlog::error (" Non-leader node={} was unable to forward put RPC to leader={}" , m_id, votedFor);
313
+ }
314
+
315
+ return grpc::Status::OK;
316
+ }
317
+
261
318
LogEntry logEntry;
262
319
logEntry.set_term (currentTerm);
263
320
logEntry.set_index (lastLogIndex);
@@ -272,21 +329,14 @@ auto ConsensusModule::Put(grpc::ServerContext *pContext, const PutRequest *pRequ
272
329
273
330
for (auto &[id, client] : m_replicas)
274
331
{
275
- sendAppendEntriesRPC (client, {logEntry});
332
+ sendAppendEntriesRPC (client. value () , {logEntry});
276
333
}
277
334
278
335
absl::MutexLock locker{&m_stateMutex};
279
336
bool success = waitForMajorityReplication (logEntry.index ());
280
337
if (success)
281
338
{
282
339
spdlog::info (" Node={} majority agreed on logEntry={}" , m_id, logEntry.index ());
283
-
284
- // Apply successfull replication to the state machine e.g. in-memory hash-table
285
- while (m_lastApplied < m_commitIndex)
286
- {
287
- ++m_lastApplied;
288
- m_kv[m_log[m_lastApplied - 1 ].key ()] = m_log[m_lastApplied - 1 ].value ();
289
- }
290
340
}
291
341
else
292
342
{
@@ -354,16 +404,16 @@ void ConsensusModule::start()
354
404
absl::MutexLock locker (&m_timerMutex); // Lock the mutex using Abseil's MutexLock
355
405
356
406
// Determine the timeout duration
357
- int64_t timeToWaitMs = gFirstElection == 0 ? generateRandomTimeout () : 1'000'000'000 ;
407
+ int64_t timeToWaitMs = gFirstElection ? generateRandomTimeout () : 1'000'000'000 ;
358
408
int64_t timeToWaitDeadlineMs = currentTimeMs () + timeToWaitMs;
359
409
360
410
// Define the condition to wait for leader's heartbeat
361
411
auto heartbeatReceivedCondition = [this , &timeToWaitDeadlineMs, currentTimeMs]()
362
412
{ return m_leaderHeartbeatReceived.load () || currentTimeMs () >= timeToWaitDeadlineMs; };
363
413
364
- spdlog::info (" Timer thread at node={} will block for {}ms for the leader to send a heartbeat" ,
365
- m_id,
366
- timeToWaitMs);
414
+ spdlog::debug (" Timer thread at node={} will block for {}ms for the leader to send a heartbeat" ,
415
+ m_id,
416
+ timeToWaitMs);
367
417
368
418
// Wait for the condition to be met or timeout
369
419
bool heartbeatReceived = m_timerMutex.AwaitWithTimeout (absl::Condition (&heartbeatReceivedCondition),
@@ -373,8 +423,8 @@ void ConsensusModule::start()
373
423
// Otherwise, heartbeat timed out and node needs to start the new leader election
374
424
if (heartbeatReceived && m_leaderHeartbeatReceived.load ())
375
425
{
376
- gFirstElection = 1 ;
377
- spdlog::info (" Node={} received heartbeat" , m_id);
426
+ gFirstElection = false ;
427
+ spdlog::debug (" Node={} received heartbeat" , m_id);
378
428
m_leaderHeartbeatReceived.store (false );
379
429
}
380
430
else
@@ -387,7 +437,7 @@ void ConsensusModule::start()
387
437
388
438
{
389
439
assert (m_raftServer);
390
- spdlog::info (" Listening for RPC requests on " );
440
+ spdlog::debug (" Listening for RPC requests on " );
391
441
m_raftServer->Wait ();
392
442
}
393
443
}
@@ -436,7 +486,7 @@ void ConsensusModule::startElection()
436
486
m_currentTerm++;
437
487
m_state = NodeState::CANDIDATE;
438
488
439
- spdlog::info (" Node={} starts election. New term={}" , m_id, m_currentTerm);
489
+ spdlog::debug (" Node={} starts election. New term={}" , m_id, m_currentTerm);
440
490
441
491
// Node in a canditate state should vote for itself.
442
492
m_voteCount++;
@@ -453,7 +503,7 @@ void ConsensusModule::startElection()
453
503
for (auto &[id, client] : m_replicas)
454
504
{
455
505
RequestVoteResponse response;
456
- if (!client. requestVote (request, &response))
506
+ if (!client-> requestVote (request, &response))
457
507
{
458
508
spdlog::error (" RequestVote RPC failed in requester thread" );
459
509
return ;
@@ -462,10 +512,10 @@ void ConsensusModule::startElection()
462
512
auto responseTerm = response.term ();
463
513
auto voteGranted = response.votegranted ();
464
514
465
- spdlog::info (" Received RequestVoteResponse in requester thread peerTerm={} voteGranted={} responseTerm={}" ,
466
- responseTerm,
467
- voteGranted,
468
- response.responderid ());
515
+ spdlog::debug (" Received RequestVoteResponse in requester thread peerTerm={} voteGranted={} responseTerm={}" ,
516
+ responseTerm,
517
+ voteGranted,
518
+ response.responderid ());
469
519
470
520
absl::MutexLock locker (&m_stateMutex);
471
521
if (responseTerm > m_currentTerm)
@@ -503,7 +553,7 @@ void ConsensusModule::becomeFollower(uint32_t newTerm)
503
553
}
504
554
m_heartbeatThreads.clear ();
505
555
506
- spdlog::info (" Server reverted to FOLLOWER state in term={}" , m_currentTerm);
556
+ spdlog::debug (" Server reverted to FOLLOWER state in term={}" , m_currentTerm);
507
557
}
508
558
509
559
auto ConsensusModule::hasMajority (uint32_t votes) const -> bool
@@ -525,11 +575,11 @@ void ConsensusModule::becomeLeader()
525
575
526
576
m_state = NodeState::LEADER;
527
577
528
- spdlog::info (" Node={} become a leader at term={}" , m_id, m_currentTerm);
578
+ spdlog::debug (" Node={} become a leader at term={}" , m_id, m_currentTerm);
529
579
530
580
for (auto &[id, client] : m_replicas)
531
581
{
532
- sendHeartbeat (client);
582
+ sendHeartbeat (client. value () );
533
583
}
534
584
}
535
585
@@ -541,7 +591,7 @@ void ConsensusModule::sendHeartbeat(NodeClient &client)
541
591
m_heartbeatThreads.emplace_back (
542
592
[this , maxRetries, &client, heartbeatInterval](std::stop_token token)
543
593
{
544
- spdlog::info (" Node={} is starting a heartbeat thread for client={}" , m_id, client.getId ());
594
+ spdlog::debug (" Node={} is starting a heartbeat thread for client={}" , m_id, client.getId ());
545
595
546
596
int consecutiveFailures = 0 ;
547
597
while (!token.stop_requested ())
@@ -551,7 +601,7 @@ void ConsensusModule::sendHeartbeat(NodeClient &client)
551
601
absl::ReaderMutexLock locker (&m_stateMutex);
552
602
if (m_state != NodeState::LEADER)
553
603
{
554
- spdlog::info (" Node={} is no longer a leader. Stopping the heartbeat thread" );
604
+ spdlog::debug (" Node={} is no longer a leader. Stopping the heartbeat thread" );
555
605
break ;
556
606
}
557
607
@@ -585,7 +635,7 @@ void ConsensusModule::sendHeartbeat(NodeClient &client)
585
635
auto responseTerm = response.term ();
586
636
auto success = response.success ();
587
637
588
- spdlog::info (
638
+ spdlog::debug (
589
639
" Received AppendEntriesResponse in requester thread peerTerm={} success={} responderId={}" ,
590
640
responseTerm,
591
641
success,
@@ -599,18 +649,13 @@ void ConsensusModule::sendHeartbeat(NodeClient &client)
599
649
becomeFollower (responseTerm);
600
650
break ;
601
651
}
602
-
603
- if (!success)
604
- {
605
- /* decrementNextIndex(client.getId());*/
606
- }
607
652
}
608
653
}
609
654
610
655
std::this_thread::sleep_for (heartbeatInterval);
611
656
}
612
657
613
- spdlog::info (" Stopping heartbeat thread for on the node={} for the client={}" , m_id, client.getId ());
658
+ spdlog::debug (" Stopping heartbeat thread for on the node={} for the client={}" , m_id, client.getId ());
614
659
});
615
660
}
616
661
@@ -631,16 +676,7 @@ void ConsensusModule::sendAppendEntriesRPC(NodeClient &client, std::vector<LogEn
631
676
632
677
for (auto logEntry : logEntries)
633
678
{
634
- spdlog::info (" VAGAG: logEntry.command={} logEntry.command.size={}" ,
635
- logEntry.command (),
636
- logEntry.command ().size ());
637
679
request.add_entries ()->CopyFrom (logEntry);
638
- /* auto *entry = request.add_entries();*/
639
- /* entry->set_term(logEntry.term());*/
640
- /* entry->set_index(logEntry.index());*/
641
- /* entry->set_command(logEntry.command());*/
642
- /* entry->set_key(logEntry.key());*/
643
- /* entry->set_value(logEntry.value());*/
644
680
}
645
681
}
646
682
@@ -677,11 +713,11 @@ void ConsensusModule::sendAppendEntriesRPC(NodeClient &client, std::vector<LogEn
677
713
m_commitIndex = majorityIndex;
678
714
679
715
// Apply successfull replication to the state machine e.g. in-memory hash-table
680
- /* while (m_lastApplied < m_commitIndex)*/
681
- /* { */
682
- /* ++m_lastApplied;*/
683
- /* m_kv[m_log[m_lastApplied - 1].key()] = m_log[m_lastApplied - 1].value();*/
684
- /* } */
716
+ while (m_lastApplied < m_commitIndex)
717
+ {
718
+ ++m_lastApplied;
719
+ m_kv[m_log[m_lastApplied - 1 ].key ()] = m_log[m_lastApplied - 1 ].value ();
720
+ }
685
721
686
722
return ;
687
723
}
0 commit comments