Skip to content

Commit a3d7ec1

Browse files
authored
Remove RequestBatches RPC, combine with RequestBatch (MystenLabs#5435)
1 parent 02cae19 commit a3d7ec1

File tree

8 files changed

+79
-195
lines changed

8 files changed

+79
-195
lines changed

narwhal/network/src/p2p.rs

Lines changed: 3 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@ use tokio::{runtime::Handle, task::JoinHandle};
1818
use types::{
1919
Batch, BatchDigest, FetchCertificatesRequest, FetchCertificatesResponse, PrimaryMessage,
2020
PrimaryToPrimaryClient, PrimaryToWorkerClient, RequestBatchRequest, WorkerBatchMessage,
21-
WorkerBatchRequest, WorkerBatchResponse, WorkerDeleteBatchesMessage, WorkerOthersBatchMessage,
22-
WorkerOurBatchMessage, WorkerReconfigureMessage, WorkerSynchronizeMessage,
23-
WorkerToPrimaryClient, WorkerToWorkerClient,
21+
WorkerDeleteBatchesMessage, WorkerOthersBatchMessage, WorkerOurBatchMessage,
22+
WorkerReconfigureMessage, WorkerSynchronizeMessage, WorkerToPrimaryClient,
23+
WorkerToWorkerClient,
2424
};
2525

2626
fn default_executor() -> BoundedExecutor {
@@ -378,23 +378,6 @@ impl ReliableNetwork<WorkerBatchMessage> for P2pNetwork {
378378
}
379379
}
380380

381-
impl UnreliableNetwork<WorkerBatchRequest> for P2pNetwork {
382-
type Response = WorkerBatchResponse;
383-
fn unreliable_send(
384-
&mut self,
385-
peer: NetworkPublicKey,
386-
message: &WorkerBatchRequest,
387-
) -> Result<JoinHandle<Result<anemo::Response<WorkerBatchResponse>>>> {
388-
let message = message.to_owned();
389-
let f = move |peer| async move {
390-
WorkerToWorkerClient::new(peer)
391-
.request_batches(message)
392-
.await
393-
};
394-
self.unreliable_send(peer, f)
395-
}
396-
}
397-
398381
#[async_trait]
399382
impl PrimaryToWorkerRpc for P2pNetwork {
400383
async fn delete_batches(
@@ -442,25 +425,3 @@ impl WorkerRpc for P2pNetwork {
442425
Ok(response.into_body().batch)
443426
}
444427
}
445-
446-
#[async_trait]
447-
impl ReliableNetwork<WorkerBatchRequest> for P2pNetwork {
448-
type Response = WorkerBatchResponse;
449-
async fn send(
450-
&mut self,
451-
peer: NetworkPublicKey,
452-
message: &WorkerBatchRequest,
453-
) -> CancelOnDropHandler<Result<anemo::Response<WorkerBatchResponse>>> {
454-
let message = message.to_owned();
455-
let f = move |peer| {
456-
let message = message.clone();
457-
async move {
458-
WorkerToWorkerClient::new(peer)
459-
.request_batches(message)
460-
.await
461-
}
462-
};
463-
464-
self.send(peer, f).await
465-
}
466-
}

narwhal/test-utils/src/lib.rs

Lines changed: 5 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,8 @@ use types::{
2929
FetchCertificatesResponse, Header, HeaderBuilder, PrimaryMessage, PrimaryToPrimary,
3030
PrimaryToPrimaryServer, PrimaryToWorker, PrimaryToWorkerServer, RequestBatchRequest,
3131
RequestBatchResponse, Round, SequenceNumber, Transaction, Vote, WorkerBatchMessage,
32-
WorkerBatchRequest, WorkerBatchResponse, WorkerDeleteBatchesMessage, WorkerReconfigureMessage,
33-
WorkerSynchronizeMessage, WorkerToWorker, WorkerToWorkerServer,
32+
WorkerDeleteBatchesMessage, WorkerReconfigureMessage, WorkerSynchronizeMessage, WorkerToWorker,
33+
WorkerToWorkerServer,
3434
};
3535

3636
pub mod cluster;
@@ -269,25 +269,16 @@ impl PrimaryToWorker for PrimaryToWorkerMockServer {
269269

270270
pub struct WorkerToWorkerMockServer {
271271
batch_sender: Sender<WorkerBatchMessage>,
272-
batch_request_sender: Sender<WorkerBatchRequest>,
273272
}
274273

275274
impl WorkerToWorkerMockServer {
276275
pub fn spawn(
277276
keypair: NetworkKeyPair,
278277
address: Multiaddr,
279-
) -> (
280-
Receiver<WorkerBatchMessage>,
281-
Receiver<WorkerBatchRequest>,
282-
anemo::Network,
283-
) {
278+
) -> (Receiver<WorkerBatchMessage>, anemo::Network) {
284279
let addr = network::multiaddr_to_address(&address).unwrap();
285280
let (batch_sender, batch_receiver) = channel(1);
286-
let (batch_request_sender, batch_request_receiver) = channel(1);
287-
let service = WorkerToWorkerServer::new(Self {
288-
batch_sender,
289-
batch_request_sender,
290-
});
281+
let service = WorkerToWorkerServer::new(Self { batch_sender });
291282

292283
let routes = anemo::Router::new().add_rpc_service(service);
293284
let network = anemo::Network::bind(addr)
@@ -296,7 +287,7 @@ impl WorkerToWorkerMockServer {
296287
.start(routes)
297288
.unwrap();
298289
info!("starting network on: {}", network.local_addr());
299-
(batch_receiver, batch_request_receiver, network)
290+
(batch_receiver, network)
300291
}
301292
}
302293

@@ -312,20 +303,6 @@ impl WorkerToWorker for WorkerToWorkerMockServer {
312303

313304
Ok(anemo::Response::new(()))
314305
}
315-
async fn request_batches(
316-
&self,
317-
request: anemo::Request<WorkerBatchRequest>,
318-
) -> Result<anemo::Response<WorkerBatchResponse>, anemo::rpc::Status> {
319-
let message = request.into_body();
320-
321-
self.batch_request_sender.send(message).await.unwrap();
322-
323-
// For testing stub, just always reply with no batches.
324-
Ok(anemo::Response::new(WorkerBatchResponse {
325-
batches: vec![],
326-
}))
327-
}
328-
329306
async fn request_batch(
330307
&self,
331308
_request: anemo::Request<RequestBatchRequest>,

narwhal/types/build.rs

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -149,15 +149,6 @@ fn build_anemo_services(out_dir: &Path) {
149149
.codec_path("anemo::rpc::codec::BincodeCodec")
150150
.build(),
151151
)
152-
.method(
153-
anemo_build::manual::Method::builder()
154-
.name("request_batches")
155-
.route_name("RequestBatches")
156-
.request_type("crate::WorkerBatchRequest")
157-
.response_type("crate::WorkerBatchResponse")
158-
.codec_path("anemo::rpc::codec::BincodeCodec")
159-
.build(),
160-
)
161152
.method(
162153
anemo_build::manual::Method::builder()
163154
.name("request_batch")

narwhal/types/src/worker.rs

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,6 @@ pub struct WorkerBatchMessage {
1717
pub batch: Batch,
1818
}
1919

20-
/// Used by workers to request batches from other workers.
21-
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
22-
pub struct WorkerBatchRequest {
23-
pub digests: Vec<BatchDigest>,
24-
}
25-
26-
/// Used by workers to provide batches to other workers.
27-
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, Eq)]
28-
pub struct WorkerBatchResponse {
29-
pub batches: Vec<Batch>,
30-
}
31-
3220
/// Used by primary to ask worker for the request.
3321
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
3422
pub struct RequestBatchRequest {

narwhal/worker/src/handlers.rs

Lines changed: 47 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,9 @@ use tokio::{sync::watch, time::sleep};
2020
use tracing::{debug, error, info, trace, warn};
2121
use types::{
2222
metered_channel::Sender, Batch, BatchDigest, PrimaryToWorker, ReconfigureNotification,
23-
RequestBatchRequest, RequestBatchResponse, WorkerBatchMessage, WorkerBatchRequest,
24-
WorkerBatchResponse, WorkerDeleteBatchesMessage, WorkerOthersBatchMessage,
25-
WorkerReconfigureMessage, WorkerSynchronizeMessage, WorkerToWorker, WorkerToWorkerClient,
23+
RequestBatchRequest, RequestBatchResponse, WorkerBatchMessage, WorkerDeleteBatchesMessage,
24+
WorkerOthersBatchMessage, WorkerReconfigureMessage, WorkerSynchronizeMessage, WorkerToWorker,
25+
WorkerToWorkerClient,
2626
};
2727

2828
#[cfg(test)]
@@ -56,28 +56,11 @@ impl WorkerToWorker for WorkerReceiverHandler {
5656
.map_err(|e| anemo::rpc::Status::internal(e.to_string()))
5757
}
5858

59-
async fn request_batches(
60-
&self,
61-
request: anemo::Request<WorkerBatchRequest>,
62-
) -> Result<anemo::Response<WorkerBatchResponse>, anemo::rpc::Status> {
63-
let message = request.into_body();
64-
// TODO [issue #7]: Do some accounting to prevent bad actors from monopolizing our resources
65-
// TODO: Add a limit on number of requested batches
66-
let batches: Vec<Batch> = self
67-
.store
68-
.read_all(message.digests)
69-
.await
70-
.map_err(|e| anemo::rpc::Status::from_error(Box::new(e)))?
71-
.into_iter()
72-
.flatten()
73-
.collect();
74-
Ok(anemo::Response::new(WorkerBatchResponse { batches }))
75-
}
76-
7759
async fn request_batch(
7860
&self,
7961
request: anemo::Request<RequestBatchRequest>,
8062
) -> Result<anemo::Response<RequestBatchResponse>, anemo::rpc::Status> {
63+
// TODO [issue #7]: Do some accounting to prevent bad actors from monopolizing our resources
8164
let batch = request.into_body().batch;
8265
let batch = self
8366
.store
@@ -100,10 +83,10 @@ pub struct PrimaryReceiverHandler {
10083
// The worker information cache.
10184
pub worker_cache: SharedWorkerCache,
10285
pub store: Store<BatchDigest, Batch>,
103-
// Timeout on RequestBatches RPC.
104-
pub request_batches_timeout: Duration,
86+
// Timeout on RequestBatch RPC.
87+
pub request_batch_timeout: Duration,
10588
// Number of random nodes to query when retrying batch requests.
106-
pub request_batches_retry_nodes: usize,
89+
pub request_batch_retry_nodes: usize,
10790
/// Send reconfiguration update to other tasks.
10891
pub tx_reconfigure: watch::Sender<ReconfigureNotification>,
10992
}
@@ -172,9 +155,11 @@ impl PrimaryToWorker for PrimaryReceiverHandler {
172155
return Ok(anemo::Response::new(()));
173156
}
174157

175-
let batch_request = WorkerBatchRequest {
176-
digests: missing.iter().cloned().collect(),
177-
};
158+
let batch_requests: Vec<_> = missing
159+
.iter()
160+
.cloned()
161+
.map(|batch| RequestBatchRequest { batch })
162+
.collect();
178163
let network = request
179164
.extensions()
180165
.get::<anemo::NetworkRef>()
@@ -184,16 +169,15 @@ impl PrimaryToWorker for PrimaryReceiverHandler {
184169
})?;
185170

186171
let mut handles = FuturesUnordered::new();
187-
let request_batches_fn = |mut client: WorkerToWorkerClient<anemo::Peer>,
188-
batch_request,
189-
timeout| {
190-
// Wrapper function enables us to move `client` into the future.
191-
async move {
192-
client
193-
.request_batches(anemo::Request::new(batch_request).with_timeout(timeout))
194-
.await
195-
}
196-
};
172+
let request_batch_fn =
173+
|mut client: WorkerToWorkerClient<anemo::Peer>, batch_request, timeout| {
174+
// Wrapper function enables us to move `client` into the future.
175+
async move {
176+
client
177+
.request_batch(anemo::Request::new(batch_request).with_timeout(timeout))
178+
.await
179+
}
180+
};
197181
if first_attempt {
198182
// Send first sync request to a single node.
199183
let worker_name = match self.worker_cache.load().worker(&message.target, &self.id) {
@@ -207,14 +191,16 @@ impl PrimaryToWorker for PrimaryReceiverHandler {
207191
let peer_id = anemo::PeerId(worker_name.0.to_bytes());
208192
if let Some(peer) = network.peer(peer_id) {
209193
debug!(
210-
"Sending WorkerBatchRequest message to {worker_name} for missing batches {:?}",
211-
batch_request.digests
194+
"Sending BatchRequests to {worker_name}: {:?}",
195+
batch_requests
212196
);
213-
handles.push(request_batches_fn(
214-
WorkerToWorkerClient::new(peer),
215-
batch_request,
216-
self.request_batches_timeout,
217-
));
197+
handles.extend(batch_requests.into_iter().map(|request| {
198+
request_batch_fn(
199+
WorkerToWorkerClient::new(peer.clone()),
200+
request,
201+
self.request_batch_timeout,
202+
)
203+
}));
218204
} else {
219205
warn!("Unable to reach primary peer {worker_name} on the network");
220206
}
@@ -229,19 +215,22 @@ impl PrimaryToWorker for PrimaryReceiverHandler {
229215
.collect();
230216
handles.extend(
231217
names
232-
.choose_multiple(&mut rand::thread_rng(), self.request_batches_retry_nodes)
218+
.choose_multiple(&mut rand::thread_rng(), self.request_batch_retry_nodes)
233219
.filter_map(|name| network.peer(anemo::PeerId(name.0.to_bytes())))
234-
.map(|peer| {
235-
request_batches_fn(
236-
WorkerToWorkerClient::new(peer),
237-
batch_request.clone(),
238-
self.request_batches_timeout,
239-
)
220+
.flat_map(|peer| {
221+
batch_requests.iter().cloned().map(move |request| {
222+
let peer = peer.clone();
223+
request_batch_fn(
224+
WorkerToWorkerClient::new(peer),
225+
request,
226+
self.request_batch_timeout,
227+
)
228+
})
240229
}),
241230
);
242231
debug!(
243-
"Sending WorkerBatchRequest retries to workers {names:?} for missing batches {:?}",
244-
batch_request.digests
232+
"Sending BatchRequest retries to workers {names:?}: {:?}",
233+
batch_requests
245234
);
246235
}
247236

@@ -250,7 +239,7 @@ impl PrimaryToWorker for PrimaryReceiverHandler {
250239
while let Some(result) = handles.next().await {
251240
match result {
252241
Ok(response) => {
253-
for batch in response.into_body().batches {
242+
if let Some(batch) = response.into_body().batch {
254243
let digest = batch.digest();
255244
if missing.remove(&digest) {
256245
self.store.write(digest, batch).await;
@@ -261,9 +250,10 @@ impl PrimaryToWorker for PrimaryReceiverHandler {
261250
}
262251
}
263252
Err(e) => {
264-
// TODO: add info on target peer when anemo supports retrieving it from
265-
// anemo::rpc::Status.
266-
info!("WorkerBatchRequest failed: {e:?}");
253+
info!(
254+
"RequestBatchRequest to worker {:?} failed: {e:?}",
255+
e.peer_id()
256+
)
267257
}
268258
}
269259
}

0 commit comments

Comments
 (0)