From c2d1fa533a24a8b31b96d85a835caeabc326552c Mon Sep 17 00:00:00 2001 From: Rafi KC Date: Mon, 20 Nov 2023 18:01:23 +0530 Subject: [PATCH] glusterfsd/mgmt: Connect backup volfile-server in round robin (#4167) Once we establish a connection with volfile-server, if that connection looses, then we try to connect to the ony be one till the last one on the list. Once we reach the last one, we never go back to the first one. So if the last one is down we will fail to connect to that node until it comes back Change-Id: I8fe801fb536879d13a3c84eb2d935932b44dbaf5 Fixes: #4166 Signed-off-by: Mohammed Rafi KC --- api/src/glfs-mgmt.c | 26 ++++++++++++++++++++------ glusterfsd/src/glusterfsd-mgmt.c | 18 +++++++++++++----- 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c index b26644b82a..faab78ddf7 100644 --- a/api/src/glfs-mgmt.c +++ b/api/src/glfs-mgmt.c @@ -819,6 +819,7 @@ mgmt_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, rpc_transport_t *rpc_trans = NULL; struct glfs *fs = NULL; int ret = 0; + static int log_ctr2; struct dnscache6 *dnscache = NULL; this = mydata; @@ -854,13 +855,26 @@ mgmt_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, } server = ctx->cmd_args.curr_server; if (server->list.next == &ctx->cmd_args.volfile_servers) { - errno = ENOTCONN; - gf_smsg("glfs-mgmt", GF_LOG_INFO, ENOTCONN, - API_MSG_VOLFILE_SERVER_EXHAUST, NULL); - glfs_init_done(fs, -1); - break; + if (!ctx->active) { + errno = ENOTCONN; + gf_smsg("glfs-mgmt", GF_LOG_INFO, ENOTCONN, + API_MSG_VOLFILE_SERVER_EXHAUST, NULL); + glfs_init_done(fs, -1); + break; + } else { + server = list_first_entry( + &ctx->cmd_args.volfile_servers, typeof(*server), + list); + GF_LOG_OCCASIONALLY(log_ctr2, "glusterfsd-mgmt", + GF_LOG_INFO, + "Exhausted all volfile servers, " + "Retrying from again!"); + } + + } else { + server = list_entry(server->list.next, typeof(*server), + list); } - server = list_entry(server->list.next, typeof(*server), list); ctx->cmd_args.curr_server = server; ctx->cmd_args.volfile_server_port = server->port; ctx->cmd_args.volfile_server = server->volfile_server; diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c index dc271b74a8..fdd37c5fb6 100644 --- a/glusterfsd/src/glusterfsd-mgmt.c +++ b/glusterfsd/src/glusterfsd-mgmt.c @@ -2817,13 +2817,21 @@ mgmt_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event, if (server->list.next == &ctx->cmd_args.volfile_servers) { if (!ctx->active) { need_term = 1; + gf_log("glusterfsd-mgmt", GF_LOG_INFO, + "Exhausted all volfile servers, Exiting"); + emval = ENOTCONN; + break; + } else { + server = list_first_entry(&ctx->cmd_args.volfile_servers, + typeof(*server), list); + emval = ENOTCONN; + GF_LOG_OCCASIONALLY( + log_ctr2, "glusterfsd-mgmt", GF_LOG_INFO, + "Exhausted all volfile servers, Retrying from again!"); } - emval = ENOTCONN; - GF_LOG_OCCASIONALLY(log_ctr2, "glusterfsd-mgmt", GF_LOG_INFO, - "Exhausted all volfile servers"); - break; + } else { + server = list_entry(server->list.next, typeof(*server), list); } - server = list_entry(server->list.next, typeof(*server), list); ctx->cmd_args.curr_server = server; ctx->cmd_args.volfile_server = server->volfile_server;