Skip to content

Commit 94a5742

Browse files
committed
Merge branch 'dgud/mnesia/restart-add-tab-copy/OTP-18850' into maint
* dgud/mnesia/restart-add-tab-copy/OTP-18850: Retry to early add_table_copy
2 parents 12b2474 + d17819b commit 94a5742

File tree

4 files changed

+32
-6
lines changed

4 files changed

+32
-6
lines changed

lib/mnesia/src/mnesia_lib.erl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1065,7 +1065,7 @@ save2(DbgInfo) ->
10651065
Key = {'$$$_report', current_pos},
10661066
P =
10671067
case ?ets_lookup_element(mnesia_gvar, Key, 2) of
1068-
30 -> -1;
1068+
100 -> -1;
10691069
I -> I
10701070
end,
10711071
set({'$$$_report', current_pos}, P+1),

lib/mnesia/src/mnesia_loader.erl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ do_get_network_copy(Tab, Reason, Ns, Storage, Cs) ->
225225
dbg_out("Table ~tp copied from ~p to ~p~n", [Tab, Node, node()]),
226226
{loaded, ok};
227227
Err = {error, _} when element(1, Reason) == dumper ->
228+
verbose("Copy failed: ~tp ~p~n", [Tab, Err]),
228229
{not_loaded,Err};
229230
restart ->
230231
try_net_load_table(Tab, Reason, Tail ++ [Node], Cs);
@@ -342,6 +343,7 @@ start_receiver(Tab,Storage,Cs,SenderPid,TabSize,DetsData,{dumper,{add_table_copy
342343
Init = table_init_fun(SenderPid, Storage),
343344
case do_init_table(Tab,Storage,Cs,SenderPid,TabSize,DetsData,self(), Init) of
344345
Err = {error, _} ->
346+
verbose("Init table failed: ~tp ~p~n", [Tab, Err]),
345347
SenderPid ! {copier_done, node()},
346348
Err;
347349
Else ->
@@ -366,6 +368,7 @@ wait_on_load_complete(Pid) ->
366368
{Pid, Res} ->
367369
Res;
368370
{'EXIT', Pid, Reason} ->
371+
verbose("Loader crashed : ~tp ~p~n", [Pid, Reason]),
369372
error(Reason);
370373
Else ->
371374
Pid ! Else,

lib/mnesia/src/mnesia_schema.erl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2466,14 +2466,15 @@ prepare_op(Tid, {op, add_table_copy, Storage, Node, TabDef}, _WaitFor) ->
24662466
_ ->
24672467
ok
24682468
end,
2469-
mnesia_lib:verbose("~w:~w Adding table~n",[?MODULE,?LINE]),
24702469

24712470
case mnesia_controller:get_network_copy(Tid, Tab, Cs) of
24722471
{loaded, ok} ->
24732472
%% Tables are created by mnesia_loader get_network code
24742473
insert_cstruct(Tid, Cs, true),
24752474
mnesia_controller:i_have_tab(Tab, Cs),
24762475
{true, optional};
2476+
{not_loaded, {not_active, schema, Node}} ->
2477+
mnesia:abort({node_not_running, Node});
24772478
{not_loaded, ErrReason} ->
24782479
Reason = {system_limit, Tab, {Node, ErrReason}},
24792480
mnesia:abort(Reason)

lib/mnesia/src/mnesia_tm.erl

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -939,27 +939,26 @@ restart(Mod, Tid, Ts, Fun, Args, Factor0, Retries0, Type, Why) ->
939939
return_abort(Fun, Args, Why),
940940
Factor = 1,
941941
SleepTime = mnesia_lib:random_time(Factor, Tid#tid.counter),
942-
dbg_out("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]),
942+
log_restart("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]),
943943
timer:sleep(SleepTime),
944944
execute_outer(Mod, Fun, Args, Factor, Retries, Type);
945945
{node_not_running, _N} -> %% Avoids hanging in receive_release_tid_ack
946946
return_abort(Fun, Args, Why),
947947
Factor = 1,
948948
SleepTime = mnesia_lib:random_time(Factor, Tid#tid.counter),
949-
dbg_out("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]),
949+
log_restart("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]),
950950
timer:sleep(SleepTime),
951951
execute_outer(Mod, Fun, Args, Factor, Retries, Type);
952952
_ ->
953953
SleepTime = mnesia_lib:random_time(Factor0, Tid#tid.counter),
954954
dbg_out("Restarting transaction ~w: in ~wms ~w~n", [Tid, SleepTime, Why]),
955-
955+
956956
if
957957
Factor0 /= 10 ->
958958
ignore;
959959
true ->
960960
%% Our serial may be much larger than other nodes ditto
961961
AllNodes = val({current, db_nodes}),
962-
verbose("Sync serial ~p~n", [Tid]),
963962
rpc:abcast(AllNodes, ?MODULE, {sync_trans_serial, Tid})
964963
end,
965964
intercept_friends(Tid, Ts),
@@ -978,6 +977,24 @@ restart(Mod, Tid, Ts, Fun, Args, Factor0, Retries0, Type, Why) ->
978977
end
979978
end.
980979

980+
log_restart(F,A) ->
981+
case get(transaction_client) of
982+
undefined ->
983+
dbg_out(F,A);
984+
_ ->
985+
case get(transaction_count) of
986+
undefined ->
987+
put(transaction_count, 1),
988+
verbose(F,A);
989+
N when (N rem 10) == 0 ->
990+
put(transaction_count, N+1),
991+
verbose(F,A);
992+
N ->
993+
put(transaction_count, N+1),
994+
dbg_out(F,A)
995+
end
996+
end.
997+
981998
get_restarted(Tid) ->
982999
case Res = rec() of
9831000
{restarted, Tid} ->
@@ -2133,6 +2150,7 @@ new_cr_format(#commit{ext=Snmp}=Cr) ->
21332150
Cr#commit{ext=[{snmp,Snmp}]}.
21342151

21352152
rec_all([Node | Tail], Tid, Res, Pids) ->
2153+
put({?MODULE, ?FUNCTION_NAME}, {Node, Tail}),
21362154
receive
21372155
{?MODULE, Node, {vote_yes, Tid}} ->
21382156
rec_all(Tail, Tid, Res, Pids);
@@ -2151,8 +2169,12 @@ rec_all([Node | Tail], Tid, Res, Pids) ->
21512169
Abort = {do_abort, {bad_commit, Node}},
21522170
?SAFE({?MODULE, Node} ! {Tid, Abort}),
21532171
rec_all(Tail, Tid, Abort, Pids)
2172+
after 15000 ->
2173+
mnesia_lib:verbose("~p: trans ~p waiting ~p~n", [self(), Tid, Node]),
2174+
rec_all([Node | Tail], Tid, Res, Pids)
21542175
end;
21552176
rec_all([], _Tid, Res, Pids) ->
2177+
erase({?MODULE, ?FUNCTION_NAME}),
21562178
{Res, Pids}.
21572179

21582180
get_transactions() ->

0 commit comments

Comments
 (0)