Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CouchDB Scanner Plugin: conflict finder #5393

Open
janl opened this issue Jan 10, 2025 · 0 comments
Open

CouchDB Scanner Plugin: conflict finder #5393

janl opened this issue Jan 10, 2025 · 0 comments

Comments

@janl
Copy link
Member

janl commented Jan 10, 2025

Heya, for hot-fixing a cluster I had to write a _conflicts and _deleted _conflicts finder that I could paste into remsh. I thought this might be a good idea to turn into a couch scanner plugin, but I don’t have time to do this right now. If someone would like to take this on, you’re all very welcome.

This is “make it work for me” levels of good code, but I ran this across ~2TB worth of shard files without issue. The “progress bar” can probably taken out and the doc/s reporting needs to go into stats, but that’s all.

Here’s my code:

rr(couch_db).
rr(couch_changes).

MinConflicts = 5.
ioq:set_io_priority({compaction, self()}).
 
Sorter = fun({_, A}, {_, B}) -> A > B end.
MinConflictsFilter = fun ({_, Conflicts}) when Conflicts < MinConflicts -> false;
				   (_) -> true
end.

MaybeAppend = fun (_Id, 0, Acc) -> Acc;
				(Id, List, Acc) -> lists:append([{Id, List}], Acc)
end.

GetDocCount = fun(Db) ->
	{ok, DbInfo} = couch_db:get_db_info(Db),
	DocCount = proplists:get_value(doc_count, DbInfo, 0),
	DelDocCount = proplists:get_value(del_doc_count, DbInfo, 0),
	DocCount + DelDocCount
end.

MaybePrintStats = fun(_, 0, _, LastPrinted) -> LastPrinted;
					 (_, _, 0, LastPrinted) -> LastPrinted;
					 (Begin, DocCount, DocsProcessed, LastPrinted) ->
					 Perc = 100 / DocCount * DocsProcessed,
					 DoJump = (Perc - LastPrinted) > 10,
					 case DoJump of
						true ->
						End = os:timestamp(),
						Duration = timer:now_diff(End, Begin) / 1000 / 1000,
						DocsPerSecond = DocsProcessed / Duration,
						io:format("~p% (~.2f docs/s) ", [trunc(Perc), DocsPerSecond]),
						Perc;
						_ -> LastPrinted
					 end
end.

Scanner = fun(DbName) -> 
	OpenOpts = [{user_ctx, #user_ctx{name = <<"admin">>, roles = [<<"_admin">>]}}],
	{ok, Db} = couch_db:open(DbName, OpenOpts),
	Begin = os:timestamp(),
	io:format("~n ~p: ", [DbName]),
	DocCount = GetDocCount(Db),
	UserFun = fun(FullDocInfo, Acc) ->
		DocInfo = couch_doc:to_doc_info(FullDocInfo),
		% RevTree = FullDocInfo#full_doc_info.rev_tree,
		% Leafs = couch_key_tree:get_all_leafs(RevTree),
		% io:format("~n FullDocInfo: ~p~n", [FullDocInfo]),
		% io:format("~n DocInfo: ~p~n", [DocInfo]),
		% io:format("~n Leafs: ~p~n", [Leafs]),
		[_ | Revs ] = DocInfo#doc_info.revs, % first one is winning rev?
		{DeletedConflicts, Conflicts} = lists:partition(fun(RevInfo) ->
			% io:format("~n RevInfo: ~p~n", [RevInfo]),
			% io:format("~n RevInfo#rev_info.deleted: ~p~n", [RevInfo#rev_info.deleted]),
			% {Leaf0} = Leaf,
			% io:format("~n Leaf0: ~p~n", [Leaf0]),
			RevInfo#rev_info.deleted
		end, Revs),
		{AccDeletedConflicts, AccConflicts, DocsProcessed, LastPrinted} = Acc#changes_acc.user_acc,
		NewLastPrinted = MaybePrintStats(Begin, DocCount, DocsProcessed, LastPrinted),
		{ok, Acc#changes_acc{
			user_acc = {
				MaybeAppend(DocInfo#doc_info.id, length(DeletedConflicts), AccDeletedConflicts),
				MaybeAppend(DocInfo#doc_info.id, length(Conflicts), AccConflicts),
				DocsProcessed + 1,
				NewLastPrinted
			}
		}}
	end,
	StartSeq = 0,
	UserAcc = #changes_acc{user_acc = {[], [], 0, 0}},
	Opts = [{include_docs, true},{deleted, true}],
	{ok, ChangesAcc} = couch_db:fold_changes(Db, StartSeq, UserFun, UserAcc, Opts),
	couch_db:close(Db),
	{DeletedConflicts, Conflicts, DocsProcessed, _} = ChangesAcc#changes_acc.user_acc,
	io:format("Total Docs Processed: ~p ", [DocsProcessed]),
	ConflictsSorted = lists:filter(MinConflictsFilter, lists:sort(Sorter, Conflicts)),
	DeletedConflictsSorted = lists:filter(MinConflictsFilter, lists:sort(Sorter, DeletedConflicts)),
	{ConflictsSorted, DeletedConflictsSorted}
end.

Enumerator = fun () ->
	{ok, AllShards} = couch_server:all_databases(),
	lists:foreach(fun(Shard) -> 
		{Conflicts, DeletedConflicts} = Scanner(Shard),
		io:format("~n      Conflicts: ~n     ~p ~n      DeletedConflicts: ~n     ~p", [Conflicts, DeletedConflicts])
	end, AllShards)
end.

Enumerator().
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

No branches or pull requests

1 participant