From 6dcfb4d0302a959bb5a2730f60c8ccd8c235d7f8 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 5 Nov 2024 10:11:59 +0100 Subject: [PATCH 1/8] refs/reftable: encapsulate reftable stack The reftable ref store needs to keep track of multiple stacks, one for the main worktree and an arbitrary number of stacks for worktrees. This is done by storing pointers to `struct reftable_stack`, which we then access directly. Wrap the stack in a new `struct reftable_backend`. This will allow us to attach more data to each respective stack in subsequent commits. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 129 +++++++++++++++++++++++----------------- 1 file changed, 73 insertions(+), 56 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index f560bc2b67857d..116cc5ec23c4ee 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -34,6 +34,23 @@ */ #define REF_UPDATE_VIA_HEAD (1 << 8) +struct reftable_backend { + struct reftable_stack *stack; +}; + +static int reftable_backend_init(struct reftable_backend *be, + const char *path, + const struct reftable_write_options *opts) +{ + return reftable_new_stack(&be->stack, path, opts); +} + +static void reftable_backend_release(struct reftable_backend *be) +{ + reftable_stack_destroy(be->stack); + be->stack = NULL; +} + struct reftable_ref_store { struct ref_store base; @@ -41,17 +58,17 @@ struct reftable_ref_store { * The main stack refers to the common dir and thus contains common * refs as well as refs of the main repository. */ - struct reftable_stack *main_stack; + struct reftable_backend main_backend; /* * The worktree stack refers to the gitdir in case the refdb is opened * via a worktree. It thus contains the per-worktree refs. */ - struct reftable_stack *worktree_stack; + struct reftable_backend worktree_backend; /* * Map of worktree stacks by their respective worktree names. The map * is populated lazily when we try to resolve `worktrees/$worktree` refs. */ - struct strmap worktree_stacks; + struct strmap worktree_backends; struct reftable_write_options write_options; unsigned int store_flags; @@ -97,21 +114,21 @@ static struct reftable_ref_store *reftable_be_downcast(struct ref_store *ref_sto * like `worktrees/$worktree/refs/heads/foo` as worktree stacks will store * those references in their normalized form. */ -static struct reftable_stack *stack_for(struct reftable_ref_store *store, - const char *refname, - const char **rewritten_ref) +static struct reftable_backend *backend_for(struct reftable_ref_store *store, + const char *refname, + const char **rewritten_ref) { const char *wtname; int wtname_len; if (!refname) - return store->main_stack; + return &store->main_backend; switch (parse_worktree_ref(refname, &wtname, &wtname_len, rewritten_ref)) { case REF_WORKTREE_OTHER: { static struct strbuf wtname_buf = STRBUF_INIT; struct strbuf wt_dir = STRBUF_INIT; - struct reftable_stack *stack; + struct reftable_backend *be; /* * We're using a static buffer here so that we don't need to @@ -125,37 +142,39 @@ static struct reftable_stack *stack_for(struct reftable_ref_store *store, /* * There is an edge case here: when the worktree references the * current worktree, then we set up the stack once via - * `worktree_stacks` and once via `worktree_stack`. This is + * `worktree_backends` and once via `worktree_backend`. This is * wasteful, but in the reading case it shouldn't matter. And * in the writing case we would notice that the stack is locked * already and error out when trying to write a reference via * both stacks. */ - stack = strmap_get(&store->worktree_stacks, wtname_buf.buf); - if (!stack) { + be = strmap_get(&store->worktree_backends, wtname_buf.buf); + if (!be) { strbuf_addf(&wt_dir, "%s/worktrees/%s/reftable", store->base.repo->commondir, wtname_buf.buf); - store->err = reftable_new_stack(&stack, wt_dir.buf, - &store->write_options); + CALLOC_ARRAY(be, 1); + store->err = reftable_backend_init(be, wt_dir.buf, + &store->write_options); assert(store->err != REFTABLE_API_ERROR); - strmap_put(&store->worktree_stacks, wtname_buf.buf, stack); + + strmap_put(&store->worktree_backends, wtname_buf.buf, be); } strbuf_release(&wt_dir); - return stack; + return be; } case REF_WORKTREE_CURRENT: /* * If there is no worktree stack then we're currently in the * main worktree. We thus return the main stack in that case. */ - if (!store->worktree_stack) - return store->main_stack; - return store->worktree_stack; + if (!store->worktree_backend.stack) + return &store->main_backend; + return &store->worktree_backend; case REF_WORKTREE_MAIN: case REF_WORKTREE_SHARED: - return store->main_stack; + return &store->main_backend; default: BUG("unhandled worktree reference type"); } @@ -292,7 +311,7 @@ static struct ref_store *reftable_be_init(struct repository *repo, umask(mask); base_ref_store_init(&refs->base, repo, gitdir, &refs_be_reftable); - strmap_init(&refs->worktree_stacks); + strmap_init(&refs->worktree_backends); refs->store_flags = store_flags; refs->log_all_ref_updates = repo_settings_get_log_all_ref_updates(repo); @@ -337,8 +356,8 @@ static struct ref_store *reftable_be_init(struct repository *repo, strbuf_realpath(&path, gitdir, 0); } strbuf_addstr(&path, "/reftable"); - refs->err = reftable_new_stack(&refs->main_stack, path.buf, - &refs->write_options); + refs->err = reftable_backend_init(&refs->main_backend, path.buf, + &refs->write_options); if (refs->err) goto done; @@ -354,8 +373,8 @@ static struct ref_store *reftable_be_init(struct repository *repo, strbuf_reset(&path); strbuf_addf(&path, "%s/reftable", gitdir); - refs->err = reftable_new_stack(&refs->worktree_stack, path.buf, - &refs->write_options); + refs->err = reftable_backend_init(&refs->worktree_backend, path.buf, + &refs->write_options); if (refs->err) goto done; } @@ -374,19 +393,17 @@ static void reftable_be_release(struct ref_store *ref_store) struct strmap_entry *entry; struct hashmap_iter iter; - if (refs->main_stack) { - reftable_stack_destroy(refs->main_stack); - refs->main_stack = NULL; - } + if (refs->main_backend.stack) + reftable_backend_release(&refs->main_backend); + if (refs->worktree_backend.stack) + reftable_backend_release(&refs->worktree_backend); - if (refs->worktree_stack) { - reftable_stack_destroy(refs->worktree_stack); - refs->worktree_stack = NULL; + strmap_for_each_entry(&refs->worktree_backends, &iter, entry) { + struct reftable_backend *be = entry->value; + reftable_backend_release(be); + free(be); } - - strmap_for_each_entry(&refs->worktree_stacks, &iter, entry) - reftable_stack_destroy(entry->value); - strmap_clear(&refs->worktree_stacks, 0); + strmap_clear(&refs->worktree_backends, 0); } static int reftable_be_create_on_disk(struct ref_store *ref_store, @@ -781,7 +798,7 @@ static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_sto required_flags |= REF_STORE_ODB; refs = reftable_be_downcast(ref_store, required_flags, "ref_iterator_begin"); - main_iter = ref_iterator_for_stack(refs, refs->main_stack, prefix, + main_iter = ref_iterator_for_stack(refs, refs->main_backend.stack, prefix, exclude_patterns, flags); /* @@ -789,14 +806,14 @@ static struct ref_iterator *reftable_be_iterator_begin(struct ref_store *ref_sto * right now. If we aren't, then we return the common reftable * iterator, only. */ - if (!refs->worktree_stack) + if (!refs->worktree_backend.stack) return &main_iter->base; /* * Otherwise we merge both the common and the per-worktree refs into a * single iterator. */ - worktree_iter = ref_iterator_for_stack(refs, refs->worktree_stack, prefix, + worktree_iter = ref_iterator_for_stack(refs, refs->worktree_backend.stack, prefix, exclude_patterns, flags); return merge_ref_iterator_begin(&worktree_iter->base, &main_iter->base, ref_iterator_select, NULL); @@ -811,7 +828,7 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_READ, "read_raw_ref"); - struct reftable_stack *stack = stack_for(refs, refname, &refname); + struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack; int ret; if (refs->err < 0) @@ -838,7 +855,7 @@ static int reftable_be_read_symbolic_ref(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_READ, "read_symbolic_ref"); - struct reftable_stack *stack = stack_for(refs, refname, &refname); + struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack; struct reftable_ref_record ref = {0}; int ret; @@ -898,7 +915,7 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out, struct ref_update *update, struct strbuf *err) { - struct reftable_stack *stack = stack_for(refs, update->refname, NULL); + struct reftable_stack *stack = backend_for(refs, update->refname, NULL)->stack; struct write_transaction_table_arg *arg = NULL; size_t i; int ret; @@ -1031,7 +1048,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store, goto done; } - ret = read_ref_without_reload(refs, stack_for(refs, "HEAD", NULL), "HEAD", + ret = read_ref_without_reload(refs, backend_for(refs, "HEAD", NULL)->stack, "HEAD", &head_oid, &head_referent, &head_type); if (ret < 0) goto done; @@ -1043,7 +1060,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store, struct reftable_stack *stack; const char *rewritten_ref; - stack = stack_for(refs, u->refname, &rewritten_ref); + stack = backend_for(refs, u->refname, &rewritten_ref)->stack; /* Verify that the new object ID is valid. */ if ((u->flags & REF_HAVE_NEW) && !is_null_oid(&u->new_oid) && @@ -1525,9 +1542,9 @@ static int reftable_be_pack_refs(struct ref_store *ref_store, if (refs->err) return refs->err; - stack = refs->worktree_stack; + stack = refs->worktree_backend.stack; if (!stack) - stack = refs->main_stack; + stack = refs->main_backend.stack; if (opts->flags & PACK_REFS_AUTO) ret = reftable_stack_auto_compact(stack); @@ -1782,7 +1799,7 @@ static int reftable_be_rename_ref(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_WRITE, "rename_ref"); - struct reftable_stack *stack = stack_for(refs, newrefname, &newrefname); + struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack; struct write_copy_arg arg = { .refs = refs, .stack = stack, @@ -1814,7 +1831,7 @@ static int reftable_be_copy_ref(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref"); - struct reftable_stack *stack = stack_for(refs, newrefname, &newrefname); + struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack; struct write_copy_arg arg = { .refs = refs, .stack = stack, @@ -1952,11 +1969,11 @@ static struct ref_iterator *reftable_be_reflog_iterator_begin(struct ref_store * reftable_be_downcast(ref_store, REF_STORE_READ, "reflog_iterator_begin"); struct reftable_reflog_iterator *main_iter, *worktree_iter; - main_iter = reflog_iterator_for_stack(refs, refs->main_stack); - if (!refs->worktree_stack) + main_iter = reflog_iterator_for_stack(refs, refs->main_backend.stack); + if (!refs->worktree_backend.stack) return &main_iter->base; - worktree_iter = reflog_iterator_for_stack(refs, refs->worktree_stack); + worktree_iter = reflog_iterator_for_stack(refs, refs->worktree_backend.stack); return merge_ref_iterator_begin(&worktree_iter->base, &main_iter->base, ref_iterator_select, NULL); @@ -1995,7 +2012,7 @@ static int reftable_be_for_each_reflog_ent_reverse(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent_reverse"); - struct reftable_stack *stack = stack_for(refs, refname, &refname); + struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack; struct reftable_log_record log = {0}; struct reftable_iterator it = {0}; int ret; @@ -2035,7 +2052,7 @@ static int reftable_be_for_each_reflog_ent(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent"); - struct reftable_stack *stack = stack_for(refs, refname, &refname); + struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack; struct reftable_log_record *logs = NULL; struct reftable_iterator it = {0}; size_t logs_alloc = 0, logs_nr = 0, i; @@ -2084,7 +2101,7 @@ static int reftable_be_reflog_exists(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_READ, "reflog_exists"); - struct reftable_stack *stack = stack_for(refs, refname, &refname); + struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack; struct reftable_log_record log = {0}; struct reftable_iterator it = {0}; int ret; @@ -2169,7 +2186,7 @@ static int reftable_be_create_reflog(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_WRITE, "create_reflog"); - struct reftable_stack *stack = stack_for(refs, refname, &refname); + struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack; struct write_reflog_existence_arg arg = { .refs = refs, .stack = stack, @@ -2243,7 +2260,7 @@ static int reftable_be_delete_reflog(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_WRITE, "delete_reflog"); - struct reftable_stack *stack = stack_for(refs, refname, &refname); + struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack; struct write_reflog_delete_arg arg = { .stack = stack, .refname = refname, @@ -2352,7 +2369,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store, */ struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_WRITE, "reflog_expire"); - struct reftable_stack *stack = stack_for(refs, refname, &refname); + struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack; struct reftable_log_record *logs = NULL; struct reftable_log_record *rewritten = NULL; struct reftable_ref_record ref_record = {0}; From 7d219904c0a142a2e2e62fa85e5df2e680748262 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 5 Nov 2024 10:12:02 +0100 Subject: [PATCH 2/8] refs/reftable: handle reloading stacks in the reftable backend When accessing a stack we almost always have to reload the stack before reading data from it. This is mostly because Git does not have a notification mechanism for when underlying data has been changed, and thus we are forced to opportunistically reload the stack every single time to account for any changes that may have happened concurrently. Handle the reload internally in `backend_for()`. For one this forces callsites to think about whether or not they need to reload the stack. But second this makes the logic to access stacks more self-contained by letting the `struct reftable_backend` manage themselves. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 150 +++++++++++++++++++++++++--------------- 1 file changed, 93 insertions(+), 57 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 116cc5ec23c4ee..4a28dc8a9dfad7 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -114,21 +114,25 @@ static struct reftable_ref_store *reftable_be_downcast(struct ref_store *ref_sto * like `worktrees/$worktree/refs/heads/foo` as worktree stacks will store * those references in their normalized form. */ -static struct reftable_backend *backend_for(struct reftable_ref_store *store, - const char *refname, - const char **rewritten_ref) +static int backend_for(struct reftable_backend **out, + struct reftable_ref_store *store, + const char *refname, + const char **rewritten_ref, + int reload) { + struct reftable_backend *be; const char *wtname; int wtname_len; - if (!refname) - return &store->main_backend; + if (!refname) { + be = &store->main_backend; + goto out; + } switch (parse_worktree_ref(refname, &wtname, &wtname_len, rewritten_ref)) { case REF_WORKTREE_OTHER: { static struct strbuf wtname_buf = STRBUF_INIT; struct strbuf wt_dir = STRBUF_INIT; - struct reftable_backend *be; /* * We're using a static buffer here so that we don't need to @@ -162,7 +166,7 @@ static struct reftable_backend *backend_for(struct reftable_ref_store *store, } strbuf_release(&wt_dir); - return be; + goto out; } case REF_WORKTREE_CURRENT: /* @@ -170,14 +174,27 @@ static struct reftable_backend *backend_for(struct reftable_ref_store *store, * main worktree. We thus return the main stack in that case. */ if (!store->worktree_backend.stack) - return &store->main_backend; - return &store->worktree_backend; + be = &store->main_backend; + else + be = &store->worktree_backend; + goto out; case REF_WORKTREE_MAIN: case REF_WORKTREE_SHARED: - return &store->main_backend; + be = &store->main_backend; + goto out; default: BUG("unhandled worktree reference type"); } + +out: + if (reload) { + int ret = reftable_stack_reload(be->stack); + if (ret) + return ret; + } + *out = be; + + return 0; } static int should_write_log(struct reftable_ref_store *refs, const char *refname) @@ -828,17 +845,17 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_READ, "read_raw_ref"); - struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack; + struct reftable_backend *be; int ret; if (refs->err < 0) return refs->err; - ret = reftable_stack_reload(stack); + ret = backend_for(&be, refs, refname, &refname, 1); if (ret) return ret; - ret = read_ref_without_reload(refs, stack, refname, oid, referent, type); + ret = read_ref_without_reload(refs, be->stack, refname, oid, referent, type); if (ret < 0) return ret; if (ret > 0) { @@ -855,15 +872,15 @@ static int reftable_be_read_symbolic_ref(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_READ, "read_symbolic_ref"); - struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack; struct reftable_ref_record ref = {0}; + struct reftable_backend *be; int ret; - ret = reftable_stack_reload(stack); + ret = backend_for(&be, refs, refname, &refname, 1); if (ret) return ret; - ret = reftable_stack_read_ref(stack, refname, &ref); + ret = reftable_stack_read_ref(be->stack, refname, &ref); if (ret == 0 && ref.value_type == REFTABLE_REF_SYMREF) strbuf_addstr(referent, ref.value.symref); else @@ -880,7 +897,7 @@ struct reftable_transaction_update { struct write_transaction_table_arg { struct reftable_ref_store *refs; - struct reftable_stack *stack; + struct reftable_backend *be; struct reftable_addition *addition; struct reftable_transaction_update *updates; size_t updates_nr; @@ -915,27 +932,31 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out, struct ref_update *update, struct strbuf *err) { - struct reftable_stack *stack = backend_for(refs, update->refname, NULL)->stack; struct write_transaction_table_arg *arg = NULL; + struct reftable_backend *be; size_t i; int ret; + ret = backend_for(&be, refs, update->refname, NULL, 0); + if (ret) + return ret; + /* * Search for a preexisting stack update. If there is one then we add * the update to it, otherwise we set up a new stack update. */ for (i = 0; !arg && i < tx_data->args_nr; i++) - if (tx_data->args[i].stack == stack) + if (tx_data->args[i].be == be) arg = &tx_data->args[i]; if (!arg) { struct reftable_addition *addition; - ret = reftable_stack_reload(stack); + ret = backend_for(&be, refs, update->refname, NULL, 1); if (ret) return ret; - ret = reftable_stack_new_addition(&addition, stack, + ret = reftable_stack_new_addition(&addition, be->stack, REFTABLE_STACK_NEW_ADDITION_RELOAD); if (ret) { if (ret == REFTABLE_LOCK_ERROR) @@ -947,7 +968,7 @@ static int prepare_transaction_update(struct write_transaction_table_arg **out, tx_data->args_alloc); arg = &tx_data->args[tx_data->args_nr++]; arg->refs = refs; - arg->stack = stack; + arg->be = be; arg->addition = addition; arg->updates = NULL; arg->updates_nr = 0; @@ -1002,6 +1023,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store, struct strbuf referent = STRBUF_INIT, head_referent = STRBUF_INIT; struct string_list affected_refnames = STRING_LIST_INIT_NODUP; struct reftable_transaction_data *tx_data = NULL; + struct reftable_backend *be; struct object_id head_oid; unsigned int head_type = 0; size_t i; @@ -1048,7 +1070,11 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store, goto done; } - ret = read_ref_without_reload(refs, backend_for(refs, "HEAD", NULL)->stack, "HEAD", + ret = backend_for(&be, refs, "HEAD", NULL, 0); + if (ret) + goto done; + + ret = read_ref_without_reload(refs, be->stack, "HEAD", &head_oid, &head_referent, &head_type); if (ret < 0) goto done; @@ -1057,10 +1083,11 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store, for (i = 0; i < transaction->nr; i++) { struct ref_update *u = transaction->updates[i]; struct object_id current_oid = {0}; - struct reftable_stack *stack; const char *rewritten_ref; - stack = backend_for(refs, u->refname, &rewritten_ref)->stack; + ret = backend_for(&be, refs, u->refname, &rewritten_ref, 0); + if (ret) + goto done; /* Verify that the new object ID is valid. */ if ((u->flags & REF_HAVE_NEW) && !is_null_oid(&u->new_oid) && @@ -1116,7 +1143,7 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store, string_list_insert(&affected_refnames, new_update->refname); } - ret = read_ref_without_reload(refs, stack, rewritten_ref, + ret = read_ref_without_reload(refs, be->stack, rewritten_ref, ¤t_oid, &referent, &u->type); if (ret < 0) goto done; @@ -1318,7 +1345,7 @@ static int transaction_update_cmp(const void *a, const void *b) static int write_transaction_table(struct reftable_writer *writer, void *cb_data) { struct write_transaction_table_arg *arg = cb_data; - uint64_t ts = reftable_stack_next_update_index(arg->stack); + uint64_t ts = reftable_stack_next_update_index(arg->be->stack); struct reftable_log_record *logs = NULL; struct ident_split committer_ident = {0}; size_t logs_nr = 0, logs_alloc = 0, i; @@ -1354,7 +1381,7 @@ static int write_transaction_table(struct reftable_writer *writer, void *cb_data struct reftable_log_record log = {0}; struct reftable_iterator it = {0}; - ret = reftable_stack_init_log_iterator(arg->stack, &it); + ret = reftable_stack_init_log_iterator(arg->be->stack, &it); if (ret < 0) goto done; @@ -1799,10 +1826,9 @@ static int reftable_be_rename_ref(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_WRITE, "rename_ref"); - struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack; + struct reftable_backend *be; struct write_copy_arg arg = { .refs = refs, - .stack = stack, .oldname = oldrefname, .newname = newrefname, .logmsg = logmsg, @@ -1814,10 +1840,11 @@ static int reftable_be_rename_ref(struct ref_store *ref_store, if (ret < 0) goto done; - ret = reftable_stack_reload(stack); + ret = backend_for(&be, refs, newrefname, &newrefname, 1); if (ret) goto done; - ret = reftable_stack_add(stack, &write_copy_table, &arg); + arg.stack = be->stack; + ret = reftable_stack_add(be->stack, &write_copy_table, &arg); done: assert(ret != REFTABLE_API_ERROR); @@ -1831,10 +1858,9 @@ static int reftable_be_copy_ref(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref"); - struct reftable_stack *stack = backend_for(refs, newrefname, &newrefname)->stack; + struct reftable_backend *be; struct write_copy_arg arg = { .refs = refs, - .stack = stack, .oldname = oldrefname, .newname = newrefname, .logmsg = logmsg, @@ -1845,10 +1871,11 @@ static int reftable_be_copy_ref(struct ref_store *ref_store, if (ret < 0) goto done; - ret = reftable_stack_reload(stack); + ret = backend_for(&be, refs, newrefname, &newrefname, 1); if (ret) goto done; - ret = reftable_stack_add(stack, &write_copy_table, &arg); + arg.stack = be->stack; + ret = reftable_stack_add(be->stack, &write_copy_table, &arg); done: assert(ret != REFTABLE_API_ERROR); @@ -2012,15 +2039,19 @@ static int reftable_be_for_each_reflog_ent_reverse(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent_reverse"); - struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack; struct reftable_log_record log = {0}; struct reftable_iterator it = {0}; + struct reftable_backend *be; int ret; if (refs->err < 0) return refs->err; - ret = reftable_stack_init_log_iterator(stack, &it); + ret = backend_for(&be, refs, refname, &refname, 0); + if (ret) + goto done; + + ret = reftable_stack_init_log_iterator(be->stack, &it); if (ret < 0) goto done; @@ -2052,16 +2083,20 @@ static int reftable_be_for_each_reflog_ent(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_READ, "for_each_reflog_ent"); - struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack; struct reftable_log_record *logs = NULL; struct reftable_iterator it = {0}; + struct reftable_backend *be; size_t logs_alloc = 0, logs_nr = 0, i; int ret; if (refs->err < 0) return refs->err; - ret = reftable_stack_init_log_iterator(stack, &it); + ret = backend_for(&be, refs, refname, &refname, 0); + if (ret) + goto done; + + ret = reftable_stack_init_log_iterator(be->stack, &it); if (ret < 0) goto done; @@ -2101,20 +2136,20 @@ static int reftable_be_reflog_exists(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_READ, "reflog_exists"); - struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack; struct reftable_log_record log = {0}; struct reftable_iterator it = {0}; + struct reftable_backend *be; int ret; ret = refs->err; if (ret < 0) goto done; - ret = reftable_stack_reload(stack); + ret = backend_for(&be, refs, refname, &refname, 1); if (ret < 0) goto done; - ret = reftable_stack_init_log_iterator(stack, &it); + ret = reftable_stack_init_log_iterator(be->stack, &it); if (ret < 0) goto done; @@ -2186,10 +2221,9 @@ static int reftable_be_create_reflog(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_WRITE, "create_reflog"); - struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack; + struct reftable_backend *be; struct write_reflog_existence_arg arg = { .refs = refs, - .stack = stack, .refname = refname, }; int ret; @@ -2198,11 +2232,12 @@ static int reftable_be_create_reflog(struct ref_store *ref_store, if (ret < 0) goto done; - ret = reftable_stack_reload(stack); + ret = backend_for(&be, refs, refname, &refname, 1); if (ret) goto done; + arg.stack = be->stack; - ret = reftable_stack_add(stack, &write_reflog_existence_table, &arg); + ret = reftable_stack_add(be->stack, &write_reflog_existence_table, &arg); done: return ret; @@ -2260,17 +2295,18 @@ static int reftable_be_delete_reflog(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_WRITE, "delete_reflog"); - struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack; + struct reftable_backend *be; struct write_reflog_delete_arg arg = { - .stack = stack, .refname = refname, }; int ret; - ret = reftable_stack_reload(stack); + ret = backend_for(&be, refs, refname, &refname, 1); if (ret) return ret; - ret = reftable_stack_add(stack, &write_reflog_delete_table, &arg); + arg.stack = be->stack; + + ret = reftable_stack_add(be->stack, &write_reflog_delete_table, &arg); assert(ret != REFTABLE_API_ERROR); return ret; @@ -2369,13 +2405,13 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store, */ struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_WRITE, "reflog_expire"); - struct reftable_stack *stack = backend_for(refs, refname, &refname)->stack; struct reftable_log_record *logs = NULL; struct reftable_log_record *rewritten = NULL; struct reftable_ref_record ref_record = {0}; struct reftable_iterator it = {0}; struct reftable_addition *add = NULL; struct reflog_expiry_arg arg = {0}; + struct reftable_backend *be; struct object_id oid = {0}; uint8_t *last_hash = NULL; size_t logs_nr = 0, logs_alloc = 0, i; @@ -2384,11 +2420,11 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store, if (refs->err < 0) return refs->err; - ret = reftable_stack_reload(stack); + ret = backend_for(&be, refs, refname, &refname, 1); if (ret < 0) goto done; - ret = reftable_stack_init_log_iterator(stack, &it); + ret = reftable_stack_init_log_iterator(be->stack, &it); if (ret < 0) goto done; @@ -2396,11 +2432,11 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store, if (ret < 0) goto done; - ret = reftable_stack_new_addition(&add, stack, 0); + ret = reftable_stack_new_addition(&add, be->stack, 0); if (ret < 0) goto done; - ret = reftable_stack_read_ref(stack, refname, &ref_record); + ret = reftable_stack_read_ref(be->stack, refname, &ref_record); if (ret < 0) goto done; if (reftable_ref_record_val1(&ref_record)) @@ -2479,7 +2515,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store, arg.refs = refs; arg.records = rewritten; arg.len = logs_nr; - arg.stack = stack, + arg.stack = be->stack, arg.refname = refname, ret = reftable_addition_add(add, &write_reflog_expiry_table, &arg); From 9641f64325339367b69f8714ebc1d1a6867d48aa Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 5 Nov 2024 10:12:04 +0100 Subject: [PATCH 3/8] refs/reftable: read references via `struct reftable_backend` Refactor `read_ref_without_reload()` to accept a `struct reftable_stack` as input instead of accepting a `struct reftable_stack`. This allows us to implement an additional caching layer when reading refs where we can reuse reftable iterators. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 110 ++++++++++++++++++++------------------ reftable/reftable-stack.h | 3 ++ reftable/stack.c | 5 ++ 3 files changed, 67 insertions(+), 51 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 4a28dc8a9dfad7..230adb690d9866 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -51,6 +51,50 @@ static void reftable_backend_release(struct reftable_backend *be) be->stack = NULL; } +static int reftable_backend_read_ref(struct reftable_backend *be, + const char *refname, + struct object_id *oid, + struct strbuf *referent, + unsigned int *type) +{ + struct reftable_ref_record ref = {0}; + int ret; + + ret = reftable_stack_read_ref(be->stack, refname, &ref); + if (ret) + goto done; + + if (ref.value_type == REFTABLE_REF_SYMREF) { + strbuf_reset(referent); + strbuf_addstr(referent, ref.value.symref); + *type |= REF_ISSYMREF; + } else if (reftable_ref_record_val1(&ref)) { + unsigned int hash_id; + + switch (reftable_stack_hash_id(be->stack)) { + case REFTABLE_HASH_SHA1: + hash_id = GIT_HASH_SHA1; + break; + case REFTABLE_HASH_SHA256: + hash_id = GIT_HASH_SHA256; + break; + default: + BUG("unhandled hash ID %d", reftable_stack_hash_id(be->stack)); + } + + oidread(oid, reftable_ref_record_val1(&ref), + &hash_algos[hash_id]); + } else { + /* We got a tombstone, which should not happen. */ + BUG("unhandled reference value type %d", ref.value_type); + } + +done: + assert(ret != REFTABLE_API_ERROR); + reftable_ref_record_release(&ref); + return ret; +} + struct reftable_ref_store { struct ref_store base; @@ -243,38 +287,6 @@ static void fill_reftable_log_record(struct reftable_log_record *log, const stru log->value.update.tz_offset = sign * atoi(tz_begin); } -static int read_ref_without_reload(struct reftable_ref_store *refs, - struct reftable_stack *stack, - const char *refname, - struct object_id *oid, - struct strbuf *referent, - unsigned int *type) -{ - struct reftable_ref_record ref = {0}; - int ret; - - ret = reftable_stack_read_ref(stack, refname, &ref); - if (ret) - goto done; - - if (ref.value_type == REFTABLE_REF_SYMREF) { - strbuf_reset(referent); - strbuf_addstr(referent, ref.value.symref); - *type |= REF_ISSYMREF; - } else if (reftable_ref_record_val1(&ref)) { - oidread(oid, reftable_ref_record_val1(&ref), - refs->base.repo->hash_algo); - } else { - /* We got a tombstone, which should not happen. */ - BUG("unhandled reference value type %d", ref.value_type); - } - -done: - assert(ret != REFTABLE_API_ERROR); - reftable_ref_record_release(&ref); - return ret; -} - static int reftable_be_config(const char *var, const char *value, const struct config_context *ctx, void *_opts) @@ -855,7 +867,7 @@ static int reftable_be_read_raw_ref(struct ref_store *ref_store, if (ret) return ret; - ret = read_ref_without_reload(refs, be->stack, refname, oid, referent, type); + ret = reftable_backend_read_ref(be, refname, oid, referent, type); if (ret < 0) return ret; if (ret > 0) { @@ -1074,8 +1086,8 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store, if (ret) goto done; - ret = read_ref_without_reload(refs, be->stack, "HEAD", - &head_oid, &head_referent, &head_type); + ret = reftable_backend_read_ref(be, "HEAD", &head_oid, + &head_referent, &head_type); if (ret < 0) goto done; ret = 0; @@ -1143,8 +1155,8 @@ static int reftable_be_transaction_prepare(struct ref_store *ref_store, string_list_insert(&affected_refnames, new_update->refname); } - ret = read_ref_without_reload(refs, be->stack, rewritten_ref, - ¤t_oid, &referent, &u->type); + ret = reftable_backend_read_ref(be, rewritten_ref, + ¤t_oid, &referent, &u->type); if (ret < 0) goto done; if (ret > 0 && !ref_update_expects_existing_old_ref(u)) { @@ -1602,7 +1614,7 @@ struct write_create_symref_arg { struct write_copy_arg { struct reftable_ref_store *refs; - struct reftable_stack *stack; + struct reftable_backend *be; const char *oldname; const char *newname; const char *logmsg; @@ -1627,7 +1639,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data) if (split_ident_line(&committer_ident, committer_info, strlen(committer_info))) BUG("failed splitting committer info"); - if (reftable_stack_read_ref(arg->stack, arg->oldname, &old_ref)) { + if (reftable_stack_read_ref(arg->be->stack, arg->oldname, &old_ref)) { ret = error(_("refname %s not found"), arg->oldname); goto done; } @@ -1666,7 +1678,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data) * the old branch and the creation of the new branch, and we cannot do * two changes to a reflog in a single update. */ - deletion_ts = creation_ts = reftable_stack_next_update_index(arg->stack); + deletion_ts = creation_ts = reftable_stack_next_update_index(arg->be->stack); if (arg->delete_old) creation_ts++; reftable_writer_set_limits(writer, deletion_ts, creation_ts); @@ -1709,8 +1721,8 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data) memcpy(logs[logs_nr].value.update.old_hash, old_ref.value.val1, GIT_MAX_RAWSZ); logs_nr++; - ret = read_ref_without_reload(arg->refs, arg->stack, "HEAD", &head_oid, - &head_referent, &head_type); + ret = reftable_backend_read_ref(arg->be, "HEAD", &head_oid, + &head_referent, &head_type); if (ret < 0) goto done; append_head_reflog = (head_type & REF_ISSYMREF) && !strcmp(head_referent.buf, arg->oldname); @@ -1753,7 +1765,7 @@ static int write_copy_table(struct reftable_writer *writer, void *cb_data) * copy over all log entries from the old reflog. Last but not least, * when renaming we also have to delete all the old reflog entries. */ - ret = reftable_stack_init_log_iterator(arg->stack, &it); + ret = reftable_stack_init_log_iterator(arg->be->stack, &it); if (ret < 0) goto done; @@ -1826,7 +1838,6 @@ static int reftable_be_rename_ref(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_WRITE, "rename_ref"); - struct reftable_backend *be; struct write_copy_arg arg = { .refs = refs, .oldname = oldrefname, @@ -1840,11 +1851,10 @@ static int reftable_be_rename_ref(struct ref_store *ref_store, if (ret < 0) goto done; - ret = backend_for(&be, refs, newrefname, &newrefname, 1); + ret = backend_for(&arg.be, refs, newrefname, &newrefname, 1); if (ret) goto done; - arg.stack = be->stack; - ret = reftable_stack_add(be->stack, &write_copy_table, &arg); + ret = reftable_stack_add(arg.be->stack, &write_copy_table, &arg); done: assert(ret != REFTABLE_API_ERROR); @@ -1858,7 +1868,6 @@ static int reftable_be_copy_ref(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_WRITE, "copy_ref"); - struct reftable_backend *be; struct write_copy_arg arg = { .refs = refs, .oldname = oldrefname, @@ -1871,11 +1880,10 @@ static int reftable_be_copy_ref(struct ref_store *ref_store, if (ret < 0) goto done; - ret = backend_for(&be, refs, newrefname, &newrefname, 1); + ret = backend_for(&arg.be, refs, newrefname, &newrefname, 1); if (ret) goto done; - arg.stack = be->stack; - ret = reftable_stack_add(be->stack, &write_copy_table, &arg); + ret = reftable_stack_add(arg.be->stack, &write_copy_table, &arg); done: assert(ret != REFTABLE_API_ERROR); diff --git a/reftable/reftable-stack.h b/reftable/reftable-stack.h index 54787f2ef53040..ae14270ea74108 100644 --- a/reftable/reftable-stack.h +++ b/reftable/reftable-stack.h @@ -149,4 +149,7 @@ struct reftable_compaction_stats { struct reftable_compaction_stats * reftable_stack_compaction_stats(struct reftable_stack *st); +/* Return the hash of the stack. */ +enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st); + #endif diff --git a/reftable/stack.c b/reftable/stack.c index 1fffd75630266c..d97b64a40d4ad0 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -1791,3 +1791,8 @@ int reftable_stack_clean(struct reftable_stack *st) reftable_addition_destroy(add); return err; } + +enum reftable_hash reftable_stack_hash_id(struct reftable_stack *st) +{ + return reftable_merged_table_hash_id(st->merged); +} From 56a5252503bf10f3b437aceba8acec5323ea9157 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 5 Nov 2024 10:12:10 +0100 Subject: [PATCH 4/8] refs/reftable: refactor reading symbolic refs to use reftable backend Refactor the callback function that reads symbolic references in the reftable backend to use `reftable_backend_read_ref()` instead of accessing the reftable stack directly. This ensures that the function will benefit from the new caching layer that we're about to introduce. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 230adb690d9866..dfe94ff9692679 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -884,21 +884,18 @@ static int reftable_be_read_symbolic_ref(struct ref_store *ref_store, { struct reftable_ref_store *refs = reftable_be_downcast(ref_store, REF_STORE_READ, "read_symbolic_ref"); - struct reftable_ref_record ref = {0}; struct reftable_backend *be; + struct object_id oid; + unsigned int type = 0; int ret; ret = backend_for(&be, refs, refname, &refname, 1); if (ret) return ret; - ret = reftable_stack_read_ref(be->stack, refname, &ref); - if (ret == 0 && ref.value_type == REFTABLE_REF_SYMREF) - strbuf_addstr(referent, ref.value.symref); - else + ret = reftable_backend_read_ref(be, refname, &oid, referent, &type); + if (type != REF_ISSYMREF) ret = -1; - - reftable_ref_record_release(&ref); return ret; } From f3f20f488bd2ad41642f1b55c41ca33e0f0ac54b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 5 Nov 2024 10:12:13 +0100 Subject: [PATCH 5/8] refs/reftable: refactor reflog expiry to use reftable backend Refactor the callback function that expires reflog entries in the reftable backend to use `reftable_backend_read_ref()` instead of accessing the reftable stack directly. This ensures that the function will benefit from the new caching layer that we're about to introduce. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index dfe94ff9692679..9c6e9c83744f69 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -2412,14 +2412,15 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store, reftable_be_downcast(ref_store, REF_STORE_WRITE, "reflog_expire"); struct reftable_log_record *logs = NULL; struct reftable_log_record *rewritten = NULL; - struct reftable_ref_record ref_record = {0}; struct reftable_iterator it = {0}; struct reftable_addition *add = NULL; struct reflog_expiry_arg arg = {0}; struct reftable_backend *be; struct object_id oid = {0}; + struct strbuf referent = STRBUF_INIT; uint8_t *last_hash = NULL; size_t logs_nr = 0, logs_alloc = 0, i; + unsigned int type = 0; int ret; if (refs->err < 0) @@ -2441,12 +2442,9 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store, if (ret < 0) goto done; - ret = reftable_stack_read_ref(be->stack, refname, &ref_record); + ret = reftable_backend_read_ref(be, refname, &oid, &referent, &type); if (ret < 0) goto done; - if (reftable_ref_record_val1(&ref_record)) - oidread(&oid, reftable_ref_record_val1(&ref_record), - ref_store->repo->hash_algo); prepare_fn(refname, &oid, policy_cb_data); while (1) { @@ -2513,8 +2511,7 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store, } } - if (flags & EXPIRE_REFLOGS_UPDATE_REF && last_hash && - reftable_ref_record_val1(&ref_record)) + if (flags & EXPIRE_REFLOGS_UPDATE_REF && last_hash && !is_null_oid(&oid)) oidread(&arg.update_oid, last_hash, ref_store->repo->hash_algo); arg.refs = refs; @@ -2539,11 +2536,11 @@ static int reftable_be_reflog_expire(struct ref_store *ref_store, cleanup_fn(policy_cb_data); assert(ret != REFTABLE_API_ERROR); - reftable_ref_record_release(&ref_record); reftable_iterator_destroy(&it); reftable_addition_destroy(add); for (i = 0; i < logs_nr; i++) reftable_log_record_release(&logs[i]); + strbuf_release(&referent); free(logs); free(rewritten); return ret; From 251ac42da39a01a4b454ae34b3d9eba9807a8fc7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 5 Nov 2024 10:12:15 +0100 Subject: [PATCH 6/8] reftable/stack: add mechanism to notify callers on reload Reftable stacks are reloaded in two cases: - When calling `reftable_stack_reload()`, if the stat-cache tells us that the stack has been modified. - When committing a reftable addition. While callers can figure out the second case, they do not have a mechanism to figure out whether `reftable_stack_reload()` led to an actual reload of the on-disk data. All they can do is thus to assume that data is always being reloaded in that case. Improve the situation by introducing a new `on_reload()` callback to the reftable options. If provided, the function will be invoked every time the stack has indeed been reloaded. This allows callers to invalidate data that depends on the current stack data. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/reftable-writer.h | 9 +++++++++ reftable/stack.c | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/reftable/reftable-writer.h b/reftable/reftable-writer.h index c85ef5a5bd1459..5f9afa620bb00d 100644 --- a/reftable/reftable-writer.h +++ b/reftable/reftable-writer.h @@ -68,6 +68,15 @@ struct reftable_write_options { * fsync(3P) when unset. */ int (*fsync)(int fd); + + /* + * Callback function to execute whenever the stack is being reloaded. + * This can be used e.g. to discard cached information that relies on + * the old stack's data. The payload data will be passed as argument to + * the callback. + */ + void (*on_reload)(void *payload); + void *on_reload_payload; }; /* reftable_block_stats holds statistics for a single block type */ diff --git a/reftable/stack.c b/reftable/stack.c index d97b64a40d4ad0..5384ca9de0d1f0 100644 --- a/reftable/stack.c +++ b/reftable/stack.c @@ -548,6 +548,10 @@ static int reftable_stack_reload_maybe_reuse(struct reftable_stack *st, close(fd); free_names(names); free_names(names_after); + + if (st->opts.on_reload) + st->opts.on_reload(st->opts.on_reload_payload); + return err; } From d6b1e08d3e162ea883f39e37ecc8647906d5451c Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 5 Nov 2024 10:12:18 +0100 Subject: [PATCH 7/8] reftable/merged: drain priority queue on reseek In 5bf96e0c39 (reftable/generic: move seeking of records into the iterator, 2024-05-13) we have refactored the reftable codebase such that iterators can be initialized once and then re-seeked multiple times. This feature is used by 1869525066 (refs/reftable: wire up support for exclude patterns, 2024-09-16) in order to skip records based on exclude patterns provided by the caller. The logic to re-seek the merged iterator is insufficient though because we don't drain the priority queue on a re-seek. This means that the queue may contain stale entries and thus reading the next record in the queue will return the wrong entry. While this is an obvious bug, it is harmless in the context of above exclude patterns: - If the queue contained stale entries that match the pattern then the caller would already know to filter out such refs. This is because our codebase is prepared to handle backends that don't have a way to efficiently implement exclude patterns. - If the queue contained stale entries that don't match the pattern we'd eventually filter out any duplicates. This is because the reftable code discards items with the same ref name and sorts any remaining entries properly. So things happen to work in this context regardless of the bug, and there is no other use case yet where we re-seek iterators. We're about to introduce a caching mechanism though where iterators are reused by the reftable backend, and that will expose the bug. Fix the issue by draining the priority queue when seeking and add a testcase that surfaces the issue. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- reftable/merged.c | 2 + t/unit-tests/t-reftable-merged.c | 73 ++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/reftable/merged.c b/reftable/merged.c index 5b93e20f429453..bb0836e3443271 100644 --- a/reftable/merged.c +++ b/reftable/merged.c @@ -66,6 +66,8 @@ static int merged_iter_seek(struct merged_iter *mi, struct reftable_record *want int err; mi->advance_index = -1; + while (!merged_iter_pqueue_is_empty(mi->pq)) + merged_iter_pqueue_remove(&mi->pq); for (size_t i = 0; i < mi->subiters_len; i++) { err = iterator_seek(&mi->subiters[i].iter, want); diff --git a/t/unit-tests/t-reftable-merged.c b/t/unit-tests/t-reftable-merged.c index 2591b5e5974553..a12bd0e1a3bded 100644 --- a/t/unit-tests/t-reftable-merged.c +++ b/t/unit-tests/t-reftable-merged.c @@ -273,6 +273,78 @@ static void t_merged_seek_multiple_times(void) reftable_free(sources); } +static void t_merged_seek_multiple_times_without_draining(void) +{ + struct reftable_ref_record r1[] = { + { + .refname = (char *) "a", + .update_index = 1, + .value_type = REFTABLE_REF_VAL1, + .value.val1 = { 1 }, + }, + { + .refname = (char *) "c", + .update_index = 1, + .value_type = REFTABLE_REF_VAL1, + .value.val1 = { 2 }, + } + }; + struct reftable_ref_record r2[] = { + { + .refname = (char *) "b", + .update_index = 2, + .value_type = REFTABLE_REF_VAL1, + .value.val1 = { 3 }, + }, + { + .refname = (char *) "d", + .update_index = 2, + .value_type = REFTABLE_REF_VAL1, + .value.val1 = { 4 }, + }, + }; + struct reftable_ref_record *refs[] = { + r1, r2, + }; + size_t sizes[] = { + ARRAY_SIZE(r1), ARRAY_SIZE(r2), + }; + struct reftable_buf bufs[] = { + REFTABLE_BUF_INIT, REFTABLE_BUF_INIT, + }; + struct reftable_block_source *sources = NULL; + struct reftable_reader **readers = NULL; + struct reftable_ref_record rec = { 0 }; + struct reftable_iterator it = { 0 }; + struct reftable_merged_table *mt; + int err; + + mt = merged_table_from_records(refs, &sources, &readers, sizes, bufs, 2); + merged_table_init_iter(mt, &it, BLOCK_TYPE_REF); + + err = reftable_iterator_seek_ref(&it, "b"); + check(!err); + err = reftable_iterator_next_ref(&it, &rec); + check(!err); + err = reftable_ref_record_equal(&rec, &r2[0], REFTABLE_HASH_SIZE_SHA1); + check(err == 1); + + err = reftable_iterator_seek_ref(&it, "a"); + check(!err); + err = reftable_iterator_next_ref(&it, &rec); + check(!err); + err = reftable_ref_record_equal(&rec, &r1[0], REFTABLE_HASH_SIZE_SHA1); + check(err == 1); + + for (size_t i = 0; i < ARRAY_SIZE(bufs); i++) + reftable_buf_release(&bufs[i]); + readers_destroy(readers, ARRAY_SIZE(refs)); + reftable_ref_record_release(&rec); + reftable_iterator_destroy(&it); + reftable_merged_table_free(mt); + reftable_free(sources); +} + static struct reftable_merged_table * merged_table_from_log_records(struct reftable_log_record **logs, struct reftable_block_source **source, @@ -467,6 +539,7 @@ int cmd_main(int argc UNUSED, const char *argv[] UNUSED) TEST(t_merged_logs(), "merged table with multiple log updates for same ref"); TEST(t_merged_refs(), "merged table with multiple updates to same ref"); TEST(t_merged_seek_multiple_times(), "merged table can seek multiple times"); + TEST(t_merged_seek_multiple_times_without_draining(), "merged table can seek multiple times without draining"); TEST(t_merged_single_record(), "ref occurring in only one record can be fetched"); return test_done(); From 1db7412ef88a2a6e362cb1a4474d05f569920f83 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Tue, 5 Nov 2024 10:12:20 +0100 Subject: [PATCH 8/8] refs/reftable: reuse iterators when reading refs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When reading references the reftable backend has to: 1. Create a new ref iterator. 2. Seek the iterator to the record we're searching for. 3. Read the record. We cannot really avoid the last two steps, but re-creating the iterator every single time we want to read a reference is kind of expensive and a waste of resources. We couldn't help it in the past though because it was not possible to reuse iterators. But starting with 5bf96e0c39 (reftable/generic: move seeking of records into the iterator, 2024-05-13) we have split up the iterator lifecycle such that creating the iterator and seeking are two different concerns. Refactor the code such that we cache iterators in the reftable backend. This cache is invalidated whenever the respective stack is reloaded such that we know to recreate the iterator in that case. This leads to a sizeable speedup when creating many refs, which requires a lot of random reference reads: Benchmark 1: update-ref: create many refs (refcount = 100000, revision = master) Time (mean ± σ): 1.793 s ± 0.010 s [User: 0.954 s, System: 0.835 s] Range (min … max): 1.781 s … 1.811 s 10 runs Benchmark 2: update-ref: create many refs (refcount = 100000, revision = HEAD) Time (mean ± σ): 1.680 s ± 0.013 s [User: 0.846 s, System: 0.831 s] Range (min … max): 1.664 s … 1.702 s 10 runs Summary update-ref: create many refs (refcount = 100000, revision = HEAD) ran 1.07 ± 0.01 times faster than update-ref: create many refs (refcount = 100000, revision = master) While 7% is not a huge win, you have to consider that the benchmark is _writing_ data, so _reading_ references is only one part of what we do. Flame graphs show that we spend around 40% of our time reading refs, so the speedup when reading refs is approximately ~2.5x that. I could not find better benchmarks where we perform a lot of random ref reads. You can also see a sizeable impact on memory usage when creating 100k references. Before this change: HEAP SUMMARY: in use at exit: 19,112,538 bytes in 200,170 blocks total heap usage: 8,400,426 allocs, 8,200,256 frees, 454,367,048 bytes allocated After this change: HEAP SUMMARY: in use at exit: 674,416 bytes in 169 blocks total heap usage: 7,929,872 allocs, 7,929,703 frees, 281,509,985 bytes allocated As an additional factor, this refactoring opens up the possibility for more performance optimizations in how we re-seek iterators. Any change that allows us to optimize re-seeking by e.g. reusing data structures would thus also directly speed up random reads. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- refs/reftable-backend.c | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/refs/reftable-backend.c b/refs/reftable-backend.c index 9c6e9c83744f69..49423637122b90 100644 --- a/refs/reftable-backend.c +++ b/refs/reftable-backend.c @@ -36,19 +36,30 @@ struct reftable_backend { struct reftable_stack *stack; + struct reftable_iterator it; }; +static void reftable_backend_on_reload(void *payload) +{ + struct reftable_backend *be = payload; + reftable_iterator_destroy(&be->it); +} + static int reftable_backend_init(struct reftable_backend *be, const char *path, - const struct reftable_write_options *opts) + const struct reftable_write_options *_opts) { - return reftable_new_stack(&be->stack, path, opts); + struct reftable_write_options opts = *_opts; + opts.on_reload = reftable_backend_on_reload; + opts.on_reload_payload = be; + return reftable_new_stack(&be->stack, path, &opts); } static void reftable_backend_release(struct reftable_backend *be) { reftable_stack_destroy(be->stack); be->stack = NULL; + reftable_iterator_destroy(&be->it); } static int reftable_backend_read_ref(struct reftable_backend *be, @@ -60,10 +71,25 @@ static int reftable_backend_read_ref(struct reftable_backend *be, struct reftable_ref_record ref = {0}; int ret; - ret = reftable_stack_read_ref(be->stack, refname, &ref); + if (!be->it.ops) { + ret = reftable_stack_init_ref_iterator(be->stack, &be->it); + if (ret) + goto done; + } + + ret = reftable_iterator_seek_ref(&be->it, refname); if (ret) goto done; + ret = reftable_iterator_next_ref(&be->it, &ref); + if (ret) + goto done; + + if (strcmp(ref.refname, refname)) { + ret = 1; + goto done; + } + if (ref.value_type == REFTABLE_REF_SYMREF) { strbuf_reset(referent); strbuf_addstr(referent, ref.value.symref);