diff --git a/Documentation/git-index-pack.txt b/Documentation/git-index-pack.txt index 5a20deefd5f5da..4be09e58e72145 100644 --- a/Documentation/git-index-pack.txt +++ b/Documentation/git-index-pack.txt @@ -139,6 +139,11 @@ include::object-format-disclaimer.txt[] written. If a `` is provided, then that content will be written to the .promisor file for future reference. See link:technical/partial-clone.html[partial clone] for more information. ++ +Also, if there are objects in the given pack that references non-promisor +objects (in the repo), repacks those non-promisor objects into a promisor +pack. This avoids a situation in which a repo has non-promisor objects that are +accessible through promisor objects. NOTES ----- diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 9d23b41b3a2306..e4afd6725f5ac1 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -9,6 +9,7 @@ #include "csum-file.h" #include "blob.h" #include "commit.h" +#include "tag.h" #include "tree.h" #include "progress.h" #include "fsck.h" @@ -20,9 +21,14 @@ #include "object-file.h" #include "object-store-ll.h" #include "oid-array.h" +#include "oidset.h" +#include "path.h" #include "replace-object.h" +#include "tree-walk.h" #include "promisor-remote.h" +#include "run-command.h" #include "setup.h" +#include "strvec.h" static const char index_pack_usage[] = "git index-pack [-v] [-o ] [--keep | --keep=] [--[no-]rev-index] [--verify] [--strict[==...]] [--fsck-objects[==...]] ( | --stdin [--fix-thin] [])"; @@ -148,6 +154,13 @@ static uint32_t input_crc32; static int input_fd, output_fd; static const char *curr_pack; +/* + * local_links is guarded by read_mutex, and record_local_links is read-only in + * a thread. + */ +static struct oidset local_links = OIDSET_INIT; +static int record_local_links; + static struct thread_local *thread_data; static int nr_dispatched; static int threads_active; @@ -799,6 +812,44 @@ static int check_collison(struct object_entry *entry) return 0; } +static void record_if_local_object(const struct object_id *oid) +{ + struct object_info info = OBJECT_INFO_INIT; + if (oid_object_info_extended(the_repository, oid, &info, 0)) + /* Missing; assume it is a promisor object */ + return; + if (info.whence == OI_PACKED && info.u.packed.pack->pack_promisor) + return; + oidset_insert(&local_links, oid); +} + +static void do_record_local_links(struct object *obj) +{ + if (obj->type == OBJ_TREE) { + struct tree *tree = (struct tree *)obj; + struct tree_desc desc; + struct name_entry entry; + if (init_tree_desc_gently(&desc, &tree->object.oid, + tree->buffer, tree->size, 0)) + /* + * Error messages are given when packs are + * verified, so do not print any here. + */ + return; + while (tree_entry_gently(&desc, &entry)) + record_if_local_object(&entry.oid); + } else if (obj->type == OBJ_COMMIT) { + struct commit *commit = (struct commit *) obj; + struct commit_list *parents = commit->parents; + + for (; parents; parents = parents->next) + record_if_local_object(&parents->item->object.oid); + } else if (obj->type == OBJ_TAG) { + struct tag *tag = (struct tag *) obj; + record_if_local_object(get_tagged_oid(tag)); + } +} + static void sha1_object(const void *data, struct object_entry *obj_entry, unsigned long size, enum object_type type, const struct object_id *oid) @@ -845,7 +896,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, free(has_data); } - if (strict || do_fsck_object) { + if (strict || do_fsck_object || record_local_links) { read_lock(); if (type == OBJ_BLOB) { struct blob *blob = lookup_blob(the_repository, oid); @@ -877,6 +928,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry, die(_("fsck error in packed object")); if (strict && fsck_walk(obj, NULL, &fsck_options)) die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid)); + if (record_local_links) + do_record_local_links(obj); if (obj->type == OBJ_TREE) { struct tree *item = (struct tree *) obj; @@ -1719,6 +1772,57 @@ static void show_pack_info(int stat_only) free(chain_histogram); } +static void repack_local_links(void) +{ + struct child_process cmd = CHILD_PROCESS_INIT; + FILE *out; + struct strbuf line = STRBUF_INIT; + struct oidset_iter iter; + struct object_id *oid; + char *base_name; + + if (!oidset_size(&local_links)) + return; + + base_name = mkpathdup("%s/pack/pack", repo_get_object_directory(the_repository)); + + strvec_push(&cmd.args, "pack-objects"); + strvec_push(&cmd.args, "--exclude-promisor-objects-best-effort"); + strvec_push(&cmd.args, base_name); + cmd.git_cmd = 1; + cmd.in = -1; + cmd.out = -1; + if (start_command(&cmd)) + die(_("could not start pack-objects to repack local links")); + + oidset_iter_init(&local_links, &iter); + while ((oid = oidset_iter_next(&iter))) { + if (write_in_full(cmd.in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 || + write_in_full(cmd.in, "\n", 1) < 0) + die(_("failed to feed local object to pack-objects")); + } + close(cmd.in); + + out = xfdopen(cmd.out, "r"); + while (strbuf_getline_lf(&line, out) != EOF) { + unsigned char binary[GIT_MAX_RAWSZ]; + if (line.len != the_hash_algo->hexsz || + !hex_to_bytes(binary, line.buf, line.len)) + die(_("index-pack: Expecting full hex object ID lines only from pack-objects.")); + + /* + * pack-objects creates the .pack and .idx files, but not the + * .promisor file. Create the .promisor file, which is empty. + */ + write_special_file("promisor", "", NULL, binary, NULL); + } + + fclose(out); + if (finish_command(&cmd)) + die(_("could not finish pack-objects to repack local links")); + strbuf_release(&line); +} + int cmd_index_pack(int argc, const char **argv, const char *prefix, @@ -1794,7 +1898,7 @@ int cmd_index_pack(int argc, } else if (skip_to_optional_arg(arg, "--keep", &keep_msg)) { ; /* nothing to do */ } else if (skip_to_optional_arg(arg, "--promisor", &promisor_msg)) { - ; /* already parsed */ + record_local_links = 1; } else if (starts_with(arg, "--threads=")) { char *end; nr_threads = strtoul(arg+10, &end, 0); @@ -1970,6 +2074,8 @@ int cmd_index_pack(int argc, free((void *) curr_index); free(curr_rev_index); + repack_local_links(); + /* * Let the caller know this pack is not self contained */ diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 080071426710da..762949e4c8f73c 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -239,6 +239,7 @@ static enum { static uint16_t write_bitmap_options = BITMAP_OPT_HASH_CACHE; static int exclude_promisor_objects; +static int exclude_promisor_objects_best_effort; static int use_delta_islands; @@ -4312,6 +4313,18 @@ static int option_parse_cruft_expiration(const struct option *opt UNUSED, return 0; } +static int is_not_in_promisor_pack_obj(struct object *obj, void *data UNUSED) +{ + struct object_info info = OBJECT_INFO_INIT; + if (oid_object_info_extended(the_repository, &obj->oid, &info, 0)) + BUG("should_include_obj should only be called on existing objects"); + return info.whence != OI_PACKED || !info.u.packed.pack->pack_promisor; +} + +static int is_not_in_promisor_pack(struct commit *commit, void *data) { + return is_not_in_promisor_pack_obj((struct object *) commit, data); +} + int cmd_pack_objects(int argc, const char **argv, const char *prefix, @@ -4424,6 +4437,9 @@ int cmd_pack_objects(int argc, option_parse_missing_action), OPT_BOOL(0, "exclude-promisor-objects", &exclude_promisor_objects, N_("do not pack objects in promisor packfiles")), + OPT_BOOL(0, "exclude-promisor-objects-best-effort", + &exclude_promisor_objects_best_effort, + N_("implies --missing=allow-any")), OPT_BOOL(0, "delta-islands", &use_delta_islands, N_("respect islands during delta compression")), OPT_STRING_LIST(0, "uri-protocol", &uri_protocols, @@ -4504,10 +4520,18 @@ int cmd_pack_objects(int argc, strvec_push(&rp, "--unpacked"); } + if (exclude_promisor_objects && exclude_promisor_objects_best_effort) + die(_("options '%s' and '%s' cannot be used together"), + "--exclude-promisor-objects", "--exclude-promisor-objects-best-effort"); if (exclude_promisor_objects) { use_internal_rev_list = 1; fetch_if_missing = 0; strvec_push(&rp, "--exclude-promisor-objects"); + } else if (exclude_promisor_objects_best_effort) { + use_internal_rev_list = 1; + fetch_if_missing = 0; + option_parse_missing_action(NULL, "allow-any", 0); + /* revs configured below */ } if (unpack_unreachable || keep_unreachable || pack_loose_unreachable) use_internal_rev_list = 1; @@ -4627,6 +4651,10 @@ int cmd_pack_objects(int argc, repo_init_revisions(the_repository, &revs, NULL); list_objects_filter_copy(&revs.filter, &filter_options); + if (exclude_promisor_objects_best_effort) { + revs.include_check = is_not_in_promisor_pack; + revs.include_check_obj = is_not_in_promisor_pack_obj; + } get_object_list(&revs, rp.nr, rp.v); release_revisions(&revs); } diff --git a/t/t0410-partial-clone.sh b/t/t0410-partial-clone.sh index 818700fbecd457..e2b317db650b24 100755 --- a/t/t0410-partial-clone.sh +++ b/t/t0410-partial-clone.sh @@ -241,7 +241,7 @@ test_expect_success 'fetching of missing objects works with ref-in-want enabled' grep "fetch< fetch=.*ref-in-want" trace ' -test_expect_success 'fetching of missing objects from another promisor remote' ' +test_expect_success 'fetching from another promisor remote' ' git clone "file://$(pwd)/server" server2 && test_commit -C server2 bar && git -C server2 repack -a -d --write-bitmap-index && @@ -264,8 +264,8 @@ test_expect_success 'fetching of missing objects from another promisor remote' ' grep "$HASH2" out ' -test_expect_success 'fetching of missing objects configures a promisor remote' ' - git clone "file://$(pwd)/server" server3 && +test_expect_success 'fetching with --filter configures a promisor remote' ' + test_create_repo server3 && test_commit -C server3 baz && git -C server3 repack -a -d --write-bitmap-index && HASH3=$(git -C server3 rev-parse baz) && diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh index 51fed26cc4c941..fee4684ceb30d5 100755 --- a/t/t5300-pack-object.sh +++ b/t/t5300-pack-object.sh @@ -156,6 +156,11 @@ test_expect_success 'pack without delta' ' check_deltas stderr = 0 ' +test_expect_success 'negative window clamps to 0' ' + git pack-objects --progress --window=-1 neg-window stderr && + check_deltas stderr = 0 +' + test_expect_success 'pack-objects with bogus arguments' ' test_must_fail git pack-objects --window=0 test-1 blah blah stderr && - check_deltas stderr = 0 -' - for hash in sha1 sha256 do test_expect_success "verify-pack with $hash packfile" ' diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh index c53e93be2f7779..91d1808883615f 100755 --- a/t/t5616-partial-clone.sh +++ b/t/t5616-partial-clone.sh @@ -694,6 +694,36 @@ test_expect_success 'lazy-fetch in submodule succeeds' ' git -C client restore --recurse-submodules --source=HEAD^ :/ ' +test_expect_success 'after fetching descendants of non-promisor commits, gc works' ' + # Setup + git init full && + git -C full config uploadpack.allowfilter 1 && + git -C full config uploadpack.allowanysha1inwant 1 && + touch full/foo && + git -C full add foo && + git -C full commit -m "commit 1" && + git -C full checkout --detach && + + # Partial clone and push commit to remote + git clone "file://$(pwd)/full" --filter=blob:none partial && + echo "hello" > partial/foo && + git -C partial commit -a -m "commit 2" && + git -C partial push && + + # gc in partial repo + git -C partial gc --prune=now && + + # Create another commit in normal repo + git -C full checkout main && + echo " world" >> full/foo && + git -C full commit -a -m "commit 3" && + + # Pull from remote in partial repo, and run gc again + git -C partial pull && + git -C partial gc --prune=now +' + + . "$TEST_DIRECTORY"/lib-httpd.sh start_httpd