Skip to content

Commit ededd0d

Browse files
committed
Merge branch 'jt/fix-fattening-promisor-fetch'
Fix performance regression of a recent "fatten promisor pack with local objects" protection against an unwanted gc. * jt/fix-fattening-promisor-fetch: index-pack --promisor: also check commits' trees index-pack --promisor: don't check blobs index-pack --promisor: dedup before checking links
2 parents 4007617 + 1a14c85 commit ededd0d

File tree

1 file changed

+72
-33
lines changed

1 file changed

+72
-33
lines changed

builtin/index-pack.c

+72-33
Original file line numberDiff line numberDiff line change
@@ -155,11 +155,11 @@ static int input_fd, output_fd;
155155
static const char *curr_pack;
156156

157157
/*
158-
* local_links is guarded by read_mutex, and record_local_links is read-only in
159-
* a thread.
158+
* outgoing_links is guarded by read_mutex, and record_outgoing_links is
159+
* read-only in a thread.
160160
*/
161-
static struct oidset local_links = OIDSET_INIT;
162-
static int record_local_links;
161+
static struct oidset outgoing_links = OIDSET_INIT;
162+
static int record_outgoing_links;
163163

164164
static struct thread_local_data *thread_data;
165165
static int nr_dispatched;
@@ -812,18 +812,41 @@ static int check_collison(struct object_entry *entry)
812812
return 0;
813813
}
814814

815-
static void record_if_local_object(const struct object_id *oid)
815+
static void record_outgoing_link(const struct object_id *oid)
816816
{
817-
struct object_info info = OBJECT_INFO_INIT;
818-
if (oid_object_info_extended(the_repository, oid, &info, 0))
819-
/* Missing; assume it is a promisor object */
820-
return;
821-
if (info.whence == OI_PACKED && info.u.packed.pack->pack_promisor)
822-
return;
823-
oidset_insert(&local_links, oid);
817+
oidset_insert(&outgoing_links, oid);
824818
}
825819

826-
static void do_record_local_links(struct object *obj)
820+
static void maybe_record_name_entry(const struct name_entry *entry)
821+
{
822+
/*
823+
* Checking only trees here results in a significantly faster packfile
824+
* indexing, but the drawback is that if the packfile to be indexed
825+
* references a local blob only directly (that is, never through a
826+
* local tree), that local blob is in danger of being garbage
827+
* collected. Such a situation may arise if we push local commits,
828+
* including one with a change to a blob in the root tree, and then the
829+
* server incorporates them into its main branch through a "rebase" or
830+
* "squash" merge strategy, and then we fetch the new main branch from
831+
* the server.
832+
*
833+
* This situation has not been observed yet - we have only noticed
834+
* missing commits, not missing trees or blobs. (In fact, if it were
835+
* believed that only missing commits are problematic, one could argue
836+
* that we should also exclude trees during the outgoing link check;
837+
* but it is safer to include them.)
838+
*
839+
* Due to the rarity of the situation (it has not been observed to
840+
* happen in real life), and because the "penalty" in such a situation
841+
* is merely to refetch the missing blob when it's needed (and this
842+
* happens only once - when refetched, the blob goes into a promisor
843+
* pack, so it won't be GC-ed, the tradeoff seems worth it.
844+
*/
845+
if (S_ISDIR(entry->mode))
846+
record_outgoing_link(&entry->oid);
847+
}
848+
849+
static void do_record_outgoing_links(struct object *obj)
827850
{
828851
if (obj->type == OBJ_TREE) {
829852
struct tree *tree = (struct tree *)obj;
@@ -837,16 +860,17 @@ static void do_record_local_links(struct object *obj)
837860
*/
838861
return;
839862
while (tree_entry_gently(&desc, &entry))
840-
record_if_local_object(&entry.oid);
863+
maybe_record_name_entry(&entry);
841864
} else if (obj->type == OBJ_COMMIT) {
842865
struct commit *commit = (struct commit *) obj;
843866
struct commit_list *parents = commit->parents;
844867

868+
record_outgoing_link(get_commit_tree_oid(commit));
845869
for (; parents; parents = parents->next)
846-
record_if_local_object(&parents->item->object.oid);
870+
record_outgoing_link(&parents->item->object.oid);
847871
} else if (obj->type == OBJ_TAG) {
848872
struct tag *tag = (struct tag *) obj;
849-
record_if_local_object(get_tagged_oid(tag));
873+
record_outgoing_link(get_tagged_oid(tag));
850874
}
851875
}
852876

@@ -896,7 +920,7 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
896920
free(has_data);
897921
}
898922

899-
if (strict || do_fsck_object || record_local_links) {
923+
if (strict || do_fsck_object || record_outgoing_links) {
900924
read_lock();
901925
if (type == OBJ_BLOB) {
902926
struct blob *blob = lookup_blob(the_repository, oid);
@@ -928,8 +952,8 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
928952
die(_("fsck error in packed object"));
929953
if (strict && fsck_walk(obj, NULL, &fsck_options))
930954
die(_("Not all child objects of %s are reachable"), oid_to_hex(&obj->oid));
931-
if (record_local_links)
932-
do_record_local_links(obj);
955+
if (record_outgoing_links)
956+
do_record_outgoing_links(obj);
933957

934958
if (obj->type == OBJ_TREE) {
935959
struct tree *item = (struct tree *) obj;
@@ -1785,28 +1809,43 @@ static void repack_local_links(void)
17851809
struct strbuf line = STRBUF_INIT;
17861810
struct oidset_iter iter;
17871811
struct object_id *oid;
1788-
char *base_name;
1812+
char *base_name = NULL;
17891813

1790-
if (!oidset_size(&local_links))
1814+
if (!oidset_size(&outgoing_links))
17911815
return;
17921816

1793-
base_name = mkpathdup("%s/pack/pack", repo_get_object_directory(the_repository));
1817+
oidset_iter_init(&outgoing_links, &iter);
1818+
while ((oid = oidset_iter_next(&iter))) {
1819+
struct object_info info = OBJECT_INFO_INIT;
1820+
if (oid_object_info_extended(the_repository, oid, &info, 0))
1821+
/* Missing; assume it is a promisor object */
1822+
continue;
1823+
if (info.whence == OI_PACKED && info.u.packed.pack->pack_promisor)
1824+
continue;
17941825

1795-
strvec_push(&cmd.args, "pack-objects");
1796-
strvec_push(&cmd.args, "--exclude-promisor-objects-best-effort");
1797-
strvec_push(&cmd.args, base_name);
1798-
cmd.git_cmd = 1;
1799-
cmd.in = -1;
1800-
cmd.out = -1;
1801-
if (start_command(&cmd))
1802-
die(_("could not start pack-objects to repack local links"));
1826+
if (!cmd.args.nr) {
1827+
base_name = mkpathdup(
1828+
"%s/pack/pack",
1829+
repo_get_object_directory(the_repository));
1830+
strvec_push(&cmd.args, "pack-objects");
1831+
strvec_push(&cmd.args,
1832+
"--exclude-promisor-objects-best-effort");
1833+
strvec_push(&cmd.args, base_name);
1834+
cmd.git_cmd = 1;
1835+
cmd.in = -1;
1836+
cmd.out = -1;
1837+
if (start_command(&cmd))
1838+
die(_("could not start pack-objects to repack local links"));
1839+
}
18031840

1804-
oidset_iter_init(&local_links, &iter);
1805-
while ((oid = oidset_iter_next(&iter))) {
18061841
if (write_in_full(cmd.in, oid_to_hex(oid), the_hash_algo->hexsz) < 0 ||
18071842
write_in_full(cmd.in, "\n", 1) < 0)
18081843
die(_("failed to feed local object to pack-objects"));
18091844
}
1845+
1846+
if (!cmd.args.nr)
1847+
return;
1848+
18101849
close(cmd.in);
18111850

18121851
out = xfdopen(cmd.out, "r");
@@ -1905,7 +1944,7 @@ int cmd_index_pack(int argc,
19051944
} else if (skip_to_optional_arg(arg, "--keep", &keep_msg)) {
19061945
; /* nothing to do */
19071946
} else if (skip_to_optional_arg(arg, "--promisor", &promisor_msg)) {
1908-
record_local_links = 1;
1947+
record_outgoing_links = 1;
19091948
} else if (starts_with(arg, "--threads=")) {
19101949
char *end;
19111950
nr_threads = strtoul(arg+10, &end, 0);

0 commit comments

Comments
 (0)