From 52ba119d862a3e4a1d7afcc0d552777681fb01d7 Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 11 Dec 2019 12:19:04 +0100 Subject: [PATCH 1/2] rebase-merges: move labels' whitespace mangling into `label_oid()` One of the trickier aspects of the design of `git rebase --rebase-merges` is the way labels are generated for the initial todo list: those labels are supposed to be intuitive and first and foremost unique. To that end, `label_oid()` appends a unique suffix when necessary. Those labels not only need to be unique, but they also need to be valid refs. To make sure of that, `make_script_with_merges()` replaces whitespace by dashes. That would appear to be the wrong layer for that sanitizing step, though: all callers of `label_oid()` should get that same benefit. Even if it does not make a difference currently (the only called of `label_oid()` that passes a label that might need to be sanitized _is_ `make_script_with_merges()`), let's move the responsibility for sanitizing labels into the `label_oid()` function. This commit is best viewed with `-w` because it unfortunately needs to change the indentation of a large block of code in `label_oid()`. Signed-off-by: Johannes Schindelin --- sequencer.c | 56 ++++++++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/sequencer.c b/sequencer.c index 9d5964fd81fe09..3122d7e642ee21 100644 --- a/sequencer.c +++ b/sequencer.c @@ -4425,7 +4425,6 @@ static const char *label_oid(struct object_id *oid, const char *label, struct labels_entry *labels_entry; struct string_entry *string_entry; struct object_id dummy; - size_t len; int i; string_entry = oidmap_get(&state->commit2label, oid); @@ -4445,10 +4444,10 @@ static const char *label_oid(struct object_id *oid, const char *label, * abbreviation for any uninteresting commit's names that does not * clash with any other label. */ + strbuf_reset(&state->buf); if (!label) { char *p; - strbuf_reset(&state->buf); strbuf_grow(&state->buf, GIT_MAX_HEXSZ); label = p = state->buf.buf; @@ -4471,32 +4470,37 @@ static const char *label_oid(struct object_id *oid, const char *label, p[i] = save; } } - } else if (((len = strlen(label)) == the_hash_algo->hexsz && - !get_oid_hex(label, &dummy)) || - (len == 1 && *label == '#') || - hashmap_get_from_hash(&state->labels, - strihash(label), label)) { - /* - * If the label already exists, or if the label is a valid full - * OID, or the label is a '#' (which we use as a separator - * between merge heads and oneline), we append a dash and a - * number to make it unique. - */ + } else { struct strbuf *buf = &state->buf; - strbuf_reset(buf); - strbuf_add(buf, label, len); + for (; *label; label++) + strbuf_addch(buf, isspace(*label) ? '-' : *label); + label = buf->buf; - for (i = 2; ; i++) { - strbuf_setlen(buf, len); - strbuf_addf(buf, "-%d", i); - if (!hashmap_get_from_hash(&state->labels, - strihash(buf->buf), - buf->buf)) - break; - } + if ((buf->len == the_hash_algo->hexsz && + !get_oid_hex(label, &dummy)) || + (buf->len == 1 && *label == '#') || + hashmap_get_from_hash(&state->labels, + strihash(label), label)) { + /* + * If the label already exists, or if the label is a + * valid full OID, or the label is a '#' (which we use + * as a separator between merge heads and oneline), we + * append a dash and a number to make it unique. + */ + size_t len = buf->len; - label = buf->buf; + for (i = 2; ; i++) { + strbuf_setlen(buf, len); + strbuf_addf(buf, "-%d", i); + if (!hashmap_get_from_hash(&state->labels, + strihash(buf->buf), + buf->buf)) + break; + } + + label = buf->buf; + } } FLEX_ALLOC_STR(labels_entry, label, label); @@ -4598,10 +4602,6 @@ static int make_script_with_merges(struct pretty_print_context *pp, else strbuf_addbuf(&label, &oneline); - for (p1 = label.buf; *p1; p1++) - if (isspace(*p1)) - *(char *)p1 = '-'; - strbuf_reset(&buf); strbuf_addf(&buf, "%s -C %s", cmd_merge, oid_to_hex(&commit->object.oid)); From ce3be0aaab611f61e1b76bed92bf734ec623183d Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Wed, 11 Dec 2019 12:19:15 +0100 Subject: [PATCH 2/2] rebase -r: let `label` generate safer labels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `label` todo command in interactive rebases creates temporary refs in the `refs/rewritten/` namespace. These refs are stored as loose refs, i.e. as files in `.git/refs/rewritten/`, therefore they have to conform with file name limitations on the current filesystem in addition to the accepted ref format. This poses a problem in particular on NTFS/FAT, where e.g. the colon, double-quote and pipe characters are disallowed as part of a file name. Let's safeguard against this by replacing not only white-space characters by dashes, but all non-alpha-numeric ones. However, we exempt non-ASCII UTF-8 characters from that, as it should be quite possible to reflect branch names such as `↯↯↯` in refs/file names. Signed-off-by: Matthew Rogers Signed-off-by: Johannes Schindelin --- sequencer.c | 20 +++++++++++++++++++- t/t3430-rebase-merges.sh | 6 ++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/sequencer.c b/sequencer.c index 3122d7e642ee21..1160e935073726 100644 --- a/sequencer.c +++ b/sequencer.c @@ -4473,8 +4473,26 @@ static const char *label_oid(struct object_id *oid, const char *label, } else { struct strbuf *buf = &state->buf; + /* + * Sanitize labels by replacing non-alpha-numeric characters + * (including white-space ones) by dashes, as they might be + * illegal in file names (and hence in ref names). + * + * Note that we retain non-ASCII UTF-8 characters (identified + * via the most significant bit). They should be all acceptable + * in file names. We do not validate the UTF-8 here, that's not + * the job of this function. + */ for (; *label; label++) - strbuf_addch(buf, isspace(*label) ? '-' : *label); + if ((*label & 0x80) || isalnum(*label)) + strbuf_addch(buf, *label); + /* avoid leading dash and double-dashes */ + else if (buf->len && buf->buf[buf->len - 1] != '-') + strbuf_addch(buf, '-'); + if (!buf->len) { + strbuf_addstr(buf, "rev-"); + strbuf_add_unique_abbrev(buf, oid, default_abbrev); + } label = buf->buf; if ((buf->len == the_hash_algo->hexsz && diff --git a/t/t3430-rebase-merges.sh b/t/t3430-rebase-merges.sh index 9efcf4808ac92f..f728aba995b2fd 100755 --- a/t/t3430-rebase-merges.sh +++ b/t/t3430-rebase-merges.sh @@ -468,4 +468,10 @@ test_expect_success '--rebase-merges with strategies' ' test_cmp expect G.t ' +test_expect_success '--rebase-merges with commit that can generate bad characters for filename' ' + git checkout -b colon-in-label E && + git merge -m "colon: this should work" G && + git rebase --rebase-merges --force-rebase E +' + test_done