From c7f6a534f0fae4a93123309e627e4d52f768b83b Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 19 Mar 2024 20:27:16 -0400 Subject: shortlog: stop setting pp.print_email_subject When shortlog processes a commit using its internal traversal, it may pretty-print the subject line for the summary view. When we do so, we set the "print_email_subject" flag in the pretty-print context. But this flag does nothing! Since we are using CMIT_FMT_USERFORMAT, we skip most of the usual formatting code entirely. This flag is there due to commit 6d167fd7cc (pretty: use fmt_output_email_subject(), 2017-03-01). But that just switched us away from setting an empty "subject" header field, which was similarly useless. That was added by dd2e794a21 (Refactor pretty_print_commit arguments into a struct, 2009-10-19). Before using the struct, we had to pass _something_ as the argument, so we passed the empty string (a NULL would have worked equally well). So this setting has never done anything, and we can drop the line. That shortens the code, but more importantly, makes it easier to reason about and refactor the other users of this flag. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/shortlog.c | 1 - 1 file changed, 1 deletion(-) diff --git a/builtin/shortlog.c b/builtin/shortlog.c index 1307ed2b88..3c7cd2d6ef 100644 --- a/builtin/shortlog.c +++ b/builtin/shortlog.c @@ -245,7 +245,6 @@ void shortlog_add_commit(struct shortlog *log, struct commit *commit) ctx.fmt = CMIT_FMT_USERFORMAT; ctx.abbrev = log->abbrev; - ctx.print_email_subject = 1; ctx.date_mode = log->date_mode; ctx.output_encoding = get_log_output_encoding(); -- cgit 1.2.3-korg From 69aff6200c51cc8a91111b80fbfb84792ce0908c Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 19 Mar 2024 20:28:51 -0400 Subject: pretty: split oneline and email subject printing The pp_title_line() function is used for two formats: the oneline format and the subject line of the email format. But most of the logic in the function does not make any sense for oneline; it is about special formatting of email headers. Lumping the two formats together made sense long ago in 4234a76167 (Extend --pretty=oneline to cover the first paragraph, 2007-06-11), when there was a lot of manual logic to paste lines together. But later, 88c44735ab (pretty: factor out format_subject(), 2008-12-27) pulled that logic into its own function. We can implement the oneline format by just calling that one function. This makes the intention of the code much more clear, as we know we only need to worry about those extra email options when dealing with actual email. While the intent here is cleanup, it is possible to trigger these cases in practice by running format-patch with an explicit --oneline option. But if you did, the results are basically nonsense. For example, with the preserve_subject flag: $ printf "%s\n" one two three | git commit --allow-empty -F - $ git format-patch -1 --stdout -k | grep ^Subject Subject: =?UTF-8?q?one=0Atwo=0Athree?= $ git format-patch -1 --stdout -k --oneline --no-signature 2af7fbe one two three Or with extra headers: $ git format-patch -1 --stdout --cc=me --oneline --no-signature 2af7fbe one two three Cc: me So I'd actually consider this to be an improvement, though you are probably crazy to use other formats with format-patch in the first place (arguably it should forbid non-email formats entirely, but that's a bigger change). As a bonus, it eliminates some pointless extra allocations for the oneline output. The email code, since it has to deal with wrapping, formats into an extra auxiliary buffer. The speedup is tiny, though like "rev-list --no-abbrev --format=oneline" seems to improve by a consistent 1-2% for me. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/log.c | 2 +- pretty.c | 22 ++++++++++++---------- pretty.h | 8 ++++---- 3 files changed, 17 insertions(+), 15 deletions(-) diff --git a/builtin/log.c b/builtin/log.c index db1808d7c1..25ceefa255 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -1297,7 +1297,7 @@ static void prepare_cover_text(struct pretty_print_context *pp, subject = subject_sb.buf; do_pp: - pp_title_line(pp, &subject, sb, encoding, need_8bit_cte); + pp_email_subject(pp, &subject, sb, encoding, need_8bit_cte); pp_remainder(pp, &body, sb, 0); strbuf_release(&description_sb); diff --git a/pretty.c b/pretty.c index cf964b060c..397d282405 100644 --- a/pretty.c +++ b/pretty.c @@ -2077,11 +2077,11 @@ static void pp_header(struct pretty_print_context *pp, } } -void pp_title_line(struct pretty_print_context *pp, - const char **msg_p, - struct strbuf *sb, - const char *encoding, - int need_8bit_cte) +void pp_email_subject(struct pretty_print_context *pp, + const char **msg_p, + struct strbuf *sb, + const char *encoding, + int need_8bit_cte) { static const int max_length = 78; /* per rfc2047 */ struct strbuf title; @@ -2126,9 +2126,8 @@ void pp_title_line(struct pretty_print_context *pp, if (pp->after_subject) { strbuf_addstr(sb, pp->after_subject); } - if (cmit_fmt_is_mail(pp->fmt)) { - strbuf_addch(sb, '\n'); - } + + strbuf_addch(sb, '\n'); if (pp->in_body_headers.nr) { int i; @@ -2328,8 +2327,11 @@ void pretty_print_commit(struct pretty_print_context *pp, msg = skip_blank_lines(msg); /* These formats treat the title line specially. */ - if (pp->fmt == CMIT_FMT_ONELINE || cmit_fmt_is_mail(pp->fmt)) - pp_title_line(pp, &msg, sb, encoding, need_8bit_cte); + if (pp->fmt == CMIT_FMT_ONELINE) { + msg = format_subject(sb, msg, " "); + strbuf_addch(sb, '\n'); + } else if (cmit_fmt_is_mail(pp->fmt)) + pp_email_subject(pp, &msg, sb, encoding, need_8bit_cte); beginning_of_body = sb->len; if (pp->fmt != CMIT_FMT_ONELINE) diff --git a/pretty.h b/pretty.h index 421209e9ec..d4ff79deb3 100644 --- a/pretty.h +++ b/pretty.h @@ -96,13 +96,13 @@ void pp_user_info(struct pretty_print_context *pp, const char *what, const char *encoding); /* - * Format title line of commit message taken from "msg_p" and + * Format subject line of commit message taken from "msg_p" and * put it into "sb". * First line of "msg_p" is also affected. */ -void pp_title_line(struct pretty_print_context *pp, const char **msg_p, - struct strbuf *sb, const char *encoding, - int need_8bit_cte); +void pp_email_subject(struct pretty_print_context *pp, const char **msg_p, + struct strbuf *sb, const char *encoding, + int need_8bit_cte); /* * Get current state of commit message from "msg_p" and continue formatting -- cgit 1.2.3-korg From d5a90d6319aeb6cb3f0b795156c4c2259373424f Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 19 Mar 2024 20:30:44 -0400 Subject: pretty: drop print_email_subject flag With one exception, the print_email_subject flag is set if and only if the commit format is email based: - in make_cover_letter() we set it along with CMIT_FMT_EMAIL explicitly - in show_log(), we set it if cmit_fmt_is_mail() is true. That covers format-patch as well as "git log --format=email" (or mboxrd). The one exception is "rev-list --format=email", which somewhat nonsensically prints the author and date as email headers, but no subject, like: $ git rev-list --format=email HEAD commit 64fc4c2cdd4db2645eaabb47aa4bac820b03cdba From: Jeff King Date: Tue, 19 Mar 2024 19:39:26 -0400 this is the subject this is the body It's doubtful that this is a useful format at all (the "commit" lines replace the "From" lines that would make it work as an actual mbox). But I think that printing the subject as a header (like this patch does) is the least surprising thing to do. So let's drop this field, making the code a little simpler and easier to reason about. Note that we do need to set the "rev" field of the pretty_print_context in rev-list, since that is used to check for subject_prefix, etc. It's not possible to set those fields via rev-list, so we'll always just print "Subject: ". But unless we pass in our rev_info, fmt_output_email_subject() would segfault trying to figure it out. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/log.c | 1 - builtin/rev-list.c | 1 + log-tree.c | 1 - pretty.c | 21 ++++++++------------- pretty.h | 1 - 5 files changed, 9 insertions(+), 16 deletions(-) diff --git a/builtin/log.c b/builtin/log.c index 25ceefa255..de1c1cbec1 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -1364,7 +1364,6 @@ static void make_cover_letter(struct rev_info *rev, int use_separate_file, pp.fmt = CMIT_FMT_EMAIL; pp.date_mode.type = DATE_RFC2822; pp.rev = rev; - pp.print_email_subject = 1; pp.encode_email_headers = rev->encode_email_headers; pp_user_info(&pp, NULL, &sb, committer, encoding); prepare_cover_text(&pp, description_file, branch_name, &sb, diff --git a/builtin/rev-list.c b/builtin/rev-list.c index b3f4783858..98024b5614 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -219,6 +219,7 @@ static void show_commit(struct commit *commit, void *data) ctx.fmt = revs->commit_format; ctx.output_encoding = get_log_output_encoding(); ctx.color = revs->diffopt.use_color; + ctx.rev = revs; pretty_print_commit(&ctx, commit, &buf); if (buf.len) { if (revs->commit_format != CMIT_FMT_ONELINE) diff --git a/log-tree.c b/log-tree.c index 337b9334cd..a765b320dc 100644 --- a/log-tree.c +++ b/log-tree.c @@ -742,7 +742,6 @@ void show_log(struct rev_info *opt) log_write_email_headers(opt, commit, &extra_headers, &ctx.need_8bit_cte, 1); ctx.rev = opt; - ctx.print_email_subject = 1; } else if (opt->commit_format != CMIT_FMT_USERFORMAT) { fputs(diff_get_color_opt(&opt->diffopt, DIFF_COMMIT), opt->diffopt.file); if (opt->commit_format != CMIT_FMT_ONELINE) diff --git a/pretty.c b/pretty.c index 397d282405..3df00bcf04 100644 --- a/pretty.c +++ b/pretty.c @@ -2091,19 +2091,14 @@ void pp_email_subject(struct pretty_print_context *pp, pp->preserve_subject ? "\n" : " "); strbuf_grow(sb, title.len + 1024); - if (pp->print_email_subject) { - if (pp->rev) - fmt_output_email_subject(sb, pp->rev); - if (pp->encode_email_headers && - needs_rfc2047_encoding(title.buf, title.len)) - add_rfc2047(sb, title.buf, title.len, - encoding, RFC2047_SUBJECT); - else - strbuf_add_wrapped_bytes(sb, title.buf, title.len, + fmt_output_email_subject(sb, pp->rev); + if (pp->encode_email_headers && + needs_rfc2047_encoding(title.buf, title.len)) + add_rfc2047(sb, title.buf, title.len, + encoding, RFC2047_SUBJECT); + else + strbuf_add_wrapped_bytes(sb, title.buf, title.len, -last_line_length(sb), 1, max_length); - } else { - strbuf_addbuf(sb, &title); - } strbuf_addch(sb, '\n'); if (need_8bit_cte == 0) { @@ -2319,7 +2314,7 @@ void pretty_print_commit(struct pretty_print_context *pp, } pp_header(pp, encoding, commit, &msg, sb); - if (pp->fmt != CMIT_FMT_ONELINE && !pp->print_email_subject) { + if (pp->fmt != CMIT_FMT_ONELINE && !cmit_fmt_is_mail(pp->fmt)) { strbuf_addch(sb, '\n'); } diff --git a/pretty.h b/pretty.h index d4ff79deb3..021bc1d658 100644 --- a/pretty.h +++ b/pretty.h @@ -39,7 +39,6 @@ struct pretty_print_context { int preserve_subject; struct date_mode date_mode; unsigned date_mode_explicit:1; - int print_email_subject; int expand_tabs_in_log; int need_8bit_cte; char *notes_message; -- cgit 1.2.3-korg From 82363d967042ca2884b087b0895bbc566704388e Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 19 Mar 2024 20:31:39 -0400 Subject: log: do not set up extra_headers for non-email formats The commit pretty-printer code has an "after_subject" parameter which it uses to insert extra headers into the email format. In show_log() we set this by calling log_write_email_headers() if we are using an email format, but otherwise default the variable to the rev_info.extra_headers variable. Since the pretty-printer code will ignore after_subject unless we are using an email format, this default is pointless. We can just set after_subject directly, eliminating an extra variable. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- log-tree.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/log-tree.c b/log-tree.c index a765b320dc..78e678e0e7 100644 --- a/log-tree.c +++ b/log-tree.c @@ -678,7 +678,6 @@ void show_log(struct rev_info *opt) struct log_info *log = opt->loginfo; struct commit *commit = log->commit, *parent = log->parent; int abbrev_commit = opt->abbrev_commit ? opt->abbrev : the_hash_algo->hexsz; - const char *extra_headers = opt->extra_headers; struct pretty_print_context ctx = {0}; opt->loginfo = NULL; @@ -739,7 +738,7 @@ void show_log(struct rev_info *opt) */ if (cmit_fmt_is_mail(opt->commit_format)) { - log_write_email_headers(opt, commit, &extra_headers, + log_write_email_headers(opt, commit, &ctx.after_subject, &ctx.need_8bit_cte, 1); ctx.rev = opt; } else if (opt->commit_format != CMIT_FMT_USERFORMAT) { @@ -807,7 +806,6 @@ void show_log(struct rev_info *opt) ctx.date_mode = opt->date_mode; ctx.date_mode_explicit = opt->date_mode_explicit; ctx.abbrev = opt->diffopt.abbrev; - ctx.after_subject = extra_headers; ctx.preserve_subject = opt->preserve_subject; ctx.encode_email_headers = opt->encode_email_headers; ctx.reflog_info = opt->reflog_info; -- cgit 1.2.3-korg From 305a68143cc0e9b714d71417efa9f0162dd07221 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 19 Mar 2024 20:35:33 -0400 Subject: format-patch: return an allocated string from log_write_email_headers() When pretty-printing a commit in the email format, we have to fill in the "after subject" field of the pretty_print_context with any extra headers the user provided (e.g., from "--to" or "--cc" options) plus any special MIME headers. We return an out-pointer that sometimes points to a newly heap-allocated string and sometimes not. To avoid leaking, we store the allocated version in a buffer with static lifetime, which is ugly. Worse, as we extend the header feature, we'll end up having to repeat this ugly pattern. Instead, let's have our out-pointer pass ownership back to the caller, and duplicate the string when necessary. This does mean one extra allocation per commit when you use extra headers, but in the context of format-patch which is showing diffs, I don't think that's even measurable. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/log.c | 1 + log-tree.c | 11 ++++++----- log-tree.h | 2 +- pretty.h | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/builtin/log.c b/builtin/log.c index de1c1cbec1..ebd3f642da 100644 --- a/builtin/log.c +++ b/builtin/log.c @@ -1370,6 +1370,7 @@ static void make_cover_letter(struct rev_info *rev, int use_separate_file, encoding, need_8bit_cte); fprintf(rev->diffopt.file, "%s\n", sb.buf); + free(pp.after_subject); strbuf_release(&sb); shortlog_init(&log); diff --git a/log-tree.c b/log-tree.c index 78e678e0e7..c3e77064e4 100644 --- a/log-tree.c +++ b/log-tree.c @@ -470,11 +470,11 @@ void fmt_output_email_subject(struct strbuf *sb, struct rev_info *opt) } void log_write_email_headers(struct rev_info *opt, struct commit *commit, - const char **extra_headers_p, + char **extra_headers_p, int *need_8bit_cte_p, int maybe_multipart) { - const char *extra_headers = opt->extra_headers; + char *extra_headers = xstrdup_or_null(opt->extra_headers); const char *name = oid_to_hex(opt->zero_commit ? null_oid() : &commit->object.oid); @@ -496,12 +496,11 @@ void log_write_email_headers(struct rev_info *opt, struct commit *commit, graph_show_oneline(opt->graph); } if (opt->mime_boundary && maybe_multipart) { - static struct strbuf subject_buffer = STRBUF_INIT; + struct strbuf subject_buffer = STRBUF_INIT; static struct strbuf buffer = STRBUF_INIT; struct strbuf filename = STRBUF_INIT; *need_8bit_cte_p = -1; /* NEVER */ - strbuf_reset(&subject_buffer); strbuf_reset(&buffer); strbuf_addf(&subject_buffer, @@ -519,7 +518,8 @@ void log_write_email_headers(struct rev_info *opt, struct commit *commit, extra_headers ? extra_headers : "", mime_boundary_leader, opt->mime_boundary, mime_boundary_leader, opt->mime_boundary); - extra_headers = subject_buffer.buf; + free(extra_headers); + extra_headers = strbuf_detach(&subject_buffer, NULL); if (opt->numbered_files) strbuf_addf(&filename, "%d", opt->nr); @@ -854,6 +854,7 @@ void show_log(struct rev_info *opt) strbuf_release(&msgbuf); free(ctx.notes_message); + free(ctx.after_subject); if (cmit_fmt_is_mail(ctx.fmt) && opt->idiff_oid1) { struct diff_queue_struct dq; diff --git a/log-tree.h b/log-tree.h index 41c776fea5..94978e2c83 100644 --- a/log-tree.h +++ b/log-tree.h @@ -29,7 +29,7 @@ void format_decorations(struct strbuf *sb, const struct commit *commit, int use_color, const struct decoration_options *opts); void show_decorations(struct rev_info *opt, struct commit *commit); void log_write_email_headers(struct rev_info *opt, struct commit *commit, - const char **extra_headers_p, + char **extra_headers_p, int *need_8bit_cte_p, int maybe_multipart); void load_ref_decorations(struct decoration_filter *filter, int flags); diff --git a/pretty.h b/pretty.h index 021bc1d658..9cc9e5d42b 100644 --- a/pretty.h +++ b/pretty.h @@ -35,7 +35,7 @@ struct pretty_print_context { */ enum cmit_fmt fmt; int abbrev; - const char *after_subject; + char *after_subject; int preserve_subject; struct date_mode date_mode; unsigned date_mode_explicit:1; -- cgit 1.2.3-korg From 838ba014ce8226e0fb6c87b1b1c587fc61582323 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Tue, 19 Mar 2024 20:35:57 -0400 Subject: format-patch: simplify after-subject MIME header handling In log_write_email_headers(), we append our MIME headers to the set of extra headers by creating a new strbuf, adding the existing headers, and then adding our new ones. We had to do it this way when our output buffer might point to the constant opt->extra_headers variable. But since the previous commit, we always make a local copy of that variable. Let's turn that into a strbuf, which lets the MIME code simply append to it. That simplifies the function and avoids a pointless extra copy of the headers. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- log-tree.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/log-tree.c b/log-tree.c index c3e77064e4..a102893621 100644 --- a/log-tree.c +++ b/log-tree.c @@ -474,12 +474,15 @@ void log_write_email_headers(struct rev_info *opt, struct commit *commit, int *need_8bit_cte_p, int maybe_multipart) { - char *extra_headers = xstrdup_or_null(opt->extra_headers); + struct strbuf headers = STRBUF_INIT; const char *name = oid_to_hex(opt->zero_commit ? null_oid() : &commit->object.oid); *need_8bit_cte_p = 0; /* unknown */ + if (opt->extra_headers) + strbuf_addstr(&headers, opt->extra_headers); + fprintf(opt->diffopt.file, "From %s Mon Sep 17 00:00:00 2001\n", name); graph_show_oneline(opt->graph); if (opt->message_id) { @@ -496,15 +499,13 @@ void log_write_email_headers(struct rev_info *opt, struct commit *commit, graph_show_oneline(opt->graph); } if (opt->mime_boundary && maybe_multipart) { - struct strbuf subject_buffer = STRBUF_INIT; static struct strbuf buffer = STRBUF_INIT; struct strbuf filename = STRBUF_INIT; *need_8bit_cte_p = -1; /* NEVER */ strbuf_reset(&buffer); - strbuf_addf(&subject_buffer, - "%s" + strbuf_addf(&headers, "MIME-Version: 1.0\n" "Content-Type: multipart/mixed;" " boundary=\"%s%s\"\n" @@ -515,11 +516,8 @@ void log_write_email_headers(struct rev_info *opt, struct commit *commit, "Content-Type: text/plain; " "charset=UTF-8; format=fixed\n" "Content-Transfer-Encoding: 8bit\n\n", - extra_headers ? extra_headers : "", mime_boundary_leader, opt->mime_boundary, mime_boundary_leader, opt->mime_boundary); - free(extra_headers); - extra_headers = strbuf_detach(&subject_buffer, NULL); if (opt->numbered_files) strbuf_addf(&filename, "%d", opt->nr); @@ -539,7 +537,7 @@ void log_write_email_headers(struct rev_info *opt, struct commit *commit, opt->diffopt.stat_sep = buffer.buf; strbuf_release(&filename); } - *extra_headers_p = extra_headers; + *extra_headers_p = headers.len ? strbuf_detach(&headers, NULL) : NULL; } static void show_sig_lines(struct rev_info *opt, int status, const char *bol) -- cgit 1.2.3-korg From 1c10b8e5b0819598b42c042754c7c860a8637ce3 Mon Sep 17 00:00:00 2001 From: Jeff King Date: Fri, 22 Mar 2024 05:59:51 -0400 Subject: format-patch: fix leak of empty header string The log_write_email_headers() function recently learned to return the "extra_headers_p" variable to the caller as an allocated string. We start by copying rev_info.extra_headers into a strbuf, and then detach the strbuf at the end of the function. If there are no extra headers, we leave the strbuf empty. Likewise, if there are no headers to return, we pass back NULL. This misses a corner case which can cause a leak. The "do we have any headers to copy" check is done by looking for a NULL opt->extra_headers. But the "do we have a non-empty string to return" check is done by checking the length of the strbuf. That means if opt->extra_headers is the empty string, we'll "copy" it into the strbuf, triggering an allocation, but then leak the buffer when we return NULL from the function. We can solve this in one of two ways: 1. Rather than checking headers->len at the end, we could check headers->alloc to see if we allocated anything. That retains the original behavior before the recent change, where an empty extra_headers string is "passed through" to the caller. In practice this doesn't matter, though (the code which eventually looks at the result treats NULL or the empty string the same). 2. Only bother copying a non-empty string into the strbuf. This has the added bonus of avoiding a pointless allocation. Arguably strbuf_addstr() could do this optimization itself, though it may be slightly dangerous to do so (some existing callers may not get a fresh allocation when they expect to). In theory callers are all supposed to use strbuf_detach() in such a case, but there's no guarantee that this is the case. This patch uses option 2. Without it, building with SANITIZE=leak shows many errors in t4021 and elsewhere. Signed-off-by: Jeff King Signed-off-by: Junio C Hamano --- log-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/log-tree.c b/log-tree.c index a102893621..7e65fd7026 100644 --- a/log-tree.c +++ b/log-tree.c @@ -480,7 +480,7 @@ void log_write_email_headers(struct rev_info *opt, struct commit *commit, *need_8bit_cte_p = 0; /* unknown */ - if (opt->extra_headers) + if (opt->extra_headers && *opt->extra_headers) strbuf_addstr(&headers, opt->extra_headers); fprintf(opt->diffopt.file, "From %s Mon Sep 17 00:00:00 2001\n", name); -- cgit 1.2.3-korg