From 92e5b62fec0e9b647429e8d3736c571c434dd375 Mon Sep 17 00:00:00 2001 From: Jacob Keller Date: Fri, 15 Apr 2016 16:01:45 -0700 Subject: [PATCH 1/3] xdiff: add recs_match helper function It is a common pattern in xdl_change_compact to check that hashes and strings match. The resulting code to perform this change causes very long lines and makes it hard to follow the intention. Introduce a helper function recs_match which performs both checks to increase code readability. Signed-off-by: Jacob Keller Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- xdiff/xdiffi.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 2358a2d632..748eeb9919 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -400,6 +400,14 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, } +static int recs_match(xrecord_t **recs, long ixs, long ix, long flags) +{ + return (recs[ixs]->ha == recs[ix]->ha && + xdl_recmatch(recs[ixs]->ptr, recs[ixs]->size, + recs[ix]->ptr, recs[ix]->size, + flags)); +} + int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { long ix, ixo, ixs, ixref, grpsiz, nrec = xdf->nrec; char *rchg = xdf->rchg, *rchgo = xdfo->rchg; @@ -442,8 +450,7 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { * the last line of the current change group, shift backward * the group. */ - while (ixs > 0 && recs[ixs - 1]->ha == recs[ix - 1]->ha && - xdl_recmatch(recs[ixs - 1]->ptr, recs[ixs - 1]->size, recs[ix - 1]->ptr, recs[ix - 1]->size, flags)) { + while (ixs > 0 && recs_match(recs, ixs - 1, ix - 1, flags)) { rchg[--ixs] = 1; rchg[--ix] = 0; @@ -470,8 +477,7 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { * the line next of the current change group, shift forward * the group. */ - while (ix < nrec && recs[ixs]->ha == recs[ix]->ha && - xdl_recmatch(recs[ixs]->ptr, recs[ixs]->size, recs[ix]->ptr, recs[ix]->size, flags)) { + while (ix < nrec && recs_match(recs, ixs, ix, flags)) { rchg[ixs++] = 0; rchg[ix++] = 1; From d634d61ed6c5f19948937012b5e3a0ed2d631d3f Mon Sep 17 00:00:00 2001 From: Stefan Beller Date: Tue, 19 Apr 2016 08:21:30 -0700 Subject: [PATCH 2/3] xdiff: implement empty line chunk heuristic In order to produce the smallest possible diff and combine several diff hunks together, we implement a heuristic from GNU Diff which moves diff hunks forward as far as possible when we find common context above and below a diff hunk. This sometimes produces less readable diffs when writing C, Shell, or other programming languages, ie: ... /* + * + * + */ + +/* ... instead of the more readable equivalent of ... +/* + * + * + */ + /* ... Implement the following heuristic to (optionally) produce the desired output. If there are diff chunks which can be shifted around, shift each hunk such that the last common empty line is below the chunk with the rest of the context above. This heuristic appears to resolve the above example and several other common issues without producing significantly weird results. However, as with any heuristic it is not really known whether this will always be more optimal. Thus, it can be disabled via diff.compactionHeuristic. Signed-off-by: Stefan Beller Signed-off-by: Jacob Keller Signed-off-by: Stefan Beller Signed-off-by: Junio C Hamano --- Documentation/diff-config.txt | 5 +++++ Documentation/diff-options.txt | 6 ++++++ diff.c | 11 +++++++++++ xdiff/xdiff.h | 2 ++ xdiff/xdiffi.c | 26 ++++++++++++++++++++++++++ 5 files changed, 50 insertions(+) diff --git a/Documentation/diff-config.txt b/Documentation/diff-config.txt index 6eaa45271c..9bf3e923cf 100644 --- a/Documentation/diff-config.txt +++ b/Documentation/diff-config.txt @@ -166,6 +166,11 @@ diff.tool:: include::mergetools-diff.txt[] +diff.compactionHeuristic:: + Set this option to enable an experimental heuristic that + shifts the hunk boundary in an attempt to make the resulting + patch easier to read. + diff.algorithm:: Choose a diff algorithm. The variants are as follows: + diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index 3ad6404dbc..b513023cd7 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -63,6 +63,12 @@ ifndef::git-format-patch[] Synonym for `-p --raw`. endif::git-format-patch[] +--compaction-heuristic:: +--no-compaction-heuristic:: + These are to help debugging and tuning an experimental + heuristic that shifts the hunk boundary in an attempt to + make the resulting patch easier to read. + --minimal:: Spend extra time to make sure the smallest possible diff is produced. diff --git a/diff.c b/diff.c index f62b7f73d8..05ca3ce08e 100644 --- a/diff.c +++ b/diff.c @@ -25,6 +25,7 @@ #endif static int diff_detect_rename_default; +static int diff_compaction_heuristic = 1; static int diff_rename_limit_default = 400; static int diff_suppress_blank_empty; static int diff_use_color_default = -1; @@ -183,6 +184,10 @@ int git_diff_ui_config(const char *var, const char *value, void *cb) diff_detect_rename_default = git_config_rename(var, value); return 0; } + if (!strcmp(var, "diff.compactionheuristic")) { + diff_compaction_heuristic = git_config_bool(var, value); + return 0; + } if (!strcmp(var, "diff.autorefreshindex")) { diff_auto_refresh_index = git_config_bool(var, value); return 0; @@ -3235,6 +3240,8 @@ void diff_setup(struct diff_options *options) options->use_color = diff_use_color_default; options->detect_rename = diff_detect_rename_default; options->xdl_opts |= diff_algorithm; + if (diff_compaction_heuristic) + DIFF_XDL_SET(options, COMPACTION_HEURISTIC); options->orderfile = diff_order_file_cfg; @@ -3712,6 +3719,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac) DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL); else if (!strcmp(arg, "--ignore-blank-lines")) DIFF_XDL_SET(options, IGNORE_BLANK_LINES); + else if (!strcmp(arg, "--compaction-heuristic")) + DIFF_XDL_SET(options, COMPACTION_HEURISTIC); + else if (!strcmp(arg, "--no-compaction-heuristic")) + DIFF_XDL_CLR(options, COMPACTION_HEURISTIC); else if (!strcmp(arg, "--patience")) options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF); else if (!strcmp(arg, "--histogram")) diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h index c0339919cc..d1dbb2750a 100644 --- a/xdiff/xdiff.h +++ b/xdiff/xdiff.h @@ -41,6 +41,8 @@ extern "C" { #define XDF_IGNORE_BLANK_LINES (1 << 7) +#define XDF_COMPACTION_HEURISTIC (1 << 8) + #define XDL_EMIT_FUNCNAMES (1 << 0) #define XDL_EMIT_COMMON (1 << 1) #define XDL_EMIT_FUNCCONTEXT (1 << 2) diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 748eeb9919..b3c6848875 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -400,6 +400,11 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, } +static int is_blank_line(xrecord_t **recs, long ix, long flags) +{ + return xdl_blankline(recs[ix]->ptr, recs[ix]->size, flags); +} + static int recs_match(xrecord_t **recs, long ixs, long ix, long flags) { return (recs[ixs]->ha == recs[ix]->ha && @@ -411,6 +416,7 @@ static int recs_match(xrecord_t **recs, long ixs, long ix, long flags) int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { long ix, ixo, ixs, ixref, grpsiz, nrec = xdf->nrec; char *rchg = xdf->rchg, *rchgo = xdfo->rchg; + unsigned int blank_lines; xrecord_t **recs = xdf->recs; /* @@ -444,6 +450,7 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { do { grpsiz = ix - ixs; + blank_lines = 0; /* * If the line before the current change group, is equal to @@ -478,6 +485,8 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { * the group. */ while (ix < nrec && recs_match(recs, ixs, ix, flags)) { + blank_lines += is_blank_line(recs, ix, flags); + rchg[ixs++] = 0; rchg[ix++] = 1; @@ -504,6 +513,23 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) { rchg[--ix] = 0; while (rchgo[--ixo]); } + + /* + * If a group can be moved back and forth, see if there is a + * blank line in the moving space. If there is a blank line, + * make sure the last blank line is the end of the group. + * + * As we already shifted the group forward as far as possible + * in the earlier loop, we need to shift it back only if at all. + */ + if ((flags & XDF_COMPACTION_HEURISTIC) && blank_lines) { + while (ixs > 0 && + !is_blank_line(recs, ix - 1, flags) && + recs_match(recs, ixs - 1, ix - 1, flags)) { + rchg[--ixs] = 1; + rchg[--ix] = 0; + } + } } return 0; From 77085a616b0fe0eaba99dfe27247ae733f1570e9 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Mon, 2 May 2016 10:36:36 -0700 Subject: [PATCH 3/3] diff: undocument the compaction heuristic knobs for experimentation It seems that people around here are all happy with the updated heuristics used to decide where the hunks are separated. Let's keep that as the default. Even though we do not expect too much trouble from the difference between the old and the new algorithms, just in case let's leave the implementation of the knobs to turn it off for emergencies. There is no longer need for documenting them, though. Signed-off-by: Junio C Hamano --- Documentation/diff-config.txt | 5 ----- Documentation/diff-options.txt | 6 ------ 2 files changed, 11 deletions(-) diff --git a/Documentation/diff-config.txt b/Documentation/diff-config.txt index 9bf3e923cf..6eaa45271c 100644 --- a/Documentation/diff-config.txt +++ b/Documentation/diff-config.txt @@ -166,11 +166,6 @@ diff.tool:: include::mergetools-diff.txt[] -diff.compactionHeuristic:: - Set this option to enable an experimental heuristic that - shifts the hunk boundary in an attempt to make the resulting - patch easier to read. - diff.algorithm:: Choose a diff algorithm. The variants are as follows: + diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt index b513023cd7..3ad6404dbc 100644 --- a/Documentation/diff-options.txt +++ b/Documentation/diff-options.txt @@ -63,12 +63,6 @@ ifndef::git-format-patch[] Synonym for `-p --raw`. endif::git-format-patch[] ---compaction-heuristic:: ---no-compaction-heuristic:: - These are to help debugging and tuning an experimental - heuristic that shifts the hunk boundary in an attempt to - make the resulting patch easier to read. - --minimal:: Spend extra time to make sure the smallest possible diff is produced.