Eric Bower
·
2025-12-13
range_diff.go
1package git
2
3import (
4 "fmt"
5 "math"
6 "sort"
7 "strings"
8
9 "github.com/bluekeyes/go-gitdiff/gitdiff"
10 ha "github.com/oddg/hungarian-algorithm"
11 "github.com/sergi/go-diff/diffmatchpatch"
12)
13
14var (
15 COST_MAX = 65536
16 RANGE_DIFF_CREATION_FACTOR_DEFAULT = 60
17)
18
19type PatchRange struct {
20 *Patch
21 Matching int
22 Diff string
23 DiffSize int
24 Shown bool
25}
26
27func NewPatchRange(patch *Patch) *PatchRange {
28 diff := patch.CalcDiff()
29 return &PatchRange{
30 Patch: patch,
31 Matching: -1,
32 Diff: diff,
33 DiffSize: len(diff),
34 Shown: false,
35 }
36}
37
38type RangeDiffOutput struct {
39 Header *RangeDiffHeader
40 Order int
41 Files []*RangeDiffFile
42 Type string
43}
44
45func output(a []*PatchRange, b []*PatchRange) []*RangeDiffOutput {
46 outputs := []*RangeDiffOutput{}
47 for i, patchA := range a {
48 if patchA.Matching == -1 {
49 hdr := NewRangeDiffHeader(patchA, nil, i+1, -1)
50 files := outputRemovedPatch(patchA)
51 outputs = append(
52 outputs,
53 &RangeDiffOutput{
54 Header: hdr,
55 Type: "rm",
56 Order: i + 1,
57 Files: files,
58 },
59 )
60 }
61 }
62
63 for j, patchB := range b {
64 if patchB.Matching == -1 {
65 hdr := NewRangeDiffHeader(nil, patchB, -1, j+1)
66 files := outputAddedPatch(patchB)
67 outputs = append(
68 outputs,
69 &RangeDiffOutput{
70 Header: hdr,
71 Type: "add",
72 Order: j + 1,
73 Files: files,
74 },
75 )
76 continue
77 }
78 patchA := a[patchB.Matching]
79 if patchB.ContentSha == patchA.ContentSha {
80 hdr := NewRangeDiffHeader(patchA, patchB, patchB.Matching+1, patchA.Matching+1)
81 outputs = append(
82 outputs,
83 &RangeDiffOutput{
84 Header: hdr,
85 Type: "equal",
86 Order: patchA.Matching + 1,
87 },
88 )
89 } else {
90 hdr := NewRangeDiffHeader(patchA, patchB, patchB.Matching+1, patchA.Matching+1)
91 diff := outputDiff(patchA, patchB)
92 outputs = append(
93 outputs,
94 &RangeDiffOutput{
95 Order: patchA.Matching + 1,
96 Header: hdr,
97 Files: diff,
98 Type: "diff",
99 },
100 )
101 }
102 }
103 sort.Slice(outputs, func(i, j int) bool {
104 return outputs[i].Order < outputs[j].Order
105 })
106 return outputs
107}
108
109type RangeDiffDiff struct {
110 OuterType string
111 InnerType string
112 Text string
113}
114
115func toRangeDiffDiff(diff []diffmatchpatch.Diff) []RangeDiffDiff {
116 result := []RangeDiffDiff{}
117
118 for _, line := range diff {
119 outerDiffType := line.Type
120
121 fmtLine := strings.Split(line.Text, "\n")
122 for idx, ln := range fmtLine {
123 text := ln
124 if idx < len(fmtLine)-1 {
125 text = ln + "\n"
126 }
127
128 // Determine inner type based on line prefix (+/-/space)
129 inner := "equal"
130 if strings.HasPrefix(text, "+") {
131 inner = "insert"
132 } else if strings.HasPrefix(text, "-") {
133 inner = "delete"
134 }
135
136 // Determine outer type based on diff result
137 outer := "equal"
138 switch outerDiffType {
139 case diffmatchpatch.DiffInsert:
140 outer = "insert"
141 case diffmatchpatch.DiffDelete:
142 outer = "delete"
143 }
144
145 st := RangeDiffDiff{
146 Text: text,
147 OuterType: outer,
148 InnerType: inner,
149 }
150
151 result = append(result, st)
152 }
153 }
154
155 return result
156}
157
158func DoDiff(src, dst string) []RangeDiffDiff {
159 dmp := diffmatchpatch.New()
160 wSrc, wDst, warray := dmp.DiffLinesToChars(src, dst)
161 diffs := dmp.DiffMain(wSrc, wDst, false)
162 diffs = dmp.DiffCharsToLines(diffs, warray)
163 return toRangeDiffDiff(diffs)
164}
165
166// extractChangedLines extracts only added and deleted lines from a file's fragments,
167// ignoring context lines. This is used for comparing patches where context lines
168// may differ due to rebasing but the actual changes are the same.
169func extractChangedLines(file *gitdiff.File) string {
170 var result strings.Builder
171 for _, frag := range file.TextFragments {
172 for _, line := range frag.Lines {
173 if line.Op == gitdiff.OpAdd || line.Op == gitdiff.OpDelete {
174 result.WriteString(line.String())
175 }
176 }
177 }
178 return result.String()
179}
180
181// extractAllLines extracts all lines (including context) from a file's fragments.
182// This is used for displaying the full diff with context.
183func extractAllLines(file *gitdiff.File) string {
184 var result strings.Builder
185 for _, frag := range file.TextFragments {
186 for _, line := range frag.Lines {
187 result.WriteString(line.String())
188 }
189 }
190 return result.String()
191}
192
193type RangeDiffFile struct {
194 OldFile *gitdiff.File
195 NewFile *gitdiff.File
196 Diff []RangeDiffDiff
197}
198
199func outputDiff(patchA, patchB *PatchRange) []*RangeDiffFile {
200 diffs := []*RangeDiffFile{}
201
202 for _, fileA := range patchA.Files {
203 found := false
204 for _, fileB := range patchB.Files {
205 if fileA.NewName == fileB.NewName {
206 found = true
207 // this means both files have been deleted so we should skip
208 if fileA.NewName == "" {
209 continue
210 }
211 // Compare only +/- lines to determine if there's a meaningful diff
212 changedA := extractChangedLines(fileA)
213 changedB := extractChangedLines(fileB)
214 if changedA == changedB {
215 // No difference in actual changes, skip this file
216 continue
217 }
218 // Use all lines (with context) for display
219 strA := extractAllLines(fileA)
220 strB := extractAllLines(fileB)
221 curDiff := DoDiff(strA, strB)
222 fp := &RangeDiffFile{
223 OldFile: fileA,
224 NewFile: fileB,
225 Diff: curDiff,
226 }
227 diffs = append(diffs, fp)
228 }
229 }
230
231 // find files in patchA but not in patchB
232 if !found {
233 strA := extractAllLines(fileA)
234 fp := &RangeDiffFile{
235 OldFile: fileA,
236 NewFile: nil,
237 Diff: DoDiff(strA, ""),
238 }
239 diffs = append(diffs, fp)
240 }
241 }
242
243 // find files in patchB not in patchA
244 for _, fileB := range patchB.Files {
245 found := false
246 for _, fileA := range patchA.Files {
247 if fileA.NewName == fileB.NewName {
248 found = true
249 break
250 }
251 }
252
253 if !found {
254 strB := extractAllLines(fileB)
255 fp := &RangeDiffFile{
256 OldFile: nil,
257 NewFile: fileB,
258 Diff: DoDiff("", strB),
259 }
260 diffs = append(diffs, fp)
261 }
262 }
263
264 return diffs
265}
266
267func outputAddedPatch(patch *PatchRange) []*RangeDiffFile {
268 diffs := []*RangeDiffFile{}
269 for _, file := range patch.Files {
270 strB := extractAllLines(file)
271 fp := &RangeDiffFile{
272 OldFile: nil,
273 NewFile: file,
274 Diff: DoDiff("", strB),
275 }
276 diffs = append(diffs, fp)
277 }
278 return diffs
279}
280
281func outputRemovedPatch(patch *PatchRange) []*RangeDiffFile {
282 diffs := []*RangeDiffFile{}
283 for _, file := range patch.Files {
284 strA := extractAllLines(file)
285 fp := &RangeDiffFile{
286 OldFile: file,
287 NewFile: nil,
288 Diff: DoDiff(strA, ""),
289 }
290 diffs = append(diffs, fp)
291 }
292 return diffs
293}
294
295// RangeDiffHeader is a header combining old and new change pairs.
296type RangeDiffHeader struct {
297 OldIdx int
298 OldSha string
299 OldAuthorName string
300 OldAuthorEmail string
301 OldTitle string
302 OldBody string
303 NewIdx int
304 NewSha string
305 NewAuthorName string
306 NewAuthorEmail string
307 NewTitle string
308 NewBody string
309 Title string
310 ContentEqual bool
311 AuthorChanged bool
312 TitleChanged bool
313 BodyChanged bool
314}
315
316func NewRangeDiffHeader(a *PatchRange, b *PatchRange, aIndex, bIndex int) *RangeDiffHeader {
317 hdr := &RangeDiffHeader{}
318 if a == nil {
319 hdr.NewIdx = bIndex
320 hdr.NewSha = b.CommitSha
321 hdr.NewAuthorName = b.AuthorName
322 hdr.NewAuthorEmail = b.AuthorEmail
323 hdr.NewTitle = b.Title
324 hdr.NewBody = b.Body
325 hdr.Title = b.Title
326 return hdr
327 }
328 if b == nil {
329 hdr.OldIdx = aIndex
330 hdr.OldSha = a.CommitSha
331 hdr.OldAuthorName = a.AuthorName
332 hdr.OldAuthorEmail = a.AuthorEmail
333 hdr.OldTitle = a.Title
334 hdr.OldBody = a.Body
335 hdr.Title = a.Title
336 return hdr
337 }
338
339 hdr.OldIdx = aIndex
340 hdr.NewIdx = bIndex
341 hdr.OldSha = a.CommitSha
342 hdr.NewSha = b.CommitSha
343 hdr.OldAuthorName = a.AuthorName
344 hdr.OldAuthorEmail = a.AuthorEmail
345 hdr.OldTitle = a.Title
346 hdr.OldBody = a.Body
347 hdr.NewAuthorName = b.AuthorName
348 hdr.NewAuthorEmail = b.AuthorEmail
349 hdr.NewTitle = b.Title
350 hdr.NewBody = b.Body
351
352 // Check what changed
353 hdr.AuthorChanged = a.AuthorName != b.AuthorName || a.AuthorEmail != b.AuthorEmail
354 hdr.TitleChanged = a.Title != b.Title
355 hdr.BodyChanged = a.Body != b.Body
356
357 if a.ContentSha == b.ContentSha {
358 hdr.Title = a.Title
359 hdr.ContentEqual = true
360 } else {
361 hdr.Title = b.Title
362 }
363
364 return hdr
365}
366
367func (hdr *RangeDiffHeader) String() string {
368 if hdr.OldIdx == 0 {
369 return fmt.Sprintf("-: ------- > %d: %s %s\n", hdr.NewIdx, truncateSha(hdr.NewSha), hdr.Title)
370 }
371 if hdr.NewIdx == 0 {
372 return fmt.Sprintf("%d: %s < -: ------- %s\n", hdr.OldIdx, truncateSha(hdr.OldSha), hdr.Title)
373 }
374 if hdr.ContentEqual {
375 return fmt.Sprintf(
376 "%d: %s = %d: %s %s\n",
377 hdr.OldIdx, truncateSha(hdr.OldSha),
378 hdr.NewIdx, truncateSha(hdr.NewSha),
379 hdr.Title,
380 )
381 }
382 return fmt.Sprintf(
383 "%d: %s ! %d: %s %s",
384 hdr.OldIdx, truncateSha(hdr.OldSha),
385 hdr.NewIdx, truncateSha(hdr.NewSha),
386 hdr.Title,
387 )
388}
389
390func RangeDiff(a []*Patch, b []*Patch) []*RangeDiffOutput {
391 aPatches := []*PatchRange{}
392 for _, patch := range a {
393 aPatches = append(aPatches, NewPatchRange(patch))
394 }
395 bPatches := []*PatchRange{}
396 for _, patch := range b {
397 bPatches = append(bPatches, NewPatchRange(patch))
398 }
399 findExactMatches(aPatches, bPatches)
400 getCorrespondences(aPatches, bPatches, RANGE_DIFF_CREATION_FACTOR_DEFAULT)
401 return output(aPatches, bPatches)
402}
403
404func RangeDiffToStr(diffs []*RangeDiffOutput) string {
405 output := ""
406 for _, diff := range diffs {
407 output += diff.Header.String()
408 for _, f := range diff.Files {
409 fileName := ""
410 if f.NewFile != nil {
411 fileName = f.NewFile.NewName
412 } else if f.OldFile != nil {
413 fileName = f.OldFile.NewName
414 }
415 output += fmt.Sprintf("\n@@ %s\n", fileName)
416 for _, d := range f.Diff {
417 switch d.OuterType {
418 case "equal":
419 output += d.Text
420 case "insert":
421 output += d.Text
422 case "delete":
423 output += d.Text
424 }
425 }
426 }
427 }
428 return output
429}
430
431func findExactMatches(a []*PatchRange, b []*PatchRange) {
432 for i, patchA := range a {
433 for j, patchB := range b {
434 if patchA.ContentSha == patchB.ContentSha {
435 patchA.Matching = j
436 patchB.Matching = i
437 }
438 }
439 }
440}
441
442func createMatrix(rows, cols int) [][]int {
443 mat := make([][]int, rows)
444 for i := range mat {
445 mat[i] = make([]int, cols)
446 }
447 return mat
448}
449
450func diffsize(a *PatchRange, b *PatchRange) int {
451 dmp := diffmatchpatch.New()
452 diffs := dmp.DiffMain(a.Diff, b.Diff, false)
453 return len(diffs)
454}
455
456func getCorrespondences(a []*PatchRange, b []*PatchRange, creationFactor int) {
457 n := len(a) + len(b)
458 cost := createMatrix(n, n)
459
460 for i, patchA := range a {
461 var c int
462 for j, patchB := range b {
463 if patchA.Matching == j {
464 c = 0
465 } else if patchA.Matching == -1 && patchB.Matching == -1 {
466 c = diffsize(patchA, patchB)
467 } else {
468 c = COST_MAX
469 }
470 cost[i][j] = c
471 }
472 }
473
474 for j, patchB := range b {
475 creationCost := (patchB.DiffSize * creationFactor) / 100
476 if patchB.Matching >= 0 {
477 creationCost = math.MaxInt32
478 }
479 for i := len(a); i < n; i++ {
480 cost[i][j] = creationCost
481 }
482 }
483
484 for i := len(a); i < n; i++ {
485 for j := len(b); j < n; j++ {
486 cost[i][j] = 0
487 }
488 }
489
490 assignment, _ := ha.Solve(cost)
491 for i := range a {
492 j := assignment[i]
493 if j >= 0 && j < len(b) {
494 a[i].Matching = j
495 b[j].Matching = i
496 }
497 }
498}