jolheiser
·
2025-08-21
range_diff.go
1package git
2
3import (
4 "fmt"
5 "math"
6 "sort"
7 "strings"
8
9 "github.com/bluekeyes/go-gitdiff/gitdiff"
10 ha "github.com/oddg/hungarian-algorithm"
11 "github.com/sergi/go-diff/diffmatchpatch"
12)
13
14var (
15 COST_MAX = 65536
16 RANGE_DIFF_CREATION_FACTOR_DEFAULT = 60
17)
18
19type PatchRange struct {
20 *Patch
21 Matching int
22 Diff string
23 DiffSize int
24 Shown bool
25}
26
27func NewPatchRange(patch *Patch) *PatchRange {
28 diff := patch.CalcDiff()
29 return &PatchRange{
30 Patch: patch,
31 Matching: -1,
32 Diff: diff,
33 DiffSize: len(diff),
34 Shown: false,
35 }
36}
37
38type RangeDiffOutput struct {
39 Header *RangeDiffHeader
40 Order int
41 Files []*RangeDiffFile
42 Type string
43}
44
45func output(a []*PatchRange, b []*PatchRange) []*RangeDiffOutput {
46 outputs := []*RangeDiffOutput{}
47 for i, patchA := range a {
48 if patchA.Matching == -1 {
49 hdr := NewRangeDiffHeader(patchA, nil, i+1, -1)
50 outputs = append(
51 outputs,
52 &RangeDiffOutput{
53 Header: hdr,
54 Type: "rm",
55 Order: i + 1,
56 },
57 )
58 }
59 }
60
61 for j, patchB := range b {
62 if patchB.Matching == -1 {
63 hdr := NewRangeDiffHeader(nil, patchB, -1, j+1)
64 outputs = append(
65 outputs,
66 &RangeDiffOutput{
67 Header: hdr,
68 Type: "add",
69 Order: j + 1,
70 },
71 )
72 continue
73 }
74 patchA := a[patchB.Matching]
75 if patchB.ContentSha == patchA.ContentSha {
76 hdr := NewRangeDiffHeader(patchA, patchB, patchB.Matching+1, patchA.Matching+1)
77 outputs = append(
78 outputs,
79 &RangeDiffOutput{
80 Header: hdr,
81 Type: "equal",
82 Order: patchA.Matching + 1,
83 },
84 )
85 } else {
86 hdr := NewRangeDiffHeader(patchA, patchB, patchB.Matching+1, patchA.Matching+1)
87 diff := outputDiff(patchA, patchB)
88 outputs = append(
89 outputs,
90 &RangeDiffOutput{
91 Order: patchA.Matching + 1,
92 Header: hdr,
93 Files: diff,
94 Type: "diff",
95 },
96 )
97 }
98 }
99 sort.Slice(outputs, func(i, j int) bool {
100 return outputs[i].Order < outputs[j].Order
101 })
102 return outputs
103}
104
105type RangeDiffDiff struct {
106 OuterType string
107 InnerType string
108 Text string
109}
110
111func toRangeDiffDiff(diff []diffmatchpatch.Diff) []RangeDiffDiff {
112 result := []RangeDiffDiff{}
113
114 for _, line := range diff {
115 outer := "equal"
116 switch line.Type {
117 case diffmatchpatch.DiffInsert:
118 outer = "insert"
119 case diffmatchpatch.DiffDelete:
120 outer = "delete"
121 }
122
123 fmtLine := strings.Split(line.Text, "\n")
124 for idx, ln := range fmtLine {
125 text := ln
126 if idx < len(fmtLine)-1 {
127 text = ln + "\n"
128 }
129 st := RangeDiffDiff{
130 Text: text,
131 OuterType: outer,
132 InnerType: "equal",
133 }
134
135 if strings.HasPrefix(text, "+") {
136 st.InnerType = "insert"
137 } else if strings.HasPrefix(text, "-") {
138 st.InnerType = "delete"
139 }
140
141 result = append(result, st)
142 }
143 }
144
145 return result
146}
147
148func DoDiff(src, dst string) []RangeDiffDiff {
149 dmp := diffmatchpatch.New()
150 wSrc, wDst, warray := dmp.DiffLinesToChars(src, dst)
151 diffs := dmp.DiffMain(wSrc, wDst, false)
152 diffs = dmp.DiffCharsToLines(diffs, warray)
153 return toRangeDiffDiff(diffs)
154}
155
156type RangeDiffFile struct {
157 OldFile *gitdiff.File
158 NewFile *gitdiff.File
159 Diff []RangeDiffDiff
160}
161
162func outputDiff(patchA, patchB *PatchRange) []*RangeDiffFile {
163 diffs := []*RangeDiffFile{}
164
165 for _, fileA := range patchA.Files {
166 found := false
167 for _, fileB := range patchB.Files {
168 if fileA.NewName == fileB.NewName {
169 found = true
170 // this means both files have been deleted so we should skip
171 if fileA.NewName == "" {
172 continue
173 }
174 strA := ""
175 for _, frag := range fileA.TextFragments {
176 for _, line := range frag.Lines {
177 strA += line.String()
178 }
179 }
180 strB := ""
181 for _, frag := range fileB.TextFragments {
182 for _, line := range frag.Lines {
183 strB += line.String()
184 }
185 }
186 curDiff := DoDiff(strA, strB)
187 hasDiff := false
188 for _, dd := range curDiff {
189 if dd.OuterType != "equal" {
190 hasDiff = true
191 break
192 }
193 }
194 if !hasDiff {
195 continue
196 }
197 fp := &RangeDiffFile{
198 OldFile: fileA,
199 NewFile: fileB,
200 Diff: curDiff,
201 }
202 diffs = append(diffs, fp)
203 }
204 }
205
206 // find files in patchA but not in patchB
207 if !found {
208 strA := ""
209 for _, frag := range fileA.TextFragments {
210 for _, line := range frag.Lines {
211 strA += line.String()
212 }
213 }
214 fp := &RangeDiffFile{
215 OldFile: fileA,
216 NewFile: nil,
217 Diff: DoDiff(strA, ""),
218 }
219 diffs = append(diffs, fp)
220 }
221 }
222
223 // find files in patchB not in patchA
224 for _, fileB := range patchB.Files {
225 found := false
226 for _, fileA := range patchA.Files {
227 if fileA.NewName == fileB.NewName {
228 found = true
229 break
230 }
231 }
232
233 if !found {
234 strB := ""
235 for _, frag := range fileB.TextFragments {
236 for _, line := range frag.Lines {
237 strB += line.String()
238 }
239 }
240 fp := &RangeDiffFile{
241 OldFile: nil,
242 NewFile: fileB,
243 Diff: DoDiff("", strB),
244 }
245 diffs = append(diffs, fp)
246 }
247 }
248
249 return diffs
250}
251
252// RangeDiffHeader is a header combining old and new change pairs.
253type RangeDiffHeader struct {
254 OldIdx int
255 OldSha string
256 NewIdx int
257 NewSha string
258 Title string
259 ContentEqual bool
260}
261
262func NewRangeDiffHeader(a *PatchRange, b *PatchRange, aIndex, bIndex int) *RangeDiffHeader {
263 hdr := &RangeDiffHeader{}
264 if a == nil {
265 hdr.NewIdx = bIndex
266 hdr.NewSha = b.CommitSha
267 hdr.Title = b.Title
268 return hdr
269 }
270 if b == nil {
271 hdr.OldIdx = aIndex
272 hdr.OldSha = a.CommitSha
273 hdr.Title = a.Title
274 return hdr
275 }
276
277 hdr.OldIdx = aIndex
278 hdr.NewIdx = bIndex
279 hdr.OldSha = a.CommitSha
280 hdr.NewSha = b.CommitSha
281
282 if a.ContentSha == b.ContentSha {
283 hdr.Title = a.Title
284 hdr.ContentEqual = true
285 } else {
286 hdr.Title = b.Title
287 }
288
289 return hdr
290}
291
292func (hdr *RangeDiffHeader) String() string {
293 if hdr.OldIdx == 0 {
294 return fmt.Sprintf("-: ------- > %d: %s %s\n", hdr.NewIdx, truncateSha(hdr.NewSha), hdr.Title)
295 }
296 if hdr.NewIdx == 0 {
297 return fmt.Sprintf("%d: %s < -: ------- %s\n", hdr.OldIdx, truncateSha(hdr.OldSha), hdr.Title)
298 }
299 if hdr.ContentEqual {
300 return fmt.Sprintf(
301 "%d: %s = %d: %s %s\n",
302 hdr.OldIdx, truncateSha(hdr.OldSha),
303 hdr.NewIdx, truncateSha(hdr.NewSha),
304 hdr.Title,
305 )
306 }
307 return fmt.Sprintf(
308 "%d: %s ! %d: %s %s",
309 hdr.OldIdx, truncateSha(hdr.OldSha),
310 hdr.NewIdx, truncateSha(hdr.NewSha),
311 hdr.Title,
312 )
313}
314
315func RangeDiff(a []*Patch, b []*Patch) []*RangeDiffOutput {
316 aPatches := []*PatchRange{}
317 for _, patch := range a {
318 aPatches = append(aPatches, NewPatchRange(patch))
319 }
320 bPatches := []*PatchRange{}
321 for _, patch := range b {
322 bPatches = append(bPatches, NewPatchRange(patch))
323 }
324 findExactMatches(aPatches, bPatches)
325 getCorrespondences(aPatches, bPatches, RANGE_DIFF_CREATION_FACTOR_DEFAULT)
326 return output(aPatches, bPatches)
327}
328
329func RangeDiffToStr(diffs []*RangeDiffOutput) string {
330 output := ""
331 for _, diff := range diffs {
332 output += diff.Header.String()
333 for _, f := range diff.Files {
334 output += fmt.Sprintf("\n@@ %s\n", f.NewFile.NewName)
335 for _, d := range f.Diff {
336 switch d.OuterType {
337 case "equal":
338 output += d.Text
339 case "insert":
340 output += d.Text
341 case "delete":
342 output += d.Text
343 }
344 }
345 }
346 }
347 return output
348}
349
350func findExactMatches(a []*PatchRange, b []*PatchRange) {
351 for i, patchA := range a {
352 for j, patchB := range b {
353 if patchA.ContentSha == patchB.ContentSha {
354 patchA.Matching = j
355 patchB.Matching = i
356 }
357 }
358 }
359}
360
361func createMatrix(rows, cols int) [][]int {
362 mat := make([][]int, rows)
363 for i := range mat {
364 mat[i] = make([]int, cols)
365 }
366 return mat
367}
368
369func diffsize(a *PatchRange, b *PatchRange) int {
370 dmp := diffmatchpatch.New()
371 diffs := dmp.DiffMain(a.Diff, b.Diff, false)
372 return len(diffs)
373}
374
375func getCorrespondences(a []*PatchRange, b []*PatchRange, creationFactor int) {
376 n := len(a) + len(b)
377 cost := createMatrix(n, n)
378
379 for i, patchA := range a {
380 var c int
381 for j, patchB := range b {
382 if patchA.Matching == j {
383 c = 0
384 } else if patchA.Matching == -1 && patchB.Matching == -1 {
385 c = diffsize(patchA, patchB)
386 } else {
387 c = COST_MAX
388 }
389 cost[i][j] = c
390 }
391 }
392
393 for j, patchB := range b {
394 creationCost := (patchB.DiffSize * creationFactor) / 100
395 if patchB.Matching >= 0 {
396 creationCost = math.MaxInt32
397 }
398 for i := len(a); i < n; i++ {
399 cost[i][j] = creationCost
400 }
401 }
402
403 for i := len(a); i < n; i++ {
404 for j := len(b); j < n; j++ {
405 cost[i][j] = 0
406 }
407 }
408
409 assignment, _ := ha.Solve(cost)
410 for i := range a {
411 j := assignment[i]
412 if j >= 0 && j < len(b) {
413 a[i].Matching = j
414 b[j].Matching = i
415 }
416 }
417}