Eric Bower
ยท
2025-02-02
range_diff.go
1package git
2
3import (
4 "fmt"
5 "math"
6 "sort"
7 "strings"
8
9 "github.com/bluekeyes/go-gitdiff/gitdiff"
10 ha "github.com/oddg/hungarian-algorithm"
11 "github.com/sergi/go-diff/diffmatchpatch"
12)
13
14var COST_MAX = 65536
15var RANGE_DIFF_CREATION_FACTOR_DEFAULT = 60
16
17type PatchRange struct {
18 *Patch
19 Matching int
20 Diff string
21 DiffSize int
22 Shown bool
23}
24
25func NewPatchRange(patch *Patch) *PatchRange {
26 diff := patch.CalcDiff()
27 return &PatchRange{
28 Patch: patch,
29 Matching: -1,
30 Diff: diff,
31 DiffSize: len(diff),
32 Shown: false,
33 }
34}
35
36type RangeDiffOutput struct {
37 Header *RangeDiffHeader
38 Order int
39 Files []*RangeDiffFile
40 Type string
41}
42
43func output(a []*PatchRange, b []*PatchRange) []*RangeDiffOutput {
44 outputs := []*RangeDiffOutput{}
45 for i, patchA := range a {
46 if patchA.Matching == -1 {
47 hdr := NewRangeDiffHeader(patchA, nil, i+1, -1)
48 outputs = append(
49 outputs,
50 &RangeDiffOutput{
51 Header: hdr,
52 Type: "rm",
53 Order: i + 1,
54 },
55 )
56 }
57 }
58
59 for j, patchB := range b {
60 if patchB.Matching == -1 {
61 hdr := NewRangeDiffHeader(nil, patchB, -1, j+1)
62 outputs = append(
63 outputs,
64 &RangeDiffOutput{
65 Header: hdr,
66 Type: "add",
67 Order: j + 1,
68 },
69 )
70 continue
71 }
72 patchA := a[patchB.Matching]
73 if patchB.ContentSha == patchA.ContentSha {
74 hdr := NewRangeDiffHeader(patchA, patchB, patchB.Matching+1, patchA.Matching+1)
75 outputs = append(
76 outputs,
77 &RangeDiffOutput{
78 Header: hdr,
79 Type: "equal",
80 Order: patchA.Matching + 1,
81 },
82 )
83 } else {
84 hdr := NewRangeDiffHeader(patchA, patchB, patchB.Matching+1, patchA.Matching+1)
85 diff := outputDiff(patchA, patchB)
86 outputs = append(
87 outputs,
88 &RangeDiffOutput{
89 Order: patchA.Matching + 1,
90 Header: hdr,
91 Files: diff,
92 Type: "diff",
93 },
94 )
95 }
96 }
97 sort.Slice(outputs, func(i, j int) bool {
98 return outputs[i].Order < outputs[j].Order
99 })
100 return outputs
101}
102
103type RangeDiffDiff struct {
104 OuterType string
105 InnerType string
106 Text string
107}
108
109func toRangeDiffDiff(diff []diffmatchpatch.Diff) []RangeDiffDiff {
110 result := []RangeDiffDiff{}
111
112 for _, line := range diff {
113 outer := "equal"
114 switch line.Type {
115 case diffmatchpatch.DiffInsert:
116 outer = "insert"
117 case diffmatchpatch.DiffDelete:
118 outer = "delete"
119 }
120
121 fmtLine := strings.Split(line.Text, "\n")
122 for idx, ln := range fmtLine {
123 text := ln
124 if idx < len(fmtLine)-1 {
125 text = ln + "\n"
126 }
127 st := RangeDiffDiff{
128 Text: text,
129 OuterType: outer,
130 InnerType: "equal",
131 }
132
133 if strings.HasPrefix(text, "+") {
134 st.InnerType = "insert"
135 } else if strings.HasPrefix(text, "-") {
136 st.InnerType = "delete"
137 }
138
139 result = append(result, st)
140 }
141 }
142
143 return result
144}
145
146func DoDiff(src, dst string) []RangeDiffDiff {
147 dmp := diffmatchpatch.New()
148 wSrc, wDst, warray := dmp.DiffLinesToChars(src, dst)
149 diffs := dmp.DiffMain(wSrc, wDst, false)
150 diffs = dmp.DiffCharsToLines(diffs, warray)
151 return toRangeDiffDiff(diffs)
152}
153
154type RangeDiffFile struct {
155 OldFile *gitdiff.File
156 NewFile *gitdiff.File
157 Diff []RangeDiffDiff
158}
159
160func outputDiff(patchA, patchB *PatchRange) []*RangeDiffFile {
161 diffs := []*RangeDiffFile{}
162
163 for _, fileA := range patchA.Files {
164 found := false
165 for _, fileB := range patchB.Files {
166 if fileA.NewName == fileB.NewName {
167 found = true
168 // this means both files have been deleted so we should skip
169 if fileA.NewName == "" {
170 continue
171 }
172 strA := ""
173 for _, frag := range fileA.TextFragments {
174 for _, line := range frag.Lines {
175 strA += line.String()
176 }
177 }
178 strB := ""
179 for _, frag := range fileB.TextFragments {
180 for _, line := range frag.Lines {
181 strB += line.String()
182 }
183 }
184 curDiff := DoDiff(strA, strB)
185 hasDiff := false
186 for _, dd := range curDiff {
187 if dd.OuterType != "equal" {
188 hasDiff = true
189 break
190 }
191 }
192 if !hasDiff {
193 continue
194 }
195 fp := &RangeDiffFile{
196 OldFile: fileA,
197 NewFile: fileB,
198 Diff: curDiff,
199 }
200 diffs = append(diffs, fp)
201 }
202 }
203
204 // find files in patchA but not in patchB
205 if !found {
206 strA := ""
207 for _, frag := range fileA.TextFragments {
208 for _, line := range frag.Lines {
209 strA += line.String()
210 }
211 }
212 fp := &RangeDiffFile{
213 OldFile: fileA,
214 NewFile: nil,
215 Diff: DoDiff(strA, ""),
216 }
217 diffs = append(diffs, fp)
218 }
219 }
220
221 // find files in patchB not in patchA
222 for _, fileB := range patchB.Files {
223 found := false
224 for _, fileA := range patchA.Files {
225 if fileA.NewName == fileB.NewName {
226 found = true
227 break
228 }
229 }
230
231 if !found {
232 strB := ""
233 for _, frag := range fileB.TextFragments {
234 for _, line := range frag.Lines {
235 strB += line.String()
236 }
237 }
238 fp := &RangeDiffFile{
239 OldFile: nil,
240 NewFile: fileB,
241 Diff: DoDiff("", strB),
242 }
243 diffs = append(diffs, fp)
244 }
245 }
246
247 return diffs
248}
249
250// RangeDiffHeader is a header combining old and new change pairs.
251type RangeDiffHeader struct {
252 OldIdx int
253 OldSha string
254 NewIdx int
255 NewSha string
256 Title string
257 ContentEqual bool
258}
259
260func NewRangeDiffHeader(a *PatchRange, b *PatchRange, aIndex, bIndex int) *RangeDiffHeader {
261 hdr := &RangeDiffHeader{}
262 if a == nil {
263 hdr.NewIdx = bIndex
264 hdr.NewSha = b.CommitSha
265 hdr.Title = b.Title
266 return hdr
267 }
268 if b == nil {
269 hdr.OldIdx = aIndex
270 hdr.OldSha = a.CommitSha
271 hdr.Title = a.Title
272 return hdr
273 }
274
275 hdr.OldIdx = aIndex
276 hdr.NewIdx = bIndex
277 hdr.OldSha = a.CommitSha
278 hdr.NewSha = b.CommitSha
279
280 if a.ContentSha == b.ContentSha {
281 hdr.Title = a.Title
282 hdr.ContentEqual = true
283 } else {
284 hdr.Title = b.Title
285 }
286
287 return hdr
288}
289
290func (hdr *RangeDiffHeader) String() string {
291 if hdr.OldIdx == 0 {
292 return fmt.Sprintf("-: ------- > %d: %s %s\n", hdr.NewIdx, truncateSha(hdr.NewSha), hdr.Title)
293 }
294 if hdr.NewIdx == 0 {
295 return fmt.Sprintf("%d: %s < -: ------- %s\n", hdr.OldIdx, truncateSha(hdr.OldSha), hdr.Title)
296 }
297 if hdr.ContentEqual {
298 return fmt.Sprintf(
299 "%d: %s = %d: %s %s\n",
300 hdr.OldIdx, truncateSha(hdr.OldSha),
301 hdr.NewIdx, truncateSha(hdr.NewSha),
302 hdr.Title,
303 )
304 }
305 return fmt.Sprintf(
306 "%d: %s ! %d: %s %s",
307 hdr.OldIdx, truncateSha(hdr.OldSha),
308 hdr.NewIdx, truncateSha(hdr.NewSha),
309 hdr.Title,
310 )
311}
312
313func RangeDiff(a []*Patch, b []*Patch) []*RangeDiffOutput {
314 aPatches := []*PatchRange{}
315 for _, patch := range a {
316 aPatches = append(aPatches, NewPatchRange(patch))
317 }
318 bPatches := []*PatchRange{}
319 for _, patch := range b {
320 bPatches = append(bPatches, NewPatchRange(patch))
321 }
322 findExactMatches(aPatches, bPatches)
323 getCorrespondences(aPatches, bPatches, RANGE_DIFF_CREATION_FACTOR_DEFAULT)
324 return output(aPatches, bPatches)
325}
326
327func RangeDiffToStr(diffs []*RangeDiffOutput) string {
328 output := ""
329 for _, diff := range diffs {
330 output += diff.Header.String()
331 for _, f := range diff.Files {
332 output += fmt.Sprintf("\n@@ %s\n", f.NewFile.NewName)
333 for _, d := range f.Diff {
334 switch d.OuterType {
335 case "equal":
336 output += d.Text
337 case "insert":
338 output += d.Text
339 case "delete":
340 output += d.Text
341 }
342 }
343 }
344 }
345 return output
346}
347
348func findExactMatches(a []*PatchRange, b []*PatchRange) {
349 for i, patchA := range a {
350 for j, patchB := range b {
351 if patchA.ContentSha == patchB.ContentSha {
352 patchA.Matching = j
353 patchB.Matching = i
354 }
355 }
356 }
357}
358
359func createMatrix(rows, cols int) [][]int {
360 mat := make([][]int, rows)
361 for i := range mat {
362 mat[i] = make([]int, cols)
363 }
364 return mat
365}
366
367func diffsize(a *PatchRange, b *PatchRange) int {
368 dmp := diffmatchpatch.New()
369 diffs := dmp.DiffMain(a.Diff, b.Diff, false)
370 return len(diffs)
371}
372
373func getCorrespondences(a []*PatchRange, b []*PatchRange, creationFactor int) {
374 n := len(a) + len(b)
375 cost := createMatrix(n, n)
376
377 for i, patchA := range a {
378 var c int
379 for j, patchB := range b {
380 if patchA.Matching == j {
381 c = 0
382 } else if patchA.Matching == -1 && patchB.Matching == -1 {
383 c = diffsize(patchA, patchB)
384 } else {
385 c = COST_MAX
386 }
387 cost[i][j] = c
388 }
389 }
390
391 for j, patchB := range b {
392 creationCost := (patchB.DiffSize * creationFactor) / 100
393 if patchB.Matching >= 0 {
394 creationCost = math.MaxInt32
395 }
396 for i := len(a); i < n; i++ {
397 cost[i][j] = creationCost
398 }
399 }
400
401 for i := len(a); i < n; i++ {
402 for j := len(b); j < n; j++ {
403 cost[i][j] = 0
404 }
405 }
406
407 assignment, _ := ha.Solve(cost)
408 for i := range a {
409 j := assignment[i]
410 if j >= 0 && j < len(b) {
411 a[i].Matching = j
412 b[j].Matching = i
413 }
414 }
415}