repos / git-pr

a self-hosted git collaboration server
git clone https://github.com/picosh/git-pr.git

Eric Bower  ยท  2025-02-02

range_diff.go

  1package git
  2
  3import (
  4	"fmt"
  5	"math"
  6	"sort"
  7	"strings"
  8
  9	"github.com/bluekeyes/go-gitdiff/gitdiff"
 10	ha "github.com/oddg/hungarian-algorithm"
 11	"github.com/sergi/go-diff/diffmatchpatch"
 12)
 13
 14var COST_MAX = 65536
 15var RANGE_DIFF_CREATION_FACTOR_DEFAULT = 60
 16
 17type PatchRange struct {
 18	*Patch
 19	Matching int
 20	Diff     string
 21	DiffSize int
 22	Shown    bool
 23}
 24
 25func NewPatchRange(patch *Patch) *PatchRange {
 26	diff := patch.CalcDiff()
 27	return &PatchRange{
 28		Patch:    patch,
 29		Matching: -1,
 30		Diff:     diff,
 31		DiffSize: len(diff),
 32		Shown:    false,
 33	}
 34}
 35
 36type RangeDiffOutput struct {
 37	Header *RangeDiffHeader
 38	Order  int
 39	Files  []*RangeDiffFile
 40	Type   string
 41}
 42
 43func output(a []*PatchRange, b []*PatchRange) []*RangeDiffOutput {
 44	outputs := []*RangeDiffOutput{}
 45	for i, patchA := range a {
 46		if patchA.Matching == -1 {
 47			hdr := NewRangeDiffHeader(patchA, nil, i+1, -1)
 48			outputs = append(
 49				outputs,
 50				&RangeDiffOutput{
 51					Header: hdr,
 52					Type:   "rm",
 53					Order:  i + 1,
 54				},
 55			)
 56		}
 57	}
 58
 59	for j, patchB := range b {
 60		if patchB.Matching == -1 {
 61			hdr := NewRangeDiffHeader(nil, patchB, -1, j+1)
 62			outputs = append(
 63				outputs,
 64				&RangeDiffOutput{
 65					Header: hdr,
 66					Type:   "add",
 67					Order:  j + 1,
 68				},
 69			)
 70			continue
 71		}
 72		patchA := a[patchB.Matching]
 73		if patchB.ContentSha == patchA.ContentSha {
 74			hdr := NewRangeDiffHeader(patchA, patchB, patchB.Matching+1, patchA.Matching+1)
 75			outputs = append(
 76				outputs,
 77				&RangeDiffOutput{
 78					Header: hdr,
 79					Type:   "equal",
 80					Order:  patchA.Matching + 1,
 81				},
 82			)
 83		} else {
 84			hdr := NewRangeDiffHeader(patchA, patchB, patchB.Matching+1, patchA.Matching+1)
 85			diff := outputDiff(patchA, patchB)
 86			outputs = append(
 87				outputs,
 88				&RangeDiffOutput{
 89					Order:  patchA.Matching + 1,
 90					Header: hdr,
 91					Files:  diff,
 92					Type:   "diff",
 93				},
 94			)
 95		}
 96	}
 97	sort.Slice(outputs, func(i, j int) bool {
 98		return outputs[i].Order < outputs[j].Order
 99	})
100	return outputs
101}
102
103type RangeDiffDiff struct {
104	OuterType string
105	InnerType string
106	Text      string
107}
108
109func toRangeDiffDiff(diff []diffmatchpatch.Diff) []RangeDiffDiff {
110	result := []RangeDiffDiff{}
111
112	for _, line := range diff {
113		outer := "equal"
114		switch line.Type {
115		case diffmatchpatch.DiffInsert:
116			outer = "insert"
117		case diffmatchpatch.DiffDelete:
118			outer = "delete"
119		}
120
121		fmtLine := strings.Split(line.Text, "\n")
122		for idx, ln := range fmtLine {
123			text := ln
124			if idx < len(fmtLine)-1 {
125				text = ln + "\n"
126			}
127			st := RangeDiffDiff{
128				Text:      text,
129				OuterType: outer,
130				InnerType: "equal",
131			}
132
133			if strings.HasPrefix(text, "+") {
134				st.InnerType = "insert"
135			} else if strings.HasPrefix(text, "-") {
136				st.InnerType = "delete"
137			}
138
139			result = append(result, st)
140		}
141	}
142
143	return result
144}
145
146func DoDiff(src, dst string) []RangeDiffDiff {
147	dmp := diffmatchpatch.New()
148	wSrc, wDst, warray := dmp.DiffLinesToChars(src, dst)
149	diffs := dmp.DiffMain(wSrc, wDst, false)
150	diffs = dmp.DiffCharsToLines(diffs, warray)
151	return toRangeDiffDiff(diffs)
152}
153
154type RangeDiffFile struct {
155	OldFile *gitdiff.File
156	NewFile *gitdiff.File
157	Diff    []RangeDiffDiff
158}
159
160func outputDiff(patchA, patchB *PatchRange) []*RangeDiffFile {
161	diffs := []*RangeDiffFile{}
162
163	for _, fileA := range patchA.Files {
164		found := false
165		for _, fileB := range patchB.Files {
166			if fileA.NewName == fileB.NewName {
167				found = true
168				// this means both files have been deleted so we should skip
169				if fileA.NewName == "" {
170					continue
171				}
172				strA := ""
173				for _, frag := range fileA.TextFragments {
174					for _, line := range frag.Lines {
175						strA += line.String()
176					}
177				}
178				strB := ""
179				for _, frag := range fileB.TextFragments {
180					for _, line := range frag.Lines {
181						strB += line.String()
182					}
183				}
184				curDiff := DoDiff(strA, strB)
185				hasDiff := false
186				for _, dd := range curDiff {
187					if dd.OuterType != "equal" {
188						hasDiff = true
189						break
190					}
191				}
192				if !hasDiff {
193					continue
194				}
195				fp := &RangeDiffFile{
196					OldFile: fileA,
197					NewFile: fileB,
198					Diff:    curDiff,
199				}
200				diffs = append(diffs, fp)
201			}
202		}
203
204		// find files in patchA but not in patchB
205		if !found {
206			strA := ""
207			for _, frag := range fileA.TextFragments {
208				for _, line := range frag.Lines {
209					strA += line.String()
210				}
211			}
212			fp := &RangeDiffFile{
213				OldFile: fileA,
214				NewFile: nil,
215				Diff:    DoDiff(strA, ""),
216			}
217			diffs = append(diffs, fp)
218		}
219	}
220
221	// find files in patchB not in patchA
222	for _, fileB := range patchB.Files {
223		found := false
224		for _, fileA := range patchA.Files {
225			if fileA.NewName == fileB.NewName {
226				found = true
227				break
228			}
229		}
230
231		if !found {
232			strB := ""
233			for _, frag := range fileB.TextFragments {
234				for _, line := range frag.Lines {
235					strB += line.String()
236				}
237			}
238			fp := &RangeDiffFile{
239				OldFile: nil,
240				NewFile: fileB,
241				Diff:    DoDiff("", strB),
242			}
243			diffs = append(diffs, fp)
244		}
245	}
246
247	return diffs
248}
249
250// RangeDiffHeader is a header combining old and new change pairs.
251type RangeDiffHeader struct {
252	OldIdx       int
253	OldSha       string
254	NewIdx       int
255	NewSha       string
256	Title        string
257	ContentEqual bool
258}
259
260func NewRangeDiffHeader(a *PatchRange, b *PatchRange, aIndex, bIndex int) *RangeDiffHeader {
261	hdr := &RangeDiffHeader{}
262	if a == nil {
263		hdr.NewIdx = bIndex
264		hdr.NewSha = b.CommitSha
265		hdr.Title = b.Title
266		return hdr
267	}
268	if b == nil {
269		hdr.OldIdx = aIndex
270		hdr.OldSha = a.CommitSha
271		hdr.Title = a.Title
272		return hdr
273	}
274
275	hdr.OldIdx = aIndex
276	hdr.NewIdx = bIndex
277	hdr.OldSha = a.CommitSha
278	hdr.NewSha = b.CommitSha
279
280	if a.ContentSha == b.ContentSha {
281		hdr.Title = a.Title
282		hdr.ContentEqual = true
283	} else {
284		hdr.Title = b.Title
285	}
286
287	return hdr
288}
289
290func (hdr *RangeDiffHeader) String() string {
291	if hdr.OldIdx == 0 {
292		return fmt.Sprintf("-:  ------- > %d:  %s %s\n", hdr.NewIdx, truncateSha(hdr.NewSha), hdr.Title)
293	}
294	if hdr.NewIdx == 0 {
295		return fmt.Sprintf("%d:  %s < -:  ------- %s\n", hdr.OldIdx, truncateSha(hdr.OldSha), hdr.Title)
296	}
297	if hdr.ContentEqual {
298		return fmt.Sprintf(
299			"%d:  %s = %d:  %s %s\n",
300			hdr.OldIdx, truncateSha(hdr.OldSha),
301			hdr.NewIdx, truncateSha(hdr.NewSha),
302			hdr.Title,
303		)
304	}
305	return fmt.Sprintf(
306		"%d:  %s ! %d:  %s %s",
307		hdr.OldIdx, truncateSha(hdr.OldSha),
308		hdr.NewIdx, truncateSha(hdr.NewSha),
309		hdr.Title,
310	)
311}
312
313func RangeDiff(a []*Patch, b []*Patch) []*RangeDiffOutput {
314	aPatches := []*PatchRange{}
315	for _, patch := range a {
316		aPatches = append(aPatches, NewPatchRange(patch))
317	}
318	bPatches := []*PatchRange{}
319	for _, patch := range b {
320		bPatches = append(bPatches, NewPatchRange(patch))
321	}
322	findExactMatches(aPatches, bPatches)
323	getCorrespondences(aPatches, bPatches, RANGE_DIFF_CREATION_FACTOR_DEFAULT)
324	return output(aPatches, bPatches)
325}
326
327func RangeDiffToStr(diffs []*RangeDiffOutput) string {
328	output := ""
329	for _, diff := range diffs {
330		output += diff.Header.String()
331		for _, f := range diff.Files {
332			output += fmt.Sprintf("\n@@ %s\n", f.NewFile.NewName)
333			for _, d := range f.Diff {
334				switch d.OuterType {
335				case "equal":
336					output += d.Text
337				case "insert":
338					output += d.Text
339				case "delete":
340					output += d.Text
341				}
342			}
343		}
344	}
345	return output
346}
347
348func findExactMatches(a []*PatchRange, b []*PatchRange) {
349	for i, patchA := range a {
350		for j, patchB := range b {
351			if patchA.ContentSha == patchB.ContentSha {
352				patchA.Matching = j
353				patchB.Matching = i
354			}
355		}
356	}
357}
358
359func createMatrix(rows, cols int) [][]int {
360	mat := make([][]int, rows)
361	for i := range mat {
362		mat[i] = make([]int, cols)
363	}
364	return mat
365}
366
367func diffsize(a *PatchRange, b *PatchRange) int {
368	dmp := diffmatchpatch.New()
369	diffs := dmp.DiffMain(a.Diff, b.Diff, false)
370	return len(diffs)
371}
372
373func getCorrespondences(a []*PatchRange, b []*PatchRange, creationFactor int) {
374	n := len(a) + len(b)
375	cost := createMatrix(n, n)
376
377	for i, patchA := range a {
378		var c int
379		for j, patchB := range b {
380			if patchA.Matching == j {
381				c = 0
382			} else if patchA.Matching == -1 && patchB.Matching == -1 {
383				c = diffsize(patchA, patchB)
384			} else {
385				c = COST_MAX
386			}
387			cost[i][j] = c
388		}
389	}
390
391	for j, patchB := range b {
392		creationCost := (patchB.DiffSize * creationFactor) / 100
393		if patchB.Matching >= 0 {
394			creationCost = math.MaxInt32
395		}
396		for i := len(a); i < n; i++ {
397			cost[i][j] = creationCost
398		}
399	}
400
401	for i := len(a); i < n; i++ {
402		for j := len(b); j < n; j++ {
403			cost[i][j] = 0
404		}
405	}
406
407	assignment, _ := ha.Solve(cost)
408	for i := range a {
409		j := assignment[i]
410		if j >= 0 && j < len(b) {
411			a[i].Matching = j
412			b[j].Matching = i
413		}
414	}
415}