repos / git-pr

a self-hosted git collaboration server
git clone https://github.com/picosh/git-pr.git

Eric Bower  ·  2025-12-13

range_diff.go

  1package git
  2
  3import (
  4	"fmt"
  5	"math"
  6	"sort"
  7	"strings"
  8
  9	"github.com/bluekeyes/go-gitdiff/gitdiff"
 10	ha "github.com/oddg/hungarian-algorithm"
 11	"github.com/sergi/go-diff/diffmatchpatch"
 12)
 13
 14var (
 15	COST_MAX                           = 65536
 16	RANGE_DIFF_CREATION_FACTOR_DEFAULT = 60
 17)
 18
 19type PatchRange struct {
 20	*Patch
 21	Matching int
 22	Diff     string
 23	DiffSize int
 24	Shown    bool
 25}
 26
 27func NewPatchRange(patch *Patch) *PatchRange {
 28	diff := patch.CalcDiff()
 29	return &PatchRange{
 30		Patch:    patch,
 31		Matching: -1,
 32		Diff:     diff,
 33		DiffSize: len(diff),
 34		Shown:    false,
 35	}
 36}
 37
 38type RangeDiffOutput struct {
 39	Header *RangeDiffHeader
 40	Order  int
 41	Files  []*RangeDiffFile
 42	Type   string
 43}
 44
 45func output(a []*PatchRange, b []*PatchRange) []*RangeDiffOutput {
 46	outputs := []*RangeDiffOutput{}
 47	for i, patchA := range a {
 48		if patchA.Matching == -1 {
 49			hdr := NewRangeDiffHeader(patchA, nil, i+1, -1)
 50			files := outputRemovedPatch(patchA)
 51			outputs = append(
 52				outputs,
 53				&RangeDiffOutput{
 54					Header: hdr,
 55					Type:   "rm",
 56					Order:  i + 1,
 57					Files:  files,
 58				},
 59			)
 60		}
 61	}
 62
 63	for j, patchB := range b {
 64		if patchB.Matching == -1 {
 65			hdr := NewRangeDiffHeader(nil, patchB, -1, j+1)
 66			files := outputAddedPatch(patchB)
 67			outputs = append(
 68				outputs,
 69				&RangeDiffOutput{
 70					Header: hdr,
 71					Type:   "add",
 72					Order:  j + 1,
 73					Files:  files,
 74				},
 75			)
 76			continue
 77		}
 78		patchA := a[patchB.Matching]
 79		if patchB.ContentSha == patchA.ContentSha {
 80			hdr := NewRangeDiffHeader(patchA, patchB, patchB.Matching+1, patchA.Matching+1)
 81			outputs = append(
 82				outputs,
 83				&RangeDiffOutput{
 84					Header: hdr,
 85					Type:   "equal",
 86					Order:  patchA.Matching + 1,
 87				},
 88			)
 89		} else {
 90			hdr := NewRangeDiffHeader(patchA, patchB, patchB.Matching+1, patchA.Matching+1)
 91			diff := outputDiff(patchA, patchB)
 92			outputs = append(
 93				outputs,
 94				&RangeDiffOutput{
 95					Order:  patchA.Matching + 1,
 96					Header: hdr,
 97					Files:  diff,
 98					Type:   "diff",
 99				},
100			)
101		}
102	}
103	sort.Slice(outputs, func(i, j int) bool {
104		return outputs[i].Order < outputs[j].Order
105	})
106	return outputs
107}
108
109type RangeDiffDiff struct {
110	OuterType string
111	InnerType string
112	Text      string
113}
114
115func toRangeDiffDiff(diff []diffmatchpatch.Diff) []RangeDiffDiff {
116	result := []RangeDiffDiff{}
117
118	for _, line := range diff {
119		outerDiffType := line.Type
120
121		fmtLine := strings.Split(line.Text, "\n")
122		for idx, ln := range fmtLine {
123			text := ln
124			if idx < len(fmtLine)-1 {
125				text = ln + "\n"
126			}
127
128			// Determine inner type based on line prefix (+/-/space)
129			inner := "equal"
130			if strings.HasPrefix(text, "+") {
131				inner = "insert"
132			} else if strings.HasPrefix(text, "-") {
133				inner = "delete"
134			}
135
136			// Determine outer type based on diff result
137			outer := "equal"
138			switch outerDiffType {
139			case diffmatchpatch.DiffInsert:
140				outer = "insert"
141			case diffmatchpatch.DiffDelete:
142				outer = "delete"
143			}
144
145			st := RangeDiffDiff{
146				Text:      text,
147				OuterType: outer,
148				InnerType: inner,
149			}
150
151			result = append(result, st)
152		}
153	}
154
155	return result
156}
157
158func DoDiff(src, dst string) []RangeDiffDiff {
159	dmp := diffmatchpatch.New()
160	wSrc, wDst, warray := dmp.DiffLinesToChars(src, dst)
161	diffs := dmp.DiffMain(wSrc, wDst, false)
162	diffs = dmp.DiffCharsToLines(diffs, warray)
163	return toRangeDiffDiff(diffs)
164}
165
166// extractChangedLines extracts only added and deleted lines from a file's fragments,
167// ignoring context lines. This is used for comparing patches where context lines
168// may differ due to rebasing but the actual changes are the same.
169func extractChangedLines(file *gitdiff.File) string {
170	var result strings.Builder
171	for _, frag := range file.TextFragments {
172		for _, line := range frag.Lines {
173			if line.Op == gitdiff.OpAdd || line.Op == gitdiff.OpDelete {
174				result.WriteString(line.String())
175			}
176		}
177	}
178	return result.String()
179}
180
181// extractAllLines extracts all lines (including context) from a file's fragments.
182// This is used for displaying the full diff with context.
183func extractAllLines(file *gitdiff.File) string {
184	var result strings.Builder
185	for _, frag := range file.TextFragments {
186		for _, line := range frag.Lines {
187			result.WriteString(line.String())
188		}
189	}
190	return result.String()
191}
192
193type RangeDiffFile struct {
194	OldFile *gitdiff.File
195	NewFile *gitdiff.File
196	Diff    []RangeDiffDiff
197}
198
199func outputDiff(patchA, patchB *PatchRange) []*RangeDiffFile {
200	diffs := []*RangeDiffFile{}
201
202	for _, fileA := range patchA.Files {
203		found := false
204		for _, fileB := range patchB.Files {
205			if fileA.NewName == fileB.NewName {
206				found = true
207				// this means both files have been deleted so we should skip
208				if fileA.NewName == "" {
209					continue
210				}
211				// Compare only +/- lines to determine if there's a meaningful diff
212				changedA := extractChangedLines(fileA)
213				changedB := extractChangedLines(fileB)
214				if changedA == changedB {
215					// No difference in actual changes, skip this file
216					continue
217				}
218				// Use all lines (with context) for display
219				strA := extractAllLines(fileA)
220				strB := extractAllLines(fileB)
221				curDiff := DoDiff(strA, strB)
222				fp := &RangeDiffFile{
223					OldFile: fileA,
224					NewFile: fileB,
225					Diff:    curDiff,
226				}
227				diffs = append(diffs, fp)
228			}
229		}
230
231		// find files in patchA but not in patchB
232		if !found {
233			strA := extractAllLines(fileA)
234			fp := &RangeDiffFile{
235				OldFile: fileA,
236				NewFile: nil,
237				Diff:    DoDiff(strA, ""),
238			}
239			diffs = append(diffs, fp)
240		}
241	}
242
243	// find files in patchB not in patchA
244	for _, fileB := range patchB.Files {
245		found := false
246		for _, fileA := range patchA.Files {
247			if fileA.NewName == fileB.NewName {
248				found = true
249				break
250			}
251		}
252
253		if !found {
254			strB := extractAllLines(fileB)
255			fp := &RangeDiffFile{
256				OldFile: nil,
257				NewFile: fileB,
258				Diff:    DoDiff("", strB),
259			}
260			diffs = append(diffs, fp)
261		}
262	}
263
264	return diffs
265}
266
267func outputAddedPatch(patch *PatchRange) []*RangeDiffFile {
268	diffs := []*RangeDiffFile{}
269	for _, file := range patch.Files {
270		strB := extractAllLines(file)
271		fp := &RangeDiffFile{
272			OldFile: nil,
273			NewFile: file,
274			Diff:    DoDiff("", strB),
275		}
276		diffs = append(diffs, fp)
277	}
278	return diffs
279}
280
281func outputRemovedPatch(patch *PatchRange) []*RangeDiffFile {
282	diffs := []*RangeDiffFile{}
283	for _, file := range patch.Files {
284		strA := extractAllLines(file)
285		fp := &RangeDiffFile{
286			OldFile: file,
287			NewFile: nil,
288			Diff:    DoDiff(strA, ""),
289		}
290		diffs = append(diffs, fp)
291	}
292	return diffs
293}
294
295// RangeDiffHeader is a header combining old and new change pairs.
296type RangeDiffHeader struct {
297	OldIdx         int
298	OldSha         string
299	OldAuthorName  string
300	OldAuthorEmail string
301	OldTitle       string
302	OldBody        string
303	NewIdx         int
304	NewSha         string
305	NewAuthorName  string
306	NewAuthorEmail string
307	NewTitle       string
308	NewBody        string
309	Title          string
310	ContentEqual   bool
311	AuthorChanged  bool
312	TitleChanged   bool
313	BodyChanged    bool
314}
315
316func NewRangeDiffHeader(a *PatchRange, b *PatchRange, aIndex, bIndex int) *RangeDiffHeader {
317	hdr := &RangeDiffHeader{}
318	if a == nil {
319		hdr.NewIdx = bIndex
320		hdr.NewSha = b.CommitSha
321		hdr.NewAuthorName = b.AuthorName
322		hdr.NewAuthorEmail = b.AuthorEmail
323		hdr.NewTitle = b.Title
324		hdr.NewBody = b.Body
325		hdr.Title = b.Title
326		return hdr
327	}
328	if b == nil {
329		hdr.OldIdx = aIndex
330		hdr.OldSha = a.CommitSha
331		hdr.OldAuthorName = a.AuthorName
332		hdr.OldAuthorEmail = a.AuthorEmail
333		hdr.OldTitle = a.Title
334		hdr.OldBody = a.Body
335		hdr.Title = a.Title
336		return hdr
337	}
338
339	hdr.OldIdx = aIndex
340	hdr.NewIdx = bIndex
341	hdr.OldSha = a.CommitSha
342	hdr.NewSha = b.CommitSha
343	hdr.OldAuthorName = a.AuthorName
344	hdr.OldAuthorEmail = a.AuthorEmail
345	hdr.OldTitle = a.Title
346	hdr.OldBody = a.Body
347	hdr.NewAuthorName = b.AuthorName
348	hdr.NewAuthorEmail = b.AuthorEmail
349	hdr.NewTitle = b.Title
350	hdr.NewBody = b.Body
351
352	// Check what changed
353	hdr.AuthorChanged = a.AuthorName != b.AuthorName || a.AuthorEmail != b.AuthorEmail
354	hdr.TitleChanged = a.Title != b.Title
355	hdr.BodyChanged = a.Body != b.Body
356
357	if a.ContentSha == b.ContentSha {
358		hdr.Title = a.Title
359		hdr.ContentEqual = true
360	} else {
361		hdr.Title = b.Title
362	}
363
364	return hdr
365}
366
367func (hdr *RangeDiffHeader) String() string {
368	if hdr.OldIdx == 0 {
369		return fmt.Sprintf("-:  ------- > %d:  %s %s\n", hdr.NewIdx, truncateSha(hdr.NewSha), hdr.Title)
370	}
371	if hdr.NewIdx == 0 {
372		return fmt.Sprintf("%d:  %s < -:  ------- %s\n", hdr.OldIdx, truncateSha(hdr.OldSha), hdr.Title)
373	}
374	if hdr.ContentEqual {
375		return fmt.Sprintf(
376			"%d:  %s = %d:  %s %s\n",
377			hdr.OldIdx, truncateSha(hdr.OldSha),
378			hdr.NewIdx, truncateSha(hdr.NewSha),
379			hdr.Title,
380		)
381	}
382	return fmt.Sprintf(
383		"%d:  %s ! %d:  %s %s",
384		hdr.OldIdx, truncateSha(hdr.OldSha),
385		hdr.NewIdx, truncateSha(hdr.NewSha),
386		hdr.Title,
387	)
388}
389
390func RangeDiff(a []*Patch, b []*Patch) []*RangeDiffOutput {
391	aPatches := []*PatchRange{}
392	for _, patch := range a {
393		aPatches = append(aPatches, NewPatchRange(patch))
394	}
395	bPatches := []*PatchRange{}
396	for _, patch := range b {
397		bPatches = append(bPatches, NewPatchRange(patch))
398	}
399	findExactMatches(aPatches, bPatches)
400	getCorrespondences(aPatches, bPatches, RANGE_DIFF_CREATION_FACTOR_DEFAULT)
401	return output(aPatches, bPatches)
402}
403
404func RangeDiffToStr(diffs []*RangeDiffOutput) string {
405	output := ""
406	for _, diff := range diffs {
407		output += diff.Header.String()
408		for _, f := range diff.Files {
409			fileName := ""
410			if f.NewFile != nil {
411				fileName = f.NewFile.NewName
412			} else if f.OldFile != nil {
413				fileName = f.OldFile.NewName
414			}
415			output += fmt.Sprintf("\n@@ %s\n", fileName)
416			for _, d := range f.Diff {
417				switch d.OuterType {
418				case "equal":
419					output += d.Text
420				case "insert":
421					output += d.Text
422				case "delete":
423					output += d.Text
424				}
425			}
426		}
427	}
428	return output
429}
430
431func findExactMatches(a []*PatchRange, b []*PatchRange) {
432	for i, patchA := range a {
433		for j, patchB := range b {
434			if patchA.ContentSha == patchB.ContentSha {
435				patchA.Matching = j
436				patchB.Matching = i
437			}
438		}
439	}
440}
441
442func createMatrix(rows, cols int) [][]int {
443	mat := make([][]int, rows)
444	for i := range mat {
445		mat[i] = make([]int, cols)
446	}
447	return mat
448}
449
450func diffsize(a *PatchRange, b *PatchRange) int {
451	dmp := diffmatchpatch.New()
452	diffs := dmp.DiffMain(a.Diff, b.Diff, false)
453	return len(diffs)
454}
455
456func getCorrespondences(a []*PatchRange, b []*PatchRange, creationFactor int) {
457	n := len(a) + len(b)
458	cost := createMatrix(n, n)
459
460	for i, patchA := range a {
461		var c int
462		for j, patchB := range b {
463			if patchA.Matching == j {
464				c = 0
465			} else if patchA.Matching == -1 && patchB.Matching == -1 {
466				c = diffsize(patchA, patchB)
467			} else {
468				c = COST_MAX
469			}
470			cost[i][j] = c
471		}
472	}
473
474	for j, patchB := range b {
475		creationCost := (patchB.DiffSize * creationFactor) / 100
476		if patchB.Matching >= 0 {
477			creationCost = math.MaxInt32
478		}
479		for i := len(a); i < n; i++ {
480			cost[i][j] = creationCost
481		}
482	}
483
484	for i := len(a); i < n; i++ {
485		for j := len(b); j < n; j++ {
486			cost[i][j] = 0
487		}
488	}
489
490	assignment, _ := ha.Solve(cost)
491	for i := range a {
492		j := assignment[i]
493		if j >= 0 && j < len(b) {
494			a[i].Matching = j
495			b[j].Matching = i
496		}
497	}
498}