repos / git-pr

a self-hosted git collaboration server
git clone https://github.com/picosh/git-pr.git

jolheiser  ·  2025-08-21

range_diff.go

  1package git
  2
  3import (
  4	"fmt"
  5	"math"
  6	"sort"
  7	"strings"
  8
  9	"github.com/bluekeyes/go-gitdiff/gitdiff"
 10	ha "github.com/oddg/hungarian-algorithm"
 11	"github.com/sergi/go-diff/diffmatchpatch"
 12)
 13
 14var (
 15	COST_MAX                           = 65536
 16	RANGE_DIFF_CREATION_FACTOR_DEFAULT = 60
 17)
 18
 19type PatchRange struct {
 20	*Patch
 21	Matching int
 22	Diff     string
 23	DiffSize int
 24	Shown    bool
 25}
 26
 27func NewPatchRange(patch *Patch) *PatchRange {
 28	diff := patch.CalcDiff()
 29	return &PatchRange{
 30		Patch:    patch,
 31		Matching: -1,
 32		Diff:     diff,
 33		DiffSize: len(diff),
 34		Shown:    false,
 35	}
 36}
 37
 38type RangeDiffOutput struct {
 39	Header *RangeDiffHeader
 40	Order  int
 41	Files  []*RangeDiffFile
 42	Type   string
 43}
 44
 45func output(a []*PatchRange, b []*PatchRange) []*RangeDiffOutput {
 46	outputs := []*RangeDiffOutput{}
 47	for i, patchA := range a {
 48		if patchA.Matching == -1 {
 49			hdr := NewRangeDiffHeader(patchA, nil, i+1, -1)
 50			outputs = append(
 51				outputs,
 52				&RangeDiffOutput{
 53					Header: hdr,
 54					Type:   "rm",
 55					Order:  i + 1,
 56				},
 57			)
 58		}
 59	}
 60
 61	for j, patchB := range b {
 62		if patchB.Matching == -1 {
 63			hdr := NewRangeDiffHeader(nil, patchB, -1, j+1)
 64			outputs = append(
 65				outputs,
 66				&RangeDiffOutput{
 67					Header: hdr,
 68					Type:   "add",
 69					Order:  j + 1,
 70				},
 71			)
 72			continue
 73		}
 74		patchA := a[patchB.Matching]
 75		if patchB.ContentSha == patchA.ContentSha {
 76			hdr := NewRangeDiffHeader(patchA, patchB, patchB.Matching+1, patchA.Matching+1)
 77			outputs = append(
 78				outputs,
 79				&RangeDiffOutput{
 80					Header: hdr,
 81					Type:   "equal",
 82					Order:  patchA.Matching + 1,
 83				},
 84			)
 85		} else {
 86			hdr := NewRangeDiffHeader(patchA, patchB, patchB.Matching+1, patchA.Matching+1)
 87			diff := outputDiff(patchA, patchB)
 88			outputs = append(
 89				outputs,
 90				&RangeDiffOutput{
 91					Order:  patchA.Matching + 1,
 92					Header: hdr,
 93					Files:  diff,
 94					Type:   "diff",
 95				},
 96			)
 97		}
 98	}
 99	sort.Slice(outputs, func(i, j int) bool {
100		return outputs[i].Order < outputs[j].Order
101	})
102	return outputs
103}
104
105type RangeDiffDiff struct {
106	OuterType string
107	InnerType string
108	Text      string
109}
110
111func toRangeDiffDiff(diff []diffmatchpatch.Diff) []RangeDiffDiff {
112	result := []RangeDiffDiff{}
113
114	for _, line := range diff {
115		outer := "equal"
116		switch line.Type {
117		case diffmatchpatch.DiffInsert:
118			outer = "insert"
119		case diffmatchpatch.DiffDelete:
120			outer = "delete"
121		}
122
123		fmtLine := strings.Split(line.Text, "\n")
124		for idx, ln := range fmtLine {
125			text := ln
126			if idx < len(fmtLine)-1 {
127				text = ln + "\n"
128			}
129			st := RangeDiffDiff{
130				Text:      text,
131				OuterType: outer,
132				InnerType: "equal",
133			}
134
135			if strings.HasPrefix(text, "+") {
136				st.InnerType = "insert"
137			} else if strings.HasPrefix(text, "-") {
138				st.InnerType = "delete"
139			}
140
141			result = append(result, st)
142		}
143	}
144
145	return result
146}
147
148func DoDiff(src, dst string) []RangeDiffDiff {
149	dmp := diffmatchpatch.New()
150	wSrc, wDst, warray := dmp.DiffLinesToChars(src, dst)
151	diffs := dmp.DiffMain(wSrc, wDst, false)
152	diffs = dmp.DiffCharsToLines(diffs, warray)
153	return toRangeDiffDiff(diffs)
154}
155
156type RangeDiffFile struct {
157	OldFile *gitdiff.File
158	NewFile *gitdiff.File
159	Diff    []RangeDiffDiff
160}
161
162func outputDiff(patchA, patchB *PatchRange) []*RangeDiffFile {
163	diffs := []*RangeDiffFile{}
164
165	for _, fileA := range patchA.Files {
166		found := false
167		for _, fileB := range patchB.Files {
168			if fileA.NewName == fileB.NewName {
169				found = true
170				// this means both files have been deleted so we should skip
171				if fileA.NewName == "" {
172					continue
173				}
174				strA := ""
175				for _, frag := range fileA.TextFragments {
176					for _, line := range frag.Lines {
177						strA += line.String()
178					}
179				}
180				strB := ""
181				for _, frag := range fileB.TextFragments {
182					for _, line := range frag.Lines {
183						strB += line.String()
184					}
185				}
186				curDiff := DoDiff(strA, strB)
187				hasDiff := false
188				for _, dd := range curDiff {
189					if dd.OuterType != "equal" {
190						hasDiff = true
191						break
192					}
193				}
194				if !hasDiff {
195					continue
196				}
197				fp := &RangeDiffFile{
198					OldFile: fileA,
199					NewFile: fileB,
200					Diff:    curDiff,
201				}
202				diffs = append(diffs, fp)
203			}
204		}
205
206		// find files in patchA but not in patchB
207		if !found {
208			strA := ""
209			for _, frag := range fileA.TextFragments {
210				for _, line := range frag.Lines {
211					strA += line.String()
212				}
213			}
214			fp := &RangeDiffFile{
215				OldFile: fileA,
216				NewFile: nil,
217				Diff:    DoDiff(strA, ""),
218			}
219			diffs = append(diffs, fp)
220		}
221	}
222
223	// find files in patchB not in patchA
224	for _, fileB := range patchB.Files {
225		found := false
226		for _, fileA := range patchA.Files {
227			if fileA.NewName == fileB.NewName {
228				found = true
229				break
230			}
231		}
232
233		if !found {
234			strB := ""
235			for _, frag := range fileB.TextFragments {
236				for _, line := range frag.Lines {
237					strB += line.String()
238				}
239			}
240			fp := &RangeDiffFile{
241				OldFile: nil,
242				NewFile: fileB,
243				Diff:    DoDiff("", strB),
244			}
245			diffs = append(diffs, fp)
246		}
247	}
248
249	return diffs
250}
251
252// RangeDiffHeader is a header combining old and new change pairs.
253type RangeDiffHeader struct {
254	OldIdx       int
255	OldSha       string
256	NewIdx       int
257	NewSha       string
258	Title        string
259	ContentEqual bool
260}
261
262func NewRangeDiffHeader(a *PatchRange, b *PatchRange, aIndex, bIndex int) *RangeDiffHeader {
263	hdr := &RangeDiffHeader{}
264	if a == nil {
265		hdr.NewIdx = bIndex
266		hdr.NewSha = b.CommitSha
267		hdr.Title = b.Title
268		return hdr
269	}
270	if b == nil {
271		hdr.OldIdx = aIndex
272		hdr.OldSha = a.CommitSha
273		hdr.Title = a.Title
274		return hdr
275	}
276
277	hdr.OldIdx = aIndex
278	hdr.NewIdx = bIndex
279	hdr.OldSha = a.CommitSha
280	hdr.NewSha = b.CommitSha
281
282	if a.ContentSha == b.ContentSha {
283		hdr.Title = a.Title
284		hdr.ContentEqual = true
285	} else {
286		hdr.Title = b.Title
287	}
288
289	return hdr
290}
291
292func (hdr *RangeDiffHeader) String() string {
293	if hdr.OldIdx == 0 {
294		return fmt.Sprintf("-:  ------- > %d:  %s %s\n", hdr.NewIdx, truncateSha(hdr.NewSha), hdr.Title)
295	}
296	if hdr.NewIdx == 0 {
297		return fmt.Sprintf("%d:  %s < -:  ------- %s\n", hdr.OldIdx, truncateSha(hdr.OldSha), hdr.Title)
298	}
299	if hdr.ContentEqual {
300		return fmt.Sprintf(
301			"%d:  %s = %d:  %s %s\n",
302			hdr.OldIdx, truncateSha(hdr.OldSha),
303			hdr.NewIdx, truncateSha(hdr.NewSha),
304			hdr.Title,
305		)
306	}
307	return fmt.Sprintf(
308		"%d:  %s ! %d:  %s %s",
309		hdr.OldIdx, truncateSha(hdr.OldSha),
310		hdr.NewIdx, truncateSha(hdr.NewSha),
311		hdr.Title,
312	)
313}
314
315func RangeDiff(a []*Patch, b []*Patch) []*RangeDiffOutput {
316	aPatches := []*PatchRange{}
317	for _, patch := range a {
318		aPatches = append(aPatches, NewPatchRange(patch))
319	}
320	bPatches := []*PatchRange{}
321	for _, patch := range b {
322		bPatches = append(bPatches, NewPatchRange(patch))
323	}
324	findExactMatches(aPatches, bPatches)
325	getCorrespondences(aPatches, bPatches, RANGE_DIFF_CREATION_FACTOR_DEFAULT)
326	return output(aPatches, bPatches)
327}
328
329func RangeDiffToStr(diffs []*RangeDiffOutput) string {
330	output := ""
331	for _, diff := range diffs {
332		output += diff.Header.String()
333		for _, f := range diff.Files {
334			output += fmt.Sprintf("\n@@ %s\n", f.NewFile.NewName)
335			for _, d := range f.Diff {
336				switch d.OuterType {
337				case "equal":
338					output += d.Text
339				case "insert":
340					output += d.Text
341				case "delete":
342					output += d.Text
343				}
344			}
345		}
346	}
347	return output
348}
349
350func findExactMatches(a []*PatchRange, b []*PatchRange) {
351	for i, patchA := range a {
352		for j, patchB := range b {
353			if patchA.ContentSha == patchB.ContentSha {
354				patchA.Matching = j
355				patchB.Matching = i
356			}
357		}
358	}
359}
360
361func createMatrix(rows, cols int) [][]int {
362	mat := make([][]int, rows)
363	for i := range mat {
364		mat[i] = make([]int, cols)
365	}
366	return mat
367}
368
369func diffsize(a *PatchRange, b *PatchRange) int {
370	dmp := diffmatchpatch.New()
371	diffs := dmp.DiffMain(a.Diff, b.Diff, false)
372	return len(diffs)
373}
374
375func getCorrespondences(a []*PatchRange, b []*PatchRange, creationFactor int) {
376	n := len(a) + len(b)
377	cost := createMatrix(n, n)
378
379	for i, patchA := range a {
380		var c int
381		for j, patchB := range b {
382			if patchA.Matching == j {
383				c = 0
384			} else if patchA.Matching == -1 && patchB.Matching == -1 {
385				c = diffsize(patchA, patchB)
386			} else {
387				c = COST_MAX
388			}
389			cost[i][j] = c
390		}
391	}
392
393	for j, patchB := range b {
394		creationCost := (patchB.DiffSize * creationFactor) / 100
395		if patchB.Matching >= 0 {
396			creationCost = math.MaxInt32
397		}
398		for i := len(a); i < n; i++ {
399			cost[i][j] = creationCost
400		}
401	}
402
403	for i := len(a); i < n; i++ {
404		for j := len(b); j < n; j++ {
405			cost[i][j] = 0
406		}
407	}
408
409	assignment, _ := ha.Solve(cost)
410	for i := range a {
411		j := assignment[i]
412		if j >= 0 && j < len(b) {
413			a[i].Matching = j
414			b[j].Matching = i
415		}
416	}
417}