// Copyright 2014 The Gogs Authors. All rights reserved. // Copyright 2019 The Gitea Authors. All rights reserved. // Use of this source code is governed by a MIT-style // license that can be found in the LICENSE file. package gitdiff import ( "bufio" "bytes" "context" "fmt" "html" "html/template" "io" "net/url" "os" "regexp" "sort" "strings" "time" "code.gitea.io/gitea/models/db" git_model "code.gitea.io/gitea/models/git" issues_model "code.gitea.io/gitea/models/issues" pull_model "code.gitea.io/gitea/models/pull" user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/lfs" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "github.com/sergi/go-diff/diffmatchpatch" stdcharset "golang.org/x/net/html/charset" "golang.org/x/text/encoding" "golang.org/x/text/transform" ) // DiffLineType represents the type of a DiffLine. type DiffLineType uint8 // DiffLineType possible values. const ( DiffLinePlain DiffLineType = iota + 1 DiffLineAdd DiffLineDel DiffLineSection ) // DiffFileType represents the type of a DiffFile. type DiffFileType uint8 // DiffFileType possible values. const ( DiffFileAdd DiffFileType = iota + 1 DiffFileChange DiffFileDel DiffFileRename DiffFileCopy ) // DiffLineExpandDirection represents the DiffLineSection expand direction type DiffLineExpandDirection uint8 // DiffLineExpandDirection possible values. const ( DiffLineExpandNone DiffLineExpandDirection = iota + 1 DiffLineExpandSingle DiffLineExpandUpDown DiffLineExpandUp DiffLineExpandDown ) // DiffLine represents a line difference in a DiffSection. type DiffLine struct { LeftIdx int RightIdx int Match int Type DiffLineType Content string Comments []*issues_model.Comment SectionInfo *DiffLineSectionInfo } // DiffLineSectionInfo represents diff line section meta data type DiffLineSectionInfo struct { Path string LastLeftIdx int LastRightIdx int LeftIdx int RightIdx int LeftHunkSize int RightHunkSize int } // BlobExcerptChunkSize represent max lines of excerpt const BlobExcerptChunkSize = 20 // GetType returns the type of a DiffLine. func (d *DiffLine) GetType() int { return int(d.Type) } // CanComment returns whether or not a line can get commented func (d *DiffLine) CanComment() bool { return len(d.Comments) == 0 && d.Type != DiffLineSection } // GetCommentSide returns the comment side of the first comment, if not set returns empty string func (d *DiffLine) GetCommentSide() string { if len(d.Comments) == 0 { return "" } return d.Comments[0].DiffSide() } // GetLineTypeMarker returns the line type marker func (d *DiffLine) GetLineTypeMarker() string { if strings.IndexByte(" +-", d.Content[0]) > -1 { return d.Content[0:1] } return "" } // GetBlobExcerptQuery builds query string to get blob excerpt func (d *DiffLine) GetBlobExcerptQuery() string { query := fmt.Sprintf( "last_left=%d&last_right=%d&"+ "left=%d&right=%d&"+ "left_hunk_size=%d&right_hunk_size=%d&"+ "path=%s", d.SectionInfo.LastLeftIdx, d.SectionInfo.LastRightIdx, d.SectionInfo.LeftIdx, d.SectionInfo.RightIdx, d.SectionInfo.LeftHunkSize, d.SectionInfo.RightHunkSize, url.QueryEscape(d.SectionInfo.Path)) return query } // GetExpandDirection gets DiffLineExpandDirection func (d *DiffLine) GetExpandDirection() DiffLineExpandDirection { if d.Type != DiffLineSection || d.SectionInfo == nil || d.SectionInfo.RightIdx-d.SectionInfo.LastRightIdx <= 1 { return DiffLineExpandNone } if d.SectionInfo.LastLeftIdx <= 0 && d.SectionInfo.LastRightIdx <= 0 { return DiffLineExpandUp } else if d.SectionInfo.RightIdx-d.SectionInfo.LastRightIdx > BlobExcerptChunkSize && d.SectionInfo.RightHunkSize > 0 { return DiffLineExpandUpDown } else if d.SectionInfo.LeftHunkSize <= 0 && d.SectionInfo.RightHunkSize <= 0 { return DiffLineExpandDown } return DiffLineExpandSingle } func getDiffLineSectionInfo(treePath, line string, lastLeftIdx, lastRightIdx int) *DiffLineSectionInfo { leftLine, leftHunk, rightLine, righHunk := git.ParseDiffHunkString(line) return &DiffLineSectionInfo{ Path: treePath, LastLeftIdx: lastLeftIdx, LastRightIdx: lastRightIdx, LeftIdx: leftLine, RightIdx: rightLine, LeftHunkSize: leftHunk, RightHunkSize: righHunk, } } // escape a line's content or return
needed for copy/paste purposes func getLineContent(content string) DiffInline { if len(content) > 0 { return DiffInlineWithUnicodeEscape(template.HTML(html.EscapeString(content))) } return DiffInline{Content: "
"} } // DiffSection represents a section of a DiffFile. type DiffSection struct { file *DiffFile FileName string Name string Lines []*DiffLine } var ( addedCodePrefix = []byte(``) removedCodePrefix = []byte(``) codeTagSuffix = []byte(``) ) var ( unfinishedtagRegex = regexp.MustCompile(`<[^>]*$`) trailingSpanRegex = regexp.MustCompile(`]?$`) entityRegex = regexp.MustCompile(`&[#]*?[0-9[:alpha:]]*$`) ) // shouldWriteInline represents combinations where we manually write inline changes func shouldWriteInline(diff diffmatchpatch.Diff, lineType DiffLineType) bool { if true && diff.Type == diffmatchpatch.DiffEqual || diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd || diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel { return true } return false } func fixupBrokenSpans(diffs []diffmatchpatch.Diff) []diffmatchpatch.Diff { // Create a new array to store our fixed up blocks fixedup := make([]diffmatchpatch.Diff, 0, len(diffs)) // semantically label some numbers const insert, delete, equal = 0, 1, 2 // record the positions of the last type of each block in the fixedup blocks last := []int{-1, -1, -1} operation := []diffmatchpatch.Operation{diffmatchpatch.DiffInsert, diffmatchpatch.DiffDelete, diffmatchpatch.DiffEqual} // create a writer for insert and deletes toWrite := []strings.Builder{ {}, {}, } // make some flags for insert and delete unfinishedTag := []bool{false, false} unfinishedEnt := []bool{false, false} // store stores the provided text in the writer for the typ store := func(text string, typ int) { (&(toWrite[typ])).WriteString(text) } // hasStored returns true if there is stored content hasStored := func(typ int) bool { return (&toWrite[typ]).Len() > 0 } // stored will return that content stored := func(typ int) string { return (&toWrite[typ]).String() } // empty will empty the stored content empty := func(typ int) { (&toWrite[typ]).Reset() } // pop will remove the stored content appending to a diff block for that typ pop := func(typ int, fixedup []diffmatchpatch.Diff) []diffmatchpatch.Diff { if hasStored(typ) { if last[typ] > last[equal] { fixedup[last[typ]].Text += stored(typ) } else { fixedup = append(fixedup, diffmatchpatch.Diff{ Type: operation[typ], Text: stored(typ), }) } empty(typ) } return fixedup } // Now we walk the provided diffs and check the type of each block in turn for _, diff := range diffs { typ := delete // flag for handling insert or delete typs switch diff.Type { case diffmatchpatch.DiffEqual: // First check if there is anything stored if hasStored(insert) || hasStored(delete) { // There are two reasons for storing content: // 1. Unfinished Entity <- Could be more efficient here by not doing this if we're looking for a tag if unfinishedEnt[insert] || unfinishedEnt[delete] { // we look for a ';' to finish an entity idx := strings.IndexRune(diff.Text, ';') if idx >= 0 { // if we find a ';' store the preceding content to both insert and delete store(diff.Text[:idx+1], insert) store(diff.Text[:idx+1], delete) // and remove it from this block diff.Text = diff.Text[idx+1:] // reset the ent flags unfinishedEnt[insert] = false unfinishedEnt[delete] = false } else { // otherwise store it all on insert and delete store(diff.Text, insert) store(diff.Text, delete) // and empty this block diff.Text = "" } } // 2. Unfinished Tag if unfinishedTag[insert] || unfinishedTag[delete] { // we look for a '>' to finish a tag idx := strings.IndexRune(diff.Text, '>') if idx >= 0 { store(diff.Text[:idx+1], insert) store(diff.Text[:idx+1], delete) diff.Text = diff.Text[idx+1:] unfinishedTag[insert] = false unfinishedTag[delete] = false } else { store(diff.Text, insert) store(diff.Text, delete) diff.Text = "" } } // If we've completed the required tag/entities if !(unfinishedTag[insert] || unfinishedTag[delete] || unfinishedEnt[insert] || unfinishedEnt[delete]) { // pop off the stack fixedup = pop(insert, fixedup) fixedup = pop(delete, fixedup) } // If that has left this diff block empty then shortcut if len(diff.Text) == 0 { continue } } // check if this block ends in an unfinished tag? idx := unfinishedtagRegex.FindStringIndex(diff.Text) if idx != nil { unfinishedTag[insert] = true unfinishedTag[delete] = true } else { // otherwise does it end in an unfinished entity? idx = entityRegex.FindStringIndex(diff.Text) if idx != nil { unfinishedEnt[insert] = true unfinishedEnt[delete] = true } } // If there is an unfinished component if idx != nil { // Store the fragment store(diff.Text[idx[0]:], insert) store(diff.Text[idx[0]:], delete) // and remove it from this block diff.Text = diff.Text[:idx[0]] } // If that hasn't left the block empty if len(diff.Text) > 0 { // store the position of the last equal block and store it in our diffs last[equal] = len(fixedup) fixedup = append(fixedup, diff) } continue case diffmatchpatch.DiffInsert: typ = insert fallthrough case diffmatchpatch.DiffDelete: // First check if there is anything stored for this type if hasStored(typ) { // if there is prepend it to this block, empty the storage and reset our flags diff.Text = stored(typ) + diff.Text empty(typ) unfinishedEnt[typ] = false unfinishedTag[typ] = false } // check if this block ends in an unfinished tag idx := unfinishedtagRegex.FindStringIndex(diff.Text) if idx != nil { unfinishedTag[typ] = true } else { // otherwise does it end in an unfinished entity idx = entityRegex.FindStringIndex(diff.Text) if idx != nil { unfinishedEnt[typ] = true } } // If there is an unfinished component if idx != nil { // Store the fragment store(diff.Text[idx[0]:], typ) // and remove it from this block diff.Text = diff.Text[:idx[0]] } // If that hasn't left the block empty if len(diff.Text) > 0 { // if the last block of this type was after the last equal block if last[typ] > last[equal] { // store this blocks content on that block fixedup[last[typ]].Text += diff.Text } else { // otherwise store the position of the last block of this type and store the block last[typ] = len(fixedup) fixedup = append(fixedup, diff) } } continue } } // pop off any remaining stored content fixedup = pop(insert, fixedup) fixedup = pop(delete, fixedup) return fixedup } func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) DiffInline { buf := bytes.NewBuffer(nil) match := "" diffs = fixupBrokenSpans(diffs) for _, diff := range diffs { if shouldWriteInline(diff, lineType) { if len(match) > 0 { diff.Text = match + diff.Text match = "" } // Chroma HTML syntax highlighting is done before diffing individual lines in order to maintain consistency. // Since inline changes might split in the middle of a chroma span tag or HTML entity, make we manually put it back together // before writing so we don't try insert added/removed code spans in the middle of one of those // and create broken HTML. This is done by moving incomplete HTML forward until it no longer matches our pattern of // a line ending with an incomplete HTML entity or partial/opening . // EX: // diffs[{Type: dmp.DiffDelete, Text: "language}] // After first iteration // diffs[{Type: dmp.DiffDelete, Text: "language"}, //write out // {Type: dmp.DiffEqual, Text: ",}] // After second iteration // {Type: dmp.DiffEqual, Text: ""}, // write out // {Type: dmp.DiffDelete, Text: ",}] // Final // {Type: dmp.DiffDelete, Text: ",}] // end up writing , // Instead of lass="p", m := trailingSpanRegex.FindStringSubmatchIndex(diff.Text) if m != nil { match = diff.Text[m[0]:m[1]] diff.Text = strings.TrimSuffix(diff.Text, match) } m = entityRegex.FindStringSubmatchIndex(diff.Text) if m != nil { match = diff.Text[m[0]:m[1]] diff.Text = strings.TrimSuffix(diff.Text, match) } // Print an existing closing span first before opening added/remove-code span so it doesn't unintentionally close it if strings.HasPrefix(diff.Text, "") { buf.WriteString("") diff.Text = strings.TrimPrefix(diff.Text, "") } // If we weren't able to fix it then this should avoid broken HTML by not inserting more spans below // The previous/next diff section will contain the rest of the tag that is missing here if strings.Count(diff.Text, "<") != strings.Count(diff.Text, ">") { buf.WriteString(diff.Text) continue } } switch { case diff.Type == diffmatchpatch.DiffEqual: buf.WriteString(diff.Text) case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd: buf.Write(addedCodePrefix) buf.WriteString(diff.Text) buf.Write(codeTagSuffix) case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel: buf.Write(removedCodePrefix) buf.WriteString(diff.Text) buf.Write(codeTagSuffix) } } return DiffInlineWithUnicodeEscape(template.HTML(buf.String())) } // GetLine gets a specific line by type (add or del) and file line number func (diffSection *DiffSection) GetLine(lineType DiffLineType, idx int) *DiffLine { var ( difference = 0 addCount = 0 delCount = 0 matchDiffLine *DiffLine ) LOOP: for _, diffLine := range diffSection.Lines { switch diffLine.Type { case DiffLineAdd: addCount++ case DiffLineDel: delCount++ default: if matchDiffLine != nil { break LOOP } difference = diffLine.RightIdx - diffLine.LeftIdx addCount = 0 delCount = 0 } switch lineType { case DiffLineDel: if diffLine.RightIdx == 0 && diffLine.LeftIdx == idx-difference { matchDiffLine = diffLine } case DiffLineAdd: if diffLine.LeftIdx == 0 && diffLine.RightIdx == idx+difference { matchDiffLine = diffLine } } } if addCount == delCount { return matchDiffLine } return nil } var diffMatchPatch = diffmatchpatch.New() func init() { diffMatchPatch.DiffEditCost = 100 } // DiffInline is a struct that has a content and escape status type DiffInline struct { EscapeStatus charset.EscapeStatus Content template.HTML } // DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped func DiffInlineWithUnicodeEscape(s template.HTML) DiffInline { status, content := charset.EscapeControlString(string(s)) return DiffInline{EscapeStatus: status, Content: template.HTML(content)} } // DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped func DiffInlineWithHighlightCode(fileName, language, code string) DiffInline { status, content := charset.EscapeControlString(highlight.Code(fileName, language, code)) return DiffInline{EscapeStatus: status, Content: template.HTML(content)} } // GetComputedInlineDiffFor computes inline diff for the given line. func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) DiffInline { if setting.Git.DisableDiffHighlight { return getLineContent(diffLine.Content[1:]) } var ( compareDiffLine *DiffLine diff1 string diff2 string ) language := "" if diffSection.file != nil { language = diffSection.file.Language } // try to find equivalent diff line. ignore, otherwise switch diffLine.Type { case DiffLineSection: return getLineContent(diffLine.Content[1:]) case DiffLineAdd: compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx) if compareDiffLine == nil { return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:]) } diff1 = compareDiffLine.Content diff2 = diffLine.Content case DiffLineDel: compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx) if compareDiffLine == nil { return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:]) } diff1 = diffLine.Content diff2 = compareDiffLine.Content default: if strings.IndexByte(" +-", diffLine.Content[0]) > -1 { return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:]) } return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content) } diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, language, diff1[1:]), highlight.Code(diffSection.FileName, language, diff2[1:]), true) diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord) return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type) } // DiffFile represents a file diff. type DiffFile struct { Name string NameHash string OldName string Index int Addition, Deletion int Type DiffFileType IsCreated bool IsDeleted bool IsBin bool IsLFSFile bool IsRenamed bool IsAmbiguous bool IsSubmodule bool Sections []*DiffSection IsIncomplete bool IsIncompleteLineTooLong bool IsProtected bool IsGenerated bool IsVendored bool IsViewed bool // User specific HasChangedSinceLastReview bool // User specific Language string } // GetType returns type of diff file. func (diffFile *DiffFile) GetType() int { return int(diffFile.Type) } // GetTailSection creates a fake DiffLineSection if the last section is not the end of the file func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommitID, rightCommitID string) *DiffSection { if len(diffFile.Sections) == 0 || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile { return nil } leftCommit, err := gitRepo.GetCommit(leftCommitID) if err != nil { return nil } rightCommit, err := gitRepo.GetCommit(rightCommitID) if err != nil { return nil } lastSection := diffFile.Sections[len(diffFile.Sections)-1] lastLine := lastSection.Lines[len(lastSection.Lines)-1] leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name) rightLineCount := getCommitFileLineCount(rightCommit, diffFile.Name) if leftLineCount <= lastLine.LeftIdx || rightLineCount <= lastLine.RightIdx { return nil } tailDiffLine := &DiffLine{ Type: DiffLineSection, Content: " ", SectionInfo: &DiffLineSectionInfo{ Path: diffFile.Name, LastLeftIdx: lastLine.LeftIdx, LastRightIdx: lastLine.RightIdx, LeftIdx: leftLineCount, RightIdx: rightLineCount, }, } tailSection := &DiffSection{FileName: diffFile.Name, Lines: []*DiffLine{tailDiffLine}} return tailSection } // GetDiffFileName returns the name of the diff file, or its old name in case it was deleted func (diffFile *DiffFile) GetDiffFileName() string { if diffFile.Name == "" { return diffFile.OldName } return diffFile.Name } func (diffFile *DiffFile) ShouldBeHidden() bool { return diffFile.IsGenerated || diffFile.IsViewed } func getCommitFileLineCount(commit *git.Commit, filePath string) int { blob, err := commit.GetBlobByPath(filePath) if err != nil { return 0 } lineCount, err := blob.GetBlobLineCount() if err != nil { return 0 } return lineCount } // Diff represents a difference between two git trees. type Diff struct { Start, End string NumFiles int TotalAddition, TotalDeletion int Files []*DiffFile IsIncomplete bool NumViewedFiles int // user-specific } // LoadComments loads comments into each line func (diff *Diff) LoadComments(ctx context.Context, issue *issues_model.Issue, currentUser *user_model.User) error { allComments, err := issues_model.FetchCodeComments(ctx, issue, currentUser) if err != nil { return err } for _, file := range diff.Files { if lineCommits, ok := allComments[file.Name]; ok { for _, section := range file.Sections { for _, line := range section.Lines { if comments, ok := lineCommits[int64(line.LeftIdx*-1)]; ok { line.Comments = append(line.Comments, comments...) } if comments, ok := lineCommits[int64(line.RightIdx)]; ok { line.Comments = append(line.Comments, comments...) } sort.SliceStable(line.Comments, func(i, j int) bool { return line.Comments[i].CreatedUnix < line.Comments[j].CreatedUnix }) } } } } return nil } const cmdDiffHead = "diff --git " // ParsePatch builds a Diff object from a io.Reader and some parameters. func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader, skipToFile string) (*Diff, error) { log.Debug("ParsePatch(%d, %d, %d, ..., %s)", maxLines, maxLineCharacters, maxFiles, skipToFile) var curFile *DiffFile skipping := skipToFile != "" diff := &Diff{Files: make([]*DiffFile, 0)} sb := strings.Builder{} // OK let's set a reasonable buffer size. // This should be let's say at least the size of maxLineCharacters or 4096 whichever is larger. readerSize := maxLineCharacters if readerSize < 4096 { readerSize = 4096 } input := bufio.NewReaderSize(reader, readerSize) line, err := input.ReadString('\n') if err != nil { if err == io.EOF { return diff, nil } return diff, err } parsingLoop: for { // 1. A patch file always begins with `diff --git ` + `a/path b/path` (possibly quoted) // if it does not we have bad input! if !strings.HasPrefix(line, cmdDiffHead) { return diff, fmt.Errorf("invalid first file line: %s", line) } if maxFiles > -1 && len(diff.Files) >= maxFiles { lastFile := createDiffFile(diff, line) diff.End = lastFile.Name diff.IsIncomplete = true _, err := io.Copy(io.Discard, reader) if err != nil { // By the definition of io.Copy this never returns io.EOF return diff, fmt.Errorf("error during io.Copy: %w", err) } break parsingLoop } curFile = createDiffFile(diff, line) if skipping { if curFile.Name != skipToFile { line, err = skipToNextDiffHead(input) if err != nil { if err == io.EOF { return diff, nil } return diff, err } continue } skipping = false } diff.Files = append(diff.Files, curFile) // 2. It is followed by one or more extended header lines: // // old mode // new mode // deleted file mode // new file mode // copy from // copy to // rename from // rename to // similarity index // dissimilarity index // index .. // // * 6-digit octal numbers including the file type and file permission bits. // * does not include the a/ and b/ prefixes // * percentage of unchanged lines for similarity, percentage of changed // lines dissimilarity as integer rounded down with terminal %. 100% => equal files. // * The index line includes the blob object names before and after the change. // The is included if the file mode does not change; otherwise, separate // lines indicate the old and the new mode. // 3. Following this header the "standard unified" diff format header may be encountered: (but not for every case...) // // --- a/ // +++ b/ // // With multiple hunks // // @@ @@ // +added line // -removed line // unchanged line // // 4. Binary files get: // // Binary files a/ and b/ differ // // but one of a/ and b/ could be /dev/null. curFileLoop: for { line, err = input.ReadString('\n') if err != nil { if err != io.EOF { return diff, err } break parsingLoop } switch { case strings.HasPrefix(line, cmdDiffHead): break curFileLoop case strings.HasPrefix(line, "old mode ") || strings.HasPrefix(line, "new mode "): if strings.HasSuffix(line, " 160000\n") { curFile.IsSubmodule = true } case strings.HasPrefix(line, "rename from "): curFile.IsRenamed = true curFile.Type = DiffFileRename if curFile.IsAmbiguous { curFile.OldName = line[len("rename from ") : len(line)-1] } case strings.HasPrefix(line, "rename to "): curFile.IsRenamed = true curFile.Type = DiffFileRename if curFile.IsAmbiguous { curFile.Name = line[len("rename to ") : len(line)-1] curFile.IsAmbiguous = false } case strings.HasPrefix(line, "copy from "): curFile.IsRenamed = true curFile.Type = DiffFileCopy if curFile.IsAmbiguous { curFile.OldName = line[len("copy from ") : len(line)-1] } case strings.HasPrefix(line, "copy to "): curFile.IsRenamed = true curFile.Type = DiffFileCopy if curFile.IsAmbiguous { curFile.Name = line[len("copy to ") : len(line)-1] curFile.IsAmbiguous = false } case strings.HasPrefix(line, "new file"): curFile.Type = DiffFileAdd curFile.IsCreated = true if strings.HasSuffix(line, " 160000\n") { curFile.IsSubmodule = true } case strings.HasPrefix(line, "deleted"): curFile.Type = DiffFileDel curFile.IsDeleted = true if strings.HasSuffix(line, " 160000\n") { curFile.IsSubmodule = true } case strings.HasPrefix(line, "index"): if strings.HasSuffix(line, " 160000\n") { curFile.IsSubmodule = true } case strings.HasPrefix(line, "similarity index 100%"): curFile.Type = DiffFileRename case strings.HasPrefix(line, "Binary"): curFile.IsBin = true case strings.HasPrefix(line, "--- "): // Handle ambiguous filenames if curFile.IsAmbiguous { // The shortest string that can end up here is: // "--- a\t\n" without the quotes. // This line has a len() of 7 but doesn't contain a oldName. // So the amount that the line need is at least 8 or more. // The code will otherwise panic for a out-of-bounds. if len(line) > 7 && line[4] == 'a' { curFile.OldName = line[6 : len(line)-1] if line[len(line)-2] == '\t' { curFile.OldName = curFile.OldName[:len(curFile.OldName)-1] } } else { curFile.OldName = "" } } // Otherwise do nothing with this line case strings.HasPrefix(line, "+++ "): // Handle ambiguous filenames if curFile.IsAmbiguous { if len(line) > 6 && line[4] == 'b' { curFile.Name = line[6 : len(line)-1] if line[len(line)-2] == '\t' { curFile.Name = curFile.Name[:len(curFile.Name)-1] } if curFile.OldName == "" { curFile.OldName = curFile.Name } } else { curFile.Name = curFile.OldName } curFile.IsAmbiguous = false } // Otherwise do nothing with this line, but now switch to parsing hunks lineBytes, isFragment, err := parseHunks(curFile, maxLines, maxLineCharacters, input) diff.TotalAddition += curFile.Addition diff.TotalDeletion += curFile.Deletion if err != nil { if err != io.EOF { return diff, err } break parsingLoop } sb.Reset() _, _ = sb.Write(lineBytes) for isFragment { lineBytes, isFragment, err = input.ReadLine() if err != nil { // Now by the definition of ReadLine this cannot be io.EOF return diff, fmt.Errorf("unable to ReadLine: %w", err) } _, _ = sb.Write(lineBytes) } line = sb.String() sb.Reset() break curFileLoop } } } // TODO: There are numerous issues with this: // - we might want to consider detecting encoding while parsing but... // - we're likely to fail to get the correct encoding here anyway as we won't have enough information diffLineTypeBuffers := make(map[DiffLineType]*bytes.Buffer, 3) diffLineTypeDecoders := make(map[DiffLineType]*encoding.Decoder, 3) diffLineTypeBuffers[DiffLinePlain] = new(bytes.Buffer) diffLineTypeBuffers[DiffLineAdd] = new(bytes.Buffer) diffLineTypeBuffers[DiffLineDel] = new(bytes.Buffer) for _, f := range diff.Files { f.NameHash = base.EncodeSha1(f.Name) for _, buffer := range diffLineTypeBuffers { buffer.Reset() } for _, sec := range f.Sections { for _, l := range sec.Lines { if l.Type == DiffLineSection { continue } diffLineTypeBuffers[l.Type].WriteString(l.Content[1:]) diffLineTypeBuffers[l.Type].WriteString("\n") } } for lineType, buffer := range diffLineTypeBuffers { diffLineTypeDecoders[lineType] = nil if buffer.Len() == 0 { continue } charsetLabel, err := charset.DetectEncoding(buffer.Bytes()) if charsetLabel != "UTF-8" && err == nil { encoding, _ := stdcharset.Lookup(charsetLabel) if encoding != nil { diffLineTypeDecoders[lineType] = encoding.NewDecoder() } } } for _, sec := range f.Sections { for _, l := range sec.Lines { decoder := diffLineTypeDecoders[l.Type] if decoder != nil { if c, _, err := transform.String(decoder, l.Content[1:]); err == nil { l.Content = l.Content[0:1] + c } } } } } diff.NumFiles = len(diff.Files) return diff, nil } func skipToNextDiffHead(input *bufio.Reader) (line string, err error) { // need to skip until the next cmdDiffHead isFragment, wasFragment := false, false var lineBytes []byte for { lineBytes, isFragment, err = input.ReadLine() if err != nil { return } if wasFragment { wasFragment = isFragment continue } if bytes.HasPrefix(lineBytes, []byte(cmdDiffHead)) { break } wasFragment = isFragment } line = string(lineBytes) if isFragment { var tail string tail, err = input.ReadString('\n') if err != nil { return } line += tail } return } func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio.Reader) (lineBytes []byte, isFragment bool, err error) { sb := strings.Builder{} var ( curSection *DiffSection curFileLinesCount int curFileLFSPrefix bool ) lastLeftIdx := -1 leftLine, rightLine := 1, 1 for { for isFragment { curFile.IsIncomplete = true curFile.IsIncompleteLineTooLong = true _, isFragment, err = input.ReadLine() if err != nil { // Now by the definition of ReadLine this cannot be io.EOF err = fmt.Errorf("unable to ReadLine: %w", err) return } } sb.Reset() lineBytes, isFragment, err = input.ReadLine() if err != nil { if err == io.EOF { return } err = fmt.Errorf("unable to ReadLine: %w", err) return } if lineBytes[0] == 'd' { // End of hunks return } switch lineBytes[0] { case '@': if maxLines > -1 && curFileLinesCount >= maxLines { curFile.IsIncomplete = true continue } _, _ = sb.Write(lineBytes) for isFragment { // This is very odd indeed - we're in a section header and the line is too long // This really shouldn't happen... lineBytes, isFragment, err = input.ReadLine() if err != nil { // Now by the definition of ReadLine this cannot be io.EOF err = fmt.Errorf("unable to ReadLine: %w", err) return } _, _ = sb.Write(lineBytes) } line := sb.String() // Create a new section to represent this hunk curSection = &DiffSection{file: curFile} lastLeftIdx = -1 curFile.Sections = append(curFile.Sections, curSection) lineSectionInfo := getDiffLineSectionInfo(curFile.Name, line, leftLine-1, rightLine-1) diffLine := &DiffLine{ Type: DiffLineSection, Content: line, SectionInfo: lineSectionInfo, } curSection.Lines = append(curSection.Lines, diffLine) curSection.FileName = curFile.Name // update line number. leftLine = lineSectionInfo.LeftIdx rightLine = lineSectionInfo.RightIdx continue case '\\': if maxLines > -1 && curFileLinesCount >= maxLines { curFile.IsIncomplete = true continue } // This is used only to indicate that the current file does not have a terminal newline if !bytes.Equal(lineBytes, []byte("\\ No newline at end of file")) { err = fmt.Errorf("unexpected line in hunk: %s", string(lineBytes)) return } // Technically this should be the end the file! // FIXME: we should be putting a marker at the end of the file if there is no terminal new line continue case '+': curFileLinesCount++ curFile.Addition++ if maxLines > -1 && curFileLinesCount >= maxLines { curFile.IsIncomplete = true continue } diffLine := &DiffLine{Type: DiffLineAdd, RightIdx: rightLine, Match: -1} rightLine++ if curSection == nil { // Create a new section to represent this hunk curSection = &DiffSection{file: curFile} curFile.Sections = append(curFile.Sections, curSection) lastLeftIdx = -1 } if lastLeftIdx > -1 { diffLine.Match = lastLeftIdx curSection.Lines[lastLeftIdx].Match = len(curSection.Lines) lastLeftIdx++ if lastLeftIdx >= len(curSection.Lines) || curSection.Lines[lastLeftIdx].Type != DiffLineDel { lastLeftIdx = -1 } } curSection.Lines = append(curSection.Lines, diffLine) case '-': curFileLinesCount++ curFile.Deletion++ if maxLines > -1 && curFileLinesCount >= maxLines { curFile.IsIncomplete = true continue } diffLine := &DiffLine{Type: DiffLineDel, LeftIdx: leftLine, Match: -1} if leftLine > 0 { leftLine++ } if curSection == nil { // Create a new section to represent this hunk curSection = &DiffSection{file: curFile} curFile.Sections = append(curFile.Sections, curSection) lastLeftIdx = -1 } if len(curSection.Lines) == 0 || curSection.Lines[len(curSection.Lines)-1].Type != DiffLineDel { lastLeftIdx = len(curSection.Lines) } curSection.Lines = append(curSection.Lines, diffLine) case ' ': curFileLinesCount++ if maxLines > -1 && curFileLinesCount >= maxLines { curFile.IsIncomplete = true continue } diffLine := &DiffLine{Type: DiffLinePlain, LeftIdx: leftLine, RightIdx: rightLine} leftLine++ rightLine++ lastLeftIdx = -1 if curSection == nil { // Create a new section to represent this hunk curSection = &DiffSection{file: curFile} curFile.Sections = append(curFile.Sections, curSection) } curSection.Lines = append(curSection.Lines, diffLine) default: // This is unexpected err = fmt.Errorf("unexpected line in hunk: %s", string(lineBytes)) return } line := string(lineBytes) if isFragment { curFile.IsIncomplete = true curFile.IsIncompleteLineTooLong = true for isFragment { lineBytes, isFragment, err = input.ReadLine() if err != nil { // Now by the definition of ReadLine this cannot be io.EOF err = fmt.Errorf("unable to ReadLine: %w", err) return } } } if len(line) > maxLineCharacters { curFile.IsIncomplete = true curFile.IsIncompleteLineTooLong = true line = line[:maxLineCharacters] } curSection.Lines[len(curSection.Lines)-1].Content = line // handle LFS if line[1:] == lfs.MetaFileIdentifier { curFileLFSPrefix = true } else if curFileLFSPrefix && strings.HasPrefix(line[1:], lfs.MetaFileOidPrefix) { oid := strings.TrimPrefix(line[1:], lfs.MetaFileOidPrefix) if len(oid) == 64 { m := &git_model.LFSMetaObject{Pointer: lfs.Pointer{Oid: oid}} count, err := db.CountByBean(db.DefaultContext, m) if err == nil && count > 0 { curFile.IsBin = true curFile.IsLFSFile = true curSection.Lines = nil lastLeftIdx = -1 } } } } } func createDiffFile(diff *Diff, line string) *DiffFile { // The a/ and b/ filenames are the same unless rename/copy is involved. // Especially, even for a creation or a deletion, /dev/null is not used // in place of the a/ or b/ filenames. // // When rename/copy is involved, file1 and file2 show the name of the // source file of the rename/copy and the name of the file that rename/copy // produces, respectively. // // Path names are quoted if necessary. // // This means that you should always be able to determine the file name even when there // there is potential ambiguity... // // but we can be simpler with our heuristics by just forcing git to prefix things nicely curFile := &DiffFile{ Index: len(diff.Files) + 1, Type: DiffFileChange, Sections: make([]*DiffSection, 0, 10), } rd := strings.NewReader(line[len(cmdDiffHead):] + " ") curFile.Type = DiffFileChange oldNameAmbiguity := false newNameAmbiguity := false curFile.OldName, oldNameAmbiguity = readFileName(rd) curFile.Name, newNameAmbiguity = readFileName(rd) if oldNameAmbiguity && newNameAmbiguity { curFile.IsAmbiguous = true // OK we should bet that the oldName and the newName are the same if they can be made to be same // So we need to start again ... if (len(line)-len(cmdDiffHead)-1)%2 == 0 { // diff --git a/b b/b b/b b/b b/b b/b // midpoint := (len(line) + len(cmdDiffHead) - 1) / 2 new, old := line[len(cmdDiffHead):midpoint], line[midpoint+1:] if len(new) > 2 && len(old) > 2 && new[2:] == old[2:] { curFile.OldName = old[2:] curFile.Name = old[2:] } } } curFile.IsRenamed = curFile.Name != curFile.OldName return curFile } func readFileName(rd *strings.Reader) (string, bool) { ambiguity := false var name string char, _ := rd.ReadByte() _ = rd.UnreadByte() if char == '"' { fmt.Fscanf(rd, "%q ", &name) if len(name) == 0 { log.Error("Reader has no file name: %v", rd) return "", true } if name[0] == '\\' { name = name[1:] } } else { // This technique is potentially ambiguous it may not be possible to uniquely identify the filenames from the diff line alone ambiguity = true fmt.Fscanf(rd, "%s ", &name) char, _ := rd.ReadByte() _ = rd.UnreadByte() for !(char == 0 || char == '"' || char == 'b') { var suffix string fmt.Fscanf(rd, "%s ", &suffix) name += " " + suffix char, _ = rd.ReadByte() _ = rd.UnreadByte() } } if len(name) < 2 { log.Error("Unable to determine name from reader: %v", rd) return "", true } return name[2:], ambiguity } // DiffOptions represents the options for a DiffRange type DiffOptions struct { BeforeCommitID string AfterCommitID string SkipTo string MaxLines int MaxLineCharacters int MaxFiles int WhitespaceBehavior string DirectComparison bool } // GetDiff builds a Diff between two commits of a repository. // Passing the empty string as beforeCommitID returns a diff from the parent commit. // The whitespaceBehavior is either an empty string or a git flag func GetDiff(gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) { repoPath := gitRepo.Path commit, err := gitRepo.GetCommit(opts.AfterCommitID) if err != nil { return nil, err } argsLength := 6 if len(opts.WhitespaceBehavior) > 0 { argsLength++ } if len(opts.SkipTo) > 0 { argsLength++ } if len(files) > 0 { argsLength += len(files) + 1 } diffArgs := make([]string, 0, argsLength) if (len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == git.EmptySHA) && commit.ParentCount() == 0 { diffArgs = append(diffArgs, "diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M") if len(opts.WhitespaceBehavior) != 0 { diffArgs = append(diffArgs, opts.WhitespaceBehavior) } // append empty tree ref diffArgs = append(diffArgs, "4b825dc642cb6eb9a060e54bf8d69288fbee4904") diffArgs = append(diffArgs, opts.AfterCommitID) } else { actualBeforeCommitID := opts.BeforeCommitID if len(actualBeforeCommitID) == 0 { parentCommit, _ := commit.Parent(0) actualBeforeCommitID = parentCommit.ID.String() } diffArgs = append(diffArgs, "diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M") if len(opts.WhitespaceBehavior) != 0 { diffArgs = append(diffArgs, opts.WhitespaceBehavior) } diffArgs = append(diffArgs, actualBeforeCommitID) diffArgs = append(diffArgs, opts.AfterCommitID) opts.BeforeCommitID = actualBeforeCommitID } // In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file // so if we are using at least this version of git we don't have to tell ParsePatch to do // the skipping for us parsePatchSkipToFile := opts.SkipTo if opts.SkipTo != "" && git.CheckGitVersionAtLeast("2.31") == nil { diffArgs = append(diffArgs, "--skip-to="+opts.SkipTo) parsePatchSkipToFile = "" } if len(files) > 0 { diffArgs = append(diffArgs, "--") diffArgs = append(diffArgs, files...) } reader, writer := io.Pipe() defer func() { _ = reader.Close() _ = writer.Close() }() go func(ctx context.Context, diffArgs []string, repoPath string, writer *io.PipeWriter) { cmd := git.NewCommand(ctx, diffArgs...) cmd.SetDescription(fmt.Sprintf("GetDiffRange [repo_path: %s]", repoPath)) if err := cmd.Run(&git.RunOpts{ Timeout: time.Duration(setting.Git.Timeout.Default) * time.Second, Dir: repoPath, Stderr: os.Stderr, Stdout: writer, }); err != nil { log.Error("error during RunWithContext: %w", err) } _ = writer.Close() }(gitRepo.Ctx, diffArgs, repoPath, writer) diff, err := ParsePatch(opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile) if err != nil { return nil, fmt.Errorf("unable to ParsePatch: %w", err) } diff.Start = opts.SkipTo checker, deferable := gitRepo.CheckAttributeReader(opts.AfterCommitID) defer deferable() for _, diffFile := range diff.Files { gotVendor := false gotGenerated := false if checker != nil { attrs, err := checker.CheckPath(diffFile.Name) if err == nil { if vendored, has := attrs["linguist-vendored"]; has { if vendored == "set" || vendored == "true" { diffFile.IsVendored = true gotVendor = true } else { gotVendor = vendored == "false" } } if generated, has := attrs["linguist-generated"]; has { if generated == "set" || generated == "true" { diffFile.IsGenerated = true gotGenerated = true } else { gotGenerated = generated == "false" } } if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" { diffFile.Language = language } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" { diffFile.Language = language } } } if !gotVendor { diffFile.IsVendored = analyze.IsVendor(diffFile.Name) } if !gotGenerated { diffFile.IsGenerated = analyze.IsGenerated(diffFile.Name) } tailSection := diffFile.GetTailSection(gitRepo, opts.BeforeCommitID, opts.AfterCommitID) if tailSection != nil { diffFile.Sections = append(diffFile.Sections, tailSection) } } separator := "..." if opts.DirectComparison { separator = ".." } shortstatArgs := []string{opts.BeforeCommitID + separator + opts.AfterCommitID} if len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == git.EmptySHA { shortstatArgs = []string{git.EmptyTreeSHA, opts.AfterCommitID} } diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, shortstatArgs...) if err != nil && strings.Contains(err.Error(), "no merge base") { // git >= 2.28 now returns an error if base and head have become unrelated. // previously it would return the results of git diff --shortstat base head so let's try that... shortstatArgs = []string{opts.BeforeCommitID, opts.AfterCommitID} diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, shortstatArgs...) } if err != nil { return nil, err } return diff, nil } // SyncAndGetUserSpecificDiff is like GetDiff, except that user specific data such as which files the given user has already viewed on the given PR will also be set // Additionally, the database asynchronously is updated if files have changed since the last review func SyncAndGetUserSpecificDiff(ctx context.Context, userID int64, pull *issues_model.PullRequest, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) { diff, err := GetDiff(gitRepo, opts, files...) if err != nil { return nil, err } review, err := pull_model.GetNewestReviewState(ctx, userID, pull.ID) if err != nil || review == nil || review.UpdatedFiles == nil { return diff, err } latestCommit := opts.AfterCommitID if latestCommit == "" { latestCommit = pull.HeadBranch // opts.AfterCommitID is preferred because it handles PRs from forks correctly and the branch name doesn't } changedFiles, err := gitRepo.GetFilesChangedBetween(review.CommitSHA, latestCommit) if err != nil { return diff, err } filesChangedSinceLastDiff := make(map[string]pull_model.ViewedState) outer: for _, diffFile := range diff.Files { fileViewedState := review.UpdatedFiles[diffFile.GetDiffFileName()] // Check whether it was previously detected that the file has changed since the last review if fileViewedState == pull_model.HasChanged { diffFile.HasChangedSinceLastReview = true continue } filename := diffFile.GetDiffFileName() // Check explicitly whether the file has changed since the last review for _, changedFile := range changedFiles { diffFile.HasChangedSinceLastReview = filename == changedFile if diffFile.HasChangedSinceLastReview { filesChangedSinceLastDiff[filename] = pull_model.HasChanged continue outer // We don't want to check if the file is viewed here as that would fold the file, which is in this case unwanted } } // Check whether the file has already been viewed if fileViewedState == pull_model.Viewed { diffFile.IsViewed = true diff.NumViewedFiles++ } } // Explicitly store files that have changed in the database, if any is present at all. // This has the benefit that the "Has Changed" attribute will be present as long as the user does not explicitly mark this file as viewed, so it will even survive a page reload after marking another file as viewed. // On the other hand, this means that even if a commit reverting an unseen change is committed, the file will still be seen as changed. if len(filesChangedSinceLastDiff) > 0 { err := pull_model.UpdateReviewState(ctx, review.UserID, review.PullID, review.CommitSHA, filesChangedSinceLastDiff) if err != nil { log.Warn("Could not update review for user %d, pull %d, commit %s and the changed files %v: %v", review.UserID, review.PullID, review.CommitSHA, filesChangedSinceLastDiff, err) return nil, err } } return diff, err } // CommentAsDiff returns c.Patch as *Diff func CommentAsDiff(c *issues_model.Comment) (*Diff, error) { diff, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.Patch), "") if err != nil { log.Error("Unable to parse patch: %v", err) return nil, err } if len(diff.Files) == 0 { return nil, fmt.Errorf("no file found for comment ID: %d", c.ID) } secs := diff.Files[0].Sections if len(secs) == 0 { return nil, fmt.Errorf("no sections found for comment ID: %d", c.ID) } return diff, nil } // CommentMustAsDiff executes AsDiff and logs the error instead of returning func CommentMustAsDiff(c *issues_model.Comment) *Diff { if c == nil { return nil } defer func() { if err := recover(); err != nil { log.Error("PANIC whilst retrieving diff for comment[%d] Error: %v\nStack: %s", c.ID, err, log.Stack(2)) } }() diff, err := CommentAsDiff(c) if err != nil { log.Warn("CommentMustAsDiff: %v", err) } return diff } // GetWhitespaceFlag returns git diff flag for treating whitespaces func GetWhitespaceFlag(whitespaceBehavior string) string { whitespaceFlags := map[string]string{ "ignore-all": "-w", "ignore-change": "-b", "ignore-eol": "--ignore-space-at-eol", "show-all": "", } if flag, ok := whitespaceFlags[whitespaceBehavior]; ok { return flag } log.Warn("unknown whitespace behavior: %q, default to 'show-all'", whitespaceBehavior) return "" }