Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Revert "Scan commit metadata" #2747

Merged
merged 1 commit into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 21 additions & 116 deletions pkg/gitparse/gitparse.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import (

const (
// defaultDateFormat is the standard date format for git.
defaultDateFormat = "Mon Jan 2 15:04:05 2006 -0700"
defaultDateFormat = "Mon Jan 02 15:04:05 2006 -0700"

// defaultMaxDiffSize is the maximum size for a diff. Larger diffs will be cut off.
defaultMaxDiffSize = 2 * 1024 * 1024 * 1024 // 2GB
Expand Down Expand Up @@ -106,12 +106,11 @@ func (d *Diff) finalize() error {

// Commit contains commit header info and diffs.
type Commit struct {
Hash string
Author string
Committer string
Date time.Time
Message strings.Builder
Size int // in bytes
Hash string
Author string
Date time.Time
Message strings.Builder
Size int // in bytes

hasDiffs bool
}
Expand All @@ -132,15 +131,10 @@ const (
CommitLine
MergeLine
AuthorLine
AuthorDateLine
CommitterLine
CommitterDateLine
DateLine
MessageStartLine
MessageLine
MessageEndLine
NotesStartLine
NotesLine
NotesEndLine
DiffLine
ModeLine
IndexLine
Expand All @@ -158,15 +152,10 @@ func (state ParseState) String() string {
"CommitLine",
"MergeLine",
"AuthorLine",
"AuthorDateLine",
"CommitterLine",
"CommitterDateLine",
"DateLine",
"MessageStartLine",
"MessageLine",
"MessageEndLine",
"NotesStartLine",
"NotesLine",
"NotesEndLine",
"DiffLine",
"ModeLine",
"IndexLine",
Expand Down Expand Up @@ -220,15 +209,7 @@ func NewParser(options ...Option) *Parser {
// RepoPath parses the output of the `git log` command for the `source` path.
// The Diff chan will return diffs in the order they are parsed from the log.
func (c *Parser) RepoPath(ctx context.Context, source string, head string, abbreviatedLog bool, excludedGlobs []string, isBare bool) (chan *Diff, error) {
args := []string{
"-C", source,
"log",
"--patch", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---patch
"--full-history",
"--date=format:%a %b %d %H:%M:%S %Y %z",
"--pretty=fuller", // https://git-scm.com/docs/git-log#_pretty_formats
"--notes", // https://git-scm.com/docs/git-log#Documentation/git-log.txt---notesltrefgt
}
args := []string{"-C", source, "log", "-p", "--full-history", "--date=format:%a %b %d %H:%M:%S %Y %z"}
if abbreviatedLog {
args = append(args, "--diff-filter=AM")
}
Expand Down Expand Up @@ -392,23 +373,16 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
latestState = MergeLine
case isAuthorLine(isStaged, latestState, line):
latestState = AuthorLine
currentCommit.Author = strings.TrimSpace(string(line[8:]))
case isAuthorDateLine(isStaged, latestState, line):
latestState = AuthorDateLine
currentCommit.Author = strings.TrimRight(string(line[8:]), "\n")

case isDateLine(isStaged, latestState, line):
latestState = DateLine

date, err := time.Parse(c.dateFormat, strings.TrimSpace(string(line[12:])))
date, err := time.Parse(c.dateFormat, strings.TrimSpace(string(line[6:])))
if err != nil {
ctx.Logger().Error(err, "failed to parse commit date", "commit", currentCommit.Hash, "latestState", latestState.String())
latestState = ParseFailure
continue
ctx.Logger().V(2).Info("Could not parse date from git stream.", "error", err)
}
currentCommit.Date = date
case isCommitterLine(isStaged, latestState, line):
latestState = CommitterLine
currentCommit.Committer = strings.TrimSpace(string(line[8:]))
case isCommitterDateLine(isStaged, latestState, line):
latestState = CommitterDateLine
// NoOp
case isMessageStartLine(isStaged, latestState, line):
latestState = MessageStartLine
// NoOp
Expand All @@ -419,17 +393,6 @@ func (c *Parser) FromReader(ctx context.Context, stdOut io.Reader, diffChan chan
case isMessageEndLine(isStaged, latestState, line):
latestState = MessageEndLine
// NoOp
case isNotesStartLine(isStaged, latestState, line):
latestState = NotesStartLine

currentCommit.Message.WriteString("\n")
currentCommit.Message.Write(line)
case isNotesLine(isStaged, latestState, line):
latestState = NotesLine
currentCommit.Message.Write(line[4:]) // Notes are indented by 4 spaces.
case isNotesEndLine(isStaged, latestState, line):
latestState = NotesEndLine
// NoOp
case isDiffLine(isStaged, latestState, line):
latestState = DiffLine

Expand Down Expand Up @@ -614,42 +577,20 @@ func isAuthorLine(isStaged bool, latestState ParseState, line []byte) bool {
return false
}

// AuthorDate: Tue Aug 10 15:20:40 2021 +0100
func isAuthorDateLine(isStaged bool, latestState ParseState, line []byte) bool {
// Date: Tue Aug 10 15:20:40 2021 +0100
func isDateLine(isStaged bool, latestState ParseState, line []byte) bool {
if isStaged || latestState != AuthorLine {
return false
}
if len(line) > 10 && bytes.Equal(line[:11], []byte("AuthorDate:")) {
return true
}
return false
}

// Commit: Bill Rich <bill.rich@trufflesec.com>
func isCommitterLine(isStaged bool, latestState ParseState, line []byte) bool {
if isStaged || latestState != AuthorDateLine {
return false
}
if len(line) > 8 && bytes.Equal(line[:7], []byte("Commit:")) {
if len(line) > 7 && bytes.Equal(line[:5], []byte("Date:")) {
return true
}
return false
}

// CommitDate: Wed Apr 17 19:59:28 2024 -0400
func isCommitterDateLine(isStaged bool, latestState ParseState, line []byte) bool {
if isStaged || latestState != CommitterLine {
return false
}
if len(line) > 10 && bytes.Equal(line[:11], []byte("CommitDate:")) {
return true
}
return false
}

// Line directly after CommitterDate with only a newline.
// Line directly after Date with only a newline.
func isMessageStartLine(isStaged bool, latestState ParseState, line []byte) bool {
if isStaged || latestState != CommitterDateLine {
if isStaged || latestState != DateLine {
return false
}
// TODO: Improve the implementation of this and isMessageEndLine
Expand Down Expand Up @@ -681,51 +622,15 @@ func isMessageEndLine(isStaged bool, latestState ParseState, line []byte) bool {
return false
}

// `Notes:` or `Notes (context):`
// See https://tylercipriani.com/blog/2022/11/19/git-notes-gits-coolest-most-unloved-feature/
func isNotesStartLine(isStaged bool, latestState ParseState, line []byte) bool {
if isStaged || latestState != MessageEndLine {
return false
}
if len(line) > 5 && bytes.Equal(line[:5], []byte("Notes")) {
return true
}
return false
}

// Line after NotesStartLine that starts with 4 spaces
func isNotesLine(isStaged bool, latestState ParseState, line []byte) bool {
if isStaged || !(latestState == NotesStartLine || latestState == NotesLine) {
return false
}
if len(line) > 4 && bytes.Equal(line[:4], []byte(" ")) {
return true
}
return false
}

// Line directly after NotesLine with only a newline.
func isNotesEndLine(isStaged bool, latestState ParseState, line []byte) bool {
if isStaged || latestState != NotesLine {
return false
}
if len(strings.TrimRight(string(line[:]), "\r\n")) == 0 {
return true
}
return false
}

// diff --git a/internal/addrs/move_endpoint_module.go b/internal/addrs/move_endpoint_module.go
func isDiffLine(isStaged bool, latestState ParseState, line []byte) bool {
if !(latestState == MessageStartLine || // Empty commit messages can go from MessageStart->Diff
latestState == MessageEndLine ||
latestState == NotesEndLine ||
latestState == BinaryFileLine ||
latestState == ModeLine ||
latestState == IndexLine ||
latestState == HunkContentLine ||
latestState == ParseFailure) {
if !(isStaged && latestState == Initial) {
if latestState == Initial && !isStaged {
return false
}
}
Expand Down
Loading
Loading