diff --git a/private/buf/bufsync/bufsync.go b/private/buf/bufsync/bufsync.go index 2a9da8e276..fb2faa9791 100644 --- a/private/buf/bufsync/bufsync.go +++ b/private/buf/bufsync/bufsync.go @@ -158,6 +158,15 @@ func SyncerWithModuleDefaultBranchGetter(getter ModuleDefaultBranchGetter) Synce } } +// SyncerWithAllBranches sets the syncer to sync all branches. Be default the syncer only processes +// commits in the current checked out branch. +func SyncerWithAllBranches() SyncerOption { + return func(s *syncer) error { + s.allBranches = true + return nil + } +} + // SyncFunc is invoked by Syncer to process a sync point. If an error is returned, // sync will abort. type SyncFunc func(ctx context.Context, commit ModuleCommit) error diff --git a/private/buf/bufsync/new_repo_test.go b/private/buf/bufsync/new_repo_test.go index c145ffe1d5..f6bf099cf3 100644 --- a/private/buf/bufsync/new_repo_test.go +++ b/private/buf/bufsync/new_repo_test.go @@ -38,6 +38,7 @@ func scaffoldGitRepository(t *testing.T) git.Repository { dir := scaffoldGitRepositoryDir(t, runner) dotGitPath := path.Join(dir, git.DotGitDir) repo, err := git.OpenRepository( + context.Background(), dotGitPath, runner, ) diff --git a/private/buf/bufsync/syncer.go b/private/buf/bufsync/syncer.go index ad77f0eef0..e2ba78c3b8 100644 --- a/private/buf/bufsync/syncer.go +++ b/private/buf/bufsync/syncer.go @@ -38,10 +38,11 @@ type syncer struct { syncPointResolver SyncPointResolver syncedGitCommitChecker SyncedGitCommitChecker moduleDefaultBranchGetter ModuleDefaultBranchGetter + allBranches bool // scanned information from the repo on sync start tagsByCommitHash map[string][]string - remoteBranches map[string]struct{} + branchesToSync map[string]struct{} } func newSyncer( @@ -122,26 +123,28 @@ func (s *syncer) Sync(ctx context.Context, syncFunc SyncFunc) error { if err := s.validateDefaultBranches(ctx); err != nil { return err } - allBranchesSyncPoints := make(map[string]map[Module]git.Hash) - for branch := range s.remoteBranches { + branchesSyncPoints := make(map[string]map[Module]git.Hash) + for branch := range s.branchesToSync { syncPoints, err := s.resolveSyncPoints(ctx, branch) if err != nil { return fmt.Errorf("resolve sync points for branch %q: %w", branch, err) } - allBranchesSyncPoints[branch] = syncPoints + branchesSyncPoints[branch] = syncPoints } - // first, default branch - baseBranch := s.repo.BaseBranch() - if err := s.syncBranch(ctx, baseBranch, allBranchesSyncPoints[baseBranch], syncFunc); err != nil { - return fmt.Errorf("sync base branch %q: %w", baseBranch, err) + // first, default branch, if present + defaultBranch := s.repo.DefaultBranch() + if _, shouldSyncDefaultBranch := s.branchesToSync[defaultBranch]; shouldSyncDefaultBranch { + if err := s.syncBranch(ctx, defaultBranch, branchesSyncPoints[defaultBranch], syncFunc); err != nil { + return fmt.Errorf("sync default branch %q: %w", defaultBranch, err) + } } // then the rest of the branches, in a deterministic order - remoteBranches := stringutil.MapToSortedSlice(s.remoteBranches) - for _, branch := range remoteBranches { - if branch == baseBranch { + sortedBranchesToSync := stringutil.MapToSortedSlice(s.branchesToSync) + for _, branch := range sortedBranchesToSync { + if branch == defaultBranch { continue // default branch already synced } - if err := s.syncBranch(ctx, branch, allBranchesSyncPoints[branch], syncFunc); err != nil { + if err := s.syncBranch(ctx, branch, branchesSyncPoints[branch], syncFunc); err != nil { return fmt.Errorf("sync branch %q: %w", branch, err) } } @@ -151,7 +154,7 @@ func (s *syncer) Sync(ctx context.Context, syncFunc SyncFunc) error { // validateDefaultBranches checks that all modules to sync, are being synced to BSR repositories // that have the same default git branch as this repo. func (s *syncer) validateDefaultBranches(ctx context.Context) error { - expectedDefaultGitBranch := s.repo.BaseBranch() + expectedDefaultGitBranch := s.repo.DefaultBranch() if s.moduleDefaultBranchGetter == nil { s.logger.Warn( "default branch validation skipped for all modules", @@ -268,7 +271,7 @@ func (s *syncer) commitsToSync( continue } if commitHash != expectedSyncPoint.Hex() { - if s.repo.BaseBranch() == branch { + if s.repo.DefaultBranch() == branch { // TODO: add details to error message saying: "run again with --force-branch-sync " when we support a flag like that. return fmt.Errorf( @@ -335,9 +338,7 @@ func (s *syncer) isGitCommitSynced(ctx context.Context, module Module, commitHas return synced, nil } -// scanRepo gathers repo information and stores it in the syncer: gathers all tags, all -// remote/origin branches, and visits each commit on them, starting from the base branch, to assign -// it to a unique branch and sort them by aithor timestamp. +// scanRepo gathers repo information and stores it in the syncer, like tags and branches to sync. func (s *syncer) scanRepo() error { s.tagsByCommitHash = make(map[string][]string) if err := s.repo.ForEachTag(func(tag string, commitHash git.Hash) error { @@ -346,16 +347,28 @@ func (s *syncer) scanRepo() error { }); err != nil { return fmt.Errorf("load tags: %w", err) } - s.remoteBranches = make(map[string]struct{}) + remoteBranches := make(map[string]struct{}) if err := s.repo.ForEachBranch(func(branch string, _ git.Hash) error { - s.remoteBranches[branch] = struct{}{} + remoteBranches[branch] = struct{}{} return nil }); err != nil { - return fmt.Errorf("looping over repo branches: %w", err) - } - baseBranch := s.repo.BaseBranch() - if _, baseBranchPushedInRemote := s.remoteBranches[baseBranch]; !baseBranchPushedInRemote { - return fmt.Errorf(`repo base branch %q is not present in "origin" remote`, baseBranch) + return fmt.Errorf("looping over repo remote branches: %w", err) + } + if s.allBranches { + s.branchesToSync = remoteBranches + // make sure the default branch is present in the branches to sync + defaultBranch := s.repo.DefaultBranch() + if _, isDefaultBranchPushedInRemote := remoteBranches[defaultBranch]; !isDefaultBranchPushedInRemote { + return fmt.Errorf(`default branch %q is not present in "origin" remote`, defaultBranch) + } + } else { + // only sync current branch, make sure it's present in remote + currentBranch := s.repo.CurrentBranch() + if _, isCurrentBranchPushedInRemote := remoteBranches[currentBranch]; !isCurrentBranchPushedInRemote { + return fmt.Errorf(`current branch %q is not present in "origin" remote`, currentBranch) + } + s.branchesToSync = map[string]struct{}{currentBranch: {}} + s.logger.Debug("current branch", zap.String("name", currentBranch)) } return nil } diff --git a/private/buf/cmd/buf/command/alpha/repo/reposync/reposync.go b/private/buf/cmd/buf/command/alpha/repo/reposync/reposync.go index 24aec224b2..ffa5d47057 100644 --- a/private/buf/cmd/buf/command/alpha/repo/reposync/reposync.go +++ b/private/buf/cmd/buf/command/alpha/repo/reposync/reposync.go @@ -49,6 +49,7 @@ const ( moduleFlagName = "module" createFlagName = "create" createVisibilityFlagName = "create-visibility" + allBranchesFlagName = "all-branches" ) // NewCommand returns a new Command. @@ -61,8 +62,9 @@ func NewCommand( Use: name, Short: "Sync a Git repository to a registry", Long: "Sync a Git repository's commits to a registry in topological order. " + - "Only commits belonging to the 'origin' remote are processed, which means that " + - "commits must be pushed to a remote. " + + "Only commits in the current branch that are pushed to the 'origin' remote are processed. " + + "Syncing all branches is possible using '--all-branches' flag." + + // TODO rephrase in favor of a default module behavior. "Only modules specified via '--module' are synced.", Args: cobra.NoArgs, Run: builder.NewRunFunc( @@ -80,6 +82,7 @@ type flags struct { Modules []string Create bool CreateVisibility string + AllBranches bool } func newFlags() *flags { @@ -96,7 +99,7 @@ func (f *flags) Bind(flagSet *pflag.FlagSet) { stringutil.SliceToString(bufanalysis.AllFormatStrings), ), ) - // TODO: before we take this to beta, and as part of mult-module support, re-evaluate the UX of this flag + // TODO: rework in favor of a default module behavior. flagSet.StringSliceVar( &f.Modules, moduleFlagName, @@ -113,6 +116,16 @@ func (f *flags) Bind(flagSet *pflag.FlagSet) { false, fmt.Sprintf("Create the repository if it does not exist. Must set a visibility using --%s", createVisibilityFlagName), ) + flagSet.BoolVar( + &f.AllBranches, + allBranchesFlagName, + false, + "Sync all git repository branches and not only the checked out one. "+ + "Only commits pushed to the 'origin' remote are processed. "+ + "Order of sync for git branches is as follows: First, it syncs the default branch read "+ + "from 'refs/remotes/origin/HEAD', and then all the rest of the branches present in "+ + "'refs/remotes/origin/*' in a lexicographical order.", + ) } func run( @@ -141,6 +154,7 @@ func run( flags.Modules, // No need to pass `flags.Create`, this is not empty iff `flags.Create` flags.CreateVisibility, + flags.AllBranches, ) } @@ -149,6 +163,7 @@ func sync( container appflag.Container, modules []string, createWithVisibility string, + allBranches bool, ) error { if len(modules) == 0 { container.Logger().Info("no modules to sync") @@ -156,7 +171,7 @@ func sync( } // Assume that this command is run from the repository root. If not, `OpenRepository` will return // a dir not found error. - repo, err := git.OpenRepository(git.DotGitDir, command.NewRunner()) + repo, err := git.OpenRepository(ctx, git.DotGitDir, command.NewRunner()) if err != nil { return fmt.Errorf("open repository: %w", err) } @@ -174,6 +189,9 @@ func sync( bufsync.SyncerWithGitCommitChecker(syncGitCommitChecker(clientConfig)), bufsync.SyncerWithModuleDefaultBranchGetter(defaultBranchGetter(clientConfig)), } + if allBranches { + syncerOptions = append(syncerOptions, bufsync.SyncerWithAllBranches()) + } for _, module := range modules { var moduleIdentityOverride bufmoduleref.ModuleIdentity colon := strings.IndexRune(module, ':') diff --git a/private/pkg/git/git.go b/private/pkg/git/git.go index 9b2912c757..675475726c 100644 --- a/private/pkg/git/git.go +++ b/private/pkg/git/git.go @@ -266,11 +266,13 @@ type TreeNode interface { // Repository is a git repository that is backed by a `.git` directory. type Repository interface { - // BaseBranch is the base branch of the repository. This is either configured - // via the `OpenRepositoryWithBaseBranch` option, or discovered via the remote - // named `origin`. Therefore, discovery requires that the repository is pushed - // to the remote. - BaseBranch() string + // DefaultBranch is the default branch of the repository. This is either configured via the + // `OpenRepositoryWithDefaultBranch` option, or discovered from the value in + // `.git/refs/remotes/origin/HEAD`. Therefore, discovery requires that the repository is pushed to + // a remote named `origin`. + DefaultBranch() string + // CurrentBranch is the current checked out branch. + CurrentBranch() string // ForEachBranch ranges over branches in the repository in an undefined order. // // Only branches pushed to a remote named "origin" are visited. @@ -288,6 +290,8 @@ type Repository interface { // ForEachTag ranges over tags in the repository in an undefined order. // // All tags are ranged, including local (unpushed) tags. + // + // TODO: only loop over remote tags, or inform the callback if the tag is local/remote. ForEachTag(func(tag string, commitHash Hash) error) error // Objects exposes the underlying object reader to read objects directly from the // `.git` directory. @@ -296,29 +300,29 @@ type Repository interface { Close() error } -// OpenRepository opens a new Repository from a `.git` directory. The provided path to the -// `.git` dir need not be normalized or cleaned. +// OpenRepository opens a new Repository from a `.git` directory. The provided path to the `.git` +// dir need not be normalized or cleaned. // -// Internally, OpenRepository will spawns a new process to communicate with `git-cat-file`, -// so the caller must close the repository to clean up resources. +// Internally, OpenRepository will spawns a new process to communicate with `git-cat-file`, so the +// caller must close the repository to clean up resources. // -// By default, OpenRepository will attempt to detect the base branch if the repository -// has been pushed. This may fail if the repository is not pushed. In this case, use the -// `OpenRepositoryWithBaseBranch` option. -func OpenRepository(gitDirPath string, runner command.Runner, options ...OpenRepositoryOption) (Repository, error) { - return openGitRepository(gitDirPath, runner, options...) +// By default, OpenRepository will attempt to detect the default branch if the repository has been +// pushed. This may fail if the repository is not pushed. In this case, use the +// `OpenRepositoryWithDefaultBranch` option. +func OpenRepository(ctx context.Context, gitDirPath string, runner command.Runner, options ...OpenRepositoryOption) (Repository, error) { + return openGitRepository(ctx, gitDirPath, runner, options...) } // OpenRepositoryOption configures the opening of a repository. type OpenRepositoryOption func(*openRepositoryOpts) error -// CommitIteratorWithBaseBranch configures the base branch for this iterator. -func OpenRepositoryWithBaseBranch(name string) OpenRepositoryOption { +// OpenRepositoryWithDefaultBranch configures the default branch for this repository. +func OpenRepositoryWithDefaultBranch(name string) OpenRepositoryOption { return func(r *openRepositoryOpts) error { if name == "" { - return errors.New("base branch cannot be empty") + return errors.New("default branch cannot be empty") } - r.baseBranch = name + r.defaultBranch = name return nil } } diff --git a/private/pkg/git/gittest/gittest.go b/private/pkg/git/gittest/gittest.go index 6a89ea5bb7..7f178db31f 100644 --- a/private/pkg/git/gittest/gittest.go +++ b/private/pkg/git/gittest/gittest.go @@ -36,9 +36,10 @@ func ScaffoldGitRepository(t *testing.T) git.Repository { dir := scaffoldGitRepository(t, runner) dotGitPath := path.Join(dir, git.DotGitDir) repo, err := git.OpenRepository( + context.Background(), dotGitPath, runner, - git.OpenRepositoryWithBaseBranch(DefaultBranch), + git.OpenRepositoryWithDefaultBranch(DefaultBranch), ) require.NoError(t, err) t.Cleanup(func() { diff --git a/private/pkg/git/repository.go b/private/pkg/git/repository.go index b1b00248cd..e4cfbc6ad1 100644 --- a/private/pkg/git/repository.go +++ b/private/pkg/git/repository.go @@ -16,8 +16,10 @@ package git import ( "bytes" + "context" "errors" "fmt" + "io" "io/fs" "os" "path" @@ -31,16 +33,17 @@ import ( const defaultRemoteName = "origin" -var baseBranchRefPrefix = []byte("ref: refs/remotes/" + defaultRemoteName + "/") +var defaultBranchRefPrefix = []byte("ref: refs/remotes/" + defaultRemoteName + "/") type openRepositoryOpts struct { - baseBranch string + defaultBranch string } type repository struct { - gitDirPath string - baseBranch string - objectReader *objectReader + gitDirPath string + defaultBranch string + checkedOutBranch string + objectReader *objectReader // packedOnce controls the fields below related to reading the `packed-refs` file packedOnce sync.Once @@ -50,6 +53,7 @@ type repository struct { } func openGitRepository( + ctx context.Context, gitDirPath string, runner command.Runner, options ...OpenRepositoryOption, @@ -72,16 +76,21 @@ func openGitRepository( if err != nil { return nil, err } - if opts.baseBranch == "" { - opts.baseBranch, err = detectBaseBranch(gitDirPath) + if opts.defaultBranch == "" { + opts.defaultBranch, err = detectDefaultBranch(gitDirPath) if err != nil { - return nil, fmt.Errorf("automatically determine base branch: %w", err) + return nil, fmt.Errorf("automatically determine default branch: %w", err) } } + checkedOutBranch, err := detectCheckedOutBranch(ctx, gitDirPath, runner) + if err != nil { + return nil, fmt.Errorf("automatically determine checked out branch: %w", err) + } return &repository{ - gitDirPath: gitDirPath, - baseBranch: opts.baseBranch, - objectReader: reader, + gitDirPath: gitDirPath, + defaultBranch: opts.defaultBranch, + checkedOutBranch: checkedOutBranch, + objectReader: reader, }, nil } @@ -136,8 +145,13 @@ func (r *repository) ForEachBranch(f func(string, Hash) error) error { } return nil } -func (r *repository) BaseBranch() string { - return r.baseBranch + +func (r *repository) DefaultBranch() string { + return r.defaultBranch +} + +func (r *repository) CurrentBranch() string { + return r.checkedOutBranch } func (r *repository) ForEachCommit(branch string, f func(Commit) error) error { @@ -166,11 +180,6 @@ func (r *repository) ForEachCommit(branch string, f func(Commit) error) error { } } -func (r *repository) HEADCommit(branch string) (Commit, error) { - branch = normalpath.Unnormalize(branch) - return r.resolveBranch(branch) -} - func (r *repository) ForEachTag(f func(string, Hash) error) error { seen := map[string]struct{}{} // Read unpacked tag refs. @@ -237,8 +246,8 @@ func (r *repository) ForEachTag(f func(string, Hash) error) error { return nil } -// resolveBranch resolves a commit from branch name if its present in the "origin" remote. -func (r *repository) resolveBranch(branch string) (Commit, error) { +// HEADCommit resolves the HEAD commit from branch name if its present in the "origin" remote. +func (r *repository) HEADCommit(branch string) (Commit, error) { commitBytes, err := os.ReadFile(path.Join(r.gitDirPath, "refs", "remotes", defaultRemoteName, branch)) if errors.Is(err, fs.ErrNotExist) { // it may be that the branch ref is packed; let's read the packed refs @@ -291,20 +300,57 @@ func (r *repository) readPackedRefs() error { return r.packedReadError } -func detectBaseBranch(gitDirPath string) (string, error) { +func detectDefaultBranch(gitDirPath string) (string, error) { path := path.Join(gitDirPath, "refs", "remotes", defaultRemoteName, "HEAD") data, err := os.ReadFile(path) if err != nil { return "", err } - if !bytes.HasPrefix(data, baseBranchRefPrefix) { + if !bytes.HasPrefix(data, defaultBranchRefPrefix) { return "", errors.New("invalid contents in " + path) } - data = bytes.TrimPrefix(data, baseBranchRefPrefix) + data = bytes.TrimPrefix(data, defaultBranchRefPrefix) data = bytes.TrimSuffix(data, []byte("\n")) return string(data), nil } +func detectCheckedOutBranch(ctx context.Context, gitDirPath string, runner command.Runner) (string, error) { + var ( + stdOutBuffer = bytes.NewBuffer(nil) + stdErrBuffer = bytes.NewBuffer(nil) + ) + if err := runner.Run( + ctx, + "git", + command.RunWithArgs( + "rev-parse", + "--abbrev-ref", + "HEAD", + ), + command.RunWithStdout(stdOutBuffer), + command.RunWithStderr(stdErrBuffer), + command.RunWithDir(gitDirPath), // exec command at the root of the git repo + ); err != nil { + stdErrMsg, err := io.ReadAll(stdErrBuffer) + if err != nil { + stdErrMsg = []byte(fmt.Sprintf("read stderr: %s", err.Error())) + } + return "", fmt.Errorf("git rev-parse: %w (%s)", err, string(stdErrMsg)) + } + stdOut, err := io.ReadAll(stdOutBuffer) + if err != nil { + return "", fmt.Errorf("read current branch: %w", err) + } + currentBranch := string(bytes.TrimSuffix(stdOut, []byte("\n"))) + if currentBranch == "" { + return "", errors.New("empty current branch") + } + if currentBranch == "HEAD" { + return "", errors.New("no current branch, git HEAD is detached") + } + return currentBranch, nil +} + // validateDirPathExists returns a non-nil error if the given dirPath // is not a valid directory path. func validateDirPathExists(dirPath string) error { diff --git a/private/pkg/git/repository_test.go b/private/pkg/git/repository_test.go index 4de40e5b5d..02df5148f7 100644 --- a/private/pkg/git/repository_test.go +++ b/private/pkg/git/repository_test.go @@ -85,10 +85,16 @@ func TestBranches(t *testing.T) { t.Parallel() repo := gittest.ScaffoldGitRepository(t) + assert.Equal(t, gittest.DefaultBranch, repo.CurrentBranch()) + var branches []string err := repo.ForEachBranch(func(branch string, headHash git.Hash) error { branches = append(branches, branch) + headCommit, err := repo.HEADCommit(branch) + require.NoError(t, err) + assert.Equal(t, headHash, headCommit.Hash()) + commit, err := repo.Objects().Commit(headHash) require.NoError(t, err) switch branch { diff --git a/private/pkg/storage/storagegit/storagegit_test.go b/private/pkg/storage/storagegit/storagegit_test.go index 5d7069fb44..e1328c108b 100644 --- a/private/pkg/storage/storagegit/storagegit_test.go +++ b/private/pkg/storage/storagegit/storagegit_test.go @@ -27,7 +27,7 @@ func TestNewBucketAtTreeHash(t *testing.T) { repo := gittest.ScaffoldGitRepository(t) provider := NewProvider(repo.Objects()) - headCommit, err := repo.HEADCommit(repo.BaseBranch()) + headCommit, err := repo.HEADCommit(repo.DefaultBranch()) require.NoError(t, err) require.NotNil(t, headCommit) bucket, err := provider.NewReadBucket(headCommit.Tree())