Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sync single branch by default #2329

Merged
merged 23 commits into from
Aug 1, 2023
Merged
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
8d7bb89
Repo's default branch RPCs
unmultimedio Jul 27, 2023
3a28aad
Fix repo service mock
unmultimedio Jul 27, 2023
60f2551
Add audit event
unmultimedio Jul 27, 2023
90b29bc
Remove new RPCs in favor of embedding into repo
unmultimedio Jul 28, 2023
e00c1ea
Merge remote-tracking branch 'origin/main' into jfigueroa/default-bra…
unmultimedio Jul 28, 2023
95bcfba
Remove RPCs from mock
unmultimedio Jul 28, 2023
f47d647
Remove "git" prefix to default branch in repository
unmultimedio Jul 28, 2023
e6a71bb
Add default branch validation step
unmultimedio Jul 28, 2023
a048356
Improve default branch validation
unmultimedio Jul 28, 2023
766c06d
Fix linter
unmultimedio Jul 28, 2023
32adbca
Single branch default behavior
unmultimedio Jul 31, 2023
37b9139
s/baseBranch/defaultBranch/
unmultimedio Jul 31, 2023
3bc0737
Use the current branch in syncer
unmultimedio Jul 31, 2023
ab7017d
Get current branch
unmultimedio Jul 31, 2023
6d03ca8
Test current branch
unmultimedio Jul 31, 2023
0760c7f
Pass ctx to open repository
unmultimedio Jul 31, 2023
fbb1d3d
Error on detached head
unmultimedio Jul 31, 2023
648d6c8
Fix check remote presence
unmultimedio Jul 31, 2023
5c3bfdd
Remove unnecessary func
unmultimedio Jul 31, 2023
ec3b151
Merge branch 'main' into jfigueroa/make-sure-default-branches-are-in-…
unmultimedio Jul 31, 2023
d2e2796
Merge commit 'ec3b15163c9f707776f97c863d247dbe2fa63072' into jfiguero…
unmultimedio Jul 31, 2023
28cef8c
Merge remote-tracking branch 'origin/main' into jfigueroa/sync-single…
unmultimedio Aug 1, 2023
c05c734
Fix cmd description
unmultimedio Aug 1, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions private/buf/bufsync/bufsync.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package bufsync

import (
"context"
"errors"
"fmt"

"github.com/bufbuild/buf/private/bufpkg/bufmodule/bufmoduleref"
Expand All @@ -25,6 +26,9 @@ import (
"go.uber.org/zap"
)

// ErrModuleDoesNotExist is an error returned when looking for a remote module.
var ErrModuleDoesNotExist = errors.New("BSR module does not exist")

// ErrorHandler handles errors reported by the Syncer. If a non-nil
// error is returned by the handler, sync will abort in a partially-synced
// state.
Expand Down Expand Up @@ -144,6 +148,25 @@ func SyncerWithGitCommitChecker(checker SyncedGitCommitChecker) SyncerOption {
}
}

// SyncerWithModuleDefaultBranchGetter configures a getter for modules' default branch, to contrast
// a BSR repository default branch vs the local git repository branch. If left empty, the syncer
// skips this validation step.
func SyncerWithModuleDefaultBranchGetter(getter ModuleDefaultBranchGetter) SyncerOption {
return func(s *syncer) error {
s.moduleDefaultBranchGetter = getter
return nil
}
}

// SyncerWithAllBranches sets the syncer to sync all branches. Be default the syncer only processes
// commits in the current checked out branch.
func SyncerWithAllBranches() SyncerOption {
return func(s *syncer) error {
s.allBranches = true
return nil
}
}

// SyncFunc is invoked by Syncer to process a sync point. If an error is returned,
// sync will abort.
type SyncFunc func(ctx context.Context, commit ModuleCommit) error
Expand All @@ -166,6 +189,14 @@ type SyncedGitCommitChecker func(
commitHashes map[string]struct{},
) (map[string]struct{}, error)

// ModuleDefaultBranchGetter is invoked before syncing, to make sure all modules that are about to
// be synced have a BSR default branch that matches the local git repo. If the BSR remote module
// does not exist, the implementation should return `ModuleDoesNotExistErr` error.
type ModuleDefaultBranchGetter func(
ctx context.Context,
module bufmoduleref.ModuleIdentity,
) (string, error)

// ModuleCommit is a module at a particular commit.
type ModuleCommit interface {
// Identity is the identity of the module, accounting for any configured override.
Expand Down
1 change: 1 addition & 0 deletions private/buf/bufsync/new_repo_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ func scaffoldGitRepository(t *testing.T) git.Repository {
dir := scaffoldGitRepositoryDir(t, runner)
dotGitPath := path.Join(dir, git.DotGitDir)
repo, err := git.OpenRepository(
context.Background(),
dotGitPath,
runner,
)
Expand Down
116 changes: 87 additions & 29 deletions private/buf/bufsync/syncer.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,24 @@ import (
"github.com/bufbuild/buf/private/pkg/storage"
"github.com/bufbuild/buf/private/pkg/storage/storagegit"
"github.com/bufbuild/buf/private/pkg/stringutil"
"go.uber.org/multierr"
"go.uber.org/zap"
)

type syncer struct {
logger *zap.Logger
repo git.Repository
storageGitProvider storagegit.Provider
errorHandler ErrorHandler
modulesToSync []Module
syncPointResolver SyncPointResolver
syncedGitCommitChecker SyncedGitCommitChecker
logger *zap.Logger
repo git.Repository
storageGitProvider storagegit.Provider
errorHandler ErrorHandler
modulesToSync []Module
syncPointResolver SyncPointResolver
syncedGitCommitChecker SyncedGitCommitChecker
moduleDefaultBranchGetter ModuleDefaultBranchGetter
allBranches bool

// scanned information from the repo on sync start
tagsByCommitHash map[string][]string
remoteBranches map[string]struct{}
branchesToSync map[string]struct{}
}

func newSyncer(
Expand Down Expand Up @@ -117,32 +120,77 @@ func (s *syncer) Sync(ctx context.Context, syncFunc SyncFunc) error {
if err := s.scanRepo(); err != nil {
return fmt.Errorf("scan repo: %w", err)
}
allBranchesSyncPoints := make(map[string]map[Module]git.Hash)
for branch := range s.remoteBranches {
if err := s.validateDefaultBranches(ctx); err != nil {
return err
}
branchesSyncPoints := make(map[string]map[Module]git.Hash)
for branch := range s.branchesToSync {
syncPoints, err := s.resolveSyncPoints(ctx, branch)
if err != nil {
return fmt.Errorf("resolve sync points for branch %q: %w", branch, err)
}
allBranchesSyncPoints[branch] = syncPoints
branchesSyncPoints[branch] = syncPoints
}
// first, default branch
baseBranch := s.repo.BaseBranch()
if err := s.syncBranch(ctx, baseBranch, allBranchesSyncPoints[baseBranch], syncFunc); err != nil {
return fmt.Errorf("sync base branch %q: %w", baseBranch, err)
// first, default branch, if present
defaultBranch := s.repo.DefaultBranch()
if _, shouldSyncDefaultBranch := s.branchesToSync[defaultBranch]; shouldSyncDefaultBranch {
if err := s.syncBranch(ctx, defaultBranch, branchesSyncPoints[defaultBranch], syncFunc); err != nil {
return fmt.Errorf("sync default branch %q: %w", defaultBranch, err)
}
}
// then the rest of the branches, in a deterministic order
remoteBranches := stringutil.MapToSortedSlice(s.remoteBranches)
for _, branch := range remoteBranches {
if branch == baseBranch {
sortedBranchesToSync := stringutil.MapToSortedSlice(s.branchesToSync)
for _, branch := range sortedBranchesToSync {
if branch == defaultBranch {
continue // default branch already synced
}
if err := s.syncBranch(ctx, branch, allBranchesSyncPoints[branch], syncFunc); err != nil {
if err := s.syncBranch(ctx, branch, branchesSyncPoints[branch], syncFunc); err != nil {
return fmt.Errorf("sync branch %q: %w", branch, err)
}
}
return nil
}

// validateDefaultBranches checks that all modules to sync, are being synced to BSR repositories
// that have the same default git branch as this repo.
func (s *syncer) validateDefaultBranches(ctx context.Context) error {
expectedDefaultGitBranch := s.repo.DefaultBranch()
if s.moduleDefaultBranchGetter == nil {
s.logger.Warn(
"default branch validation skipped for all modules",
zap.String("expected_default_branch", expectedDefaultGitBranch),
)
return nil
}
var validationErr error
for _, module := range s.modulesToSync {
bsrDefaultBranch, err := s.moduleDefaultBranchGetter(ctx, module.RemoteIdentity())
if err != nil {
if errors.Is(err, ErrModuleDoesNotExist) {
s.logger.Warn(
"default branch validation skipped",
zap.String("expected_default_branch", expectedDefaultGitBranch),
zap.String("module", module.RemoteIdentity().IdentityString()),
zap.Error(err),
)
continue
}
validationErr = multierr.Append(validationErr, fmt.Errorf("getting bsr module %q default branch: %w", module.RemoteIdentity().IdentityString(), err))
continue
}
if bsrDefaultBranch != expectedDefaultGitBranch {
validationErr = multierr.Append(
validationErr,
fmt.Errorf(
"remote module %q with default branch %q does not match the git repository's default branch %q, aborting sync",
module.RemoteIdentity().IdentityString(), bsrDefaultBranch, expectedDefaultGitBranch,
),
)
}
}
return validationErr
}

// syncBranch syncs all modules in a branch.
func (s *syncer) syncBranch(
ctx context.Context,
Expand Down Expand Up @@ -223,7 +271,7 @@ func (s *syncer) commitsToSync(
continue
}
if commitHash != expectedSyncPoint.Hex() {
if s.repo.BaseBranch() == branch {
if s.repo.DefaultBranch() == branch {
// TODO: add details to error message saying: "run again with --force-branch-sync <branch
// name>" when we support a flag like that.
return fmt.Errorf(
Expand Down Expand Up @@ -290,9 +338,7 @@ func (s *syncer) isGitCommitSynced(ctx context.Context, module Module, commitHas
return synced, nil
}

// scanRepo gathers repo information and stores it in the syncer: gathers all tags, all
// remote/origin branches, and visits each commit on them, starting from the base branch, to assign
// it to a unique branch and sort them by aithor timestamp.
// scanRepo gathers repo information and stores it in the syncer, like tags and branches to sync.
func (s *syncer) scanRepo() error {
s.tagsByCommitHash = make(map[string][]string)
if err := s.repo.ForEachTag(func(tag string, commitHash git.Hash) error {
Expand All @@ -301,16 +347,28 @@ func (s *syncer) scanRepo() error {
}); err != nil {
return fmt.Errorf("load tags: %w", err)
}
s.remoteBranches = make(map[string]struct{})
remoteBranches := make(map[string]struct{})
if err := s.repo.ForEachBranch(func(branch string, _ git.Hash) error {
s.remoteBranches[branch] = struct{}{}
remoteBranches[branch] = struct{}{}
return nil
}); err != nil {
return fmt.Errorf("looping over repo branches: %w", err)
return fmt.Errorf("looping over repo remote branches: %w", err)
}
baseBranch := s.repo.BaseBranch()
if _, baseBranchPushedInRemote := s.remoteBranches[baseBranch]; !baseBranchPushedInRemote {
return fmt.Errorf(`repo base branch %q is not present in "origin" remote`, baseBranch)
if s.allBranches {
s.branchesToSync = remoteBranches
// make sure the default branch is present in the branches to sync
defaultBranch := s.repo.DefaultBranch()
if _, isDefaultBranchPushedInRemote := remoteBranches[defaultBranch]; !isDefaultBranchPushedInRemote {
return fmt.Errorf(`default branch %q is not present in "origin" remote`, defaultBranch)
}
} else {
// only sync current branch, make sure it's present in remote
currentBranch := s.repo.CurrentBranch()
if _, isCurrentBranchPushedInRemote := remoteBranches[currentBranch]; !isCurrentBranchPushedInRemote {
return fmt.Errorf(`current branch %q is not present in "origin" remote`, currentBranch)
}
s.branchesToSync = map[string]struct{}{currentBranch: {}}
s.logger.Debug("current branch", zap.String("name", currentBranch))
}
return nil
}
Expand Down
44 changes: 40 additions & 4 deletions private/buf/cmd/buf/command/alpha/repo/reposync/reposync.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ const (
moduleFlagName = "module"
createFlagName = "create"
createVisibilityFlagName = "create-visibility"
allBranchesFlagName = "all-branches"
)

// NewCommand returns a new Command.
Expand All @@ -61,8 +62,9 @@ func NewCommand(
Use: name,
Short: "Sync a Git repository to a registry",
Long: "Sync a Git repository's commits to a registry in topological order. " +
"Only commits belonging to the 'origin' remote are processed, which means that " +
"commits must be pushed to a remote. " +
"Only commits in the current branch that are pushed to the 'origin' remote are processed. " +
"Syncing all branches is possible using '--all-modules' flag." +
// TODO rephrase in favor of a default module behavior.
unmultimedio marked this conversation as resolved.
Show resolved Hide resolved
"Only modules specified via '--module' are synced.",
Args: cobra.NoArgs,
Run: builder.NewRunFunc(
Expand All @@ -80,6 +82,7 @@ type flags struct {
Modules []string
Create bool
CreateVisibility string
AllBranches bool
}

func newFlags() *flags {
Expand All @@ -96,7 +99,7 @@ func (f *flags) Bind(flagSet *pflag.FlagSet) {
stringutil.SliceToString(bufanalysis.AllFormatStrings),
),
)
// TODO: before we take this to beta, and as part of mult-module support, re-evaluate the UX of this flag
// TODO: rework in favor of a default module behavior.
flagSet.StringSliceVar(
&f.Modules,
moduleFlagName,
Expand All @@ -113,6 +116,16 @@ func (f *flags) Bind(flagSet *pflag.FlagSet) {
false,
fmt.Sprintf("Create the repository if it does not exist. Must set a visibility using --%s", createVisibilityFlagName),
)
flagSet.BoolVar(
&f.AllBranches,
allBranchesFlagName,
false,
"Sync all git repository branches and not only the checked out one. "+
"Only commits pushed to the 'origin' remote are processed. "+
"Order of sync for git branches is as follows: First, it syncs the default branch read "+
"from 'refs/remotes/origin/HEAD', and then all the rest of the branches present in "+
"'refs/remotes/origin/*' in a lexicographical order.",
)
}

func run(
Expand Down Expand Up @@ -141,6 +154,7 @@ func run(
flags.Modules,
// No need to pass `flags.Create`, this is not empty iff `flags.Create`
flags.CreateVisibility,
flags.AllBranches,
)
}

Expand All @@ -149,14 +163,15 @@ func sync(
container appflag.Container,
modules []string,
createWithVisibility string,
allBranches bool,
) error {
if len(modules) == 0 {
container.Logger().Info("no modules to sync")
return nil
}
// Assume that this command is run from the repository root. If not, `OpenRepository` will return
// a dir not found error.
repo, err := git.OpenRepository(git.DotGitDir, command.NewRunner())
repo, err := git.OpenRepository(ctx, git.DotGitDir, command.NewRunner())
if err != nil {
return fmt.Errorf("open repository: %w", err)
}
Expand All @@ -172,6 +187,10 @@ func sync(
syncerOptions := []bufsync.SyncerOption{
bufsync.SyncerWithResumption(syncPointResolver(clientConfig)),
bufsync.SyncerWithGitCommitChecker(syncGitCommitChecker(clientConfig)),
bufsync.SyncerWithModuleDefaultBranchGetter(defaultBranchGetter(clientConfig)),
}
if allBranches {
syncerOptions = append(syncerOptions, bufsync.SyncerWithAllBranches())
}
for _, module := range modules {
var moduleIdentityOverride bufmoduleref.ModuleIdentity
Expand Down Expand Up @@ -284,6 +303,23 @@ func syncGitCommitChecker(clientConfig *connectclient.Config) bufsync.SyncedGitC
}
}

func defaultBranchGetter(clientConfig *connectclient.Config) bufsync.ModuleDefaultBranchGetter {
return func(ctx context.Context, module bufmoduleref.ModuleIdentity) (string, error) {
service := connectclient.Make(clientConfig, module.Remote(), registryv1alpha1connect.NewRepositoryServiceClient)
res, err := service.GetRepositoryByFullName(ctx, connect.NewRequest(&registryv1alpha1.GetRepositoryByFullNameRequest{
FullName: module.Owner() + "/" + module.Repository(),
}))
if err != nil {
if connect.CodeOf(err) == connect.CodeNotFound {
// Repo is not created
return "", bufsync.ErrModuleDoesNotExist
}
return "", fmt.Errorf("get repository by full name %q: %w", module.IdentityString(), err)
}
return res.Msg.Repository.DefaultBranch, nil
}
}

type syncErrorHandler struct {
logger *zap.Logger
}
Expand Down
Loading
Loading