Skip to content

Commit

Permalink
PR comments
Browse files Browse the repository at this point in the history
- prefix flag instead of bool
- padded ids
- include tenant in id
- add tenant tests
  • Loading branch information
stefanhengl committed Nov 15, 2024
1 parent 5485cb1 commit b93cb3c
Show file tree
Hide file tree
Showing 15 changed files with 216 additions and 99 deletions.
2 changes: 1 addition & 1 deletion api.go
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ func (r *Repository) UnmarshalJSON(data []byte) error {
r.ID = uint32(id)
}

if v, ok := repo.RawConfig["tenantid"]; ok {
if v, ok := repo.RawConfig["tenantID"]; ok {
id, _ := strconv.ParseInt(v, 10, 64)
r.TenantID = int(id)
}
Expand Down
13 changes: 9 additions & 4 deletions build/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,8 @@ type Options struct {
// Note: heap checking is "best effort", and it's possible for the process to OOM without triggering the heap profile.
HeapProfileTriggerBytes uint64

// UseSourcegraphIDForName is true if we want to use the Sourcegraph ID as prefix for the shard name.
UseSourcegraphIDForName bool
// ShardPrefix is the prefix of the shard. If empty, the repository name is used.
ShardPrefix string
}

// HashOptions contains only the options in Options that upon modification leads to IndexState of IndexStateMismatch during the next index building.
Expand Down Expand Up @@ -184,6 +184,7 @@ func (o *Options) Flags(fs *flag.FlagSet) {
fs.StringVar(&o.IndexDir, "index", x.IndexDir, "directory for search indices")
fs.BoolVar(&o.CTagsMustSucceed, "require_ctags", x.CTagsMustSucceed, "If set, ctags calls must succeed.")
fs.Var(largeFilesFlag{o}, "large_file", "A glob pattern where matching files are to be index regardless of their size. You can add multiple patterns by setting this more than once.")
fs.StringVar(&o.ShardPrefix, "shard_prefix", x.ShardPrefix, "the prefix of the shard. If empty, the repository name is used.")

// Sourcegraph specific
fs.BoolVar(&o.DisableCTags, "disable_ctags", x.DisableCTags, "If set, ctags will not be called.")
Expand Down Expand Up @@ -231,6 +232,10 @@ func (o *Options) Args() []string {
args = append(args, "-shard_merging")
}

if o.ShardPrefix != "" {
args = append(args, "-shard_prefix", o.ShardPrefix)
}

return args
}

Expand Down Expand Up @@ -341,8 +346,8 @@ func (o *Options) shardName(n int) string {

func (o *Options) shardNameVersion(version, n int) string {
var prefix string
if o.UseSourcegraphIDForName {
prefix = fmt.Sprintf("%d", o.RepositoryDescription.ID)
if o.ShardPrefix != "" {
prefix = o.ShardPrefix
} else {
prefix = url.QueryEscape(o.RepositoryDescription.Name)
}
Expand Down
87 changes: 87 additions & 0 deletions build/e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,19 @@ import (
"reflect"
"runtime"
"sort"
"strconv"
"strings"
"testing"
"time"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/grafana/regexp"
"github.com/stretchr/testify/require"

"github.com/sourcegraph/zoekt"
"github.com/sourcegraph/zoekt/internal/tenant"
"github.com/sourcegraph/zoekt/internal/tenant/tenanttest"
"github.com/sourcegraph/zoekt/query"
"github.com/sourcegraph/zoekt/shards"
)
Expand Down Expand Up @@ -153,6 +158,88 @@ func TestBasic(t *testing.T) {
})
}

func TestBasicTenant(t *testing.T) {
tenanttest.MockEnforce(t)

dir := t.TempDir()

ctx1 := tenanttest.NewTestContext()
tnt1, err := tenant.FromContext(ctx1)
require.NoError(t, err)

opts := Options{
IndexDir: dir,
ShardMax: 1024,
RepositoryDescription: zoekt.Repository{
Name: "repo",
RawConfig: map[string]string{"tenantID": strconv.Itoa(tnt1.ID())},
},
Parallelism: 2,
SizeMax: 1 << 20,
}

b, err := NewBuilder(opts)
if err != nil {
t.Fatalf("NewBuilder: %v", err)
}

for i := 0; i < 4; i++ {
s := fmt.Sprintf("%d", i)
if err := b.AddFile("F"+s, []byte(strings.Repeat(s, 1000))); err != nil {
t.Fatal(err)
}
}

if err := b.Finish(); err != nil {
t.Errorf("Finish: %v", err)
}

fs, _ := filepath.Glob(dir + "/*.zoekt")
if len(fs) <= 1 {
t.Fatalf("want multiple shards, got %v", fs)
}

_, md0, err := zoekt.ReadMetadataPath(fs[0])
if err != nil {
t.Fatal(err)
}
for _, f := range fs[1:] {
_, md, err := zoekt.ReadMetadataPath(f)
if err != nil {
t.Fatal(err)
}
if md.IndexTime != md0.IndexTime {
t.Fatalf("wanted identical time stamps but got %v!=%v", md.IndexTime, md0.IndexTime)
}
if md.ID != md0.ID {
t.Fatalf("wanted identical IDs but got %s!=%s", md.ID, md0.ID)
}
}

ss, err := shards.NewDirectorySearcher(dir)
if err != nil {
t.Fatalf("NewDirectorySearcher(%s): %v", dir, err)
}
defer ss.Close()

q, err := query.Parse("111")
if err != nil {
t.Fatalf("Parse(111): %v", err)
}

var sOpts zoekt.SearchOptions
// Tenant 1 has access to the repo
result, err := ss.Search(ctx1, q, &sOpts)
require.NoError(t, err)
require.Len(t, result.Files, 1)

// Tenant 2 does not have access to the repo
ctx2 := tenanttest.NewTestContext()
result, err = ss.Search(ctx2, q, &sOpts)
require.NoError(t, err)
require.Len(t, result.Files, 0)
}

// retryTest will retry f until min(t.Deadline(), time.Minute). It returns
// once f doesn't call fatalf.
func retryTest(t *testing.T, f func(fatalf func(format string, args ...interface{}))) {
Expand Down
3 changes: 0 additions & 3 deletions cmd/zoekt-git-index/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@ func run() int {

cpuProfile := flag.String("cpuprofile", "", "write cpu profile to `file`")

useSourcegraphIDForName := flag.Bool("use_sourcegraph_id_for_name", false, "use the Sourcegraph ID for the shard name")

flag.Parse()

// Tune GOMAXPROCS to match Linux container CPU quota.
Expand Down Expand Up @@ -77,7 +75,6 @@ func run() int {

opts := cmd.OptionsFromFlags()
opts.IsDelta = *isDelta
opts.UseSourcegraphIDForName = *useSourcegraphIDForName

var branches []string
if *branchesStr != "" {
Expand Down
27 changes: 10 additions & 17 deletions cmd/zoekt-sourcegraph-indexserver/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,11 @@ import (
"strings"
"time"

sglog "github.com/sourcegraph/log"

"github.com/sourcegraph/zoekt"
"github.com/sourcegraph/zoekt/build"
"github.com/sourcegraph/zoekt/ctags"
"github.com/sourcegraph/zoekt/internal/tenant"

sglog "github.com/sourcegraph/log"
)

const defaultIndexingTimeout = 1*time.Hour + 30*time.Minute
Expand Down Expand Up @@ -100,18 +99,16 @@ type indexArgs struct {
// ShardMerging is true if we want zoekt-git-index to respect compound shards.
ShardMerging bool

// UseSourcegraphIDForName is true if we want to use the Sourcegraph ID as prefix for the shard name.
UseSourcegraphIDForName bool
// IdBasedNames is true if we want to use ID-based names as prefix for the shard name.
IdBasedNames bool
}

// BuildOptions returns a build.Options represented by indexArgs. Note: it
// doesn't set fields like repository/branch.
func (o *indexArgs) BuildOptions() *build.Options {

// Default to tenant 1 if no tenant is set.
tenantID := o.TenantID
if o.TenantID < 1 {
tenantID = 1
shardPrefix := ""
if o.IdBasedNames {
shardPrefix = fmt.Sprintf("%09d_%09d", o.TenantID, o.IndexOptions.RepoID)
}

return &build.Options{
Expand All @@ -132,7 +129,7 @@ func (o *indexArgs) BuildOptions() *build.Options {
"archived": marshalBool(o.Archived),
// Calculate repo rank based on the latest commit date.
"latestCommitDate": "1",
"tenantid": strconv.Itoa(tenantID),
"tenantID": strconv.Itoa(o.TenantID),
},
},
IndexDir: o.IndexDir,
Expand All @@ -147,7 +144,7 @@ func (o *indexArgs) BuildOptions() *build.Options {

ShardMerging: o.ShardMerging,

UseSourcegraphIDForName: o.UseSourcegraphIDForName,
ShardPrefix: shardPrefix,
}
}

Expand Down Expand Up @@ -263,7 +260,7 @@ func fetchRepo(ctx context.Context, gitDir string, o *indexArgs, c gitIndexConfi
"-C", gitDir,
"-c", "protocol.version=2",
"-c", "http.extraHeader=X-Sourcegraph-Actor-UID: internal",
"-c", "http.extraHeader=" + tenant.HttpExtraHeader(o.TenantID),
"-c", "http.extraHeader=X-Sourcegraph-Tenant-ID: " + strconv.Itoa(o.TenantID),
"fetch", "--depth=1", "--no-tags",
}

Expand Down Expand Up @@ -410,10 +407,6 @@ func indexRepo(ctx context.Context, gitDir string, sourcegraph Sourcegraph, o *i
args = append(args, "-delta_threshold", strconv.FormatUint(o.DeltaShardNumberFallbackThreshold, 10))
}

if o.UseSourcegraphIDForName {
args = append(args, "-use_sourcegraph_id_for_name")
}

if len(o.LanguageMap) > 0 {
var languageMap []string
for language, parser := range o.LanguageMap {
Expand Down
14 changes: 9 additions & 5 deletions cmd/zoekt-sourcegraph-indexserver/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -487,11 +487,12 @@ func TestIndex(t *testing.T) {
Name: "test/repo",
CloneURL: "http://api.test/.internal/git/test/repo",
Branches: []zoekt.RepositoryBranch{{Name: "HEAD", Version: "deadbeef"}},
TenantID: 42,
},
},
want: []string{
"git -c init.defaultBranch=nonExistentBranchBB0FOFCH32 init --bare $TMPDIR/test%2Frepo.git",
"git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 1 fetch --depth=1 --no-tags --filter=blob:limit=1m http://api.test/.internal/git/test/repo deadbeef",
"git -C $TMPDIR/test%2Frepo.git -c protocol.version=2 -c http.extraHeader=X-Sourcegraph-Actor-UID: internal -c http.extraHeader=X-Sourcegraph-Tenant-ID: 42 fetch --depth=1 --no-tags --filter=blob:limit=1m http://api.test/.internal/git/test/repo deadbeef",
"git -C $TMPDIR/test%2Frepo.git update-ref HEAD deadbeef",
"git -C $TMPDIR/test%2Frepo.git config zoekt.archived 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.fork 0",
Expand All @@ -500,7 +501,7 @@ func TestIndex(t *testing.T) {
"git -C $TMPDIR/test%2Frepo.git config zoekt.priority 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.public 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.tenantid 1",
"git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 42",
"zoekt-git-index -submodules=false -branches HEAD -disable_ctags $TMPDIR/test%2Frepo.git",
},
}, {
Expand All @@ -511,6 +512,7 @@ func TestIndex(t *testing.T) {
CloneURL: "http://api.test/.internal/git/test/repo",
Branches: []zoekt.RepositoryBranch{{Name: "HEAD", Version: "deadbeef"}},
RepoID: 123,
TenantID: 1,
},
},
want: []string{
Expand All @@ -524,7 +526,7 @@ func TestIndex(t *testing.T) {
"git -C $TMPDIR/test%2Frepo.git config zoekt.priority 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.public 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 123",
"git -C $TMPDIR/test%2Frepo.git config zoekt.tenantid 1",
"git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 1",
"zoekt-git-index -submodules=false -branches HEAD -disable_ctags $TMPDIR/test%2Frepo.git",
},
}, {
Expand All @@ -543,6 +545,7 @@ func TestIndex(t *testing.T) {
{Name: "HEAD", Version: "deadbeef"},
{Name: "dev", Version: "feebdaed"}, // ignored for archive
},
TenantID: 1,
},
},
want: []string{
Expand All @@ -557,7 +560,7 @@ func TestIndex(t *testing.T) {
"git -C $TMPDIR/test%2Frepo.git config zoekt.priority 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.public 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.tenantid 1",
"git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 1",
"zoekt-git-index -submodules=false -incremental -branches HEAD,dev " +
"-file_limit 123 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " +
"$TMPDIR/test%2Frepo.git",
Expand All @@ -581,6 +584,7 @@ func TestIndex(t *testing.T) {
{Name: "dev", Version: "feebdaed"},
{Name: "release", Version: "12345678"},
},
TenantID: 1,
},
DeltaShardNumberFallbackThreshold: 22,
},
Expand All @@ -606,7 +610,7 @@ func TestIndex(t *testing.T) {
"git -C $TMPDIR/test%2Frepo.git config zoekt.priority 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.public 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.repoid 0",
"git -C $TMPDIR/test%2Frepo.git config zoekt.tenantid 1",
"git -C $TMPDIR/test%2Frepo.git config zoekt.tenantID 1",
"zoekt-git-index -submodules=false -incremental -branches HEAD,dev,release " +
"-delta -delta_threshold 22 -file_limit 123 -parallelism 4 -index /data/index -require_ctags -large_file foo -large_file bar " +
"$TMPDIR/test%2Frepo.git",
Expand Down
Loading

0 comments on commit b93cb3c

Please sign in to comment.