Skip to content

Commit e42a922

Browse files
committed
feat(perf): cache vcs.GetFiles() to reduce redundant VCS API volume
Calls to `vcs.GetFiles()` only result in one or two API calls (paging aside). However the method is called each time a CEL expression references `"path".pathChanged()` or the `on-path-change` annotation is checked for matches. Because of this, because matching is evaluated several times in a row, and because some users have tens of PipelineRuns in their `.tekton` directory, in some cases a single push event may cause PaC to make hundreds of API requests listing the same files repeatedly.
1 parent 6a0d179 commit e42a922

File tree

9 files changed

+162
-145
lines changed

9 files changed

+162
-145
lines changed

pkg/provider/bitbucketdatacenter/bitbucketdatacenter.go

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ type Provider struct {
4242
projectKey string
4343
repo *v1alpha1.Repository
4444
triggerEvent string
45+
cachedChangedFiles *changedfiles.ChangedFiles
4546
}
4647

4748
func (v Provider) Client() *scm.Client {
@@ -370,13 +371,28 @@ func (v *Provider) GetConfig() *info.ProviderConfig {
370371
}
371372
}
372373

374+
// GetFiles gets and caches the list of files changed by a given event.
373375
func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedfiles.ChangedFiles, error) {
374-
OrgAndRepo := fmt.Sprintf("%s/%s", runevent.Organization, runevent.Repository)
375-
if runevent.TriggerTarget == triggertype.PullRequest {
376+
if v.cachedChangedFiles == nil {
377+
changes, err := v.fetchChangedFiles(ctx, runevent)
378+
if err != nil {
379+
return changedfiles.ChangedFiles{}, err
380+
}
381+
v.cachedChangedFiles = &changes
382+
}
383+
return *v.cachedChangedFiles, nil
384+
}
385+
386+
func (v *Provider) fetchChangedFiles(ctx context.Context, runevent *info.Event) (changedfiles.ChangedFiles, error) {
387+
changedFiles := changedfiles.ChangedFiles{}
388+
389+
orgAndRepo := fmt.Sprintf("%s/%s", runevent.Organization, runevent.Repository)
390+
391+
switch runevent.TriggerTarget {
392+
case triggertype.PullRequest:
376393
opts := &scm.ListOptions{Page: 1, Size: apiResponseLimit}
377-
changedFiles := changedfiles.ChangedFiles{}
378394
for {
379-
changes, _, err := v.Client().PullRequests.ListChanges(ctx, OrgAndRepo, runevent.PullRequestNumber, opts)
395+
changes, _, err := v.Client().PullRequests.ListChanges(ctx, orgAndRepo, runevent.PullRequestNumber, opts)
380396
if err != nil {
381397
return changedfiles.ChangedFiles{}, fmt.Errorf("failed to list changes for pull request: %w", err)
382398
}
@@ -407,14 +423,10 @@ func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedf
407423

408424
opts.Page++
409425
}
410-
return changedFiles, nil
411-
}
412-
413-
if runevent.TriggerTarget == triggertype.Push {
426+
case triggertype.Push:
414427
opts := &scm.ListOptions{Page: 1, Size: apiResponseLimit}
415-
changedFiles := changedfiles.ChangedFiles{}
416428
for {
417-
changes, _, err := v.Client().Git.ListChanges(ctx, OrgAndRepo, runevent.SHA, opts)
429+
changes, _, err := v.Client().Git.ListChanges(ctx, orgAndRepo, runevent.SHA, opts)
418430
if err != nil {
419431
return changedfiles.ChangedFiles{}, fmt.Errorf("failed to list changes for commit %s: %w", runevent.SHA, err)
420432
}
@@ -441,9 +453,10 @@ func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedf
441453

442454
opts.Page++
443455
}
444-
return changedFiles, nil
456+
default:
457+
// No action necessary
445458
}
446-
return changedfiles.ChangedFiles{}, nil
459+
return changedFiles, nil
447460
}
448461

449462
func (v *Provider) CreateToken(_ context.Context, _ []string, _ *info.Event) (string, error) {

pkg/provider/bitbucketdatacenter/bitbucketdatacenter_test.go

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,14 @@ import (
2222
"github.com/openshift-pipelines/pipelines-as-code/pkg/params/triggertype"
2323
"github.com/openshift-pipelines/pipelines-as-code/pkg/provider"
2424
bbtest "github.com/openshift-pipelines/pipelines-as-code/pkg/provider/bitbucketdatacenter/test"
25+
"github.com/openshift-pipelines/pipelines-as-code/pkg/test/metrics"
2526

2627
"github.com/jenkins-x/go-scm/scm"
2728
"go.uber.org/zap"
2829
zapobserver "go.uber.org/zap/zaptest/observer"
2930
"gotest.tools/v3/assert"
31+
"knative.dev/pkg/metrics/metricstest"
32+
_ "knative.dev/pkg/metrics/testing"
3033
rtesting "knative.dev/pkg/reconciler/testing"
3134
)
3235

@@ -796,6 +799,7 @@ func TestGetFiles(t *testing.T) {
796799
ctx, _ := rtesting.SetupFakeContext(t)
797800
client, mux, tearDown, tURL := bbtest.SetupBBDataCenterClient()
798801
defer tearDown()
802+
metrics.ResetMetrics()
799803

800804
stats := &bbtest.DiffStats{
801805
Values: tt.changeFiles,
@@ -821,13 +825,17 @@ func TestGetFiles(t *testing.T) {
821825
}
822826
})
823827
}
824-
v := &Provider{client: client, baseURL: tURL}
828+
829+
metricsTags := map[string]string{"provider": "bitbucket-datacenter", "event-type": string(tt.event.TriggerTarget)}
830+
metricstest.CheckStatsNotReported(t, "pipelines_as_code_git_provider_api_request_count")
831+
832+
v := &Provider{client: client, baseURL: tURL, triggerEvent: string(tt.event.TriggerTarget)}
825833
changedFiles, err := v.GetFiles(ctx, tt.event)
826834
if tt.wantError {
827835
assert.Equal(t, err.Error(), tt.errMsg)
828-
return
836+
} else {
837+
assert.NilError(t, err, nil)
829838
}
830-
assert.NilError(t, err, nil)
831839
assert.Equal(t, tt.wantAddedFilesCount, len(changedFiles.Added))
832840
assert.Equal(t, tt.wantDeletedFilesCount, len(changedFiles.Deleted))
833841
assert.Equal(t, tt.wantModifiedFilesCount, len(changedFiles.Modified))
@@ -844,6 +852,17 @@ func TestGetFiles(t *testing.T) {
844852
assert.Equal(t, tt.changeFiles[i].Path.ToString, changedFiles.All[i])
845853
}
846854
}
855+
856+
// Check caching
857+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, 1)
858+
_, _ = v.GetFiles(ctx, tt.event)
859+
if tt.wantError {
860+
// No caching on error
861+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, 2)
862+
} else {
863+
// Cache API results on success
864+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, 1)
865+
}
847866
})
848867
}
849868
}

pkg/provider/github/github.go

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ type Provider struct {
5555
PaginedNumber int
5656
userType string // The type of user i.e bot or not
5757
skippedRun
58-
triggerEvent string
58+
triggerEvent string
59+
cachedChangedFiles *changedfiles.ChangedFiles
5960
}
6061

6162
type skippedRun struct {
@@ -517,11 +518,24 @@ func (v *Provider) getPullRequest(ctx context.Context, runevent *info.Event) (*i
517518
return runevent, nil
518519
}
519520

520-
// GetFiles get a files from pull request.
521+
// GetFiles gets and caches the list of files changed by a given event.
521522
func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedfiles.ChangedFiles, error) {
522-
if runevent.TriggerTarget == triggertype.PullRequest {
523+
if v.cachedChangedFiles == nil {
524+
changes, err := v.fetchChangedFiles(ctx, runevent)
525+
if err != nil {
526+
return changedfiles.ChangedFiles{}, err
527+
}
528+
v.cachedChangedFiles = &changes
529+
}
530+
return *v.cachedChangedFiles, nil
531+
}
532+
533+
func (v *Provider) fetchChangedFiles(ctx context.Context, runevent *info.Event) (changedfiles.ChangedFiles, error) {
534+
changedFiles := changedfiles.ChangedFiles{}
535+
536+
switch runevent.TriggerTarget {
537+
case triggertype.PullRequest:
523538
opt := &github.ListOptions{PerPage: v.PaginedNumber}
524-
changedFiles := changedfiles.ChangedFiles{}
525539
for {
526540
repoCommit, resp, err := wrapAPI(v, "list_pull_request_files", func() ([]*github.CommitFile, *github.Response, error) {
527541
return v.Client().PullRequests.ListFiles(ctx, runevent.Organization, runevent.Repository, runevent.PullRequestNumber, opt)
@@ -549,11 +563,7 @@ func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedf
549563
}
550564
opt.Page = resp.NextPage
551565
}
552-
return changedFiles, nil
553-
}
554-
555-
if runevent.TriggerTarget == "push" {
556-
changedFiles := changedfiles.ChangedFiles{}
566+
case triggertype.Push:
557567
rC, _, err := wrapAPI(v, "get_commit_files", func() (*github.RepositoryCommit, *github.Response, error) {
558568
return v.Client().Repositories.GetCommit(ctx, runevent.Organization, runevent.Repository, runevent.SHA, &github.ListOptions{})
559569
})
@@ -575,9 +585,10 @@ func (v *Provider) GetFiles(ctx context.Context, runevent *info.Event) (changedf
575585
changedFiles.Renamed = append(changedFiles.Renamed, *rC.Files[i].Filename)
576586
}
577587
}
578-
return changedFiles, nil
588+
default:
589+
// No action necessary
579590
}
580-
return changedfiles.ChangedFiles{}, nil
591+
return changedFiles, nil
581592
}
582593

583594
// getObject Get an object from a repository.

pkg/provider/github/github_test.go

Lines changed: 23 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ import (
2020
"github.com/jonboulle/clockwork"
2121
"github.com/openshift-pipelines/pipelines-as-code/pkg/apis/pipelinesascode/keys"
2222
"github.com/openshift-pipelines/pipelines-as-code/pkg/apis/pipelinesascode/v1alpha1"
23-
"github.com/openshift-pipelines/pipelines-as-code/pkg/metrics"
2423
"github.com/openshift-pipelines/pipelines-as-code/pkg/params"
2524
"github.com/openshift-pipelines/pipelines-as-code/pkg/params/clients"
2625
"github.com/openshift-pipelines/pipelines-as-code/pkg/params/info"
@@ -29,6 +28,8 @@ import (
2928
testclient "github.com/openshift-pipelines/pipelines-as-code/pkg/test/clients"
3029
ghtesthelper "github.com/openshift-pipelines/pipelines-as-code/pkg/test/github"
3130
"github.com/openshift-pipelines/pipelines-as-code/pkg/test/logger"
31+
"github.com/openshift-pipelines/pipelines-as-code/pkg/test/metrics"
32+
3233
"go.uber.org/zap"
3334
zapobserver "go.uber.org/zap/zaptest/observer"
3435
"gotest.tools/v3/assert"
@@ -347,7 +348,7 @@ func TestGetTektonDir(t *testing.T) {
347348
}
348349
for _, tt := range testGetTektonDir {
349350
t.Run(tt.name, func(t *testing.T) {
350-
resetMetrics()
351+
metrics.ResetMetrics()
351352
observer, exporter := zapobserver.New(zap.InfoLevel)
352353
fakelogger := zap.New(observer).Sugar()
353354
ctx, _ := rtesting.SetupFakeContext(t)
@@ -892,6 +893,7 @@ func TestGetFiles(t *testing.T) {
892893
wantDeletedFilesCount int
893894
wantModifiedFilesCount int
894895
wantRenamedFilesCount int
896+
wantAPIRequestCount int64
895897
}{
896898
{
897899
name: "pull-request",
@@ -920,6 +922,7 @@ func TestGetFiles(t *testing.T) {
920922
wantDeletedFilesCount: 1,
921923
wantModifiedFilesCount: 1,
922924
wantRenamedFilesCount: 1,
925+
wantAPIRequestCount: 2,
923926
},
924927
{
925928
name: "push",
@@ -950,51 +953,23 @@ func TestGetFiles(t *testing.T) {
950953
wantDeletedFilesCount: 1,
951954
wantModifiedFilesCount: 1,
952955
wantRenamedFilesCount: 1,
956+
wantAPIRequestCount: 1,
953957
},
954958
}
955959
for _, tt := range tests {
956960
t.Run(tt.name, func(t *testing.T) {
957961
fakeclient, mux, _, teardown := ghtesthelper.SetupGH()
958962
defer teardown()
959-
prCommitFiles := []*github.CommitFile{
960-
{
961-
Filename: ptr.String("modified.yaml"),
962-
Status: ptr.String("modified"),
963-
}, {
964-
Filename: ptr.String("added.doc"),
965-
Status: ptr.String("added"),
966-
}, {
967-
Filename: ptr.String("removed.yaml"),
968-
Status: ptr.String("removed"),
969-
}, {
970-
Filename: ptr.String("renamed.doc"),
971-
Status: ptr.String("renamed"),
972-
},
973-
}
963+
metrics.ResetMetrics()
974964

975-
pushCommitFiles := []*github.CommitFile{
976-
{
977-
Filename: ptr.String("modified.yaml"),
978-
Status: ptr.String("modified"),
979-
}, {
980-
Filename: ptr.String("added.doc"),
981-
Status: ptr.String("added"),
982-
}, {
983-
Filename: ptr.String("removed.yaml"),
984-
Status: ptr.String("removed"),
985-
}, {
986-
Filename: ptr.String("renamed.doc"),
987-
Status: ptr.String("renamed"),
988-
},
989-
}
990965
if tt.event.TriggerTarget == "pull_request" {
991966
mux.HandleFunc(fmt.Sprintf("/repos/%s/%s/pulls/%d/files",
992967
tt.event.Organization, tt.event.Repository, tt.event.PullRequestNumber), func(rw http.ResponseWriter, r *http.Request) {
993968
if r.URL.Query().Get("page") == "" || r.URL.Query().Get("page") == "1" {
994969
rw.Header().Add("Link", fmt.Sprintf("<https://api.github.com/repos/%s/%s/pulls/%d/files?page=2>; rel=\"next\"", tt.event.Organization, tt.event.Repository, tt.event.PullRequestNumber))
995970
fmt.Fprint(rw, "[]")
996971
} else {
997-
b, _ := json.Marshal(prCommitFiles)
972+
b, _ := json.Marshal(tt.commitFiles)
998973
fmt.Fprint(rw, string(b))
999974
}
1000975
})
@@ -1003,17 +978,26 @@ func TestGetFiles(t *testing.T) {
1003978
mux.HandleFunc(fmt.Sprintf("/repos/%s/%s/commits/%s",
1004979
tt.event.Organization, tt.event.Repository, tt.event.SHA), func(rw http.ResponseWriter, _ *http.Request) {
1005980
c := &github.RepositoryCommit{
1006-
Files: pushCommitFiles,
981+
Files: tt.commit.Files,
1007982
}
1008983
b, _ := json.Marshal(c)
1009984
fmt.Fprint(rw, string(b))
1010985
})
1011986
}
1012987

988+
metricsTags := map[string]string{"provider": "github", "event-type": string(tt.event.TriggerTarget)}
989+
metricstest.CheckStatsNotReported(t, "pipelines_as_code_git_provider_api_request_count")
990+
991+
log, _ := logger.GetLogger()
1013992
ctx, _ := rtesting.SetupFakeContext(t)
1014993
provider := &Provider{
1015994
ghClient: fakeclient,
1016995
PaginedNumber: 1,
996+
997+
// necessary for metrics
998+
providerName: "github",
999+
triggerEvent: string(tt.event.TriggerTarget),
1000+
Logger: log,
10171001
}
10181002
changedFiles, err := provider.GetFiles(ctx, tt.event)
10191003
assert.NilError(t, err, nil)
@@ -1032,6 +1016,11 @@ func TestGetFiles(t *testing.T) {
10321016
assert.Equal(t, *tt.commit.Files[i].Filename, changedFiles.All[i])
10331017
}
10341018
}
1019+
1020+
// Check caching
1021+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, tt.wantAPIRequestCount)
1022+
_, _ = provider.GetFiles(ctx, tt.event)
1023+
metricstest.CheckCountData(t, "pipelines_as_code_git_provider_api_request_count", metricsTags, tt.wantAPIRequestCount)
10351024
})
10361025
}
10371026
}
@@ -1382,18 +1371,6 @@ func TestIsHeadCommitOfBranch(t *testing.T) {
13821371
}
13831372
}
13841373

1385-
func resetMetrics() {
1386-
metricstest.Unregister(
1387-
"pipelines_as_code_pipelinerun_count",
1388-
"pipelines_as_code_pipelinerun_duration_seconds_sum",
1389-
"pipelines_as_code_running_pipelineruns_count",
1390-
"pipelines_as_code_git_provider_api_request_count",
1391-
)
1392-
1393-
// have to reset sync.Once to allow recreation of Recorder.
1394-
metrics.ResetRecorder()
1395-
}
1396-
13971374
func TestCreateComment(t *testing.T) {
13981375
tests := []struct {
13991376
name string

0 commit comments

Comments
 (0)