Skip to content

Commit ada53fc

Browse files
Multi branch / tag support (#58)
1 parent 5992ac5 commit ada53fc

30 files changed

+544
-47
lines changed
655 KB
Loading
878 KB
Loading

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## [Unreleased]
99

10+
### Added
11+
12+
- Added support for indexing and searching repositories across multiple revisions (tag or branch). ([#58](https://github.com/sourcebot-dev/sourcebot/pull/58))
13+
1014
## [2.3.0] - 2024-11-01
1115

1216
### Added

README.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,46 @@ docker run -e <b>GITEA_TOKEN=my-secret-token</b> /* additional args */ ghcr.io/s
267267

268268
If you're using a self-hosted GitLab or GitHub instance with a custom domain, you can specify the domain in your config file. See [configs/self-hosted.json](configs/self-hosted.json) for examples.
269269

270+
## Searching multiple branches
271+
272+
By default, Sourcebot will index the default branch. To configure Sourcebot to index multiple branches (or tags), the `revisions` field can be used:
273+
274+
```jsonc
275+
{
276+
"$schema": "https://raw.githubusercontent.com/sourcebot-dev/sourcebot/main/schemas/v2/index.json",
277+
"repos": [
278+
{
279+
"type": "github",
280+
"revisions": {
281+
// Index the `main` branch and any branches matching the `releases/*` glob pattern.
282+
"branches": [
283+
"main",
284+
"releases/*"
285+
],
286+
// Index the `latest` tag and any tags matching the `v*.*.*` glob pattern.
287+
"tags": [
288+
"latest",
289+
"v*.*.*"
290+
]
291+
},
292+
"repos": [
293+
"my_org/repo_a",
294+
"my_org/repo_b"
295+
]
296+
}
297+
]
298+
}
299+
```
300+
301+
For each repository (in this case, `repo_a` and `repo_b`), Sourcebot will index all branches and tags matching the `branches` and `tags` patterns provided. Any branches or tags that don't match the patterns will be ignored and not indexed.
302+
303+
To search on a specific revision, use the `revision` filter in the search bar:
304+
305+
<picture>
306+
<source media="(prefers-color-scheme: dark)" srcset=".github/images/revisions_filter_dark.png">
307+
<img style="max-width:700px;width:100%" src=".github/images/revisions_filter_light.png">
308+
</picture>
309+
270310
## Searching a local directory
271311

272312
Local directories can be searched by using the `local` type in your config file:

configs/multi-branch.json

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"$schema": "../schemas/v2/index.json",
3+
"repos": [
4+
{
5+
"type": "github",
6+
"revisions": {
7+
// Specify branches to index...
8+
"branches": [
9+
"main",
10+
"release/*"
11+
],
12+
// ... or specify tags
13+
"tags": [
14+
"v*.*.*"
15+
]
16+
},
17+
// For each repo (repoa, repob), Sourcebot will index all branches and tags in the repo
18+
// matching the `branches` and `tags` patterns above. Any branches or tags that don't
19+
// match the patterns will be ignored and not indexed.
20+
"repos": [
21+
"org/repoa",
22+
"org/repob"
23+
]
24+
}
25+
]
26+
}

packages/backend/package.json

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
},
1313
"devDependencies": {
1414
"@types/argparse": "^2.0.16",
15+
"@types/micromatch": "^4.0.9",
1516
"@types/node": "^22.7.5",
1617
"json-schema-to-typescript": "^15.0.2",
1718
"tsc-watch": "^6.2.0",
@@ -25,6 +26,7 @@
2526
"cross-fetch": "^4.0.0",
2627
"gitea-js": "^1.22.0",
2728
"lowdb": "^7.0.1",
29+
"micromatch": "^4.0.8",
2830
"simple-git": "^3.27.0",
2931
"strip-json-comments": "^5.0.1",
3032
"winston": "^3.15.0"

packages/backend/src/gitea.ts

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import { AppContext, GitRepository } from './types.js';
55
import fetch from 'cross-fetch';
66
import { createLogger } from './logger.js';
77
import path from 'path';
8+
import micromatch from 'micromatch';
89

910
const logger = createLogger('Gitea');
1011

@@ -60,7 +61,9 @@ export const getGiteaReposFromConfig = async (config: GiteaConfig, ctx: AppConte
6061
'zoekt.archived': marshalBool(repo.archived),
6162
'zoekt.fork': marshalBool(repo.fork!),
6263
'zoekt.public': marshalBool(repo.internal === false && repo.private === false),
63-
}
64+
},
65+
branches: [],
66+
tags: []
6467
} satisfies GitRepository;
6568
});
6669

@@ -77,10 +80,68 @@ export const getGiteaReposFromConfig = async (config: GiteaConfig, ctx: AppConte
7780
repos = excludeReposByName(repos, config.exclude.repos, logger);
7881
}
7982
}
83+
84+
logger.debug(`Found ${repos.length} total repositories.`);
85+
86+
if (config.revisions) {
87+
if (config.revisions.branches) {
88+
const branchGlobs = config.revisions.branches;
89+
repos = await Promise.all(
90+
repos.map(async (repo) => {
91+
const [owner, name] = repo.name.split('/');
92+
let branches = (await getBranchesForRepo(owner, name, api)).map(branch => branch.name!);
93+
branches = micromatch.match(branches, branchGlobs);
94+
95+
return {
96+
...repo,
97+
branches,
98+
};
99+
})
100+
)
101+
}
102+
103+
if (config.revisions.tags) {
104+
const tagGlobs = config.revisions.tags;
105+
repos = await Promise.all(
106+
repos.map(async (repo) => {
107+
const [owner, name] = repo.name.split('/');
108+
let tags = (await getTagsForRepo(owner, name, api)).map(tag => tag.name!);
109+
tags = micromatch.match(tags, tagGlobs);
110+
111+
return {
112+
...repo,
113+
tags,
114+
};
115+
})
116+
)
117+
}
118+
}
80119

81120
return repos;
82121
}
83122

123+
const getTagsForRepo = async <T>(owner: string, repo: string, api: Api<T>) => {
124+
logger.debug(`Fetching tags for repo ${owner}/${repo}...`);
125+
const { durationMs, data: tags } = await measure(() =>
126+
paginate((page) => api.repos.repoListTags(owner, repo, {
127+
page
128+
}))
129+
);
130+
logger.debug(`Found ${tags.length} tags in repo ${owner}/${repo} in ${durationMs}ms.`);
131+
return tags;
132+
}
133+
134+
const getBranchesForRepo = async <T>(owner: string, repo: string, api: Api<T>) => {
135+
logger.debug(`Fetching branches for repo ${owner}/${repo}...`);
136+
const { durationMs, data: branches } = await measure(() =>
137+
paginate((page) => api.repos.repoListBranches(owner, repo, {
138+
page
139+
}))
140+
);
141+
logger.debug(`Found ${branches.length} branches in repo ${owner}/${repo} in ${durationMs}ms.`);
142+
return branches;
143+
}
144+
84145
const getReposOwnedByUsers = async <T>(users: string[], api: Api<T>) => {
85146
const repos = (await Promise.all(users.map(async (user) => {
86147
logger.debug(`Fetching repos for user ${user}...`);

packages/backend/src/github.ts

Lines changed: 72 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@ import { GitHubConfig } from "./schemas/v2.js";
33
import { createLogger } from "./logger.js";
44
import { AppContext, GitRepository } from "./types.js";
55
import path from 'path';
6-
import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, getTokenFromConfig, marshalBool } from "./utils.js";
6+
import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, getTokenFromConfig, marshalBool, measure } from "./utils.js";
7+
import micromatch from "micromatch";
78

89
const logger = createLogger("GitHub");
910

1011
type OctokitRepository = {
1112
name: string,
13+
id: number,
1214
full_name: string,
1315
fork: boolean,
1416
private: boolean,
@@ -88,7 +90,9 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo
8890
'zoekt.archived': marshalBool(repo.archived),
8991
'zoekt.fork': marshalBool(repo.fork),
9092
'zoekt.public': marshalBool(repo.private === false)
91-
}
93+
},
94+
branches: [],
95+
tags: [],
9296
} satisfies GitRepository;
9397
});
9498

@@ -107,10 +111,75 @@ export const getGitHubReposFromConfig = async (config: GitHubConfig, signal: Abo
107111
}
108112

109113
logger.debug(`Found ${repos.length} total repositories.`);
110-
114+
115+
if (config.revisions) {
116+
if (config.revisions.branches) {
117+
const branchGlobs = config.revisions.branches;
118+
repos = await Promise.all(
119+
repos.map(async (repo) => {
120+
const [owner, name] = repo.name.split('/');
121+
let branches = (await getBranchesForRepo(owner, name, octokit, signal)).map(branch => branch.name);
122+
branches = micromatch.match(branches, branchGlobs);
123+
124+
return {
125+
...repo,
126+
branches,
127+
};
128+
})
129+
)
130+
}
131+
132+
if (config.revisions.tags) {
133+
const tagGlobs = config.revisions.tags;
134+
repos = await Promise.all(
135+
repos.map(async (repo) => {
136+
const [owner, name] = repo.name.split('/');
137+
let tags = (await getTagsForRepo(owner, name, octokit, signal)).map(tag => tag.name);
138+
tags = micromatch.match(tags, tagGlobs);
139+
140+
return {
141+
...repo,
142+
tags,
143+
};
144+
})
145+
)
146+
}
147+
}
148+
111149
return repos;
112150
}
113151

152+
const getTagsForRepo = async (owner: string, repo: string, octokit: Octokit, signal: AbortSignal) => {
153+
logger.debug(`Fetching tags for repo ${owner}/${repo}...`);
154+
155+
const { durationMs, data: tags } = await measure(() => octokit.paginate(octokit.repos.listTags, {
156+
owner,
157+
repo,
158+
per_page: 100,
159+
request: {
160+
signal
161+
}
162+
}));
163+
164+
logger.debug(`Found ${tags.length} tags for repo ${owner}/${repo} in ${durationMs}ms`);
165+
return tags;
166+
}
167+
168+
const getBranchesForRepo = async (owner: string, repo: string, octokit: Octokit, signal: AbortSignal) => {
169+
logger.debug(`Fetching branches for repo ${owner}/${repo}...`);
170+
const { durationMs, data: branches } = await measure(() => octokit.paginate(octokit.repos.listBranches, {
171+
owner,
172+
repo,
173+
per_page: 100,
174+
request: {
175+
signal
176+
}
177+
}));
178+
logger.debug(`Found ${branches.length} branches for repo ${owner}/${repo} in ${durationMs}ms`);
179+
return branches;
180+
}
181+
182+
114183
const getReposOwnedByUsers = async (users: string[], isAuthenticated: boolean, octokit: Octokit, signal: AbortSignal) => {
115184
// @todo : error handling
116185
const repos = (await Promise.all(users.map(async (user) => {
@@ -149,7 +218,6 @@ const getReposOwnedByUsers = async (users: string[], isAuthenticated: boolean, o
149218
}
150219

151220
const getReposForOrgs = async (orgs: string[], octokit: Octokit, signal: AbortSignal) => {
152-
// @todo : error handling
153221
const repos = (await Promise.all(orgs.map(async (org) => {
154222
logger.debug(`Fetching repository info for org ${org}...`);
155223
const start = Date.now();
@@ -172,7 +240,6 @@ const getReposForOrgs = async (orgs: string[], octokit: Octokit, signal: AbortSi
172240
}
173241

174242
const getRepos = async (repoList: string[], octokit: Octokit, signal: AbortSignal) => {
175-
// @todo : error handling
176243
const repos = await Promise.all(repoList.map(async (repo) => {
177244
logger.debug(`Fetching repository info for ${repo}...`);
178245
const start = Date.now();

packages/backend/src/gitlab.ts

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { excludeArchivedRepos, excludeForkedRepos, excludeReposByName, getTokenF
44
import { createLogger } from "./logger.js";
55
import { AppContext, GitRepository } from "./types.js";
66
import path from 'path';
7+
import micromatch from "micromatch";
78

89
const logger = createLogger("GitLab");
910

@@ -90,7 +91,9 @@ export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppCon
9091
'zoekt.archived': marshalBool(project.archived),
9192
'zoekt.fork': marshalBool(isFork),
9293
'zoekt.public': marshalBool(project.visibility === 'public'),
93-
}
94+
},
95+
branches: [],
96+
tags: [],
9497
} satisfies GitRepository;
9598
});
9699

@@ -110,5 +113,41 @@ export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppCon
110113

111114
logger.debug(`Found ${repos.length} total repositories.`);
112115

116+
if (config.revisions) {
117+
if (config.revisions.branches) {
118+
const branchGlobs = config.revisions.branches;
119+
repos = await Promise.all(repos.map(async (repo) => {
120+
logger.debug(`Fetching branches for repo ${repo.name}...`);
121+
let { durationMs, data } = await measure(() => api.Branches.all(repo.name));
122+
logger.debug(`Found ${data.length} branches in repo ${repo.name} in ${durationMs}ms.`);
123+
124+
let branches = data.map((branch) => branch.name);
125+
branches = micromatch.match(branches, branchGlobs);
126+
127+
return {
128+
...repo,
129+
branches,
130+
};
131+
}));
132+
}
133+
134+
if (config.revisions.tags) {
135+
const tagGlobs = config.revisions.tags;
136+
repos = await Promise.all(repos.map(async (repo) => {
137+
logger.debug(`Fetching tags for repo ${repo.name}...`);
138+
let { durationMs, data } = await measure(() => api.Tags.all(repo.name));
139+
logger.debug(`Found ${data.length} tags in repo ${repo.name} in ${durationMs}ms.`);
140+
141+
let tags = data.map((tag) => tag.name);
142+
tags = micromatch.match(tags, tagGlobs);
143+
144+
return {
145+
...repo,
146+
tags,
147+
};
148+
}));
149+
}
150+
}
151+
113152
return repos;
114153
}

0 commit comments

Comments
 (0)