Skip to content

Commit 46be044

Browse files
Add back revision support (#215)
1 parent 3d6c7dc commit 46be044

21 files changed

+191
-177
lines changed

packages/backend/src/git.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,3 +39,20 @@ export const fetchRepository = async (path: string, onProgress?: (event: SimpleG
3939
]
4040
);
4141
}
42+
43+
export const getBranches = async (path: string) => {
44+
const git = simpleGit();
45+
const branches = await git.cwd({
46+
path,
47+
}).branch();
48+
49+
return branches.all;
50+
}
51+
52+
export const getTags = async (path: string) => {
53+
const git = simpleGit();
54+
const tags = await git.cwd({
55+
path,
56+
}).tags();
57+
return tags.all;
58+
}

packages/backend/src/gitea.ts

Lines changed: 1 addition & 76 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { Api, giteaApi, HttpResponse, Repository as GiteaRepository } from 'gitea-js';
22
import { GiteaConnectionConfig } from '@sourcebot/schemas/v3/gitea.type';
3-
import { getTokenFromConfig, measure, fetchWithRetry } from './utils.js';
3+
import { getTokenFromConfig, measure } from './utils.js';
44
import fetch from 'cross-fetch';
55
import { createLogger } from './logger.js';
66
import micromatch from 'micromatch';
@@ -56,49 +56,6 @@ export const getGiteaReposFromConfig = async (config: GiteaConnectionConfig, org
5656
return true;
5757
});
5858

59-
60-
if (config.revisions) {
61-
if (config.revisions.branches) {
62-
const branchGlobs = config.revisions.branches;
63-
allRepos = await Promise.all(
64-
allRepos.map(async (repo) => {
65-
const [owner, name] = repo.full_name!.split('/');
66-
let branches = (await fetchWithRetry(
67-
() => getBranchesForRepo(owner, name, api),
68-
`branches for ${owner}/${name}`,
69-
logger
70-
)).map(branch => branch.name!);
71-
branches = micromatch.match(branches, branchGlobs);
72-
73-
return {
74-
...repo,
75-
branches,
76-
};
77-
})
78-
)
79-
}
80-
81-
if (config.revisions.tags) {
82-
const tagGlobs = config.revisions.tags;
83-
allRepos = await Promise.all(
84-
allRepos.map(async (allRepos) => {
85-
const [owner, name] = allRepos.full_name!.split('/');
86-
let tags = (await fetchWithRetry(
87-
() => getTagsForRepo(owner, name, api),
88-
`tags for ${owner}/${name}`,
89-
logger
90-
)).map(tag => tag.name!);
91-
tags = micromatch.match(tags, tagGlobs);
92-
93-
return {
94-
...allRepos,
95-
tags,
96-
};
97-
})
98-
)
99-
}
100-
}
101-
10259
let repos = allRepos
10360
.filter((repo) => {
10461
const isExcluded = shouldExcludeRepo({
@@ -158,38 +115,6 @@ const shouldExcludeRepo = ({
158115
return shouldExclude;
159116
}
160117

161-
const getTagsForRepo = async <T>(owner: string, repo: string, api: Api<T>) => {
162-
try {
163-
logger.debug(`Fetching tags for repo ${owner}/${repo}...`);
164-
const { durationMs, data: tags } = await measure(() =>
165-
paginate((page) => api.repos.repoListTags(owner, repo, {
166-
page
167-
}))
168-
);
169-
logger.debug(`Found ${tags.length} tags in repo ${owner}/${repo} in ${durationMs}ms.`);
170-
return tags;
171-
} catch (e) {
172-
logger.error(`Failed to fetch tags for repo ${owner}/${repo}.`, e);
173-
throw e;
174-
}
175-
}
176-
177-
const getBranchesForRepo = async <T>(owner: string, repo: string, api: Api<T>) => {
178-
try {
179-
logger.debug(`Fetching branches for repo ${owner}/${repo}...`);
180-
const { durationMs, data: branches } = await measure(() =>
181-
paginate((page) => api.repos.repoListBranches(owner, repo, {
182-
page
183-
}))
184-
);
185-
logger.debug(`Found ${branches.length} branches in repo ${owner}/${repo} in ${durationMs}ms.`);
186-
return branches;
187-
} catch (e) {
188-
logger.error(`Failed to fetch branches for repo ${owner}/${repo}.`, e);
189-
throw e;
190-
}
191-
}
192-
193118
const getReposOwnedByUsers = async <T>(users: string[], api: Api<T>) => {
194119
const results = await Promise.allSettled(users.map(async (user) => {
195120
try {

packages/backend/src/posthogEvents.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ export type PosthogEventMap = {
1818
connectionId: number,
1919
repoCount: number,
2020
},
21+
revisions_truncated: {},
2122
//////////////////////////////////////////////////////////////////
2223
}
2324

packages/backend/src/promClient.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import express, { Request, Response } from 'express';
2-
import client, { Registry, Counter, Gauge, Histogram } from 'prom-client';
2+
import client, { Registry, Counter, Gauge } from 'prom-client';
33

44
export class PromClient {
55
private registry: Registry;

packages/backend/src/repoCompileUtils.ts

Lines changed: 49 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import { Prisma, PrismaClient } from '@sourcebot/db';
77
import { WithRequired } from "./types.js"
88
import { marshalBool } from "./utils.js";
99
import { GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
10+
import { RepoMetadata } from './types.js';
1011

1112
export type RepoData = WithRequired<Prisma.RepoCreateInput, 'connections'>;
1213

@@ -54,17 +55,21 @@ export const compileGithubConfig = async (
5455
}
5556
},
5657
metadata: {
57-
'zoekt.web-url-type': 'github',
58-
'zoekt.web-url': repo.html_url,
59-
'zoekt.name': repoName,
60-
'zoekt.github-stars': (repo.stargazers_count ?? 0).toString(),
61-
'zoekt.github-watchers': (repo.watchers_count ?? 0).toString(),
62-
'zoekt.github-subscribers': (repo.subscribers_count ?? 0).toString(),
63-
'zoekt.github-forks': (repo.forks_count ?? 0).toString(),
64-
'zoekt.archived': marshalBool(repo.archived),
65-
'zoekt.fork': marshalBool(repo.fork),
66-
'zoekt.public': marshalBool(repo.private === false)
67-
},
58+
gitConfig: {
59+
'zoekt.web-url-type': 'github',
60+
'zoekt.web-url': repo.html_url,
61+
'zoekt.name': repoName,
62+
'zoekt.github-stars': (repo.stargazers_count ?? 0).toString(),
63+
'zoekt.github-watchers': (repo.watchers_count ?? 0).toString(),
64+
'zoekt.github-subscribers': (repo.subscribers_count ?? 0).toString(),
65+
'zoekt.github-forks': (repo.forks_count ?? 0).toString(),
66+
'zoekt.archived': marshalBool(repo.archived),
67+
'zoekt.fork': marshalBool(repo.fork),
68+
'zoekt.public': marshalBool(repo.private === false),
69+
},
70+
branches: config.revisions?.branches ?? undefined,
71+
tags: config.revisions?.tags ?? undefined,
72+
} satisfies RepoMetadata,
6873
};
6974

7075
return record;
@@ -113,15 +118,19 @@ export const compileGitlabConfig = async (
113118
}
114119
},
115120
metadata: {
116-
'zoekt.web-url-type': 'gitlab',
117-
'zoekt.web-url': projectUrl,
118-
'zoekt.name': project.path_with_namespace,
119-
'zoekt.gitlab-stars': (project.stargazers_count ?? 0).toString(),
120-
'zoekt.gitlab-forks': (project.forks_count ?? 0).toString(),
121-
'zoekt.archived': marshalBool(project.archived),
122-
'zoekt.fork': marshalBool(isFork),
123-
'zoekt.public': marshalBool(project.private === false)
124-
},
121+
gitConfig: {
122+
'zoekt.web-url-type': 'gitlab',
123+
'zoekt.web-url': projectUrl,
124+
'zoekt.name': project.path_with_namespace,
125+
'zoekt.gitlab-stars': (project.stargazers_count ?? 0).toString(),
126+
'zoekt.gitlab-forks': (project.forks_count ?? 0).toString(),
127+
'zoekt.archived': marshalBool(project.archived),
128+
'zoekt.fork': marshalBool(isFork),
129+
'zoekt.public': marshalBool(project.private === false)
130+
},
131+
branches: config.revisions?.branches ?? undefined,
132+
tags: config.revisions?.tags ?? undefined,
133+
} satisfies RepoMetadata,
125134
};
126135

127136
return record;
@@ -168,13 +177,17 @@ export const compileGiteaConfig = async (
168177
}
169178
},
170179
metadata: {
171-
'zoekt.web-url-type': 'gitea',
172-
'zoekt.web-url': repo.html_url!,
173-
'zoekt.name': repo.full_name!,
174-
'zoekt.archived': marshalBool(repo.archived),
175-
'zoekt.fork': marshalBool(repo.fork!),
176-
'zoekt.public': marshalBool(repo.internal === false && repo.private === false),
177-
},
180+
gitConfig: {
181+
'zoekt.web-url-type': 'gitea',
182+
'zoekt.web-url': repo.html_url!,
183+
'zoekt.name': repo.full_name!,
184+
'zoekt.archived': marshalBool(repo.archived),
185+
'zoekt.fork': marshalBool(repo.fork!),
186+
'zoekt.public': marshalBool(repo.internal === false && repo.private === false),
187+
},
188+
branches: config.revisions?.branches ?? undefined,
189+
tags: config.revisions?.tags ?? undefined,
190+
} satisfies RepoMetadata,
178191
};
179192

180193
return record;
@@ -227,13 +240,15 @@ export const compileGerritConfig = async (
227240
}
228241
},
229242
metadata: {
230-
'zoekt.web-url-type': 'gitiles',
231-
'zoekt.web-url': webUrl,
232-
'zoekt.name': repoId,
233-
'zoekt.archived': marshalBool(false),
234-
'zoekt.fork': marshalBool(false),
235-
'zoekt.public': marshalBool(true),
236-
},
243+
gitConfig: {
244+
'zoekt.web-url-type': 'gitiles',
245+
'zoekt.web-url': webUrl,
246+
'zoekt.name': repoId,
247+
'zoekt.archived': marshalBool(false),
248+
'zoekt.fork': marshalBool(false),
249+
'zoekt.public': marshalBool(true),
250+
},
251+
} satisfies RepoMetadata,
237252
};
238253

239254
return record;

packages/backend/src/repoManager.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import { Redis } from 'ioredis';
33
import { createLogger } from "./logger.js";
44
import { Connection, PrismaClient, Repo, RepoToConnection, RepoIndexingStatus, StripeSubscriptionStatus } from "@sourcebot/db";
55
import { GithubConnectionConfig, GitlabConnectionConfig, GiteaConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
6-
import { AppContext, Settings } from "./types.js";
6+
import { AppContext, Settings, RepoMetadata } from "./types.js";
77
import { getRepoPath, getTokenFromConfig, measure, getShardPrefix } from "./utils.js";
88
import { cloneRepository, fetchRepository } from "./git.js";
99
import { existsSync, rmSync, readdirSync, rm } from 'fs';
@@ -187,7 +187,7 @@ export class RepoManager implements IRepoManager {
187187
let cloneDuration_s: number | undefined = undefined;
188188

189189
const repoPath = getRepoPath(repo, this.ctx);
190-
const metadata = repo.metadata as Record<string, string>;
190+
const metadata = repo.metadata as RepoMetadata;
191191

192192
// If the repo was already in the indexing state, this job was likely killed and picked up again. As a result,
193193
// to ensure the repo state is valid, we delete the repo if it exists so we get a fresh clone
@@ -223,7 +223,7 @@ export class RepoManager implements IRepoManager {
223223
cloneUrl = url.toString();
224224
}
225225

226-
const { durationMs } = await measure(() => cloneRepository(cloneUrl, repoPath, metadata, ({ method, stage, progress }) => {
226+
const { durationMs } = await measure(() => cloneRepository(cloneUrl, repoPath, metadata.gitConfig, ({ method, stage, progress }) => {
227227
//this.logger.info(`git.${method} ${stage} stage ${progress}% complete for ${repo.id}`)
228228
}));
229229
cloneDuration_s = durationMs / 1000;

packages/backend/src/types.ts

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,29 @@ export type Settings = {
4949
gcGracePeriodMs: number;
5050
}
5151

52+
/**
53+
* Structure of the `metadata` field in the `Repo` table.
54+
*/
55+
export type RepoMetadata = {
56+
/**
57+
* A set of key-value pairs that will be used as git config
58+
* variables when cloning the repo.
59+
* @see: https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--configcodecodeltkeygtltvaluegtcode
60+
*/
61+
gitConfig?: Record<string, string>;
62+
63+
/**
64+
* A list of branches to index. Glob patterns are supported.
65+
*/
66+
branches?: string[];
67+
68+
/**
69+
* A list of tags to index. Glob patterns are supported.
70+
*/
71+
tags?: string[];
72+
}
73+
74+
5275
// @see : https://stackoverflow.com/a/61132308
5376
export type DeepPartial<T> = T extends object ? {
5477
[P in keyof T]?: DeepPartial<T[P]>;

packages/backend/src/zoekt.ts

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,63 @@
11
import { exec } from "child_process";
2-
import { AppContext } from "./types.js";
2+
import { AppContext, RepoMetadata } from "./types.js";
33
import { Repo } from "@sourcebot/db";
44
import { getRepoPath } from "./utils.js";
55
import { DEFAULT_SETTINGS } from "./constants.js";
66
import { getShardPrefix } from "./utils.js";
7+
import { getBranches, getTags } from "./git.js";
8+
import micromatch from "micromatch";
9+
import { createLogger } from "./logger.js";
10+
import { captureEvent } from "./posthog.js";
11+
12+
const logger = createLogger('zoekt');
713

814
export const indexGitRepository = async (repo: Repo, ctx: AppContext) => {
9-
const revisions = [
15+
let revisions = [
1016
'HEAD'
1117
];
12-
13-
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
18+
1419
const repoPath = getRepoPath(repo, ctx);
20+
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
21+
const metadata = repo.metadata as RepoMetadata;
22+
23+
if (metadata.branches) {
24+
const branchGlobs = metadata.branches
25+
const allBranches = await getBranches(repoPath);
26+
const matchingBranches =
27+
allBranches
28+
.filter((branch) => micromatch.isMatch(branch, branchGlobs))
29+
.map((branch) => `refs/heads/${branch}`);
30+
31+
revisions = [
32+
...revisions,
33+
...matchingBranches
34+
];
35+
}
36+
37+
if (metadata.tags) {
38+
const tagGlobs = metadata.tags;
39+
const allTags = await getTags(repoPath);
40+
const matchingTags =
41+
allTags
42+
.filter((tag) => micromatch.isMatch(tag, tagGlobs))
43+
.map((tag) => `refs/tags/${tag}`);
44+
45+
revisions = [
46+
...revisions,
47+
...matchingTags
48+
];
49+
}
50+
51+
// zoekt has a limit of 64 branches/tags to index.
52+
if (revisions.length > 64) {
53+
logger.warn(`Too many revisions (${revisions.length}) for repo ${repo.id}, truncating to 64`);
54+
captureEvent('backend_revisions_truncated', {
55+
repoId: repo.id,
56+
revisionCount: revisions.length,
57+
});
58+
revisions = revisions.slice(0, 64);
59+
}
60+
1561
const command = `zoekt-git-index -allow_missing_branches -index ${ctx.indexPath} -file_limit ${DEFAULT_SETTINGS.maxFileSize} -branches ${revisions.join(',')} -tenant_id ${repo.orgId} -shard_prefix ${shardPrefix} ${repoPath}`;
1662

1763
return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => {

packages/schemas/src/v3/connection.schema.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -154,11 +154,11 @@ const schema = {
154154
},
155155
"revisions": {
156156
"type": "object",
157-
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed.",
157+
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
158158
"properties": {
159159
"branches": {
160160
"type": "array",
161-
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported.",
161+
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
162162
"items": {
163163
"type": "string"
164164
},
@@ -175,7 +175,7 @@ const schema = {
175175
},
176176
"tags": {
177177
"type": "array",
178-
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported.",
178+
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
179179
"items": {
180180
"type": "string"
181181
},

0 commit comments

Comments
 (0)