Skip to content

Commit bbd8b22

Browse files
Display name improvements (#259)
1 parent d55bf83 commit bbd8b22

File tree

14 files changed

+140
-63
lines changed

14 files changed

+140
-63
lines changed

.vscode/sourcebot.code-workspace

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"folders": [
3+
{
4+
"path": ".."
5+
},
6+
{
7+
"path": "../vendor/zoekt"
8+
}
9+
],
10+
"settings": {
11+
"files.associations": {
12+
"*.json": "jsonc",
13+
"index.json": "json"
14+
}
15+
}
16+
}

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
### Fixes
1111
- Change connection manager upsert timeout to 5 minutes
12+
- Fix issue with repo display names being poorly formatted, especially for gerrit. ([#259](https://github.com/sourcebot-dev/sourcebot/pull/259))
1213

1314
## [3.0.1] - 2025-04-01
1415

packages/backend/src/git.ts

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,15 @@
11
import { simpleGit, SimpleGitProgressEvent } from 'simple-git';
22

3-
export const cloneRepository = async (cloneURL: string, path: string, gitConfig?: Record<string, string>, onProgress?: (event: SimpleGitProgressEvent) => void) => {
3+
export const cloneRepository = async (cloneURL: string, path: string, onProgress?: (event: SimpleGitProgressEvent) => void) => {
44
const git = simpleGit({
55
progress: onProgress,
66
});
7-
8-
const configParams = Object.entries(gitConfig ?? {}).flatMap(
9-
([key, value]) => ['--config', `${key}=${value}`]
10-
);
11-
127
try {
138
await git.clone(
149
cloneURL,
1510
path,
1611
[
1712
"--bare",
18-
...configParams
1913
]
2014
);
2115

@@ -48,6 +42,26 @@ export const fetchRepository = async (path: string, onProgress?: (event: SimpleG
4842
}
4943
}
5044

45+
/**
46+
* Applies the gitConfig to the repo at the given path. Note that this will
47+
* override the values for any existing keys, and append new values for keys
48+
* that do not exist yet. It will _not_ remove any existing keys that are not
49+
* present in gitConfig.
50+
*/
51+
export const upsertGitConfig = async (path: string, gitConfig: Record<string, string>, onProgress?: (event: SimpleGitProgressEvent) => void) => {
52+
const git = simpleGit({
53+
progress: onProgress,
54+
}).cwd(path);
55+
56+
try {
57+
for (const [key, value] of Object.entries(gitConfig)) {
58+
await git.addConfig(key, value);
59+
}
60+
} catch (error) {
61+
throw new Error(`Failed to set git config ${path}`);
62+
}
63+
}
64+
5165
export const getBranches = async (path: string) => {
5266
const git = simpleGit();
5367
const branches = await git.cwd({

packages/backend/src/repoCompileUtils.ts

Lines changed: 50 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import { WithRequired } from "./types.js"
88
import { marshalBool } from "./utils.js";
99
import { GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
1010
import { RepoMetadata } from './types.js';
11+
import path from 'path';
1112

1213
export type RepoData = WithRequired<Prisma.RepoCreateInput, 'connections'>;
1314

@@ -29,10 +30,13 @@ export const compileGithubConfig = async (
2930
const notFound = gitHubReposResult.notFound;
3031

3132
const hostUrl = config.url ?? 'https://github.com';
32-
const hostname = new URL(hostUrl).hostname;
33+
const repoNameRoot = new URL(hostUrl)
34+
.toString()
35+
.replace(/^https?:\/\//, '');
3336

3437
const repos = gitHubRepos.map((repo) => {
35-
const repoName = `${hostname}/${repo.full_name}`;
38+
const repoDisplayName = repo.full_name;
39+
const repoName = path.join(repoNameRoot, repoDisplayName);
3640
const cloneUrl = new URL(repo.clone_url!);
3741

3842
const record: RepoData = {
@@ -42,6 +46,7 @@ export const compileGithubConfig = async (
4246
cloneUrl: cloneUrl.toString(),
4347
webUrl: repo.html_url,
4448
name: repoName,
49+
displayName: repoDisplayName,
4550
imageUrl: repo.owner.avatar_url,
4651
isFork: repo.fork,
4752
isArchived: !!repo.archived,
@@ -67,6 +72,7 @@ export const compileGithubConfig = async (
6772
'zoekt.archived': marshalBool(repo.archived),
6873
'zoekt.fork': marshalBool(repo.fork),
6974
'zoekt.public': marshalBool(repo.private === false),
75+
'zoekt.display-name': repoDisplayName,
7076
},
7177
branches: config.revisions?.branches ?? undefined,
7278
tags: config.revisions?.tags ?? undefined,
@@ -93,13 +99,16 @@ export const compileGitlabConfig = async (
9399
const notFound = gitlabReposResult.notFound;
94100

95101
const hostUrl = config.url ?? 'https://gitlab.com';
96-
const hostname = new URL(hostUrl).hostname;
97-
102+
const repoNameRoot = new URL(hostUrl)
103+
.toString()
104+
.replace(/^https?:\/\//, '');
105+
98106
const repos = gitlabRepos.map((project) => {
99107
const projectUrl = `${hostUrl}/${project.path_with_namespace}`;
100108
const cloneUrl = new URL(project.http_url_to_repo);
101109
const isFork = project.forked_from_project !== undefined;
102-
const repoName = `${hostname}/${project.path_with_namespace}`;
110+
const repoDisplayName = project.path_with_namespace;
111+
const repoName = path.join(repoNameRoot, repoDisplayName);
103112

104113
const record: RepoData = {
105114
external_id: project.id.toString(),
@@ -108,6 +117,7 @@ export const compileGitlabConfig = async (
108117
cloneUrl: cloneUrl.toString(),
109118
webUrl: projectUrl,
110119
name: repoName,
120+
displayName: repoDisplayName,
111121
imageUrl: project.avatar_url,
112122
isFork: isFork,
113123
isArchived: !!project.archived,
@@ -130,7 +140,8 @@ export const compileGitlabConfig = async (
130140
'zoekt.gitlab-forks': (project.forks_count ?? 0).toString(),
131141
'zoekt.archived': marshalBool(project.archived),
132142
'zoekt.fork': marshalBool(isFork),
133-
'zoekt.public': marshalBool(project.private === false)
143+
'zoekt.public': marshalBool(project.private === false),
144+
'zoekt.display-name': repoDisplayName,
134145
},
135146
branches: config.revisions?.branches ?? undefined,
136147
tags: config.revisions?.tags ?? undefined,
@@ -157,11 +168,14 @@ export const compileGiteaConfig = async (
157168
const notFound = giteaReposResult.notFound;
158169

159170
const hostUrl = config.url ?? 'https://gitea.com';
160-
const hostname = new URL(hostUrl).hostname;
171+
const repoNameRoot = new URL(hostUrl)
172+
.toString()
173+
.replace(/^https?:\/\//, '');
161174

162175
const repos = giteaRepos.map((repo) => {
163176
const cloneUrl = new URL(repo.clone_url!);
164-
const repoName = `${hostname}/${repo.full_name!}`;
177+
const repoDisplayName = repo.full_name!;
178+
const repoName = path.join(repoNameRoot, repoDisplayName);
165179

166180
const record: RepoData = {
167181
external_id: repo.id!.toString(),
@@ -170,6 +184,7 @@ export const compileGiteaConfig = async (
170184
cloneUrl: cloneUrl.toString(),
171185
webUrl: repo.html_url,
172186
name: repoName,
187+
displayName: repoDisplayName,
173188
imageUrl: repo.owner?.avatar_url,
174189
isFork: repo.fork!,
175190
isArchived: !!repo.archived,
@@ -191,6 +206,7 @@ export const compileGiteaConfig = async (
191206
'zoekt.archived': marshalBool(repo.archived),
192207
'zoekt.fork': marshalBool(repo.fork!),
193208
'zoekt.public': marshalBool(repo.internal === false && repo.private === false),
209+
'zoekt.display-name': repoDisplayName,
194210
},
195211
branches: config.revisions?.branches ?? undefined,
196212
tags: config.revisions?.tags ?? undefined,
@@ -212,35 +228,41 @@ export const compileGerritConfig = async (
212228
orgId: number) => {
213229

214230
const gerritRepos = await getGerritReposFromConfig(config);
215-
const hostUrl = (config.url ?? 'https://gerritcodereview.com').replace(/\/$/, ''); // Remove trailing slash
216-
const hostname = new URL(hostUrl).hostname;
231+
const hostUrl = config.url;
232+
const repoNameRoot = new URL(hostUrl)
233+
.toString()
234+
.replace(/^https?:\/\//, '');
217235

218236
const repos = gerritRepos.map((project) => {
219-
const repoId = `${hostname}/${project.name}`;
220-
const cloneUrl = new URL(`${config.url}/${encodeURIComponent(project.name)}`);
237+
const cloneUrl = new URL(path.join(hostUrl, encodeURIComponent(project.name)));
238+
const repoDisplayName = project.name;
239+
const repoName = path.join(repoNameRoot, repoDisplayName);
221240

222-
let webUrl = "https://www.gerritcodereview.com/";
223-
// Gerrit projects can have multiple web links; use the first one
224-
if (project.web_links) {
225-
const webLink = project.web_links[0];
226-
if (webLink) {
227-
webUrl = webLink.url;
241+
const webUrl = (() => {
242+
if (!project.web_links || project.web_links.length === 0) {
243+
return null;
228244
}
229-
}
230245

231-
// Handle case where webUrl is just a gitiles path
232-
// https://github.com/GerritCodeReview/plugins_gitiles/blob/5ee7f57/src/main/java/com/googlesource/gerrit/plugins/gitiles/GitilesWeblinks.java#L50
233-
if (webUrl.startsWith('/plugins/gitiles/')) {
234-
webUrl = `${hostUrl}${webUrl}`;
235-
}
246+
const webLink = project.web_links[0];
247+
const webUrl = webLink.url;
248+
249+
// Handle case where webUrl is just a gitiles path
250+
// https://github.com/GerritCodeReview/plugins_gitiles/blob/5ee7f57/src/main/java/com/googlesource/gerrit/plugins/gitiles/GitilesWeblinks.java#L50
251+
if (webUrl.startsWith('/plugins/gitiles/')) {
252+
return path.join(hostUrl, webUrl);
253+
} else {
254+
return webUrl;
255+
}
256+
})();
236257

237258
const record: RepoData = {
238259
external_id: project.id.toString(),
239260
external_codeHostType: 'gerrit',
240261
external_codeHostUrl: hostUrl,
241262
cloneUrl: cloneUrl.toString(),
242263
webUrl: webUrl,
243-
name: project.name,
264+
name: repoName,
265+
displayName: repoDisplayName,
244266
isFork: false,
245267
isArchived: false,
246268
org: {
@@ -256,11 +278,12 @@ export const compileGerritConfig = async (
256278
metadata: {
257279
gitConfig: {
258280
'zoekt.web-url-type': 'gitiles',
259-
'zoekt.web-url': webUrl,
260-
'zoekt.name': repoId,
281+
'zoekt.web-url': webUrl ?? '',
282+
'zoekt.name': repoName,
261283
'zoekt.archived': marshalBool(false),
262284
'zoekt.fork': marshalBool(false),
263285
'zoekt.public': marshalBool(true),
286+
'zoekt.display-name': repoDisplayName,
264287
},
265288
} satisfies RepoMetadata,
266289
};

packages/backend/src/repoManager.ts

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,9 @@ import { Redis } from 'ioredis';
33
import { createLogger } from "./logger.js";
44
import { Connection, PrismaClient, Repo, RepoToConnection, RepoIndexingStatus, StripeSubscriptionStatus } from "@sourcebot/db";
55
import { GithubConnectionConfig, GitlabConnectionConfig, GiteaConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
6-
import { AppContext, Settings, RepoMetadata } from "./types.js";
6+
import { AppContext, Settings, repoMetadataSchema } from "./types.js";
77
import { getRepoPath, getTokenFromConfig, measure, getShardPrefix } from "./utils.js";
8-
import { cloneRepository, fetchRepository } from "./git.js";
8+
import { cloneRepository, fetchRepository, upsertGitConfig } from "./git.js";
99
import { existsSync, readdirSync, promises } from 'fs';
1010
import { indexGitRepository } from "./zoekt.js";
1111
import { PromClient } from './promClient.js';
@@ -200,8 +200,7 @@ export class RepoManager implements IRepoManager {
200200
let cloneDuration_s: number | undefined = undefined;
201201

202202
const repoPath = getRepoPath(repo, this.ctx);
203-
const metadata = repo.metadata as RepoMetadata;
204-
203+
const metadata = repoMetadataSchema.parse(repo.metadata);
205204

206205
// If the repo was already in the indexing state, this job was likely killed and picked up again. As a result,
207206
// to ensure the repo state is valid, we delete the repo if it exists so we get a fresh clone
@@ -240,7 +239,7 @@ export class RepoManager implements IRepoManager {
240239
}
241240
}
242241

243-
const { durationMs } = await measure(() => cloneRepository(cloneUrl.toString(), repoPath, metadata.gitConfig, ({ method, stage, progress }) => {
242+
const { durationMs } = await measure(() => cloneRepository(cloneUrl.toString(), repoPath, ({ method, stage, progress }) => {
244243
this.logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.id}`)
245244
}));
246245
cloneDuration_s = durationMs / 1000;
@@ -249,6 +248,13 @@ export class RepoManager implements IRepoManager {
249248
this.logger.info(`Cloned ${repo.id} in ${cloneDuration_s}s`);
250249
}
251250

251+
// Regardless of clone or fetch, always upsert the git config for the repo.
252+
// This ensures that the git config is always up to date for whatever we
253+
// have in the DB.
254+
if (metadata.gitConfig) {
255+
await upsertGitConfig(repoPath, metadata.gitConfig);
256+
}
257+
252258
this.logger.info(`Indexing ${repo.id}...`);
253259
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, this.ctx));
254260
const indexDuration_s = durationMs / 1000;

packages/backend/src/types.ts

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import { Settings as SettingsSchema } from "@sourcebot/schemas/v3/index.type";
2+
import { z } from "zod";
23

34
export type AppContext = {
45
/**
@@ -16,28 +17,32 @@ export type AppContext = {
1617

1718
export type Settings = Required<SettingsSchema>;
1819

19-
/**
20-
* Structure of the `metadata` field in the `Repo` table.
21-
*/
22-
export type RepoMetadata = {
20+
// Structure of the `metadata` field in the `Repo` table.
21+
//
22+
// @WARNING: If you modify this schema, please make sure it is backwards
23+
// compatible with any prior versions of the schema!!
24+
// @NOTE: If you move this schema, please update the comment in schema.prisma
25+
// to point to the new location.
26+
export const repoMetadataSchema = z.object({
2327
/**
2428
* A set of key-value pairs that will be used as git config
2529
* variables when cloning the repo.
2630
* @see: https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--configcodecodeltkeygtltvaluegtcode
2731
*/
28-
gitConfig?: Record<string, string>;
32+
gitConfig: z.record(z.string(), z.string()).optional(),
2933

3034
/**
3135
* A list of branches to index. Glob patterns are supported.
3236
*/
33-
branches?: string[];
37+
branches: z.array(z.string()).optional(),
3438

3539
/**
3640
* A list of tags to index. Glob patterns are supported.
3741
*/
38-
tags?: string[];
39-
}
42+
tags: z.array(z.string()).optional(),
43+
});
4044

45+
export type RepoMetadata = z.infer<typeof repoMetadataSchema>;
4146

4247
// @see : https://stackoverflow.com/a/61132308
4348
export type DeepPartial<T> = T extends object ? {

packages/backend/src/zoekt.ts

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { exec } from "child_process";
2-
import { AppContext, RepoMetadata, Settings } from "./types.js";
2+
import { AppContext, repoMetadataSchema, Settings } from "./types.js";
33
import { Repo } from "@sourcebot/db";
44
import { getRepoPath } from "./utils.js";
55
import { getShardPrefix } from "./utils.js";
@@ -17,7 +17,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, ctx: Ap
1717

1818
const repoPath = getRepoPath(repo, ctx);
1919
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
20-
const metadata = repo.metadata as RepoMetadata;
20+
const metadata = repoMetadataSchema.parse(repo.metadata);
2121

2222
if (metadata.branches) {
2323
const branchGlobs = metadata.branches
@@ -57,7 +57,17 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, ctx: Ap
5757
revisions = revisions.slice(0, 64);
5858
}
5959

60-
const command = `zoekt-git-index -allow_missing_branches -index ${ctx.indexPath} -max_trigram_count ${settings.maxTrigramCount} -file_limit ${settings.maxFileSize} -branches ${revisions.join(',')} -tenant_id ${repo.orgId} -shard_prefix ${shardPrefix} ${repoPath}`;
60+
const command = [
61+
'zoekt-git-index',
62+
'-allow_missing_branches',
63+
`-index ${ctx.indexPath}`,
64+
`-max_trigram_count ${settings.maxTrigramCount}`,
65+
`-file_limit ${settings.maxFileSize}`,
66+
`-branches ${revisions.join(',')}`,
67+
`-tenant_id ${repo.orgId}`,
68+
`-shard_prefix ${shardPrefix}`,
69+
repoPath
70+
].join(' ');
6171

6272
return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => {
6373
exec(command, (error, stdout, stderr) => {
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
-- AlterTable
2+
ALTER TABLE "Repo" ADD COLUMN "displayName" TEXT;

0 commit comments

Comments
 (0)