Skip to content

Commit 5fe554e

Browse files
renames + add abortSignal
1 parent d315292 commit 5fe554e

File tree

6 files changed

+118
-606
lines changed

6 files changed

+118
-606
lines changed

packages/backend/src/git.ts

Lines changed: 50 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ type onProgressFn = (event: SimpleGitProgressEvent) => void;
1010
* Creates a simple-git client that has it's working directory
1111
* set to the given path.
1212
*/
13-
const createGitClientForPath = (path: string, onProgress?: onProgressFn) => {
13+
const createGitClientForPath = (path: string, onProgress?: onProgressFn, signal?: AbortSignal) => {
1414
if (!existsSync(path)) {
1515
throw new Error(`Path ${path} does not exist`);
1616
}
@@ -19,6 +19,7 @@ const createGitClientForPath = (path: string, onProgress?: onProgressFn) => {
1919

2020
const git = simpleGit({
2121
progress: onProgress,
22+
abort: signal,
2223
})
2324
.env({
2425
...process.env,
@@ -48,17 +49,19 @@ export const cloneRepository = async (
4849
authHeader,
4950
path,
5051
onProgress,
52+
signal,
5153
}: {
5254
cloneUrl: string,
5355
authHeader?: string,
5456
path: string,
5557
onProgress?: onProgressFn
58+
signal?: AbortSignal
5659
}
5760
) => {
5861
try {
5962
await mkdir(path, { recursive: true });
6063

61-
const git = createGitClientForPath(path, onProgress);
64+
const git = createGitClientForPath(path, onProgress, signal);
6265

6366
const cloneArgs = [
6467
"--bare",
@@ -67,7 +70,11 @@ export const cloneRepository = async (
6770

6871
await git.clone(cloneUrl, path, cloneArgs);
6972

70-
await unsetGitConfig(path, ["remote.origin.url"]);
73+
await unsetGitConfig({
74+
path,
75+
keys: ["remote.origin.url"],
76+
signal,
77+
});
7178
} catch (error: unknown) {
7279
const baseLog = `Failed to clone repository: ${path}`;
7380

@@ -88,15 +95,17 @@ export const fetchRepository = async (
8895
authHeader,
8996
path,
9097
onProgress,
98+
signal,
9199
}: {
92100
cloneUrl: string,
93101
authHeader?: string,
94102
path: string,
95-
onProgress?: onProgressFn
103+
onProgress?: onProgressFn,
104+
signal?: AbortSignal
96105
}
97106
) => {
98107
try {
99-
const git = createGitClientForPath(path, onProgress);
108+
const git = createGitClientForPath(path, onProgress, signal);
100109

101110
if (authHeader) {
102111
await git.addConfig("http.extraHeader", authHeader);
@@ -137,8 +146,19 @@ export const fetchRepository = async (
137146
* that do not exist yet. It will _not_ remove any existing keys that are not
138147
* present in gitConfig.
139148
*/
140-
export const upsertGitConfig = async (path: string, gitConfig: Record<string, string>, onProgress?: onProgressFn) => {
141-
const git = createGitClientForPath(path, onProgress);
149+
export const upsertGitConfig = async (
150+
{
151+
path,
152+
gitConfig,
153+
onProgress,
154+
signal,
155+
}: {
156+
path: string,
157+
gitConfig: Record<string, string>,
158+
onProgress?: onProgressFn,
159+
signal?: AbortSignal
160+
}) => {
161+
const git = createGitClientForPath(path, onProgress, signal);
142162

143163
try {
144164
for (const [key, value] of Object.entries(gitConfig)) {
@@ -157,8 +177,19 @@ export const upsertGitConfig = async (path: string, gitConfig: Record<string, st
157177
* Unsets the specified keys in the git config for the repo at the given path.
158178
* If a key is not set, this is a no-op.
159179
*/
160-
export const unsetGitConfig = async (path: string, keys: string[], onProgress?: onProgressFn) => {
161-
const git = createGitClientForPath(path, onProgress);
180+
export const unsetGitConfig = async (
181+
{
182+
path,
183+
keys,
184+
onProgress,
185+
signal,
186+
}: {
187+
path: string,
188+
keys: string[],
189+
onProgress?: onProgressFn,
190+
signal?: AbortSignal
191+
}) => {
192+
const git = createGitClientForPath(path, onProgress, signal);
162193

163194
try {
164195
const configList = await git.listConfig();
@@ -181,8 +212,16 @@ export const unsetGitConfig = async (path: string, keys: string[], onProgress?:
181212
/**
182213
* Returns true if `path` is the _root_ of a git repository.
183214
*/
184-
export const isPathAValidGitRepoRoot = async (path: string, onProgress?: onProgressFn) => {
185-
const git = createGitClientForPath(path, onProgress);
215+
export const isPathAValidGitRepoRoot = async ({
216+
path,
217+
onProgress,
218+
signal,
219+
}: {
220+
path: string,
221+
onProgress?: onProgressFn,
222+
signal?: AbortSignal
223+
}) => {
224+
const git = createGitClientForPath(path, onProgress, signal);
186225

187226
try {
188227
return git.checkIsRepo(CheckRepoActions.IS_REPO_ROOT);

packages/backend/src/index.ts

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,8 @@ import { DEFAULT_SETTINGS, INDEX_CACHE_DIR, REPOS_CACHE_DIR } from './constants.
1111
import { RepoPermissionSyncer } from './ee/repoPermissionSyncer.js';
1212
import { UserPermissionSyncer } from "./ee/userPermissionSyncer.js";
1313
import { env } from "./env.js";
14-
import { IndexSyncer } from "./indexSyncer.js";
14+
import { RepoIndexManager } from "./repoIndexManager.js";
1515
import { PromClient } from './promClient.js';
16-
import { RepoManager } from './repoManager.js';
1716

1817

1918
const logger = createLogger('backend-entrypoint');
@@ -60,16 +59,12 @@ const promClient = new PromClient();
6059
const settings = await getSettings(env.CONFIG_PATH);
6160

6261
const connectionManager = new ConnectionManager(prisma, settings, redis);
63-
const repoManager = new RepoManager(prisma, settings, redis, promClient);
6462
const repoPermissionSyncer = new RepoPermissionSyncer(prisma, settings, redis);
6563
const userPermissionSyncer = new UserPermissionSyncer(prisma, settings, redis);
66-
const indexSyncer = new IndexSyncer(prisma, settings, redis);
67-
68-
// await repoManager.validateIndexedReposHaveShards();
64+
const repoIndexManager = new RepoIndexManager(prisma, settings, redis);
6965

7066
connectionManager.startScheduler();
71-
// repoManager.startScheduler();
72-
indexSyncer.startScheduler();
67+
repoIndexManager.startScheduler();
7368

7469
if (env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && !hasEntitlement('permission-syncing')) {
7570
logger.error('Permission syncing is not supported in current plan. Please contact team@sourcebot.dev for assistance.');
@@ -88,8 +83,7 @@ const cleanup = async (signal: string) => {
8883
try {
8984
await Promise.race([
9085
Promise.all([
91-
indexSyncer.dispose(),
92-
repoManager.dispose(),
86+
repoIndexManager.dispose(),
9387
connectionManager.dispose(),
9488
repoPermissionSyncer.dispose(),
9589
userPermissionSyncer.dispose(),

packages/backend/src/repoCompileUtils.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -497,7 +497,9 @@ export const compileGenericGitHostConfig_file = async (
497497
};
498498

499499
await Promise.all(repoPaths.map(async (repoPath) => {
500-
const isGitRepo = await isPathAValidGitRepoRoot(repoPath);
500+
const isGitRepo = await isPathAValidGitRepoRoot({
501+
path: repoPath,
502+
});
501503
if (!isGitRepo) {
502504
logger.warn(`Skipping ${repoPath} - not a git repository.`);
503505
notFound.repos.push(repoPath);
Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import { repoMetadataSchema, RepoWithConnections, Settings } from "./types.js";
1212
import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, groupmqLifecycleExceptionWrapper, measure } from './utils.js';
1313
import { indexGitRepository } from './zoekt.js';
1414

15-
const LOG_TAG = 'index-syncer';
15+
const LOG_TAG = 'repo-index-manager';
1616
const logger = createLogger(LOG_TAG);
1717
const createJobLogger = (jobId: string) => createLogger(`${LOG_TAG}:job:${jobId}`);
1818

@@ -25,7 +25,18 @@ type JobPayload = {
2525

2626
const JOB_TIMEOUT_MS = 1000 * 60 * 60 * 6; // 6 hour indexing timeout
2727

28-
export class IndexSyncer {
28+
/**
29+
* Manages the lifecycle of repository data on disk, including git working copies
30+
* and search index shards. Handles both indexing operations (cloning/fetching repos
31+
* and building search indexes) and cleanup operations (removing orphaned repos and
32+
* their associated data).
33+
*
34+
* Uses a job queue system to process indexing and cleanup tasks asynchronously,
35+
* with configurable concurrency limits and retry logic. Automatically schedules
36+
* re-indexing of repos based on configured intervals and manages garbage collection
37+
* of repos that are no longer connected to any source.
38+
*/
39+
export class RepoIndexManager {
2940
private interval?: NodeJS.Timeout;
3041
private queue: Queue<JobPayload>;
3142
private worker: Worker<JobPayload>;
@@ -37,7 +48,7 @@ export class IndexSyncer {
3748
) {
3849
this.queue = new Queue<JobPayload>({
3950
redis,
40-
namespace: 'index-sync-queue',
51+
namespace: 'repo-index-queue',
4152
jobTimeoutMs: JOB_TIMEOUT_MS,
4253
maxAttempts: 3,
4354
logger: env.DEBUG_ENABLE_GROUPMQ_LOGGING === 'true',
@@ -210,6 +221,7 @@ export class IndexSyncer {
210221
const logger = createJobLogger(id);
211222
logger.info(`Running ${job.data.type} job ${id} for repo ${job.data.repoName} (id: ${job.data.repoId}) (attempt ${job.attempts + 1} / ${job.maxAttempts})`);
212223

224+
213225
const { repo, type: jobType } = await this.db.repoJob.update({
214226
where: {
215227
id,
@@ -231,14 +243,28 @@ export class IndexSyncer {
231243
}
232244
});
233245

234-
if (jobType === RepoJobType.INDEX) {
235-
await this.indexRepository(repo, logger);
236-
} else if (jobType === RepoJobType.CLEANUP) {
237-
await this.cleanupRepository(repo, logger);
246+
const abortController = new AbortController();
247+
const signalHandler = () => {
248+
logger.info(`Received shutdown signal, aborting...`);
249+
abortController.abort(); // This cancels all operations
250+
};
251+
252+
process.on('SIGTERM', signalHandler);
253+
process.on('SIGINT', signalHandler);
254+
255+
try {
256+
if (jobType === RepoJobType.INDEX) {
257+
await this.indexRepository(repo, logger, abortController.signal);
258+
} else if (jobType === RepoJobType.CLEANUP) {
259+
await this.cleanupRepository(repo, logger);
260+
}
261+
} finally {
262+
process.off('SIGTERM', signalHandler);
263+
process.off('SIGINT', signalHandler);
238264
}
239265
}
240266

241-
private async indexRepository(repo: RepoWithConnections, logger: Logger) {
267+
private async indexRepository(repo: RepoWithConnections, logger: Logger, signal: AbortSignal) {
242268
const { path: repoPath, isReadOnly } = getRepoPath(repo);
243269

244270
const metadata = repoMetadataSchema.parse(repo.metadata);
@@ -250,9 +276,16 @@ export class IndexSyncer {
250276
// If the repo path exists but it is not a valid git repository root, this indicates
251277
// that the repository is in a bad state. To fix, we remove the directory and perform
252278
// a fresh clone.
253-
if (existsSync(repoPath) && !(await isPathAValidGitRepoRoot(repoPath)) && !isReadOnly) {
254-
logger.warn(`${repoPath} is not a valid git repository root. Deleting directory and performing fresh clone.`);
255-
await rm(repoPath, { recursive: true, force: true });
279+
if (existsSync(repoPath) && !(await isPathAValidGitRepoRoot( { path: repoPath } ))) {
280+
const isValidGitRepo = await isPathAValidGitRepoRoot({
281+
path: repoPath,
282+
signal,
283+
});
284+
285+
if (!isValidGitRepo && !isReadOnly) {
286+
logger.warn(`${repoPath} is not a valid git repository root. Deleting directory and performing fresh clone.`);
287+
await rm(repoPath, { recursive: true, force: true });
288+
}
256289
}
257290

258291
if (existsSync(repoPath) && !isReadOnly) {
@@ -262,7 +295,11 @@ export class IndexSyncer {
262295
// to unset this key since it is no longer needed, hence this line.
263296
// This will no-op if the key is already unset.
264297
// @see: https://github.com/sourcebot-dev/sourcebot/pull/483
265-
await unsetGitConfig(repoPath, ["remote.origin.url"]);
298+
await unsetGitConfig({
299+
path: repoPath,
300+
keys: ["remote.origin.url"],
301+
signal,
302+
});
266303

267304
logger.info(`Fetching ${repo.name} (id: ${repo.id})...`);
268305
const { durationMs } = await measure(() => fetchRepository({
@@ -271,7 +308,8 @@ export class IndexSyncer {
271308
path: repoPath,
272309
onProgress: ({ method, stage, progress }) => {
273310
logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.name} (id: ${repo.id})`)
274-
}
311+
},
312+
signal,
275313
}));
276314
const fetchDuration_s = durationMs / 1000;
277315

@@ -287,7 +325,8 @@ export class IndexSyncer {
287325
path: repoPath,
288326
onProgress: ({ method, stage, progress }) => {
289327
logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.name} (id: ${repo.id})`)
290-
}
328+
},
329+
signal
291330
}));
292331
const cloneDuration_s = durationMs / 1000;
293332

@@ -299,11 +338,15 @@ export class IndexSyncer {
299338
// This ensures that the git config is always up to date for whatever we
300339
// have in the DB.
301340
if (metadata.gitConfig && !isReadOnly) {
302-
await upsertGitConfig(repoPath, metadata.gitConfig);
341+
await upsertGitConfig({
342+
path: repoPath,
343+
gitConfig: metadata.gitConfig,
344+
signal,
345+
});
303346
}
304347

305348
logger.info(`Indexing ${repo.name} (id: ${repo.id})...`);
306-
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings));
349+
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, signal));
307350
const indexDuration_s = durationMs / 1000;
308351
logger.info(`Indexed ${repo.name} (id: ${repo.id}) in ${indexDuration_s}s`);
309352
}

0 commit comments

Comments
 (0)