@@ -12,7 +12,7 @@ import { repoMetadataSchema, RepoWithConnections, Settings } from "./types.js";
1212import { getAuthCredentialsForRepo , getRepoPath , getShardPrefix , groupmqLifecycleExceptionWrapper , measure } from './utils.js' ;
1313import { indexGitRepository } from './zoekt.js' ;
1414
15- const LOG_TAG = 'index-syncer ' ;
15+ const LOG_TAG = 'repo- index-manager ' ;
1616const logger = createLogger ( LOG_TAG ) ;
1717const createJobLogger = ( jobId : string ) => createLogger ( `${ LOG_TAG } :job:${ jobId } ` ) ;
1818
@@ -25,7 +25,18 @@ type JobPayload = {
2525
2626const JOB_TIMEOUT_MS = 1000 * 60 * 60 * 6 ; // 6 hour indexing timeout
2727
28- export class IndexSyncer {
28+ /**
29+ * Manages the lifecycle of repository data on disk, including git working copies
30+ * and search index shards. Handles both indexing operations (cloning/fetching repos
31+ * and building search indexes) and cleanup operations (removing orphaned repos and
32+ * their associated data).
33+ *
34+ * Uses a job queue system to process indexing and cleanup tasks asynchronously,
35+ * with configurable concurrency limits and retry logic. Automatically schedules
36+ * re-indexing of repos based on configured intervals and manages garbage collection
37+ * of repos that are no longer connected to any source.
38+ */
39+ export class RepoIndexManager {
2940 private interval ?: NodeJS . Timeout ;
3041 private queue : Queue < JobPayload > ;
3142 private worker : Worker < JobPayload > ;
@@ -37,7 +48,7 @@ export class IndexSyncer {
3748 ) {
3849 this . queue = new Queue < JobPayload > ( {
3950 redis,
40- namespace : 'index-sync -queue' ,
51+ namespace : 'repo-index -queue' ,
4152 jobTimeoutMs : JOB_TIMEOUT_MS ,
4253 maxAttempts : 3 ,
4354 logger : env . DEBUG_ENABLE_GROUPMQ_LOGGING === 'true' ,
@@ -210,6 +221,7 @@ export class IndexSyncer {
210221 const logger = createJobLogger ( id ) ;
211222 logger . info ( `Running ${ job . data . type } job ${ id } for repo ${ job . data . repoName } (id: ${ job . data . repoId } ) (attempt ${ job . attempts + 1 } / ${ job . maxAttempts } )` ) ;
212223
224+
213225 const { repo, type : jobType } = await this . db . repoJob . update ( {
214226 where : {
215227 id,
@@ -231,14 +243,28 @@ export class IndexSyncer {
231243 }
232244 } ) ;
233245
234- if ( jobType === RepoJobType . INDEX ) {
235- await this . indexRepository ( repo , logger ) ;
236- } else if ( jobType === RepoJobType . CLEANUP ) {
237- await this . cleanupRepository ( repo , logger ) ;
246+ const abortController = new AbortController ( ) ;
247+ const signalHandler = ( ) => {
248+ logger . info ( `Received shutdown signal, aborting...` ) ;
249+ abortController . abort ( ) ; // This cancels all operations
250+ } ;
251+
252+ process . on ( 'SIGTERM' , signalHandler ) ;
253+ process . on ( 'SIGINT' , signalHandler ) ;
254+
255+ try {
256+ if ( jobType === RepoJobType . INDEX ) {
257+ await this . indexRepository ( repo , logger , abortController . signal ) ;
258+ } else if ( jobType === RepoJobType . CLEANUP ) {
259+ await this . cleanupRepository ( repo , logger ) ;
260+ }
261+ } finally {
262+ process . off ( 'SIGTERM' , signalHandler ) ;
263+ process . off ( 'SIGINT' , signalHandler ) ;
238264 }
239265 }
240266
241- private async indexRepository ( repo : RepoWithConnections , logger : Logger ) {
267+ private async indexRepository ( repo : RepoWithConnections , logger : Logger , signal : AbortSignal ) {
242268 const { path : repoPath , isReadOnly } = getRepoPath ( repo ) ;
243269
244270 const metadata = repoMetadataSchema . parse ( repo . metadata ) ;
@@ -250,9 +276,16 @@ export class IndexSyncer {
250276 // If the repo path exists but it is not a valid git repository root, this indicates
251277 // that the repository is in a bad state. To fix, we remove the directory and perform
252278 // a fresh clone.
253- if ( existsSync ( repoPath ) && ! ( await isPathAValidGitRepoRoot ( repoPath ) ) && ! isReadOnly ) {
254- logger . warn ( `${ repoPath } is not a valid git repository root. Deleting directory and performing fresh clone.` ) ;
255- await rm ( repoPath , { recursive : true , force : true } ) ;
279+ if ( existsSync ( repoPath ) && ! ( await isPathAValidGitRepoRoot ( { path : repoPath } ) ) ) {
280+ const isValidGitRepo = await isPathAValidGitRepoRoot ( {
281+ path : repoPath ,
282+ signal,
283+ } ) ;
284+
285+ if ( ! isValidGitRepo && ! isReadOnly ) {
286+ logger . warn ( `${ repoPath } is not a valid git repository root. Deleting directory and performing fresh clone.` ) ;
287+ await rm ( repoPath , { recursive : true , force : true } ) ;
288+ }
256289 }
257290
258291 if ( existsSync ( repoPath ) && ! isReadOnly ) {
@@ -262,7 +295,11 @@ export class IndexSyncer {
262295 // to unset this key since it is no longer needed, hence this line.
263296 // This will no-op if the key is already unset.
264297 // @see : https://github.com/sourcebot-dev/sourcebot/pull/483
265- await unsetGitConfig ( repoPath , [ "remote.origin.url" ] ) ;
298+ await unsetGitConfig ( {
299+ path : repoPath ,
300+ keys : [ "remote.origin.url" ] ,
301+ signal,
302+ } ) ;
266303
267304 logger . info ( `Fetching ${ repo . name } (id: ${ repo . id } )...` ) ;
268305 const { durationMs } = await measure ( ( ) => fetchRepository ( {
@@ -271,7 +308,8 @@ export class IndexSyncer {
271308 path : repoPath ,
272309 onProgress : ( { method, stage, progress } ) => {
273310 logger . debug ( `git.${ method } ${ stage } stage ${ progress } % complete for ${ repo . name } (id: ${ repo . id } )` )
274- }
311+ } ,
312+ signal,
275313 } ) ) ;
276314 const fetchDuration_s = durationMs / 1000 ;
277315
@@ -287,7 +325,8 @@ export class IndexSyncer {
287325 path : repoPath ,
288326 onProgress : ( { method, stage, progress } ) => {
289327 logger . debug ( `git.${ method } ${ stage } stage ${ progress } % complete for ${ repo . name } (id: ${ repo . id } )` )
290- }
328+ } ,
329+ signal
291330 } ) ) ;
292331 const cloneDuration_s = durationMs / 1000 ;
293332
@@ -299,11 +338,15 @@ export class IndexSyncer {
299338 // This ensures that the git config is always up to date for whatever we
300339 // have in the DB.
301340 if ( metadata . gitConfig && ! isReadOnly ) {
302- await upsertGitConfig ( repoPath , metadata . gitConfig ) ;
341+ await upsertGitConfig ( {
342+ path : repoPath ,
343+ gitConfig : metadata . gitConfig ,
344+ signal,
345+ } ) ;
303346 }
304347
305348 logger . info ( `Indexing ${ repo . name } (id: ${ repo . id } )...` ) ;
306- const { durationMs } = await measure ( ( ) => indexGitRepository ( repo , this . settings ) ) ;
349+ const { durationMs } = await measure ( ( ) => indexGitRepository ( repo , this . settings , signal ) ) ;
307350 const indexDuration_s = durationMs / 1000 ;
308351 logger . info ( `Indexed ${ repo . name } (id: ${ repo . id } ) in ${ indexDuration_s } s` ) ;
309352 }
0 commit comments