Skip to content

Commit 27a1066

Browse files
Declarative connection cleanup + improvements (#245)
1 parent 691c593 commit 27a1066

File tree

15 files changed

+110
-59
lines changed

15 files changed

+110
-59
lines changed

Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,8 +115,9 @@ ENV NEXT_TELEMETRY_DISABLED=1
115115
ENV DATA_DIR=/data
116116
ENV DATA_CACHE_DIR=$DATA_DIR/.sourcebot
117117
ENV DB_DATA_DIR=$DATA_CACHE_DIR/db
118+
ENV REDIS_DATA_DIR=$DATA_CACHE_DIR/redis
118119
ENV DB_NAME=sourcebot
119-
ENV DATABASE_URL="postgresql://postgres@localhost:5432/sourcebot"
120+
ENV DATABASE_URL="postgresql://postgres@localhost:5432/$DB_NAME"
120121
ENV REDIS_URL="redis://localhost:6379"
121122
ENV SRC_TENANT_ENFORCEMENT_MODE=strict
122123

entrypoint.sh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,11 @@ if [ ! -d "$DB_DATA_DIR" ]; then
3636
su postgres -c "initdb -D $DB_DATA_DIR"
3737
fi
3838

39+
# Create the redis data directory if it doesn't exist
40+
if [ ! -d "$REDIS_DATA_DIR" ]; then
41+
mkdir -p $REDIS_DATA_DIR
42+
fi
43+
3944
if [ -z "$SOURCEBOT_ENCRYPTION_KEY" ]; then
4045
echo -e "\e[33m[Warning] SOURCEBOT_ENCRYPTION_KEY is not set.\e[0m"
4146

packages/backend/src/connectionManager.ts

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
import { Connection, ConnectionSyncStatus, PrismaClient, Prisma, RepoIndexingStatus } from "@sourcebot/db";
1+
import { Connection, ConnectionSyncStatus, PrismaClient, Prisma } from "@sourcebot/db";
22
import { Job, Queue, Worker } from 'bullmq';
33
import { Settings } from "./types.js";
44
import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type";
55
import { createLogger } from "./logger.js";
6-
import os from 'os';
76
import { Redis } from 'ioredis';
87
import { RepoData, compileGithubConfig, compileGitlabConfig, compileGiteaConfig, compileGerritConfig } from "./repoCompileUtils.js";
98
import { BackendError, BackendException } from "@sourcebot/error";
@@ -42,10 +41,9 @@ export class ConnectionManager implements IConnectionManager {
4241
this.queue = new Queue<JobPayload>(QUEUE_NAME, {
4342
connection: redis,
4443
});
45-
const numCores = os.cpus().length;
4644
this.worker = new Worker(QUEUE_NAME, this.runSyncJob.bind(this), {
4745
connection: redis,
48-
concurrency: numCores * this.settings.configSyncConcurrencyMultiple,
46+
concurrency: this.settings.maxConnectionSyncJobConcurrency,
4947
});
5048
this.worker.on('completed', this.onSyncJobCompleted.bind(this));
5149
this.worker.on('failed', this.onSyncJobFailed.bind(this));
@@ -262,11 +260,11 @@ export class ConnectionManager implements IConnectionManager {
262260
});
263261
}
264262

265-
private async onSyncJobFailed(job: Job | undefined, err: unknown) {
263+
private async onSyncJobFailed(job: Job<JobPayload> | undefined, err: unknown) {
266264
this.logger.info(`Connection sync job failed with error: ${err}`);
267265
Sentry.captureException(err, {
268266
tags: {
269-
repoId: job?.data.repo.id,
267+
connectionid: job?.data.connectionId,
270268
jobId: job?.id,
271269
queue: QUEUE_NAME,
272270
}

packages/backend/src/constants.ts

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@ import { Settings } from "./types.js";
55
*/
66
export const DEFAULT_SETTINGS: Settings = {
77
maxFileSize: 2 * 1024 * 1024, // 2MB in bytes
8+
maxTrigramCount: 20000,
89
reindexIntervalMs: 1000 * 60 * 60, // 1 hour
9-
resyncConnectionPollingIntervalMs: 1000,
10-
reindexRepoPollingIntervalMs: 1000,
11-
indexConcurrencyMultiple: 3,
12-
configSyncConcurrencyMultiple: 3,
13-
gcConcurrencyMultiple: 1,
14-
gcGracePeriodMs: 10 * 1000, // 10 seconds
10+
resyncConnectionPollingIntervalMs: 1000 * 1, // 1 second
11+
reindexRepoPollingIntervalMs: 1000 * 1, // 1 second
12+
maxConnectionSyncJobConcurrency: 8,
13+
maxRepoIndexingJobConcurrency: 8,
14+
maxRepoGarbageCollectionJobConcurrency: 8,
15+
repoGarbageCollectionGracePeriodMs: 10 * 1000, // 10 seconds
1516
repoIndexTimeoutMs: 1000 * 60 * 60 * 2, // 2 hours
16-
maxTrigramCount: 20000,
1717
}

packages/backend/src/github.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ const getReposOwnedByUsers = async (users: string[], isAuthenticated: boolean, o
257257
};
258258
} catch (error) {
259259
Sentry.captureException(error);
260+
logger.error(`Failed to fetch repositories for user ${user}.`, error);
260261

261262
if (isHttpError(error, 404)) {
262263
logger.error(`User ${user} not found or no access`);
@@ -302,6 +303,7 @@ const getReposForOrgs = async (orgs: string[], octokit: Octokit, signal: AbortSi
302303
};
303304
} catch (error) {
304305
Sentry.captureException(error);
306+
logger.error(`Failed to fetch repositories for org ${org}.`, error);
305307

306308
if (isHttpError(error, 404)) {
307309
logger.error(`Organization ${org} not found or no access`);
@@ -349,6 +351,7 @@ const getRepos = async (repoList: string[], octokit: Octokit, signal: AbortSigna
349351

350352
} catch (error) {
351353
Sentry.captureException(error);
354+
logger.error(`Failed to fetch repository ${repo}.`, error);
352355

353356
if (isHttpError(error, 404)) {
354357
logger.error(`Repository ${repo} not found or no access`);

packages/backend/src/gitlab.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, o
8282
};
8383
} catch (e: any) {
8484
Sentry.captureException(e);
85+
logger.error(`Failed to fetch projects for group ${group}.`, e);
8586

8687
const status = e?.cause?.response?.status;
8788
if (status === 404) {
@@ -118,6 +119,7 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, o
118119
};
119120
} catch (e: any) {
120121
Sentry.captureException(e);
122+
logger.error(`Failed to fetch projects for user ${user}.`, e);
121123

122124
const status = e?.cause?.response?.status;
123125
if (status === 404) {
@@ -152,6 +154,7 @@ export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, o
152154
};
153155
} catch (e: any) {
154156
Sentry.captureException(e);
157+
logger.error(`Failed to fetch project ${project}.`, e);
155158

156159
const status = e?.cause?.response?.status;
157160

packages/backend/src/repoManager.ts

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ import { getRepoPath, getTokenFromConfig, measure, getShardPrefix } from "./util
88
import { cloneRepository, fetchRepository } from "./git.js";
99
import { existsSync, readdirSync, promises } from 'fs';
1010
import { indexGitRepository } from "./zoekt.js";
11-
import os from 'os';
1211
import { PromClient } from './promClient.js';
1312
import * as Sentry from "@sentry/node";
1413

@@ -43,15 +42,13 @@ export class RepoManager implements IRepoManager {
4342
private promClient: PromClient,
4443
private ctx: AppContext,
4544
) {
46-
const numCores = os.cpus().length;
47-
4845
// Repo indexing
4946
this.indexQueue = new Queue<RepoIndexingPayload>(REPO_INDEXING_QUEUE, {
5047
connection: redis,
5148
});
5249
this.indexWorker = new Worker(REPO_INDEXING_QUEUE, this.runIndexJob.bind(this), {
5350
connection: redis,
54-
concurrency: numCores * this.settings.indexConcurrencyMultiple,
51+
concurrency: this.settings.maxRepoIndexingJobConcurrency,
5552
});
5653
this.indexWorker.on('completed', this.onIndexJobCompleted.bind(this));
5754
this.indexWorker.on('failed', this.onIndexJobFailed.bind(this));
@@ -62,7 +59,7 @@ export class RepoManager implements IRepoManager {
6259
});
6360
this.gcWorker = new Worker(REPO_GC_QUEUE, this.runGarbageCollectionJob.bind(this), {
6461
connection: redis,
65-
concurrency: numCores * this.settings.gcConcurrencyMultiple,
62+
concurrency: this.settings.maxRepoGarbageCollectionJobConcurrency,
6663
});
6764
this.gcWorker.on('completed', this.onGarbageCollectionJobCompleted.bind(this));
6865
this.gcWorker.on('failed', this.onGarbageCollectionJobFailed.bind(this));
@@ -396,7 +393,7 @@ export class RepoManager implements IRepoManager {
396393
////////////////////////////////////
397394

398395

399-
const thresholdDate = new Date(Date.now() - this.settings.gcGracePeriodMs);
396+
const thresholdDate = new Date(Date.now() - this.settings.repoGarbageCollectionGracePeriodMs);
400397
const reposWithNoConnections = await this.db.repo.findMany({
401398
where: {
402399
repoIndexingStatus: {
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
-- AlterTable
2+
ALTER TABLE "Connection" ADD COLUMN "isDeclarative" BOOLEAN NOT NULL DEFAULT false;

packages/db/prisma/schema.prisma

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,7 @@ model Connection {
6767
id Int @id @default(autoincrement())
6868
name String
6969
config Json
70+
isDeclarative Boolean @default(false)
7071
createdAt DateTime @default(now())
7172
updatedAt DateTime @updatedAt
7273
syncedAt DateTime?

packages/schemas/src/v3/index.schema.ts

Lines changed: 20 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,37 +10,45 @@ const schema = {
1010
"properties": {
1111
"maxFileSize": {
1212
"type": "number",
13-
"description": "The maximum size of a file (in bytes) to be indexed. Files that exceed this maximum will not be indexed."
13+
"description": "The maximum size of a file (in bytes) to be indexed. Files that exceed this maximum will not be indexed. Defaults to 2MB.",
14+
"minimum": 1
1415
},
1516
"maxTrigramCount": {
1617
"type": "number",
17-
"description": "The maximum number of trigrams per document. Files that exceed this maximum will not be indexed."
18+
"description": "The maximum number of trigrams per document. Files that exceed this maximum will not be indexed. Default to 20000.",
19+
"minimum": 1
1820
},
1921
"reindexIntervalMs": {
2022
"type": "number",
21-
"description": "The interval (in milliseconds) at which the indexer should re-index all repositories."
23+
"description": "The interval (in milliseconds) at which the indexer should re-index all repositories. Defaults to 1 hour.",
24+
"minimum": 1
2225
},
2326
"resyncConnectionPollingIntervalMs": {
2427
"type": "number",
25-
"description": "The polling rate (in milliseconds) at which the db should be checked for connections that need to be re-synced."
28+
"description": "The polling rate (in milliseconds) at which the db should be checked for connections that need to be re-synced. Defaults to 5 seconds.",
29+
"minimum": 1
2630
},
2731
"reindexRepoPollingIntervalMs": {
2832
"type": "number",
29-
"description": "The polling rate (in milliseconds) at which the db should be checked for repos that should be re-indexed."
33+
"description": "The polling rate (in milliseconds) at which the db should be checked for repos that should be re-indexed. Defaults to 5 seconds.",
34+
"minimum": 1
3035
},
31-
"indexConcurrencyMultiple": {
36+
"maxConnectionSyncJobConcurrency": {
3237
"type": "number",
33-
"description": "The multiple of the number of CPUs to use for indexing."
38+
"description": "The number of connection sync jobs to run concurrently. Defaults to 8.",
39+
"minimum": 1
3440
},
35-
"configSyncConcurrencyMultiple": {
41+
"maxRepoIndexingJobConcurrency": {
3642
"type": "number",
37-
"description": "The multiple of the number of CPUs to use for syncing the configuration."
43+
"description": "The number of repo indexing jobs to run concurrently. Defaults to 8.",
44+
"minimum": 1
3845
},
39-
"gcConcurrencyMultiple": {
46+
"maxRepoGarbageCollectionJobConcurrency": {
4047
"type": "number",
41-
"description": "The multiple of the number of CPUs to use for garbage collection."
48+
"description": "The number of repo GC jobs to run concurrently. Defaults to 8.",
49+
"minimum": 1
4250
},
43-
"gcGracePeriodMs": {
51+
"repoGarbageCollectionGracePeriodMs": {
4452
"type": "number",
4553
"description": "The grace period (in milliseconds) for garbage collection. Used to prevent deleting shards while they're being loaded."
4654
},

packages/schemas/src/v3/index.type.ts

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,41 +28,41 @@ export interface SourcebotConfig {
2828
*/
2929
export interface Settings {
3030
/**
31-
* The maximum size of a file (in bytes) to be indexed. Files that exceed this maximum will not be indexed.
31+
* The maximum size of a file (in bytes) to be indexed. Files that exceed this maximum will not be indexed. Defaults to 2MB.
3232
*/
3333
maxFileSize?: number;
3434
/**
35-
* The maximum number of trigrams per document. Files that exceed this maximum will not be indexed.
35+
* The maximum number of trigrams per document. Files that exceed this maximum will not be indexed. Default to 20000.
3636
*/
3737
maxTrigramCount?: number;
3838
/**
39-
* The interval (in milliseconds) at which the indexer should re-index all repositories.
39+
* The interval (in milliseconds) at which the indexer should re-index all repositories. Defaults to 1 hour.
4040
*/
4141
reindexIntervalMs?: number;
4242
/**
43-
* The polling rate (in milliseconds) at which the db should be checked for connections that need to be re-synced.
43+
* The polling rate (in milliseconds) at which the db should be checked for connections that need to be re-synced. Defaults to 5 seconds.
4444
*/
4545
resyncConnectionPollingIntervalMs?: number;
4646
/**
47-
* The polling rate (in milliseconds) at which the db should be checked for repos that should be re-indexed.
47+
* The polling rate (in milliseconds) at which the db should be checked for repos that should be re-indexed. Defaults to 5 seconds.
4848
*/
4949
reindexRepoPollingIntervalMs?: number;
5050
/**
51-
* The multiple of the number of CPUs to use for indexing.
51+
* The number of connection sync jobs to run concurrently. Defaults to 8.
5252
*/
53-
indexConcurrencyMultiple?: number;
53+
maxConnectionSyncJobConcurrency?: number;
5454
/**
55-
* The multiple of the number of CPUs to use for syncing the configuration.
55+
* The number of repo indexing jobs to run concurrently. Defaults to 8.
5656
*/
57-
configSyncConcurrencyMultiple?: number;
57+
maxRepoIndexingJobConcurrency?: number;
5858
/**
59-
* The multiple of the number of CPUs to use for garbage collection.
59+
* The number of repo GC jobs to run concurrently. Defaults to 8.
6060
*/
61-
gcConcurrencyMultiple?: number;
61+
maxRepoGarbageCollectionJobConcurrency?: number;
6262
/**
6363
* The grace period (in milliseconds) for garbage collection. Used to prevent deleting shards while they're being loaded.
6464
*/
65-
gcGracePeriodMs?: number;
65+
repoGarbageCollectionGracePeriodMs?: number;
6666
/**
6767
* The timeout (in milliseconds) for a repo indexing to timeout.
6868
*/

packages/web/src/app/error.tsx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import { SourcebotLogo } from './components/sourcebotLogo';
1313
export default function Error({ error, reset }: { error: Error & { digest?: string }, reset: () => void }) {
1414
useEffect(() => {
1515
Sentry.captureException(error);
16+
console.error(error);
1617
}, [error]);
1718

1819
const { message, errorCode, statusCode } = useMemo(() => {

packages/web/src/initialize.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,11 +130,13 @@ const initSingleTenancy = async () => {
130130
update: {
131131
config: newConnectionConfig as unknown as Prisma.InputJsonValue,
132132
syncStatus: syncNeededOnUpdate ? ConnectionSyncStatus.SYNC_NEEDED : undefined,
133+
isDeclarative: true,
133134
},
134135
create: {
135136
name: key,
136137
connectionType: newConnectionConfig.type,
137138
config: newConnectionConfig as unknown as Prisma.InputJsonValue,
139+
isDeclarative: true,
138140
org: {
139141
connect: {
140142
id: SINGLE_TENANT_ORG_ID,
@@ -160,6 +162,25 @@ const initSingleTenancy = async () => {
160162
})
161163
}
162164
}
165+
166+
const deletedConnections = await prisma.connection.findMany({
167+
where: {
168+
isDeclarative: true,
169+
name: {
170+
notIn: Object.keys(config.connections),
171+
},
172+
orgId: SINGLE_TENANT_ORG_ID,
173+
}
174+
});
175+
176+
for (const connection of deletedConnections) {
177+
console.log(`Deleting connection with name '${connection.name}'. Connection ID: ${connection.id}`);
178+
await prisma.connection.delete({
179+
where: {
180+
id: connection.id,
181+
}
182+
})
183+
}
163184
}
164185
}
165186
}

0 commit comments

Comments
 (0)