Skip to content

Declarative connection configuration #235

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Mar 21, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions .env.development
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ SRC_TENANT_ENFORCEMENT_MODE=strict
# You can generate a new secret with:
# openssl rand -base64 33
# @see: https://authjs.dev/getting-started/deployment#auth_secret
AUTH_SECRET="secret"
AUTH_SECRET="00000000000000000000000000000000000000000000"
AUTH_URL="http://localhost:3000"
# AUTH_CREDENTIALS_LOGIN_ENABLED=true
# AUTH_GITHUB_CLIENT_ID=""
Expand Down Expand Up @@ -59,7 +59,7 @@ REDIS_URL="redis://localhost:6379"

# Generated using:
# openssl rand -base64 24
SOURCEBOT_ENCRYPTION_KEY="secret"
SOURCEBOT_ENCRYPTION_KEY="00000000000000000000000000000000"

SOURCEBOT_LOG_LEVEL="debug" # valid values: info, debug, warn, error
SOURCEBOT_TELEMETRY_DISABLED=true # Disables telemetry collection
Expand All @@ -79,6 +79,5 @@ SOURCEBOT_TELEMETRY_DISABLED=true # Disables telemetry collection
# NEXT_PUBLIC_SOURCEBOT_VERSION=

# CONFIG_MAX_REPOS_NO_TOKEN=
# SOURCEBOT_ROOT_DOMAIN=
# NODE_ENV=
# SOURCEBOT_TENANCY_MODE=mutli
# SOURCEBOT_TENANCY_MODE=single
4 changes: 2 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,13 @@
"build": "yarn workspaces run build",
"test": "yarn workspaces run test",

"dev": "yarn dev:prisma:migrate && npm-run-all --print-label --parallel dev:zoekt dev:backend dev:web",
"dev": "yarn dev:prisma:migrate:dev && npm-run-all --print-label --parallel dev:zoekt dev:backend dev:web",
"with-env": "cross-env PATH=\"$PWD/bin:$PATH\" dotenv -e .env.development -c --",
"dev:zoekt": "yarn with-env zoekt-webserver -index .sourcebot/index -rpc",
"dev:backend": "yarn with-env yarn workspace @sourcebot/backend dev:watch",
"dev:web": "yarn with-env yarn workspace @sourcebot/web dev",

"dev:prisma:migrate": "yarn with-env yarn workspace @sourcebot/db prisma:migrate:dev",
"dev:prisma:migrate:dev": "yarn with-env yarn workspace @sourcebot/db prisma:migrate:dev",
"dev:prisma:studio": "yarn with-env yarn workspace @sourcebot/db prisma:studio",
"dev:prisma:migrate:reset": "yarn with-env yarn workspace @sourcebot/db prisma:migrate:reset"
},
Expand Down
1 change: 1 addition & 0 deletions packages/backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"@sourcebot/schemas": "^0.1.0",
"@t3-oss/env-core": "^0.12.0",
"@types/express": "^5.0.0",
"ajv": "^8.17.1",
"argparse": "^2.0.1",
"bullmq": "^5.34.10",
"cross-fetch": "^4.0.0",
Expand Down
4 changes: 2 additions & 2 deletions packages/backend/src/env.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ export const env = createEnv({
LOGTAIL_TOKEN: z.string().optional(),
LOGTAIL_HOST: z.string().url().optional(),

INDEX_CONCURRENCY_MULTIPLE: numberSchema.optional(),
DATABASE_URL: z.string().url().default("postgresql://postgres:postgres@localhost:5432/postgres")
DATABASE_URL: z.string().url().default("postgresql://postgres:postgres@localhost:5432/postgres"),
CONFIG_PATH: z.string().optional(),
},
runtimeEnv: process.env,
emptyStringAsUndefined: true,
Expand Down
3 changes: 1 addition & 2 deletions packages/backend/src/gitea.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ import { env } from './env.js';
const logger = createLogger('Gitea');

export const getGiteaReposFromConfig = async (config: GiteaConnectionConfig, orgId: number, db: PrismaClient) => {
const tokenResult = config.token ? await getTokenFromConfig(config.token, orgId, db) : undefined;
const token = tokenResult?.token ?? env.FALLBACK_GITEA_TOKEN;
const token = config.token ? await getTokenFromConfig(config.token, orgId, db, logger) : env.FALLBACK_GITEA_TOKEN;

const api = giteaApi(config.url ?? 'https://gitea.com', {
token: token,
Expand Down
10 changes: 5 additions & 5 deletions packages/backend/src/github.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,10 @@ const isHttpError = (error: unknown, status: number): boolean => {
}

export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, orgId: number, db: PrismaClient, signal: AbortSignal) => {
const tokenResult = config.token ? await getTokenFromConfig(config.token, orgId, db) : undefined;
const token = tokenResult?.token;
const secretKey = tokenResult?.secretKey;
const token = config.token ? await getTokenFromConfig(config.token, orgId, db, logger) : env.FALLBACK_GITHUB_TOKEN;

const octokit = new Octokit({
auth: token ?? env.FALLBACK_GITHUB_TOKEN,
auth: token,
...(config.url ? {
baseUrl: `${config.url}/api/v3`
} : {}),
Expand All @@ -59,7 +57,9 @@ export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, o

if (isHttpError(error, 401)) {
const e = new BackendException(BackendError.CONNECTION_SYNC_INVALID_TOKEN, {
secretKey,
...(config.token && 'secret' in config.token ? {
secretKey: config.token.secret,
} : {}),
});
Sentry.captureException(e);
throw e;
Expand Down
3 changes: 1 addition & 2 deletions packages/backend/src/gitlab.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@ const logger = createLogger("GitLab");
export const GITLAB_CLOUD_HOSTNAME = "gitlab.com";

export const getGitLabReposFromConfig = async (config: GitlabConnectionConfig, orgId: number, db: PrismaClient) => {
const tokenResult = config.token ? await getTokenFromConfig(config.token, orgId, db) : undefined;
const token = tokenResult?.token ?? env.FALLBACK_GITLAB_TOKEN;
const token = config.token ? await getTokenFromConfig(config.token, orgId, db, logger) : env.FALLBACK_GITLAB_TOKEN;

const api = new Gitlab({
...(token ? {
Expand Down
2 changes: 0 additions & 2 deletions packages/backend/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ const parser = new ArgumentParser({
});

type Arguments = {
configPath: string;
cacheDir: string;
}

Expand All @@ -67,7 +66,6 @@ const context: AppContext = {
indexPath,
reposPath,
cachePath: cacheDir,
configPath: args.configPath,
}

const prisma = new PrismaClient();
Expand Down
43 changes: 39 additions & 4 deletions packages/backend/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,46 @@ import { ConnectionManager } from './connectionManager.js';
import { RepoManager } from './repoManager.js';
import { env } from './env.js';
import { PromClient } from './promClient.js';
import { isRemotePath } from './utils.js';
import { readFile } from 'fs/promises';
import stripJsonComments from 'strip-json-comments';
import { SourcebotConfig } from '@sourcebot/schemas/v3/index.type';
import { indexSchema } from '@sourcebot/schemas/v3/index.schema';
import { Ajv } from "ajv";

const logger = createLogger('main');
const ajv = new Ajv({
validateFormats: false,
});

const getSettings = async (configPath?: string) => {
if (!configPath) {
return DEFAULT_SETTINGS;
}

const configContent = await (async () => {
if (isRemotePath(configPath)) {
const response = await fetch(configPath);
if (!response.ok) {
throw new Error(`Failed to fetch config file ${configPath}: ${response.statusText}`);
}
return response.text();
} else {
return readFile(configPath, { encoding: 'utf-8' });
}
})();

const config = JSON.parse(stripJsonComments(configContent)) as SourcebotConfig;
const isValidConfig = ajv.validate(indexSchema, config);
if (!isValidConfig) {
throw new Error(`Config file '${configPath}' is invalid: ${ajv.errorsText(ajv.errors)}`);
}

return {
...DEFAULT_SETTINGS,
...config.settings,
}
}

export const main = async (db: PrismaClient, context: AppContext) => {
const redis = new Redis(env.REDIS_URL, {
Expand All @@ -22,10 +60,7 @@ export const main = async (db: PrismaClient, context: AppContext) => {
process.exit(1);
});

const settings = DEFAULT_SETTINGS;
if (env.INDEX_CONCURRENCY_MULTIPLE) {
settings.indexConcurrencyMultiple = env.INDEX_CONCURRENCY_MULTIPLE;
}
const settings = await getSettings(env.CONFIG_PATH);

const promClient = new PromClient();

Expand Down
10 changes: 5 additions & 5 deletions packages/backend/src/repoManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { indexGitRepository } from "./zoekt.js";
import os from 'os';
import { PromClient } from './promClient.js';
import * as Sentry from "@sentry/node";

interface IRepoManager {
blockingPollLoop: () => void;
dispose: () => void;
Expand Down Expand Up @@ -177,8 +178,7 @@ export class RepoManager implements IRepoManager {

const config = connection.config as unknown as GithubConnectionConfig | GitlabConnectionConfig | GiteaConnectionConfig;
if (config.token) {
const tokenResult = await getTokenFromConfig(config.token, connection.orgId, db);
token = tokenResult?.token;
token = await getTokenFromConfig(config.token, connection.orgId, db, this.logger);
if (token) {
break;
}
Expand Down Expand Up @@ -207,7 +207,7 @@ export class RepoManager implements IRepoManager {
this.logger.info(`Fetching ${repo.id}...`);

const { durationMs } = await measure(() => fetchRepository(repoPath, ({ method, stage, progress }) => {
//this.logger.info(`git.${method} ${stage} stage ${progress}% complete for ${repo.id}`)
this.logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.id}`)
}));
fetchDuration_s = durationMs / 1000;

Expand All @@ -234,7 +234,7 @@ export class RepoManager implements IRepoManager {
}

const { durationMs } = await measure(() => cloneRepository(cloneUrl.toString(), repoPath, metadata.gitConfig, ({ method, stage, progress }) => {
//this.logger.info(`git.${method} ${stage} stage ${progress}% complete for ${repo.id}`)
this.logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.id}`)
}));
cloneDuration_s = durationMs / 1000;

Expand All @@ -243,7 +243,7 @@ export class RepoManager implements IRepoManager {
}

this.logger.info(`Indexing ${repo.id}...`);
const { durationMs } = await measure(() => indexGitRepository(repo, this.ctx));
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, this.ctx));
const indexDuration_s = durationMs / 1000;
this.logger.info(`Indexed ${repo.id} in ${indexDuration_s}s`);

Expand Down
47 changes: 3 additions & 44 deletions packages/backend/src/types.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { Settings as SettingsSchema } from "@sourcebot/schemas/v3/index.type";

export type AppContext = {
/**
* Path to the repos cache directory.
Expand All @@ -10,52 +12,9 @@ export type AppContext = {
indexPath: string;

cachePath: string;

configPath: string;
}

export type Settings = {
/**
* The maximum size of a file (in bytes) to be indexed. Files that exceed this maximum will not be indexed.
*/
maxFileSize: number;
/**
* The maximum number of trigrams per document. Files that exceed this maximum will not be indexed.
*/
maxTrigramCount: number;
/**
* The interval (in milliseconds) at which the indexer should re-index all repositories.
*/
reindexIntervalMs: number;
/**
* The polling rate (in milliseconds) at which the db should be checked for connections that need to be re-synced.
*/
resyncConnectionPollingIntervalMs: number;
/**
* The polling rate (in milliseconds) at which the db should be checked for repos that should be re-indexed.
*/
reindexRepoPollingIntervalMs: number;
/**
* The multiple of the number of CPUs to use for indexing.
*/
indexConcurrencyMultiple: number;
/**
* The multiple of the number of CPUs to use for syncing the configuration.
*/
configSyncConcurrencyMultiple: number;
/**
* The multiple of the number of CPUs to use for garbage collection.
*/
gcConcurrencyMultiple: number;
/**
* The grace period (in milliseconds) for garbage collection. Used to prevent deleting shards while they're being loaded.
*/
gcGracePeriodMs: number;
/**
* The timeout (in milliseconds) for a repo indexing to timeout.
*/
repoIndexTimeoutMs: number;
}
export type Settings = Required<SettingsSchema>;

/**
* Structure of the `metadata` field in the `Repo` table.
Expand Down
65 changes: 35 additions & 30 deletions packages/backend/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,43 +21,48 @@ export const marshalBool = (value?: boolean) => {
return !!value ? '1' : '0';
}

export const getTokenFromConfig = async (token: Token, orgId: number, db?: PrismaClient) => {
if (!db) {
const e = new BackendException(BackendError.CONNECTION_SYNC_SYSTEM_ERROR, {
message: `No database connection provided.`,
});
Sentry.captureException(e);
throw e;
}
export const isRemotePath = (path: string) => {
return path.startsWith('https://') || path.startsWith('http://');
}

const secretKey = token.secret;
const secret = await db.secret.findUnique({
where: {
orgId_key: {
key: secretKey,
orgId
export const getTokenFromConfig = async (token: Token, orgId: number, db: PrismaClient, logger?: Logger) => {
if ('secret' in token) {
const secretKey = token.secret;
const secret = await db.secret.findUnique({
where: {
orgId_key: {
key: secretKey,
orgId
}
}
});

if (!secret) {
const e = new BackendException(BackendError.CONNECTION_SYNC_SECRET_DNE, {
message: `Secret with key ${secretKey} not found for org ${orgId}`,
});
Sentry.captureException(e);
logger?.error(e.metadata.message);
throw e;
}
});

if (!secret) {
const e = new BackendException(BackendError.CONNECTION_SYNC_SECRET_DNE, {
message: `Secret with key ${secretKey} not found for org ${orgId}`,
});
Sentry.captureException(e);
throw e;
}
const decryptedToken = decrypt(secret.iv, secret.encryptedValue);
return decryptedToken;
} else {
const envToken = process.env[token.env];
if (!envToken) {
const e = new BackendException(BackendError.CONNECTION_SYNC_SECRET_DNE, {
message: `Environment variable ${token.env} not found.`,
});
Sentry.captureException(e);
logger?.error(e.metadata.message);
throw e;
}

const decryptedSecret = decrypt(secret.iv, secret.encryptedValue);
return {
token: decryptedSecret,
secretKey,
};
return envToken;
}
}

export const isRemotePath = (path: string) => {
return path.startsWith('https://') || path.startsWith('http://');
}

export const resolvePathRelativeToConfig = (localPath: string, configPath: string) => {
let absolutePath = localPath;
Expand Down
7 changes: 3 additions & 4 deletions packages/backend/src/zoekt.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import { exec } from "child_process";
import { AppContext, RepoMetadata } from "./types.js";
import { AppContext, RepoMetadata, Settings } from "./types.js";
import { Repo } from "@sourcebot/db";
import { getRepoPath } from "./utils.js";
import { DEFAULT_SETTINGS } from "./constants.js";
import { getShardPrefix } from "./utils.js";
import { getBranches, getTags } from "./git.js";
import micromatch from "micromatch";
Expand All @@ -11,7 +10,7 @@ import { captureEvent } from "./posthog.js";

const logger = createLogger('zoekt');

export const indexGitRepository = async (repo: Repo, ctx: AppContext) => {
export const indexGitRepository = async (repo: Repo, settings: Settings, ctx: AppContext) => {
let revisions = [
'HEAD'
];
Expand Down Expand Up @@ -58,7 +57,7 @@ export const indexGitRepository = async (repo: Repo, ctx: AppContext) => {
revisions = revisions.slice(0, 64);
}

const command = `zoekt-git-index -allow_missing_branches -index ${ctx.indexPath} -max_trigram_count ${DEFAULT_SETTINGS.maxTrigramCount} -file_limit ${DEFAULT_SETTINGS.maxFileSize} -branches ${revisions.join(',')} -tenant_id ${repo.orgId} -shard_prefix ${shardPrefix} ${repoPath}`;
const command = `zoekt-git-index -allow_missing_branches -index ${ctx.indexPath} -max_trigram_count ${settings.maxTrigramCount} -file_limit ${settings.maxFileSize} -branches ${revisions.join(',')} -tenant_id ${repo.orgId} -shard_prefix ${shardPrefix} ${repoPath}`;

return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => {
exec(command, (error, stdout, stderr) => {
Expand Down
Loading