Skip to content

Commit 31114a9

Browse files
add concept of secrets (#180)
* add @sourcebot/schemas package * migrate things to use the schemas package * Dockerfile support * add secret table to schema * Add concept of connection manager * Rename Config->Connection * Handle job failures * Add join table between repo and connection * nits * create first version of crypto package * add crypto package as deps to others * forgot to add package changes * add server action for adding and listing secrets, create test page for it * add secrets page to nav menu * add secret to config and support fetching it in backend * reset secret form on successful submission * add toast feedback for secrets form * add instructions for adding encryption key to dev instructions * add encryption key support in docker file * add delete secret button * fix nits from pr review --------- Co-authored-by: bkellam <bshizzle1234@gmail.com>
1 parent dd8ff6e commit 31114a9

31 files changed

+699
-31
lines changed

Dockerfile

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,10 @@ WORKDIR /app
1717
COPY package.json yarn.lock* ./
1818
COPY ./packages/db ./packages/db
1919
COPY ./packages/schemas ./packages/schemas
20+
COPY ./packages/crypto ./packages/crypto
2021
RUN yarn workspace @sourcebot/db install --frozen-lockfile
2122
RUN yarn workspace @sourcebot/schemas install --frozen-lockfile
23+
RUN yarn workspace @sourcebot/crypto install --frozen-lockfile
2224

2325
# ------ Build Web ------
2426
FROM node-alpine AS web-builder
@@ -30,6 +32,7 @@ COPY ./packages/web ./packages/web
3032
COPY --from=shared-libs-builder /app/node_modules ./node_modules
3133
COPY --from=shared-libs-builder /app/packages/db ./packages/db
3234
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas
35+
COPY --from=shared-libs-builder /app/packages/crypto ./packages/crypto
3336

3437
# Fixes arm64 timeouts
3538
RUN yarn config set registry https://registry.npmjs.org/
@@ -60,6 +63,7 @@ COPY ./packages/backend ./packages/backend
6063
COPY --from=shared-libs-builder /app/node_modules ./node_modules
6164
COPY --from=shared-libs-builder /app/packages/db ./packages/db
6265
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas
66+
COPY --from=shared-libs-builder /app/packages/crypto ./packages/crypto
6367
RUN yarn workspace @sourcebot/backend install --frozen-lockfile
6468
RUN yarn workspace @sourcebot/backend build
6569

@@ -100,7 +104,7 @@ ENV POSTHOG_PAPIK=$POSTHOG_PAPIK
100104
# ENV SOURCEBOT_TELEMETRY_DISABLED=1
101105

102106
# Configure dependencies
103-
RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget supervisor uuidgen curl perl jq redis postgresql postgresql-contrib
107+
RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget supervisor uuidgen curl perl jq redis postgresql postgresql-contrib openssl
104108

105109
# Configure zoekt
106110
COPY vendor/zoekt/install-ctags-alpine.sh .
@@ -129,6 +133,7 @@ COPY --from=backend-builder /app/packages/backend ./packages/backend
129133
COPY --from=shared-libs-builder /app/node_modules ./node_modules
130134
COPY --from=shared-libs-builder /app/packages/db ./packages/db
131135
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas
136+
COPY --from=shared-libs-builder /app/packages/crypto ./packages/crypto
132137

133138
# Configure the database
134139
RUN mkdir -p /run/postgresql && \
@@ -143,6 +148,8 @@ RUN chmod +x ./entrypoint.sh
143148

144149
COPY default-config.json .
145150

151+
ENV SOURCEBOT_ENCRYPTION_KEY=""
152+
146153
EXPOSE 3000
147154
ENV PORT=3000
148155
ENV HOSTNAME="0.0.0.0"

README.md

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -374,14 +374,20 @@ docker run <b>-v /path/to/my-repo:/repos/my-repo</b> /* additional args */ ghcr.
374374

375375
5. Create a `config.json` file at the repository root. See [Configuring Sourcebot](#configuring-sourcebot) for more information.
376376

377-
6. Start Sourcebot with the command:
377+
6. Create `.env.local` files in the `packages/backend` and `packages/web` directories with the following contents:
378+
```sh
379+
# You can use https://acte.ltd/utils/randomkeygen to generate a key ("Encryption key 256")
380+
SOURCEBOT_ENCRYPTION_KEY="32-byte-secret-key"
381+
```
382+
383+
7. Start Sourcebot with the command:
378384
```sh
379385
yarn dev
380386
```
381387

382388
A `.sourcebot` directory will be created and zoekt will begin to index the repositories found given `config.json`.
383389

384-
7. Start searching at `http://localhost:3000`.
390+
8. Start searching at `http://localhost:3000`.
385391

386392
## Telemetry
387393

entrypoint.sh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,22 @@ if [ ! -d "$DB_DATA_DIR" ]; then
2626
su postgres -c "initdb -D $DB_DATA_DIR"
2727
fi
2828

29+
if [ -z "$SOURCEBOT_ENCRYPTION_KEY" ]; then
30+
echo -e "\e[31m[Error] SOURCEBOT_ENCRYPTION_KEY is not set.\e[0m"
31+
32+
if [ -f "$DATA_CACHE_DIR/.secret" ]; then
33+
echo -e "\e[34m[Info] Loading environment variables from $DATA_CACHE_DIR/.secret\e[0m"
34+
else
35+
echo -e "\e[34m[Info] Generating a new encryption key...\e[0m"
36+
SOURCEBOT_ENCRYPTION_KEY=$(openssl rand -base64 24)
37+
echo "SOURCEBOT_ENCRYPTION_KEY=\"$SOURCEBOT_ENCRYPTION_KEY\"" >> "$DATA_CACHE_DIR/.secret"
38+
fi
39+
40+
set -a
41+
. "$DATA_CACHE_DIR/.secret"
42+
set +a
43+
fi
44+
2945
# In order to detect if this is the first run, we create a `.installed` file in
3046
# the cache directory.
3147
FIRST_RUN_FILE="$DATA_CACHE_DIR/.installedv2"

packages/backend/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
"lowdb": "^7.0.1",
3232
"micromatch": "^4.0.8",
3333
"posthog-node": "^4.2.1",
34+
"@sourcebot/crypto": "^0.1.0",
3435
"@sourcebot/db": "^0.1.0",
3536
"@sourcebot/schemas": "^0.1.0",
3637
"simple-git": "^3.27.0",

packages/backend/src/connectionManager.ts

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type";
55
import { createLogger } from "./logger.js";
66
import os from 'os';
77
import { Redis } from 'ioredis';
8-
import { getTokenFromConfig, marshalBool } from "./utils.js";
8+
import { marshalBool } from "./utils.js";
99
import { getGitHubReposFromConfig } from "./github.js";
1010

1111
interface IConnectionManager {
@@ -70,17 +70,13 @@ export class ConnectionManager implements IConnectionManager {
7070
const repoData: RepoData[] = await (async () => {
7171
switch (config.type) {
7272
case 'github': {
73-
const token = config.token ? getTokenFromConfig(config.token, this.context) : undefined;
74-
const gitHubRepos = await getGitHubReposFromConfig(config, abortController.signal, this.context);
73+
const gitHubRepos = await getGitHubReposFromConfig(config, orgId, this.db, abortController.signal);
7574
const hostUrl = config.url ?? 'https://github.com';
7675
const hostname = config.url ? new URL(config.url).hostname : 'github.com';
77-
76+
7877
return gitHubRepos.map((repo) => {
7978
const repoName = `${hostname}/${repo.full_name}`;
8079
const cloneUrl = new URL(repo.clone_url!);
81-
if (token) {
82-
cloneUrl.username = token;
83-
}
8480

8581
const record: RepoData = {
8682
external_id: repo.id.toString(),

packages/backend/src/gitea.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ import micromatch from 'micromatch';
99

1010
const logger = createLogger('Gitea');
1111

12-
export const getGiteaReposFromConfig = async (config: GiteaConfig, ctx: AppContext) => {
13-
const token = config.token ? getTokenFromConfig(config.token, ctx) : undefined;
12+
export const getGiteaReposFromConfig = async (config: GiteaConfig, orgId: number, ctx: AppContext) => {
13+
// TODO: pass in DB here to fetch secret properly
14+
const token = config.token ? await getTokenFromConfig(config.token, orgId) : undefined;
1415

1516
const api = giteaApi(config.url ?? 'https://gitea.com', {
1617
token,

packages/backend/src/github.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { createLogger } from "./logger.js";
44
import { AppContext } from "./types.js";
55
import { getTokenFromConfig, measure } from "./utils.js";
66
import micromatch from "micromatch";
7+
import { PrismaClient } from "@sourcebot/db";
78

89
const logger = createLogger("GitHub");
910

@@ -25,8 +26,8 @@ export type OctokitRepository = {
2526
size?: number,
2627
}
2728

28-
export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, signal: AbortSignal, ctx: AppContext) => {
29-
const token = config.token ? getTokenFromConfig(config.token, ctx) : undefined;
29+
export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, orgId: number, db: PrismaClient, signal: AbortSignal) => {
30+
const token = config.token ? await getTokenFromConfig(config.token, orgId, db) : undefined;
3031

3132
const octokit = new Octokit({
3233
auth: token,

packages/backend/src/gitlab.ts

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,9 @@ import { getTokenFromConfig, measure } from "./utils.js";
88
const logger = createLogger("GitLab");
99
export const GITLAB_CLOUD_HOSTNAME = "gitlab.com";
1010

11-
export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppContext) => {
12-
const token = config.token ? getTokenFromConfig(config.token, ctx) : undefined;
11+
export const getGitLabReposFromConfig = async (config: GitLabConfig, orgId: number, ctx: AppContext) => {
12+
// TODO: pass in DB here to fetch secret properly
13+
const token = config.token ? await getTokenFromConfig(config.token, orgId) : undefined;
1314
const api = new Gitlab({
1415
...(config.token ? {
1516
token,

packages/backend/src/main.ts

Lines changed: 54 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,50 @@
1-
import { ConnectionSyncStatus, PrismaClient, Repo, RepoIndexingStatus } from '@sourcebot/db';
1+
import { ConnectionSyncStatus, PrismaClient, Repo, RepoIndexingStatus, RepoToConnection, Connection } from '@sourcebot/db';
22
import { existsSync } from 'fs';
33
import { cloneRepository, fetchRepository } from "./git.js";
44
import { createLogger } from "./logger.js";
55
import { captureEvent } from "./posthog.js";
66
import { AppContext } from "./types.js";
7-
import { getRepoPath, measure } from "./utils.js";
7+
import { getRepoPath, getTokenFromConfig, measure } from "./utils.js";
88
import { indexGitRepository } from "./zoekt.js";
99
import { DEFAULT_SETTINGS } from './constants.js';
1010
import { Queue, Worker, Job } from 'bullmq';
1111
import { Redis } from 'ioredis';
1212
import * as os from 'os';
1313
import { ConnectionManager } from './connectionManager.js';
14+
import { ConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
1415

1516
const logger = createLogger('main');
1617

17-
const syncGitRepository = async (repo: Repo, ctx: AppContext) => {
18+
type RepoWithConnections = Repo & { connections: (RepoToConnection & { connection: Connection})[] };
19+
20+
// TODO: do this better? ex: try using the tokens from all the connections
21+
// We can no longer use repo.cloneUrl directly since it doesn't contain the token for security reasons. As a result, we need to
22+
// fetch the token here using the connections from the repo. Multiple connections could be referencing this repo, and each
23+
// may have their own token. This method will just pick the first connection that has a token (if one exists) and uses that. This
24+
// may technically cause syncing to fail if that connection's token just so happens to not have access to the repo it's referrencing.
25+
const getTokenForRepo = async (repo: RepoWithConnections, db: PrismaClient) => {
26+
const repoConnections = repo.connections;
27+
if (repoConnections.length === 0) {
28+
logger.error(`Repo ${repo.id} has no connections`);
29+
return;
30+
}
31+
32+
let token: string | undefined;
33+
for (const repoConnection of repoConnections) {
34+
const connection = repoConnection.connection;
35+
const config = connection.config as unknown as ConnectionConfig;
36+
if (config.token) {
37+
token = await getTokenFromConfig(config.token, connection.orgId, db);
38+
if (token) {
39+
break;
40+
}
41+
}
42+
}
43+
44+
return token;
45+
}
46+
47+
const syncGitRepository = async (repo: RepoWithConnections, ctx: AppContext, db: PrismaClient) => {
1848
let fetchDuration_s: number | undefined = undefined;
1949
let cloneDuration_s: number | undefined = undefined;
2050

@@ -35,7 +65,15 @@ const syncGitRepository = async (repo: Repo, ctx: AppContext) => {
3565
} else {
3666
logger.info(`Cloning ${repo.id}...`);
3767

38-
const { durationMs } = await measure(() => cloneRepository(repo.cloneUrl, repoPath, metadata, ({ method, stage, progress }) => {
68+
const token = await getTokenForRepo(repo, db);
69+
let cloneUrl = repo.cloneUrl;
70+
if (token) {
71+
const url = new URL(cloneUrl);
72+
url.username = token;
73+
cloneUrl = url.toString();
74+
}
75+
76+
const { durationMs } = await measure(() => cloneRepository(cloneUrl, repoPath, metadata, ({ method, stage, progress }) => {
3977
logger.info(`git.${method} ${stage} stage ${progress}% complete for ${repo.id}`)
4078
}));
4179
cloneDuration_s = durationMs / 1000;
@@ -92,13 +130,13 @@ export const main = async (db: PrismaClient, context: AppContext) => {
92130

93131
const connectionManager = new ConnectionManager(db, DEFAULT_SETTINGS, redis, context);
94132
setInterval(async () => {
95-
const configs = await db.connection.findMany({
133+
const connections = await db.connection.findMany({
96134
where: {
97135
syncStatus: ConnectionSyncStatus.SYNC_NEEDED,
98136
}
99137
});
100-
for (const config of configs) {
101-
await connectionManager.scheduleConnectionSync(config);
138+
for (const connection of connections) {
139+
await connectionManager.scheduleConnectionSync(connection);
102140
}
103141
}, DEFAULT_SETTINGS.resyncConnectionPollingIntervalMs);
104142

@@ -111,13 +149,13 @@ export const main = async (db: PrismaClient, context: AppContext) => {
111149
const numWorkers = numCores * DEFAULT_SETTINGS.indexConcurrencyMultiple;
112150
logger.info(`Detected ${numCores} cores. Setting repo index max concurrency to ${numWorkers}`);
113151
const worker = new Worker('indexQueue', async (job: Job) => {
114-
const repo = job.data as Repo;
152+
const repo = job.data as RepoWithConnections;
115153

116154
let indexDuration_s: number | undefined;
117155
let fetchDuration_s: number | undefined;
118156
let cloneDuration_s: number | undefined;
119157

120-
const stats = await syncGitRepository(repo, context);
158+
const stats = await syncGitRepository(repo, context, db);
121159
indexDuration_s = stats.indexDuration_s;
122160
fetchDuration_s = stats.fetchDuration_s;
123161
cloneDuration_s = stats.cloneDuration_s;
@@ -171,6 +209,13 @@ export const main = async (db: PrismaClient, context: AppContext) => {
171209
{ indexedAt: { lt: thresholdDate } },
172210
{ repoIndexingStatus: RepoIndexingStatus.NEW }
173211
]
212+
},
213+
include: {
214+
connections: {
215+
include: {
216+
connection: true
217+
}
218+
}
174219
}
175220
});
176221
addReposToQueue(db, indexQueue, repos);

packages/backend/src/utils.ts

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,9 @@ import { Logger } from "winston";
22
import { AppContext, Repository } from "./types.js";
33
import path from 'path';
44
import micromatch from "micromatch";
5-
import { Repo } from "@sourcebot/db";
5+
import { PrismaClient, Repo } from "@sourcebot/db";
6+
import { decrypt } from "@sourcebot/crypto";
7+
import { Token } from "@sourcebot/schemas/v3/shared.type";
68

79
export const measure = async <T>(cb : () => Promise<T>) => {
810
const start = Date.now();
@@ -86,15 +88,39 @@ export const excludeReposByTopic = <T extends Repository>(repos: T[], excludedRe
8688
});
8789
}
8890

89-
export const getTokenFromConfig = (token: string | { env: string }, ctx: AppContext) => {
91+
export const getTokenFromConfig = async (token: Token, orgId: number, db?: PrismaClient) => {
9092
if (typeof token === 'string') {
9193
return token;
9294
}
93-
const tokenValue = process.env[token.env];
94-
if (!tokenValue) {
95-
throw new Error(`The environment variable '${token.env}' was referenced in ${ctx.configPath}, but was not set.`);
95+
if ('env' in token) {
96+
const tokenValue = process.env[token.env];
97+
if (!tokenValue) {
98+
throw new Error(`The environment variable '${token.env}' was referenced in the config but was not set.`);
99+
}
100+
return tokenValue;
101+
} else if ('secret' in token) {
102+
if (!db) {
103+
throw new Error(`Database connection required to retrieve secret`);
104+
}
105+
106+
const secretKey = token.secret;
107+
const secret = await db.secret.findUnique({
108+
where: {
109+
orgId_key: {
110+
key: secretKey,
111+
orgId
112+
}
113+
}
114+
});
115+
116+
if (!secret) {
117+
throw new Error(`Secret with key ${secretKey} not found for org ${orgId}`);
118+
}
119+
120+
const decryptedSecret = decrypt(secret.iv, secret.encryptedValue);
121+
return decryptedSecret;
96122
}
97-
return tokenValue;
123+
throw new Error(`Invalid token configuration in config`);
98124
}
99125

100126
export const isRemotePath = (path: string) => {

packages/crypto/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.env.local

packages/crypto/package.json

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"name": "@sourcebot/crypto",
3+
"main": "dist/index.js",
4+
"version": "0.1.0",
5+
"scripts": {
6+
"build": "tsc",
7+
"postinstall": "yarn build"
8+
},
9+
"dependencies": {
10+
"dotenv": "^16.4.5"
11+
},
12+
"devDependencies": {
13+
"@types/node": "^22.7.5",
14+
"typescript": "^5.7.3"
15+
}
16+
}

packages/crypto/src/environment.ts

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import dotenv from 'dotenv';
2+
3+
export const getEnv = (env: string | undefined, defaultValue?: string, required?: boolean) => {
4+
if (required && !env && !defaultValue) {
5+
throw new Error(`Missing required environment variable`);
6+
}
7+
8+
return env ?? defaultValue;
9+
}
10+
11+
dotenv.config({
12+
path: './.env.local',
13+
override: true
14+
});
15+
16+
// @note: You can use https://generate-random.org/encryption-key-generator to create a new 32 byte key
17+
export const SOURCEBOT_ENCRYPTION_KEY = getEnv(process.env.SOURCEBOT_ENCRYPTION_KEY, undefined, true)!;

0 commit comments

Comments
 (0)