Skip to content

add concept of secrets #180

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Jan 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,10 @@ WORKDIR /app
COPY package.json yarn.lock* ./
COPY ./packages/db ./packages/db
COPY ./packages/schemas ./packages/schemas
COPY ./packages/crypto ./packages/crypto
RUN yarn workspace @sourcebot/db install --frozen-lockfile
RUN yarn workspace @sourcebot/schemas install --frozen-lockfile
RUN yarn workspace @sourcebot/crypto install --frozen-lockfile

# ------ Build Web ------
FROM node-alpine AS web-builder
Expand All @@ -30,6 +32,7 @@ COPY ./packages/web ./packages/web
COPY --from=shared-libs-builder /app/node_modules ./node_modules
COPY --from=shared-libs-builder /app/packages/db ./packages/db
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas
COPY --from=shared-libs-builder /app/packages/crypto ./packages/crypto

# Fixes arm64 timeouts
RUN yarn config set registry https://registry.npmjs.org/
Expand Down Expand Up @@ -60,6 +63,7 @@ COPY ./packages/backend ./packages/backend
COPY --from=shared-libs-builder /app/node_modules ./node_modules
COPY --from=shared-libs-builder /app/packages/db ./packages/db
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas
COPY --from=shared-libs-builder /app/packages/crypto ./packages/crypto
RUN yarn workspace @sourcebot/backend install --frozen-lockfile
RUN yarn workspace @sourcebot/backend build

Expand Down Expand Up @@ -100,7 +104,7 @@ ENV POSTHOG_PAPIK=$POSTHOG_PAPIK
# ENV SOURCEBOT_TELEMETRY_DISABLED=1

# Configure dependencies
RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget supervisor uuidgen curl perl jq redis postgresql postgresql-contrib
RUN apk add --no-cache git ca-certificates bind-tools tini jansson wget supervisor uuidgen curl perl jq redis postgresql postgresql-contrib openssl

# Configure zoekt
COPY vendor/zoekt/install-ctags-alpine.sh .
Expand Down Expand Up @@ -129,6 +133,7 @@ COPY --from=backend-builder /app/packages/backend ./packages/backend
COPY --from=shared-libs-builder /app/node_modules ./node_modules
COPY --from=shared-libs-builder /app/packages/db ./packages/db
COPY --from=shared-libs-builder /app/packages/schemas ./packages/schemas
COPY --from=shared-libs-builder /app/packages/crypto ./packages/crypto

# Configure the database
RUN mkdir -p /run/postgresql && \
Expand All @@ -143,6 +148,8 @@ RUN chmod +x ./entrypoint.sh

COPY default-config.json .

ENV SOURCEBOT_ENCRYPTION_KEY=""

EXPOSE 3000
ENV PORT=3000
ENV HOSTNAME="0.0.0.0"
Expand Down
10 changes: 8 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -374,14 +374,20 @@ docker run <b>-v /path/to/my-repo:/repos/my-repo</b> /* additional args */ ghcr.

5. Create a `config.json` file at the repository root. See [Configuring Sourcebot](#configuring-sourcebot) for more information.

6. Start Sourcebot with the command:
6. Create `.env.local` files in the `packages/backend` and `packages/web` directories with the following contents:
```sh
# You can use https://acte.ltd/utils/randomkeygen to generate a key ("Encryption key 256")
SOURCEBOT_ENCRYPTION_KEY="32-byte-secret-key"
```

7. Start Sourcebot with the command:
```sh
yarn dev
```

A `.sourcebot` directory will be created and zoekt will begin to index the repositories found given `config.json`.

7. Start searching at `http://localhost:3000`.
8. Start searching at `http://localhost:3000`.

## Telemetry

Expand Down
16 changes: 16 additions & 0 deletions entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,22 @@ if [ ! -d "$DB_DATA_DIR" ]; then
su postgres -c "initdb -D $DB_DATA_DIR"
fi

if [ -z "$SOURCEBOT_ENCRYPTION_KEY" ]; then
echo -e "\e[31m[Error] SOURCEBOT_ENCRYPTION_KEY is not set.\e[0m"

if [ -f "$DATA_CACHE_DIR/.secret" ]; then
echo -e "\e[34m[Info] Loading environment variables from $DATA_CACHE_DIR/.secret\e[0m"
else
echo -e "\e[34m[Info] Generating a new encryption key...\e[0m"
SOURCEBOT_ENCRYPTION_KEY=$(openssl rand -base64 24)
echo "SOURCEBOT_ENCRYPTION_KEY=\"$SOURCEBOT_ENCRYPTION_KEY\"" >> "$DATA_CACHE_DIR/.secret"
fi

set -a
. "$DATA_CACHE_DIR/.secret"
set +a
fi

# In order to detect if this is the first run, we create a `.installed` file in
# the cache directory.
FIRST_RUN_FILE="$DATA_CACHE_DIR/.installedv2"
Expand Down
1 change: 1 addition & 0 deletions packages/backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
"lowdb": "^7.0.1",
"micromatch": "^4.0.8",
"posthog-node": "^4.2.1",
"@sourcebot/crypto": "^0.1.0",
"@sourcebot/db": "^0.1.0",
"@sourcebot/schemas": "^0.1.0",
"simple-git": "^3.27.0",
Expand Down
10 changes: 3 additions & 7 deletions packages/backend/src/connectionManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type";
import { createLogger } from "./logger.js";
import os from 'os';
import { Redis } from 'ioredis';
import { getTokenFromConfig, marshalBool } from "./utils.js";
import { marshalBool } from "./utils.js";
import { getGitHubReposFromConfig } from "./github.js";

interface IConnectionManager {
Expand Down Expand Up @@ -70,17 +70,13 @@ export class ConnectionManager implements IConnectionManager {
const repoData: RepoData[] = await (async () => {
switch (config.type) {
case 'github': {
const token = config.token ? getTokenFromConfig(config.token, this.context) : undefined;
const gitHubRepos = await getGitHubReposFromConfig(config, abortController.signal, this.context);
const gitHubRepos = await getGitHubReposFromConfig(config, orgId, this.db, abortController.signal);
const hostUrl = config.url ?? 'https://github.com';
const hostname = config.url ? new URL(config.url).hostname : 'github.com';

return gitHubRepos.map((repo) => {
const repoName = `${hostname}/${repo.full_name}`;
const cloneUrl = new URL(repo.clone_url!);
if (token) {
cloneUrl.username = token;
}

const record: RepoData = {
external_id: repo.id.toString(),
Expand Down
5 changes: 3 additions & 2 deletions packages/backend/src/gitea.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,9 @@ import micromatch from 'micromatch';

const logger = createLogger('Gitea');

export const getGiteaReposFromConfig = async (config: GiteaConfig, ctx: AppContext) => {
const token = config.token ? getTokenFromConfig(config.token, ctx) : undefined;
export const getGiteaReposFromConfig = async (config: GiteaConfig, orgId: number, ctx: AppContext) => {
// TODO: pass in DB here to fetch secret properly
const token = config.token ? await getTokenFromConfig(config.token, orgId) : undefined;

const api = giteaApi(config.url ?? 'https://gitea.com', {
token,
Expand Down
5 changes: 3 additions & 2 deletions packages/backend/src/github.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import { createLogger } from "./logger.js";
import { AppContext } from "./types.js";
import { getTokenFromConfig, measure } from "./utils.js";
import micromatch from "micromatch";
import { PrismaClient } from "@sourcebot/db";

const logger = createLogger("GitHub");

Expand All @@ -25,8 +26,8 @@ export type OctokitRepository = {
size?: number,
}

export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, signal: AbortSignal, ctx: AppContext) => {
const token = config.token ? getTokenFromConfig(config.token, ctx) : undefined;
export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, orgId: number, db: PrismaClient, signal: AbortSignal) => {
const token = config.token ? await getTokenFromConfig(config.token, orgId, db) : undefined;

const octokit = new Octokit({
auth: token,
Expand Down
5 changes: 3 additions & 2 deletions packages/backend/src/gitlab.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ import { getTokenFromConfig, measure } from "./utils.js";
const logger = createLogger("GitLab");
export const GITLAB_CLOUD_HOSTNAME = "gitlab.com";

export const getGitLabReposFromConfig = async (config: GitLabConfig, ctx: AppContext) => {
const token = config.token ? getTokenFromConfig(config.token, ctx) : undefined;
export const getGitLabReposFromConfig = async (config: GitLabConfig, orgId: number, ctx: AppContext) => {
// TODO: pass in DB here to fetch secret properly
const token = config.token ? await getTokenFromConfig(config.token, orgId) : undefined;
const api = new Gitlab({
...(config.token ? {
token,
Expand Down
63 changes: 54 additions & 9 deletions packages/backend/src/main.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,50 @@
import { ConnectionSyncStatus, PrismaClient, Repo, RepoIndexingStatus } from '@sourcebot/db';
import { ConnectionSyncStatus, PrismaClient, Repo, RepoIndexingStatus, RepoToConnection, Connection } from '@sourcebot/db';
import { existsSync } from 'fs';
import { cloneRepository, fetchRepository } from "./git.js";
import { createLogger } from "./logger.js";
import { captureEvent } from "./posthog.js";
import { AppContext } from "./types.js";
import { getRepoPath, measure } from "./utils.js";
import { getRepoPath, getTokenFromConfig, measure } from "./utils.js";
import { indexGitRepository } from "./zoekt.js";
import { DEFAULT_SETTINGS } from './constants.js';
import { Queue, Worker, Job } from 'bullmq';
import { Redis } from 'ioredis';
import * as os from 'os';
import { ConnectionManager } from './connectionManager.js';
import { ConnectionConfig } from '@sourcebot/schemas/v3/connection.type';

const logger = createLogger('main');

const syncGitRepository = async (repo: Repo, ctx: AppContext) => {
type RepoWithConnections = Repo & { connections: (RepoToConnection & { connection: Connection})[] };

// TODO: do this better? ex: try using the tokens from all the connections
// We can no longer use repo.cloneUrl directly since it doesn't contain the token for security reasons. As a result, we need to
// fetch the token here using the connections from the repo. Multiple connections could be referencing this repo, and each
// may have their own token. This method will just pick the first connection that has a token (if one exists) and uses that. This
// may technically cause syncing to fail if that connection's token just so happens to not have access to the repo it's referrencing.
const getTokenForRepo = async (repo: RepoWithConnections, db: PrismaClient) => {
const repoConnections = repo.connections;
if (repoConnections.length === 0) {
logger.error(`Repo ${repo.id} has no connections`);
return;
}

let token: string | undefined;
for (const repoConnection of repoConnections) {
const connection = repoConnection.connection;
const config = connection.config as unknown as ConnectionConfig;
if (config.token) {
token = await getTokenFromConfig(config.token, connection.orgId, db);
if (token) {
break;
}
}
}

return token;
}

const syncGitRepository = async (repo: RepoWithConnections, ctx: AppContext, db: PrismaClient) => {
let fetchDuration_s: number | undefined = undefined;
let cloneDuration_s: number | undefined = undefined;

Expand All @@ -35,7 +65,15 @@ const syncGitRepository = async (repo: Repo, ctx: AppContext) => {
} else {
logger.info(`Cloning ${repo.id}...`);

const { durationMs } = await measure(() => cloneRepository(repo.cloneUrl, repoPath, metadata, ({ method, stage, progress }) => {
const token = await getTokenForRepo(repo, db);
let cloneUrl = repo.cloneUrl;
if (token) {
const url = new URL(cloneUrl);
url.username = token;
cloneUrl = url.toString();
}

const { durationMs } = await measure(() => cloneRepository(cloneUrl, repoPath, metadata, ({ method, stage, progress }) => {
logger.info(`git.${method} ${stage} stage ${progress}% complete for ${repo.id}`)
}));
cloneDuration_s = durationMs / 1000;
Expand Down Expand Up @@ -92,13 +130,13 @@ export const main = async (db: PrismaClient, context: AppContext) => {

const connectionManager = new ConnectionManager(db, DEFAULT_SETTINGS, redis, context);
setInterval(async () => {
const configs = await db.connection.findMany({
const connections = await db.connection.findMany({
where: {
syncStatus: ConnectionSyncStatus.SYNC_NEEDED,
}
});
for (const config of configs) {
await connectionManager.scheduleConnectionSync(config);
for (const connection of connections) {
await connectionManager.scheduleConnectionSync(connection);
}
}, DEFAULT_SETTINGS.resyncConnectionPollingIntervalMs);

Expand All @@ -111,13 +149,13 @@ export const main = async (db: PrismaClient, context: AppContext) => {
const numWorkers = numCores * DEFAULT_SETTINGS.indexConcurrencyMultiple;
logger.info(`Detected ${numCores} cores. Setting repo index max concurrency to ${numWorkers}`);
const worker = new Worker('indexQueue', async (job: Job) => {
const repo = job.data as Repo;
const repo = job.data as RepoWithConnections;

let indexDuration_s: number | undefined;
let fetchDuration_s: number | undefined;
let cloneDuration_s: number | undefined;

const stats = await syncGitRepository(repo, context);
const stats = await syncGitRepository(repo, context, db);
indexDuration_s = stats.indexDuration_s;
fetchDuration_s = stats.fetchDuration_s;
cloneDuration_s = stats.cloneDuration_s;
Expand Down Expand Up @@ -171,6 +209,13 @@ export const main = async (db: PrismaClient, context: AppContext) => {
{ indexedAt: { lt: thresholdDate } },
{ repoIndexingStatus: RepoIndexingStatus.NEW }
]
},
include: {
connections: {
include: {
connection: true
}
}
}
});
addReposToQueue(db, indexQueue, repos);
Expand Down
38 changes: 32 additions & 6 deletions packages/backend/src/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ import { Logger } from "winston";
import { AppContext, Repository } from "./types.js";
import path from 'path';
import micromatch from "micromatch";
import { Repo } from "@sourcebot/db";
import { PrismaClient, Repo } from "@sourcebot/db";
import { decrypt } from "@sourcebot/crypto";
import { Token } from "@sourcebot/schemas/v3/shared.type";

export const measure = async <T>(cb : () => Promise<T>) => {
const start = Date.now();
Expand Down Expand Up @@ -86,15 +88,39 @@ export const excludeReposByTopic = <T extends Repository>(repos: T[], excludedRe
});
}

export const getTokenFromConfig = (token: string | { env: string }, ctx: AppContext) => {
export const getTokenFromConfig = async (token: Token, orgId: number, db?: PrismaClient) => {
if (typeof token === 'string') {
return token;
}
const tokenValue = process.env[token.env];
if (!tokenValue) {
throw new Error(`The environment variable '${token.env}' was referenced in ${ctx.configPath}, but was not set.`);
if ('env' in token) {
const tokenValue = process.env[token.env];
if (!tokenValue) {
throw new Error(`The environment variable '${token.env}' was referenced in the config but was not set.`);
}
return tokenValue;
} else if ('secret' in token) {
if (!db) {
throw new Error(`Database connection required to retrieve secret`);
}

const secretKey = token.secret;
const secret = await db.secret.findUnique({
where: {
orgId_key: {
key: secretKey,
orgId
}
}
});

if (!secret) {
throw new Error(`Secret with key ${secretKey} not found for org ${orgId}`);
}

const decryptedSecret = decrypt(secret.iv, secret.encryptedValue);
return decryptedSecret;
}
return tokenValue;
throw new Error(`Invalid token configuration in config`);
}

export const isRemotePath = (path: string) => {
Expand Down
1 change: 1 addition & 0 deletions packages/crypto/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
.env.local
16 changes: 16 additions & 0 deletions packages/crypto/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"name": "@sourcebot/crypto",
"main": "dist/index.js",
"version": "0.1.0",
"scripts": {
"build": "tsc",
"postinstall": "yarn build"
},
"dependencies": {
"dotenv": "^16.4.5"
},
"devDependencies": {
"@types/node": "^22.7.5",
"typescript": "^5.7.3"
}
}
17 changes: 17 additions & 0 deletions packages/crypto/src/environment.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import dotenv from 'dotenv';

export const getEnv = (env: string | undefined, defaultValue?: string, required?: boolean) => {
if (required && !env && !defaultValue) {
throw new Error(`Missing required environment variable`);
}

return env ?? defaultValue;
}

dotenv.config({
path: './.env.local',
override: true
});

// @note: You can use https://generate-random.org/encryption-key-generator to create a new 32 byte key
export const SOURCEBOT_ENCRYPTION_KEY = getEnv(process.env.SOURCEBOT_ENCRYPTION_KEY, undefined, true)!;
Loading