Skip to content

Add back revision support #215

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 27, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions packages/backend/src/git.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,20 @@ export const fetchRepository = async (path: string, onProgress?: (event: SimpleG
]
);
}

export const getBranches = async (path: string) => {
const git = simpleGit();
const branches = await git.cwd({
path,
}).branch();

return branches.all;
}

export const getTags = async (path: string) => {
const git = simpleGit();
const tags = await git.cwd({
path,
}).tags();
return tags.all;
}
77 changes: 1 addition & 76 deletions packages/backend/src/gitea.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Api, giteaApi, HttpResponse, Repository as GiteaRepository } from 'gitea-js';
import { GiteaConnectionConfig } from '@sourcebot/schemas/v3/gitea.type';
import { getTokenFromConfig, measure, fetchWithRetry } from './utils.js';
import { getTokenFromConfig, measure } from './utils.js';
import fetch from 'cross-fetch';
import { createLogger } from './logger.js';
import micromatch from 'micromatch';
Expand Down Expand Up @@ -56,49 +56,6 @@ export const getGiteaReposFromConfig = async (config: GiteaConnectionConfig, org
return true;
});


if (config.revisions) {
if (config.revisions.branches) {
const branchGlobs = config.revisions.branches;
allRepos = await Promise.all(
allRepos.map(async (repo) => {
const [owner, name] = repo.full_name!.split('/');
let branches = (await fetchWithRetry(
() => getBranchesForRepo(owner, name, api),
`branches for ${owner}/${name}`,
logger
)).map(branch => branch.name!);
branches = micromatch.match(branches, branchGlobs);

return {
...repo,
branches,
};
})
)
}

if (config.revisions.tags) {
const tagGlobs = config.revisions.tags;
allRepos = await Promise.all(
allRepos.map(async (allRepos) => {
const [owner, name] = allRepos.full_name!.split('/');
let tags = (await fetchWithRetry(
() => getTagsForRepo(owner, name, api),
`tags for ${owner}/${name}`,
logger
)).map(tag => tag.name!);
tags = micromatch.match(tags, tagGlobs);

return {
...allRepos,
tags,
};
})
)
}
}

let repos = allRepos
.filter((repo) => {
const isExcluded = shouldExcludeRepo({
Expand Down Expand Up @@ -158,38 +115,6 @@ const shouldExcludeRepo = ({
return shouldExclude;
}

const getTagsForRepo = async <T>(owner: string, repo: string, api: Api<T>) => {
try {
logger.debug(`Fetching tags for repo ${owner}/${repo}...`);
const { durationMs, data: tags } = await measure(() =>
paginate((page) => api.repos.repoListTags(owner, repo, {
page
}))
);
logger.debug(`Found ${tags.length} tags in repo ${owner}/${repo} in ${durationMs}ms.`);
return tags;
} catch (e) {
logger.error(`Failed to fetch tags for repo ${owner}/${repo}.`, e);
throw e;
}
}

const getBranchesForRepo = async <T>(owner: string, repo: string, api: Api<T>) => {
try {
logger.debug(`Fetching branches for repo ${owner}/${repo}...`);
const { durationMs, data: branches } = await measure(() =>
paginate((page) => api.repos.repoListBranches(owner, repo, {
page
}))
);
logger.debug(`Found ${branches.length} branches in repo ${owner}/${repo} in ${durationMs}ms.`);
return branches;
} catch (e) {
logger.error(`Failed to fetch branches for repo ${owner}/${repo}.`, e);
throw e;
}
}

const getReposOwnedByUsers = async <T>(users: string[], api: Api<T>) => {
const results = await Promise.allSettled(users.map(async (user) => {
try {
Expand Down
1 change: 1 addition & 0 deletions packages/backend/src/posthogEvents.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ export type PosthogEventMap = {
connectionId: number,
repoCount: number,
},
revisions_truncated: {},
//////////////////////////////////////////////////////////////////
}

Expand Down
2 changes: 1 addition & 1 deletion packages/backend/src/promClient.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import express, { Request, Response } from 'express';
import client, { Registry, Counter, Gauge, Histogram } from 'prom-client';
import client, { Registry, Counter, Gauge } from 'prom-client';

export class PromClient {
private registry: Registry;
Expand Down
83 changes: 49 additions & 34 deletions packages/backend/src/repoCompileUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { Prisma, PrismaClient } from '@sourcebot/db';
import { WithRequired } from "./types.js"
import { marshalBool } from "./utils.js";
import { GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { RepoMetadata } from './types.js';

export type RepoData = WithRequired<Prisma.RepoCreateInput, 'connections'>;

Expand Down Expand Up @@ -54,17 +55,21 @@ export const compileGithubConfig = async (
}
},
metadata: {
'zoekt.web-url-type': 'github',
'zoekt.web-url': repo.html_url,
'zoekt.name': repoName,
'zoekt.github-stars': (repo.stargazers_count ?? 0).toString(),
'zoekt.github-watchers': (repo.watchers_count ?? 0).toString(),
'zoekt.github-subscribers': (repo.subscribers_count ?? 0).toString(),
'zoekt.github-forks': (repo.forks_count ?? 0).toString(),
'zoekt.archived': marshalBool(repo.archived),
'zoekt.fork': marshalBool(repo.fork),
'zoekt.public': marshalBool(repo.private === false)
},
gitConfig: {
'zoekt.web-url-type': 'github',
'zoekt.web-url': repo.html_url,
'zoekt.name': repoName,
'zoekt.github-stars': (repo.stargazers_count ?? 0).toString(),
'zoekt.github-watchers': (repo.watchers_count ?? 0).toString(),
'zoekt.github-subscribers': (repo.subscribers_count ?? 0).toString(),
'zoekt.github-forks': (repo.forks_count ?? 0).toString(),
'zoekt.archived': marshalBool(repo.archived),
'zoekt.fork': marshalBool(repo.fork),
'zoekt.public': marshalBool(repo.private === false),
},
branches: config.revisions?.branches ?? undefined,
tags: config.revisions?.tags ?? undefined,
} satisfies RepoMetadata,
};

return record;
Expand Down Expand Up @@ -113,15 +118,19 @@ export const compileGitlabConfig = async (
}
},
metadata: {
'zoekt.web-url-type': 'gitlab',
'zoekt.web-url': projectUrl,
'zoekt.name': project.path_with_namespace,
'zoekt.gitlab-stars': (project.stargazers_count ?? 0).toString(),
'zoekt.gitlab-forks': (project.forks_count ?? 0).toString(),
'zoekt.archived': marshalBool(project.archived),
'zoekt.fork': marshalBool(isFork),
'zoekt.public': marshalBool(project.private === false)
},
gitConfig: {
'zoekt.web-url-type': 'gitlab',
'zoekt.web-url': projectUrl,
'zoekt.name': project.path_with_namespace,
'zoekt.gitlab-stars': (project.stargazers_count ?? 0).toString(),
'zoekt.gitlab-forks': (project.forks_count ?? 0).toString(),
'zoekt.archived': marshalBool(project.archived),
'zoekt.fork': marshalBool(isFork),
'zoekt.public': marshalBool(project.private === false)
},
branches: config.revisions?.branches ?? undefined,
tags: config.revisions?.tags ?? undefined,
} satisfies RepoMetadata,
};

return record;
Expand Down Expand Up @@ -168,13 +177,17 @@ export const compileGiteaConfig = async (
}
},
metadata: {
'zoekt.web-url-type': 'gitea',
'zoekt.web-url': repo.html_url!,
'zoekt.name': repo.full_name!,
'zoekt.archived': marshalBool(repo.archived),
'zoekt.fork': marshalBool(repo.fork!),
'zoekt.public': marshalBool(repo.internal === false && repo.private === false),
},
gitConfig: {
'zoekt.web-url-type': 'gitea',
'zoekt.web-url': repo.html_url!,
'zoekt.name': repo.full_name!,
'zoekt.archived': marshalBool(repo.archived),
'zoekt.fork': marshalBool(repo.fork!),
'zoekt.public': marshalBool(repo.internal === false && repo.private === false),
},
branches: config.revisions?.branches ?? undefined,
tags: config.revisions?.tags ?? undefined,
} satisfies RepoMetadata,
};

return record;
Expand Down Expand Up @@ -227,13 +240,15 @@ export const compileGerritConfig = async (
}
},
metadata: {
'zoekt.web-url-type': 'gitiles',
'zoekt.web-url': webUrl,
'zoekt.name': repoId,
'zoekt.archived': marshalBool(false),
'zoekt.fork': marshalBool(false),
'zoekt.public': marshalBool(true),
},
gitConfig: {
'zoekt.web-url-type': 'gitiles',
'zoekt.web-url': webUrl,
'zoekt.name': repoId,
'zoekt.archived': marshalBool(false),
'zoekt.fork': marshalBool(false),
'zoekt.public': marshalBool(true),
},
} satisfies RepoMetadata,
};

return record;
Expand Down
6 changes: 3 additions & 3 deletions packages/backend/src/repoManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { Redis } from 'ioredis';
import { createLogger } from "./logger.js";
import { Connection, PrismaClient, Repo, RepoToConnection, RepoIndexingStatus, StripeSubscriptionStatus } from "@sourcebot/db";
import { GithubConnectionConfig, GitlabConnectionConfig, GiteaConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { AppContext, Settings } from "./types.js";
import { AppContext, Settings, RepoMetadata } from "./types.js";
import { getRepoPath, getTokenFromConfig, measure, getShardPrefix } from "./utils.js";
import { cloneRepository, fetchRepository } from "./git.js";
import { existsSync, rmSync, readdirSync, rm } from 'fs';
Expand Down Expand Up @@ -187,7 +187,7 @@ export class RepoManager implements IRepoManager {
let cloneDuration_s: number | undefined = undefined;

const repoPath = getRepoPath(repo, this.ctx);
const metadata = repo.metadata as Record<string, string>;
const metadata = repo.metadata as RepoMetadata;

// If the repo was already in the indexing state, this job was likely killed and picked up again. As a result,
// to ensure the repo state is valid, we delete the repo if it exists so we get a fresh clone
Expand Down Expand Up @@ -223,7 +223,7 @@ export class RepoManager implements IRepoManager {
cloneUrl = url.toString();
}

const { durationMs } = await measure(() => cloneRepository(cloneUrl, repoPath, metadata, ({ method, stage, progress }) => {
const { durationMs } = await measure(() => cloneRepository(cloneUrl, repoPath, metadata.gitConfig, ({ method, stage, progress }) => {
//this.logger.info(`git.${method} ${stage} stage ${progress}% complete for ${repo.id}`)
}));
cloneDuration_s = durationMs / 1000;
Expand Down
23 changes: 23 additions & 0 deletions packages/backend/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,29 @@ export type Settings = {
gcGracePeriodMs: number;
}

/**
* Structure of the `metadata` field in the `Repo` table.
*/
export type RepoMetadata = {
/**
* A set of key-value pairs that will be used as git config
* variables when cloning the repo.
* @see: https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--configcodecodeltkeygtltvaluegtcode
*/
gitConfig?: Record<string, string>;

/**
* A list of branches to index. Glob patterns are supported.
*/
branches?: string[];

/**
* A list of tags to index. Glob patterns are supported.
*/
tags?: string[];
}


// @see : https://stackoverflow.com/a/61132308
export type DeepPartial<T> = T extends object ? {
[P in keyof T]?: DeepPartial<T[P]>;
Expand Down
54 changes: 50 additions & 4 deletions packages/backend/src/zoekt.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,63 @@
import { exec } from "child_process";
import { AppContext } from "./types.js";
import { AppContext, RepoMetadata } from "./types.js";
import { Repo } from "@sourcebot/db";
import { getRepoPath } from "./utils.js";
import { DEFAULT_SETTINGS } from "./constants.js";
import { getShardPrefix } from "./utils.js";
import { getBranches, getTags } from "./git.js";
import micromatch from "micromatch";
import { createLogger } from "./logger.js";
import { captureEvent } from "./posthog.js";

const logger = createLogger('zoekt');

export const indexGitRepository = async (repo: Repo, ctx: AppContext) => {
const revisions = [
let revisions = [
'HEAD'
];

const shardPrefix = getShardPrefix(repo.orgId, repo.id);

const repoPath = getRepoPath(repo, ctx);
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
const metadata = repo.metadata as RepoMetadata;

if (metadata.branches) {
const branchGlobs = metadata.branches
const allBranches = await getBranches(repoPath);
const matchingBranches =
allBranches
.filter((branch) => micromatch.isMatch(branch, branchGlobs))
.map((branch) => `refs/heads/${branch}`);

revisions = [
...revisions,
...matchingBranches
];
}

if (metadata.tags) {
const tagGlobs = metadata.tags;
const allTags = await getTags(repoPath);
const matchingTags =
allTags
.filter((tag) => micromatch.isMatch(tag, tagGlobs))
.map((tag) => `refs/tags/${tag}`);

revisions = [
...revisions,
...matchingTags
];
}

// zoekt has a limit of 64 branches/tags to index.
if (revisions.length > 64) {
logger.warn(`Too many revisions (${revisions.length}) for repo ${repo.id}, truncating to 64`);
captureEvent('backend_revisions_truncated', {
repoId: repo.id,
revisionCount: revisions.length,
});
revisions = revisions.slice(0, 64);
}

const command = `zoekt-git-index -allow_missing_branches -index ${ctx.indexPath} -file_limit ${DEFAULT_SETTINGS.maxFileSize} -branches ${revisions.join(',')} -tenant_id ${repo.orgId} -shard_prefix ${shardPrefix} ${repoPath}`;

return new Promise<{ stdout: string, stderr: string }>((resolve, reject) => {
Expand Down
6 changes: 3 additions & 3 deletions packages/schemas/src/v3/connection.schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,11 @@ const schema = {
},
"revisions": {
"type": "object",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed.",
"description": "The revisions (branches, tags) that should be included when indexing. The default branch (HEAD) is always indexed. A maximum of 64 revisions can be indexed, with any additional revisions being ignored.",
"properties": {
"branches": {
"type": "array",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported.",
"description": "List of branches to include when indexing. For a given repo, only the branches that exist on the repo's remote *and* match at least one of the provided `branches` will be indexed. The default branch (HEAD) is always indexed. Glob patterns are supported. A maximum of 64 branches can be indexed, with any additional branches being ignored.",
"items": {
"type": "string"
},
Expand All @@ -175,7 +175,7 @@ const schema = {
},
"tags": {
"type": "array",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported.",
"description": "List of tags to include when indexing. For a given repo, only the tags that exist on the repo's remote *and* match at least one of the provided `tags` will be indexed. Glob patterns are supported. A maximum of 64 tags can be indexed, with any additional tags being ignored.",
"items": {
"type": "string"
},
Expand Down
Loading