Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion .oxlintrc.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,16 @@
"packages/auth",
"apps/web/.source",
"**/*.d.ts",
"**/*.gen.ts"
"**/*.gen.ts",
".agent",
".agents",
".codex",
".cursor",
".docs",
".gemini",
".opencode",
".windsurf",
".claude/skills"
],
"rules": {
"unicorn/no-abusive-eslint-disable": "off",
Expand Down
1 change: 1 addition & 0 deletions apps/server/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
"worker": "bun run src/workers/job.worker.ts"
},
"dependencies": {
"@ai-sdk/google-vertex": "^4.0.73",
"@ai-sdk/openai": "^3.0.26",
"@aws-sdk/client-s3": "^3.985.0",
"@aws-sdk/s3-request-presigner": "^3.985.0",
Expand Down
2 changes: 2 additions & 0 deletions apps/server/src/lib/job-status.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ export interface CompleteJobResult {
pageCount: number;
tokenCount?: number;
llmModel?: string;
llmProvider?: string;
llmUsage?: LlmUsage;
processingTimeMs: number;
}
Expand Down Expand Up @@ -68,6 +69,7 @@ export const completeJob = async (
completedAt,
jsonResult: result.jsonResult,
llmModel: result.llmModel,
llmProvider: result.llmProvider,
markdownResult: result.markdownResult,
pageCount: result.pageCount,
processingTimeMs: result.processingTimeMs,
Expand Down
101 changes: 86 additions & 15 deletions apps/server/src/services/llm.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,68 @@
import type { LanguageModel } from "ai";

import { createVertex } from "@ai-sdk/google-vertex";
import { createVertexAnthropic } from "@ai-sdk/google-vertex/anthropic";
import { createOpenAI } from "@ai-sdk/openai";
import { env } from "@ocrbase/env/server";
import { generateText } from "ai";

const openrouter = createOpenAI({
apiKey: env.OPENROUTER_API_KEY ?? "",
baseURL: "https://openrouter.ai/api/v1",
});
const getProvider = () => env.LLM_PROVIDER;

const getModelId = (): string => {
if (env.LLM_MODEL) {
return env.LLM_MODEL;
}
return getProvider() === "vertex"
? "gemini-2.5-flash"
: "google/gemini-2.5-flash";
};

const createLlmModel = (): LanguageModel => {
const provider = getProvider();
const modelId = getModelId();

if (provider === "vertex") {
if (modelId.includes("claude")) {
const anthropic = createVertexAnthropic({
location: env.GOOGLE_VERTEX_LOCATION,
project: env.GOOGLE_VERTEX_PROJECT ?? "",
});
return anthropic(modelId);
}
const vertex = createVertex({
location: env.GOOGLE_VERTEX_LOCATION,
project: env.GOOGLE_VERTEX_PROJECT ?? "",
});
return vertex(modelId);
}

const openrouter = createOpenAI({
apiKey: env.OPENROUTER_API_KEY ?? "",
baseURL: "https://openrouter.ai/api/v1",
});
return openrouter(modelId);
};

let cachedModel: LanguageModel | null = null;
const getModel = (): LanguageModel => {
if (!cachedModel) {
cachedModel = createLlmModel();
}
return cachedModel;
};

const validateLlmConfig = () => {
const provider = getProvider();
if (provider === "openrouter" && !env.OPENROUTER_API_KEY) {
throw new Error("OPENROUTER_API_KEY is not configured");
}
if (provider === "vertex" && !env.GOOGLE_VERTEX_PROJECT) {
throw new Error(
"GOOGLE_VERTEX_PROJECT is required when LLM_PROVIDER=vertex"
);
}
};

const DEFAULT_MODEL = "google/gemini-2.5-flash";
const JSON_REPAIR_INPUT_LIMIT = 40_000;
const RESPONSE_PREVIEW_LIMIT = 240;

Expand All @@ -26,6 +81,7 @@ interface ExtractionResult {
data: Record<string, unknown>;
usage: LlmUsage;
model: string;
provider: string;
}

interface GenerateSchemaOptions {
Expand Down Expand Up @@ -303,7 +359,7 @@ const parseJsonWithRepair = async <T>({
};
} catch {
const repairResult = await generateText({
model: openrouter(DEFAULT_MODEL),
model: getModel(),
prompt: buildJsonRepairPrompt(responseText, expectedShape, schema),
system:
"You are a JSON repair tool. Output valid JSON only, with no markdown fences.",
Expand All @@ -324,6 +380,24 @@ const parseJsonWithRepair = async <T>({
};

export const checkLlmHealth = async (): Promise<boolean> => {
const provider = getProvider();

if (provider === "vertex") {
if (!env.GOOGLE_VERTEX_PROJECT) {
return true;
}
try {
await generateText({
maxOutputTokens: 5,
model: getModel(),
prompt: "ping",
});
return true;
} catch {
return false;
}
}

if (!env.OPENROUTER_API_KEY) {
return true;
}
Expand All @@ -345,9 +419,7 @@ export const llmService = {
markdown,
hints,
}: GenerateSchemaOptions): Promise<GeneratedSchema> {
if (!env.OPENROUTER_API_KEY) {
throw new Error("OPENROUTER_API_KEY is not configured");
}
validateLlmConfig();

let systemPrompt = `You are a JSON schema generator. Analyze the provided document and generate a JSON schema that can be used to extract structured data from similar documents.

Expand All @@ -365,7 +437,7 @@ Do not include any markdown formatting or explanation. Just the JSON object.`;
}

const result = await generateText({
model: openrouter(DEFAULT_MODEL),
model: getModel(),
prompt: markdown,
system: systemPrompt,
});
Expand All @@ -385,9 +457,7 @@ Do not include any markdown formatting or explanation. Just the JSON object.`;
schema,
hints,
}: ProcessExtractionOptions): Promise<ExtractionResult> {
if (!env.OPENROUTER_API_KEY) {
throw new Error("OPENROUTER_API_KEY is not configured");
}
validateLlmConfig();

let systemPrompt =
"You are a data extraction assistant. Extract structured data from the provided markdown content. Return ONLY valid JSON, no markdown formatting or explanation.";
Expand All @@ -401,7 +471,7 @@ Do not include any markdown formatting or explanation. Just the JSON object.`;
}

const result = await generateText({
model: openrouter(DEFAULT_MODEL),
model: getModel(),
prompt: markdown,
system: systemPrompt,
});
Expand All @@ -419,7 +489,8 @@ Do not include any markdown formatting or explanation. Just the JSON object.`;

return {
data: parsedResult.data,
model: DEFAULT_MODEL,
model: getModelId(),
provider: getProvider(),
usage: {
completionTokens:
primaryUsage.completionTokens +
Expand Down
3 changes: 2 additions & 1 deletion apps/server/src/workers/job.worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ const UNRECOVERABLE_CODE_PREFIX = "[cause=";
const RETRYABLE_MESSAGE_PATTERN =
/(timed out|timeout|connection error|econnreset|econnrefused|enotfound|eai_again|429|502|503|504)/i;
const NON_RETRYABLE_MESSAGE_PATTERN =
/(openrouter_api_key is not configured|job not found|no file or url provided|generated schema response is missing required fields|extraction response must be a json object)/i;
/(openrouter_api_key is not configured|google_vertex_project is required|job not found|no file or url provided|generated schema response is missing required fields|extraction response must be a json object)/i;

const toErrorContext = (
error: unknown
Expand Down Expand Up @@ -161,6 +161,7 @@ const runExtraction = async (
await completeJob(jobId, {
jsonResult: extractionResult.data,
llmModel: extractionResult.model,
llmProvider: extractionResult.provider,
llmUsage: extractionResult.usage,
markdownResult: markdown,
pageCount,
Expand Down
Loading