Skip to content

Commit 686668d

Browse files
committed
Run 5 prep: naming hints extraction, prompt rewrite, ALLCAPS ban
- Add extractNamingHints() to codegen.ts: walks schema.json to produce naming_hints.json with correct PascalCase error/kind names, written into the agent workspace so it has authoritative naming data - Complete rewrite of prompts.ts: naming_hints.json featured prominently, scaffolding scripts explicitly banned, mandatory TS reading phase, previous-failures section with concrete bad patterns from runs 1-4 - Add ALLCAPS class name check to verify-script.ts: regex bans class names with 4+ consecutive uppercase letters (e.g. NOTFOUNDError)
1 parent 19b2dde commit 686668d

File tree

3 files changed

+447
-445
lines changed

3 files changed

+447
-445
lines changed

codegen-llm/src/codegen.ts

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,10 +162,161 @@ function validatePrerequisites(opts: CodegenOptions): void {
162162
}
163163
}
164164

165+
// ---------------------------------------------------------------------------
166+
// Naming hints extraction
167+
// ---------------------------------------------------------------------------
168+
169+
interface NamingHints {
170+
/** Maps error code literals (e.g. "NOT_FOUND") to PascalCase class names (e.g. "NotFoundError"). */
171+
errorCodeToClassName: Record<string, string>;
172+
/** The four standard River error codes that appear in every procedure. */
173+
standardErrorCodes: string[];
174+
/** Maps $kind literal values to suggested variant class name prefixes. */
175+
kindValueToClassName: Record<string, string>;
176+
}
177+
178+
/**
179+
* Walk schema.json and extract naming hints the agent should use.
180+
*
181+
* This lets us compute correct PascalCase names for error codes and $kind
182+
* variants mechanically, so the agent doesn't need to implement its own
183+
* (broken) conversion.
184+
*/
185+
function extractNamingHints(schemaPath: string): NamingHints {
186+
const schema = JSON.parse(fs.readFileSync(schemaPath, "utf8"));
187+
const errorCodes = new Set<string>();
188+
const kindValues = new Set<string>();
189+
190+
// Walk all procedures to find error codes and $kind values
191+
for (const svc of Object.values(schema.services ?? {})) {
192+
const service = svc as { procedures: Record<string, Record<string, unknown>> };
193+
for (const proc of Object.values(service.procedures ?? {})) {
194+
// Collect error codes
195+
collectErrorCodes(proc.errors, errorCodes);
196+
// Collect $kind values from input, output, init
197+
for (const facet of ["input", "output", "init"]) {
198+
collectKindValues(proc[facet], kindValues);
199+
}
200+
}
201+
}
202+
203+
// Build error code → class name map
204+
const errorCodeToClassName: Record<string, string> = {};
205+
for (const code of errorCodes) {
206+
errorCodeToClassName[code] = errorCodeToPascal(code);
207+
}
208+
209+
// Build $kind value → class name map
210+
const kindValueToClassName: Record<string, string> = {};
211+
for (const kind of kindValues) {
212+
kindValueToClassName[kind] = kindValueToPascal(kind);
213+
}
214+
215+
// Standard error codes (appear in virtually every procedure)
216+
const standardErrorCodes = [
217+
"UNCAUGHT_ERROR",
218+
"UNEXPECTED_DISCONNECT",
219+
"INVALID_REQUEST",
220+
"CANCEL",
221+
];
222+
223+
return { errorCodeToClassName, standardErrorCodes, kindValueToClassName };
224+
}
225+
226+
/** Recursively find all `const` values under `properties.code` in error schemas. */
227+
function collectErrorCodes(node: unknown, codes: Set<string>): void {
228+
if (!node || typeof node !== "object") return;
229+
const obj = node as Record<string, unknown>;
230+
231+
// If this is an error variant with properties.code.const
232+
if (obj.properties && typeof obj.properties === "object") {
233+
const props = obj.properties as Record<string, unknown>;
234+
if (props.code && typeof props.code === "object") {
235+
const codeSchema = props.code as Record<string, unknown>;
236+
if (typeof codeSchema.const === "string") {
237+
codes.add(codeSchema.const);
238+
}
239+
}
240+
}
241+
242+
// Recurse into anyOf / oneOf / allOf
243+
for (const key of ["anyOf", "oneOf", "allOf"]) {
244+
if (Array.isArray(obj[key])) {
245+
for (const item of obj[key] as unknown[]) {
246+
collectErrorCodes(item, codes);
247+
}
248+
}
249+
}
250+
}
251+
252+
/** Recursively find all `$kind` literal values in schemas. */
253+
function collectKindValues(node: unknown, kinds: Set<string>): void {
254+
if (!node || typeof node !== "object") return;
255+
const obj = node as Record<string, unknown>;
256+
257+
if (obj.properties && typeof obj.properties === "object") {
258+
const props = obj.properties as Record<string, unknown>;
259+
if (props["$kind"] && typeof props["$kind"] === "object") {
260+
const kindSchema = props["$kind"] as Record<string, unknown>;
261+
if (typeof kindSchema.const === "string") {
262+
kinds.add(kindSchema.const);
263+
}
264+
}
265+
}
266+
267+
for (const key of ["anyOf", "oneOf", "allOf", "items"]) {
268+
const val = obj[key];
269+
if (Array.isArray(val)) {
270+
for (const item of val) {
271+
collectKindValues(item, kinds);
272+
}
273+
} else if (val && typeof val === "object") {
274+
collectKindValues(val, kinds);
275+
}
276+
}
277+
}
278+
279+
/**
280+
* Convert an UPPER_SNAKE_CASE error code to a PascalCase class name.
281+
* e.g. "NOT_FOUND" → "NotFoundError", "CGROUP_CLEANUP_ERROR" → "CgroupCleanupError"
282+
*/
283+
function errorCodeToPascal(code: string): string {
284+
// If the code already ends with _ERROR, don't double-suffix
285+
const stripped = code.replace(/_ERROR$/, "");
286+
const pascal = stripped
287+
.split("_")
288+
.map((w) => w.charAt(0).toUpperCase() + w.slice(1).toLowerCase())
289+
.join("");
290+
return `${pascal}Error`;
291+
}
292+
293+
/**
294+
* Convert a $kind value to a PascalCase class name prefix.
295+
* e.g. "finished" → "Finished", "finalOutput" → "FinalOutput"
296+
*/
297+
function kindValueToPascal(kind: string): string {
298+
// Already camelCase → just capitalise first letter
299+
return kind.charAt(0).toUpperCase() + kind.slice(1);
300+
}
301+
165302
function setupWorkspace(workDir: string, opts: CodegenOptions): void {
166303
// Copy the serialised schema
167304
fs.copyFileSync(opts.schemaPath, path.join(workDir, "schema.json"));
168305

306+
// Pre-compute naming hints from the schema so the agent doesn't need to
307+
// implement PascalCase conversion (which it keeps getting wrong).
308+
log(opts, "Extracting naming hints from schema...");
309+
const namingHints = extractNamingHints(opts.schemaPath);
310+
fs.writeFileSync(
311+
path.join(workDir, "naming_hints.json"),
312+
JSON.stringify(namingHints, null, 2),
313+
);
314+
log(
315+
opts,
316+
`Extracted ${Object.keys(namingHints.errorCodeToClassName).length} error names, ` +
317+
`${Object.keys(namingHints.kindValueToClassName).length} $kind variant names`,
318+
);
319+
169320
// Write the verification script OUTSIDE the workspace so the agent
170321
// cannot read its source (workspace-write sandbox restricts reads to
171322
// the workspace + additionalDirectories). We place a thin shell

0 commit comments

Comments
 (0)