Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

EPD-572 Setup CLI repo #1

Merged
merged 33 commits into from
Feb 19, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Runs are per-test
  • Loading branch information
Nicole White committed Feb 12, 2024
commit 452a34997c76f79b562e8a6f5041a337f41b3f13
10 changes: 6 additions & 4 deletions python-example.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ async def evaluate_output(
)

await client.post("/evals", json=dict(
testId=id,
testExternalId=id,
testCaseHash=test_case.hash,
evaluatorId=evaluator.id,
score=evaluation.score,
Expand All @@ -132,7 +132,7 @@ async def run_test_case(
output = await loop.run_in_executor(None, ctx.run, fn, test_case)

await client.post("/results", json=dict(
testId=test_id,
testExternalId=test_id,
testCaseHash=test_case.hash,
testCaseBody=dataclasses.asdict(test_case),
testCaseOutput=output,
Expand Down Expand Up @@ -170,6 +170,8 @@ async def run_test(
]
await asyncio.gather(*run_tasks)

await client.post("/end", json=dict(testExternalId=test_id))


# Sync entrypoint
def test(
Expand All @@ -186,11 +188,11 @@ def test(
fn=fn,
),
loop,
)
)
future.result()



# Example usage
if __name__ == "__main__":
import random

Expand Down
104 changes: 72 additions & 32 deletions src/handlers/testing/exec/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ import { z } from 'zod';
import { startInteractiveCLI, interactiveEmitter } from './interactive-cli';
import net from 'net';

/**
* Utils
*/
function findAvailablePort(startPort: number): Promise<number> {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nice

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

chatgpt wrote that for me

return new Promise((resolve, reject) => {
function tryListening(port: number) {
Expand All @@ -34,20 +37,29 @@ function findAvailablePort(startPort: number): Promise<number> {
}

/**
* Current run utils
* Globals
*/
let _currentRunId: string | undefined = undefined;
let _currentRunMessage: string | undefined = undefined;
let _isInteractive: boolean | undefined = undefined;

async function currentRunId(): Promise<string> {
if (!_currentRunId) {
const run = await startRun();
_currentRunId = run.runId;
// Map of test's external ID to its current run ID and its internal test ID
const testExternalIdToRun: Record<string, { runId: string; testId: string }> =
{};

async function currentRun(args: {
testExternalId: string;
}): Promise<{ runId: string; testId: string }> {
let run = testExternalIdToRun[args.testExternalId];
if (!run) {
run = await startRun({ testExternalId: args.testExternalId });
testExternalIdToRun[args.testExternalId] = run;
}
return _currentRunId;
return run;
}

/**
* Logger
*/
const logger = {
log: (...args: unknown[]) => {
if (_isInteractive) {
Expand All @@ -71,7 +83,7 @@ const logger = {
* Accumulate events for the duration of the run
*/
interface TestCaseEvent {
testId: string;
testExternalId: string;
testCaseHash: string;
message: string;
traceId: string;
Expand All @@ -84,7 +96,7 @@ const testCaseEvents: TestCaseEvent[] = [];
/**
* Keep a map of test case hashes to their result IDs
*
* runId -> testId -> testCaseHash -> testCaseResultId
* testExternalId -> testCaseHash -> testCaseResultId
*/
const testCaseHashToResultId: Record<string, Record<string, string>> = {};

Expand All @@ -111,43 +123,54 @@ function evaluationPassed(args: {
/**
* Public API stubs
*/
async function startRun(): Promise<{ runId: string }> {
logger.log('POST /api/testing/local/runs', { message: _currentRunMessage });
return { runId: crypto.randomUUID() };
async function startRun(args: {
testExternalId: string;
}): Promise<{ runId: string; testId: string }> {
logger.log('POST /api/testing/local/runs', {
testExternalId: args.testExternalId,
message: _currentRunMessage,
});
return { runId: crypto.randomUUID(), testId: crypto.randomUUID() };
}

async function endRun(): Promise<void> {
const runId = await currentRunId();
async function endRun(args: { testExternalId: string }): Promise<void> {
const { runId } = await currentRun(args);
logger.log(`POST /api/testing/local/runs/${runId}/end`);
interactiveEmitter.emit('end');
_currentRunId = undefined;
interactiveEmitter.emit('end', { testExternalId: args.testExternalId });
delete testExternalIdToRun[args.testExternalId];
}

async function postTestCaseResult(args: {
testId: string;
testExternalId: string;
testCaseHash: string;
testCaseBody?: unknown;
testCaseOutput?: unknown;
testCaseEvents: TestCaseEvent[];
}): Promise<{ testCaseResultId: string }> {
const runId = await currentRunId();
logger.log(`POST /api/testing/local/runs/${runId}/results`, args);
const { runId, testId } = await currentRun(args);
logger.log(`POST /api/testing/local/runs/${runId}/results`, {
...args,
testId,
});
return { testCaseResultId: crypto.randomUUID() };
}

async function postTestCaseEval(args: {
testId: string;
testExternalId: string;
testCaseResultId: string;
evaluatorId: string;
score: number;
passed: boolean | undefined;
thresholdOp?: '<' | '<=' | '>' | '>=';
thresholdValue?: number;
}): Promise<void> {
const runId = await currentRunId();
const { runId, testId } = await currentRun(args);
// TODO: use enums, zod schemas for passing this data to the interactive CLI
interactiveEmitter.emit('eval', { ...args, runId });
logger.log(`POST /api/testing/local/runs/${runId}/evals`, args);
interactiveEmitter.emit('eval', args);
logger.log(`POST /api/testing/local/runs/${runId}/evals`, {
...args,
testId,
});
}

/**
Expand All @@ -160,7 +183,7 @@ app.post(
zValidator(
'json',
z.object({
testId: z.string(),
testExternalId: z.string(),
testCaseHash: z.string(),
message: z.string(),
traceId: z.string(),
Expand All @@ -180,7 +203,7 @@ app.post(
zValidator(
'json',
z.object({
testId: z.string(),
testExternalId: z.string(),
testCaseHash: z.string(),
testCaseBody: z.unknown(),
testCaseOutput: z.unknown(),
Expand All @@ -190,18 +213,21 @@ app.post(
const data = c.req.valid('json');

const events = testCaseEvents.filter(
(e) => e.testId === data.testId && e.testCaseHash === data.testCaseHash,
(e) =>
e.testExternalId === data.testExternalId &&
e.testCaseHash === data.testCaseHash,
);
const { testCaseResultId } = await postTestCaseResult({
...data,
testCaseEvents: events,
});

if (!testCaseHashToResultId[data.testId]) {
testCaseHashToResultId[data.testId] = {};
if (!testCaseHashToResultId[data.testExternalId]) {
testCaseHashToResultId[data.testExternalId] = {};
}

testCaseHashToResultId[data.testId][data.testCaseHash] = testCaseResultId;
testCaseHashToResultId[data.testExternalId][data.testCaseHash] =
testCaseResultId;

return c.json('ok');
},
Expand All @@ -212,7 +238,7 @@ app.post(
zValidator(
'json',
z.object({
testId: z.string(),
testExternalId: z.string(),
testCaseHash: z.string(),
evaluatorId: z.string(),
score: z.number(),
Expand All @@ -233,7 +259,7 @@ app.post(
}

const testCaseResultId =
testCaseHashToResultId[data.testId]?.[data.testCaseHash];
testCaseHashToResultId[data.testExternalId]?.[data.testCaseHash];

if (!testCaseResultId) {
logger.warn(
Expand All @@ -252,6 +278,21 @@ app.post(
},
);

app.post(
'/end',
zValidator(
'json',
z.object({
testExternalId: z.string(),
}),
),
async (c) => {
const data = c.req.valid('json');
await endRun(data);
return c.json('ok');
},
);

/**
* Exec command while local server is running
*/
Expand Down Expand Up @@ -294,7 +335,6 @@ export async function exec(args: {
env,
silent: args.interactive,
}).finally(async () => {
await endRun();
server?.close();
});
},
Expand Down
93 changes: 63 additions & 30 deletions src/handlers/testing/exec/interactive-cli.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,69 @@ export const interactiveEmitter = new EventEmitter();

const Space = () => <Text> </Text>;

function TestOutcomes(props: {
runIsOver: boolean;
testExternalId: string;
outcomes: boolean[];
}) {
const passed = props.outcomes.every((x) => x);
return (
<Box alignItems="center">
{props.runIsOver ? (
<Text color="white" backgroundColor={passed ? 'green' : 'red'}>
<Space />
{passed ? 'PASSED' : 'FAILED'}
<Space />
</Text>
) : (
<Spinner type="dots" />
)}
<Space />
<Text bold={true}>{props.testExternalId}</Text>
<Space />
{props.outcomes.map((passed, i) => (
<Text key={i} color={passed ? 'green' : 'red'}>
{'.'}
</Text>
))}
<Spacer />
<Box borderStyle="single">
<Text>Rerun</Text>
</Box>
</Box>
);
}

const App = () => {
const [outcomes, setOutcomes] = useState<boolean[]>([]);
const [runIsOver, setRunIsOver] = useState<boolean>(false);
const [testIdToOutcomes, setTestIdToOutcomes] = useState<
Record<string, boolean[]>
>({});
const [testIdToRunIsOver, setTestIdToRunIsOver] = useState<
Record<string, boolean>
>({});

useInput((input, key) => {
// TODO: add interaction!
});

useEffect(() => {
const evalListener = (args: { passed: boolean }) => {
setOutcomes((prevOutcomes) => [...prevOutcomes, args.passed]);
const evalListener = (args: {
testExternalId: string;
passed: boolean;
}) => {
setTestIdToOutcomes((prevOutcomes) => {
const { testExternalId, passed } = args;
return {
...prevOutcomes,
[testExternalId]: [...(prevOutcomes[testExternalId] || []), passed],
};
});
};

const onEndListener = () => {
setRunIsOver(true);
const onEndListener = (args: { testExternalId: string }) => {
setTestIdToRunIsOver((prevRunIsOver) => {
return { ...prevRunIsOver, [args.testExternalId]: true };
});
};

interactiveEmitter.on('eval', evalListener);
Expand All @@ -36,34 +84,19 @@ const App = () => {
return (
<Box
paddingX={1}
flexDirection="column"
borderStyle="round"
borderColor="gray"
alignItems="center"
minHeight={12}
>
{runIsOver ? (
<Text
color="white"
backgroundColor={outcomes.every((x) => x) ? 'green' : 'red'}
>
<Space />
{outcomes.every((x) => x) ? 'PASSED' : 'FAILED'}
<Space />
</Text>
) : (
<Spinner type="dots" />
)}
<Space />
<Text bold={true}>acme-bot</Text>
<Space />
{outcomes.map((passed, i) => (
<Text key={i} color={passed ? 'green' : 'red'}>
{'.'}
</Text>
{Object.entries(testIdToOutcomes).map(([testId, outcomes]) => (
<TestOutcomes
key={testId}
runIsOver={testIdToRunIsOver[testId]}
testExternalId={testId}
outcomes={outcomes}
/>
))}
<Spacer />
<Box borderStyle="single">
<Text>Rerun</Text>
</Box>
</Box>
);
};
Expand Down