From 742aec3fe2474ece57ce3eae9624e688c27ffb5c Mon Sep 17 00:00:00 2001 From: Your Name Date: Mon, 13 Jan 2025 12:40:09 -0800 Subject: [PATCH] review pass adjustments --- src/base/htmlTracer.ts | 15 ++---- src/base/htmlTracerTypes.ts | 3 +- src/base/index.ts | 4 ++ src/base/jsonTypes.ts | 2 +- src/base/materialized.ts | 84 ++++++++++++--------------------- src/base/promptElements.tsx | 4 +- src/base/promptRenderer.ts | 55 +++++++++++---------- src/base/test/renderer.test.tsx | 14 +++--- src/base/tokenizer/tokenizer.ts | 11 ++++- 9 files changed, 82 insertions(+), 110 deletions(-) diff --git a/src/base/htmlTracer.ts b/src/base/htmlTracer.ts index 8db25df..b123b15 100644 --- a/src/base/htmlTracer.ts +++ b/src/base/htmlTracer.ts @@ -14,7 +14,7 @@ import { TraceMaterializedNodeType, } from './htmlTracerTypes'; import { - MaterializedChatMesageImage, + MaterializedChatMessageImage, MaterializedChatMessage, MaterializedChatMessageTextChunk, MaterializedContainer, @@ -225,23 +225,14 @@ async function serializeMaterialized( value: materialized.text, tokens: await materialized.upperBoundTokenCount(tokenizer), }; - } else if (materialized instanceof MaterializedChatMesageImage) { + } else if (materialized instanceof MaterializedChatMessageImage) { return { ...common, name: materialized.id.toString(), id: materialized.id, type: TraceMaterializedNodeType.Image, - value: materialized.imageUrl, + value: materialized.src, tokens: await materialized.upperBoundTokenCount(tokenizer), - children: await Promise.all( - materialized.children.map(c => - serializeMaterialized( - tokenizer, - c, - inChatMessage || materialized instanceof MaterializedChatMesageImage - ) - ) - ), } } else { const containerCommon = { diff --git a/src/base/htmlTracerTypes.ts b/src/base/htmlTracerTypes.ts index 068950f..e29fcea 100644 --- a/src/base/htmlTracerTypes.ts +++ b/src/base/htmlTracerTypes.ts @@ -67,7 +67,6 @@ export interface ITraceMaterializedChatMessageImage extends ITraceMaterializedCo name: string value: string; priority: number; - tokens: number; - children: ITraceMaterializedNode[]; + tokens: number, } diff --git a/src/base/index.ts b/src/base/index.ts index 09ccfd8..13c1d24 100644 --- a/src/base/index.ts +++ b/src/base/index.ts @@ -128,6 +128,10 @@ export async function renderPrompt

( 'countTokens' in tokenizerMetadata ? new AnyTokenizer((text, token) => tokenizerMetadata.countTokens(text, token)) : tokenizerMetadata; + + if (tokenizer instanceof AnyTokenizer && mode !== 'vscode') { + throw new Error('Tokenizer must be an instance of AnyTokenizer when not in vscode mode.'); + } const renderer = new PromptRenderer(endpoint, ctor, props, tokenizer); const renderResult = await renderer.render(progress, token); const { tokenCount, references, metadata } = renderResult; diff --git a/src/base/jsonTypes.ts b/src/base/jsonTypes.ts index b124ec0..956b9f8 100644 --- a/src/base/jsonTypes.ts +++ b/src/base/jsonTypes.ts @@ -55,7 +55,7 @@ export interface ImageChatMessagePieceJSON { children: PromptNodeJSON[]; references: PromptReferenceJSON[] | undefined; props: { - imageUrl: string; + src: string; detail?: "low" | "high"; }; } diff --git a/src/base/materialized.ts b/src/base/materialized.ts index 44a279f..97b7e0b 100644 --- a/src/base/materialized.ts +++ b/src/base/materialized.ts @@ -25,7 +25,7 @@ export type MaterializedNode = | MaterializedContainer | MaterializedChatMessage | MaterializedChatMessageTextChunk - | MaterializedChatMesageImage; + | MaterializedChatMessageImage; export const enum ContainerFlags { /** It's a {@link LegacyPrioritization} instance */ @@ -93,7 +93,7 @@ export class MaterializedContainer implements IMaterializedNode { /** * Finds a node in the tree by ID. */ - findById(nodeId: number): MaterializedContainer | MaterializedChatMessage | MaterializedChatMesageImage | undefined { + findById(nodeId: number): MaterializedContainer | MaterializedChatMessage | undefined { return findNodeById(nodeId, this); } @@ -173,17 +173,13 @@ export class MaterializedChatMessage implements IMaterializedNode { } /** Gets the text this message contains */ - public get text(): (string | MaterializedChatMesageImage)[] { + public get text(): (string | MaterializedChatMessageImage)[] { return this._text(); } /** Gets whether the message is empty */ public get isEmpty() { - const content = this.text - .filter(element => typeof element === 'string') - .join('').trimEnd(); - - return !this.toolCalls?.length && !this.text.some(element => element instanceof MaterializedChatMesageImage || /\S/.test(content)); + return !this.toolCalls?.length && !this.text.some(element => element instanceof MaterializedChatMessageImage || /\S/.test(element)); } /** @@ -212,7 +208,7 @@ export class MaterializedChatMessage implements IMaterializedNode { /** * Finds a node in the tree by ID. */ - findById(nodeId: number): MaterializedContainer | MaterializedChatMessage | MaterializedChatMesageImage | undefined { + findById(nodeId: number): MaterializedContainer | MaterializedChatMessage | MaterializedChatMessageImage | undefined { return findNodeById(nodeId, this); } @@ -235,13 +231,10 @@ export class MaterializedChatMessage implements IMaterializedNode { return tokenizer.countMessageTokens({ ...this.toChatMessage(), content: '' }); }); - private readonly _text = once((): (string | MaterializedChatMesageImage)[] => { - let result: (string | MaterializedChatMesageImage)[] = []; + private readonly _text = once((): (string | MaterializedChatMessageImage)[] => { + let result: (string | MaterializedChatMessageImage)[] = []; for (const { text, isTextSibling } of textChunks(this)) { - if (text instanceof MaterializedChatMesageImage) { - if (text.children.length > 0) { - throw new Error('Images cannot have children'); - } + if (text instanceof MaterializedChatMessageImage) { result.push(text); continue; } @@ -270,7 +263,7 @@ export class MaterializedChatMessage implements IMaterializedNode { .filter(element => typeof element === 'string') .join('').trim(); - if (this.text.some(element => element instanceof MaterializedChatMesageImage)) { + if (this.text.some(element => element instanceof MaterializedChatMessageImage)) { if (this.role !== ChatRole.User) { throw new Error('Only User messages can have images'); } @@ -278,10 +271,10 @@ export class MaterializedChatMessage implements IMaterializedNode { let prompts: ChatCompletionContentPart[] = this.text.map(element => { if (typeof element === 'string') { return { type: 'text', text: element }; - } else if (element instanceof MaterializedChatMesageImage) { + } else if (element instanceof MaterializedChatMessageImage) { return { type: 'image_url', - image_url: { url: getEncodedBase64(element.imageUrl), detail: element.detail }, + image_url: { url: getEncodedBase64(element.src), detail: element.detail }, }; } else { throw new Error('Unexpected element type'); @@ -329,15 +322,14 @@ export class MaterializedChatMessage implements IMaterializedNode { } } -export class MaterializedChatMesageImage implements IMaterializedNode { +export class MaterializedChatMessageImage implements IMaterializedNode { constructor( public readonly id: number, - public readonly role: ChatRole, - public readonly imageUrl: string, + // public readonly role: ChatRole, + public readonly src: string, public readonly priority: number, public readonly metadata: PromptMetadata[] = [], public readonly lineBreakBefore: LineBreakBefore, - public readonly children: MaterializedNode[], public readonly detail?: 'low' | 'high', ) { } upperBoundTokenCount(tokenizer: ITokenizer): Promise { @@ -351,50 +343,32 @@ export class MaterializedChatMesageImage implements IMaterializedNode { return 0; }); - removeLowestPriorityChild(): void { - removeLowestPriorityChild(this); - } - - /** - * Replaces a node in the tree with the given one, by its ID. - */ - replaceNode(nodeId: number, withNode: MaterializedNode): MaterializedNode | undefined { - return replaceNode(nodeId, this.children, withNode); - } - - /** - * Finds a node in the tree by ID. - */ - findById(nodeId: number): MaterializedContainer | MaterializedChatMessage | MaterializedChatMesageImage | undefined { - return findNodeById(nodeId, this); - } - isEmpty: boolean = false; } function isContainerType( node: MaterializedNode ): node is MaterializedContainer | MaterializedChatMessage { - return !(node instanceof MaterializedChatMessageTextChunk); + return !(node instanceof MaterializedChatMessageTextChunk || node instanceof MaterializedChatMessageImage); } function assertContainerOrChatMessage( v: MaterializedNode -): asserts v is MaterializedContainer | MaterializedChatMessage | MaterializedChatMesageImage { - if (!(v instanceof MaterializedContainer) && !(v instanceof MaterializedChatMessage) && !(v instanceof MaterializedChatMesageImage)) { +): asserts v is MaterializedContainer | MaterializedChatMessage | MaterializedChatMessageImage { + if (!(v instanceof MaterializedContainer) && !(v instanceof MaterializedChatMessage) && !(v instanceof MaterializedChatMessageImage)) { throw new Error(`Cannot have a text node outside a ChatMessage. Text: "${v.text}"`); } } function* textChunks( - node: MaterializedContainer | MaterializedChatMessage | MaterializedChatMesageImage, + node: MaterializedContainer | MaterializedChatMessage, isTextSibling = false -): Generator<{ text: MaterializedChatMessageTextChunk | MaterializedChatMesageImage; isTextSibling: boolean }> { +): Generator<{ text: MaterializedChatMessageTextChunk | MaterializedChatMessageImage; isTextSibling: boolean }> { for (const child of node.children) { if (child instanceof MaterializedChatMessageTextChunk) { yield { text: child, isTextSibling }; isTextSibling = true; - } else if (child instanceof MaterializedChatMesageImage) { + } else if (child instanceof MaterializedChatMessageImage) { yield { text: child, isTextSibling: false }; } else { if (child) @@ -408,15 +382,15 @@ function removeLowestPriorityLegacy(root: MaterializedNode) { let lowest: | undefined | { - chain: (MaterializedContainer | MaterializedChatMessage | MaterializedChatMesageImage)[]; - node: MaterializedChatMessageTextChunk; + chain: (MaterializedContainer | MaterializedChatMessage)[]; + node: MaterializedChatMessageTextChunk | MaterializedChatMessageImage; }; function findLowestInTree( node: MaterializedNode, - chain: (MaterializedContainer | MaterializedChatMessage | MaterializedChatMesageImage)[] + chain: (MaterializedContainer | MaterializedChatMessage)[] ) { - if (node instanceof MaterializedChatMessageTextChunk) { + if (node instanceof MaterializedChatMessageTextChunk || node instanceof MaterializedChatMessageImage) { if (!lowest || node.priority < lowest.node.priority) { lowest = { chain: chain.slice(), node }; } @@ -458,11 +432,11 @@ function removeLowestPriorityLegacy(root: MaterializedNode) { } } -function removeLowestPriorityChild(node: MaterializedContainer | MaterializedChatMessage | MaterializedChatMesageImage) { +function removeLowestPriorityChild(node: MaterializedContainer | MaterializedChatMessage) { let lowest: | undefined | { - chain: (MaterializedContainer | MaterializedChatMessage | MaterializedChatMesageImage)[]; + chain: (MaterializedContainer | MaterializedChatMessage)[]; index: number; value: MaterializedNode; lowestNested?: number; @@ -498,7 +472,7 @@ function removeLowestPriorityChild(node: MaterializedContainer | MaterializedCha const containingList = lowest.chain[lowest.chain.length - 1].children; if ( - lowest.value instanceof MaterializedChatMessageTextChunk || + lowest.value instanceof MaterializedChatMessageTextChunk || lowest.value instanceof MaterializedChatMessageImage || (lowest.value instanceof MaterializedContainer && lowest.value.has(ContainerFlags.IsChunk)) || (isContainerType(lowest.value) && !lowest.value.children.length) ) { @@ -567,8 +541,8 @@ function replaceNode( function findNodeById( nodeId: number, - container: MaterializedContainer | MaterializedChatMessage | MaterializedChatMesageImage -): MaterializedContainer | MaterializedChatMessage | MaterializedChatMesageImage | undefined { + container: MaterializedContainer | MaterializedChatMessage +): MaterializedContainer | MaterializedChatMessage | undefined { if (container.id === nodeId) { return container; } diff --git a/src/base/promptElements.tsx b/src/base/promptElements.tsx index 76e14eb..f7eca78 100644 --- a/src/base/promptElements.tsx +++ b/src/base/promptElements.tsx @@ -134,9 +134,8 @@ export interface TextChunkProps extends BasePromptElementProps { } export interface ImageProps extends BasePromptElementProps { - imageUrl: string; + src: string; detail?: 'low' | 'high'; - role?: ChatRole.User; } /** @@ -229,7 +228,6 @@ async function getTextContentBelowBudget( export class BaseImageMessage extends BaseChatMessage { constructor(props: ImageProps) { - props.role = ChatRole.User; super(props); } } diff --git a/src/base/promptRenderer.ts b/src/base/promptRenderer.ts index 1b65d56..cbe50c5 100644 --- a/src/base/promptRenderer.ts +++ b/src/base/promptRenderer.ts @@ -8,7 +8,7 @@ import { PromptNodeType } from './jsonTypes'; import { ContainerFlags, LineBreakBefore, - MaterializedChatMesageImage, + MaterializedChatMessageImage, MaterializedChatMessage, MaterializedChatMessageTextChunk, MaterializedContainer, @@ -103,7 +103,7 @@ export class PromptRenderer

{ private readonly _ctor: PromptElementCtor, private readonly _props: P, private readonly _tokenizer: ITokenizer - ) {} + ) { } public getIgnoredFiles(): URI[] { return Array.from(new Set(this._ignoredFiles)); @@ -475,7 +475,7 @@ export class PromptRenderer

{ /** Grows all Expandable elements, returns if any changes were made. */ private async _grow( - tree: MaterializedContainer | MaterializedChatMessage | MaterializedChatMesageImage, + tree: MaterializedContainer | MaterializedChatMessage, tokensUsed: number, tokenBudget: number, token: CancellationToken | undefined @@ -538,7 +538,7 @@ export class PromptRenderer

{ progress: Progress | undefined, token: CancellationToken | undefined ) { - if (element.ctor === TextChunk || element.ctor === BaseImageMessage) { + if (element.ctor === TextChunk) { this._handleExtrinsicTextChunkChildren(element.node, element.node, element.props, pieces); return; } @@ -799,7 +799,7 @@ class IntrinsicPromptPiece { public readonly name: string, public readonly props: JSX.IntrinsicElements[K], public readonly children: PromptPieceChild[] - ) {} + ) { } } class ExtrinsicPromptPiece

{ @@ -815,7 +815,7 @@ class ExtrinsicPromptPiece

{ class LiteralPromptPiece { public readonly kind = 'literal'; - constructor(public readonly value: string, public readonly priority?: number) {} + constructor(public readonly value: string, public readonly priority?: number) { } } type ProcessedPromptPiece = @@ -833,7 +833,7 @@ type LeafPromptNode = PromptText; class PromptSizingContext { private _consumed = 0; - constructor(public readonly tokenBudget: number, public readonly endpoint: IChatEndpointInfo) {} + constructor(public readonly tokenBudget: number, public readonly endpoint: IChatEndpointInfo) { } public get consumed() { return this._consumed > this.tokenBudget ? this.tokenBudget : this._consumed; @@ -896,7 +896,7 @@ class PromptTreeElement { public readonly parent: PromptTreeElement | null = null, public readonly childIndex: number, public readonly id = PromptTreeElement._nextId++ - ) {} + ) { } public setObj(obj: PromptElement) { this._obj = obj; @@ -968,7 +968,7 @@ class PromptTreeElement { ...json, ctor: JSONT.PieceCtorKind.ImageChatMessage, props: { - imageUrl: this._obj.props.imageUrl, + src: this._obj.props.src, detail: this._obj.props.detail, }, } @@ -977,27 +977,26 @@ class PromptTreeElement { return json; } - public materialize(): MaterializedChatMessage | MaterializedContainer | MaterializedChatMesageImage { + public materialize(): MaterializedChatMessage | MaterializedContainer | MaterializedChatMessageImage { this._children.sort((a, b) => a.childIndex - b.childIndex); + + if (this._obj instanceof BaseImageMessage) { + // #region materialize baseimage + const parent = new MaterializedChatMessageImage( + 1, + this._obj.props.src, + this._obj.props.priority ?? Number.MAX_SAFE_INTEGER, + this._metadata, + LineBreakBefore.None, + this._obj.props.detail ?? undefined) + return parent; + } + if (this._obj instanceof BaseChatMessage) { if (!this._obj.props.role) { throw new Error(`Invalid ChatMessage!`); } - if (this._obj instanceof BaseImageMessage) { - // #region materialize baseimage - const parent = new MaterializedChatMesageImage( - 1, - ChatRole.User, - this._obj.props.imageUrl, - this._obj.props.priority ?? Number.MAX_SAFE_INTEGER, - this._metadata, - LineBreakBefore.None, - [], - this._obj.props.detail ?? undefined) - return parent; - } - const parent = new MaterializedChatMessage( this.id, this._obj.props.role, @@ -1065,7 +1064,7 @@ class PromptText { public readonly priority?: number, public readonly metadata?: PromptMetadata[], public readonly lineBreakBefore = false - ) {} + ) { } public collectLeafs(result: LeafPromptNode[]) { result.push(this); @@ -1075,8 +1074,8 @@ class PromptText { const lineBreak = this.lineBreakBefore ? LineBreakBefore.Always : this.childIndex === 0 - ? LineBreakBefore.IfNotTextSibling - : LineBreakBefore.None; + ? LineBreakBefore.IfNotTextSibling + : LineBreakBefore.None; return new MaterializedChatMessageTextChunk( this.text, this.priority ?? Number.MAX_SAFE_INTEGER, @@ -1111,7 +1110,7 @@ function isDefined(x: T | undefined): x is T { return x !== undefined; } -class InternalMetadata extends PromptMetadata {} +class InternalMetadata extends PromptMetadata { } class ReferenceMetadata extends InternalMetadata { constructor(public readonly reference: PromptReference) { diff --git a/src/base/test/renderer.test.tsx b/src/base/test/renderer.test.tsx index 2eef903..89904d4 100644 --- a/src/base/test/renderer.test.tsx +++ b/src/base/test/renderer.test.tsx @@ -2131,7 +2131,7 @@ suite('PromptRenderer', () => { render() { return ( - + ); } @@ -2157,7 +2157,7 @@ suite('PromptRenderer', () => { render() { return ( - + ); } @@ -2183,7 +2183,7 @@ suite('PromptRenderer', () => { render() { return ( - + ); } @@ -2209,7 +2209,7 @@ suite('PromptRenderer', () => { render() { return ( - + ); } @@ -2235,7 +2235,7 @@ suite('PromptRenderer', () => { render() { return ( - + Child in Base Image Message @@ -2264,7 +2264,7 @@ suite('PromptRenderer', () => { return ( some text in a text chunk - + {/* some text in a text chunk */} ); @@ -2292,7 +2292,7 @@ suite('PromptRenderer', () => { render() { return ( - + some text in a text chunk ); diff --git a/src/base/tokenizer/tokenizer.ts b/src/base/tokenizer/tokenizer.ts index 67cda30..2b64200 100644 --- a/src/base/tokenizer/tokenizer.ts +++ b/src/base/tokenizer/tokenizer.ts @@ -26,7 +26,7 @@ export class AnyTokenizer implements ITokenizer { text: string | LanguageModelChatMessage, token?: CancellationToken ) => Thenable - ) {} + ) { } async tokenLength(text: string, token?: CancellationToken): Promise { return this.countTokens(text, token); @@ -36,11 +36,18 @@ export class AnyTokenizer implements ITokenizer { const vscode = await import('vscode'); return this.countTokens({ role: this.toChatRole(message.role), - content: [new vscode.LanguageModelTextPart(message.content as string)], + content: [new vscode.LanguageModelTextPart(this.extractText(message))], name: 'name' in message ? message.name : undefined, }); } + extractText(message: ChatMessage): string { + if (message.content instanceof Array) { + return message.content.map(c => 'text' in c ? c.text : '').join(''); + } + return message.content; + } + private toChatRole(role: ChatRole) { switch (role) { case ChatRole.User: