Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 4 additions & 31 deletions entrypoints/inject-utils/document-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,29 +4,15 @@ import { SerializedElementInfo } from '@/types/tab'
import { nonNullable } from '@/utils/array'
import { waitUntilDocumentMaybeLoaded } from '@/utils/document'
import Logger from '@/utils/logger'
import { lazyInitialize } from '@/utils/memo'
import { sleep } from '@/utils/sleep'
import { serializeElement } from '@/utils/tab'

import { checkNodeType, getElementAttributes } from './helpers'
import { checkNodeType, cloneDocument, getElementAttributes } from './helpers'
import { highlightElement, removeHighlights } from './highlight'
import { PruningContentFilter } from './pruning-content-filter'

const logger = Logger.child('document-parser')

const htmlTrustedPolicy = lazyInitialize<{ createHTML: (str: string) => string } | undefined>(() => {
// @ts-expect-error - no type support for trusted types yet
if (window.trustedTypes) {
try {
// @ts-expect-error - no type support for trusted types yet
return window.trustedTypes.createPolicy('NativeMindSafeHTML', { createHTML: (str: string) => str })
}
catch (err) {
logger.error('Failed to create trusted types policy, falling back to unsafe HTML', { error: err })
}
}
})

const INTERNAL_ID_DATA_KEY = 'data-nativemind-parser-internal-id'
const IGNORE_TAGS: (keyof HTMLElementTagNameMap)[] = ['head', 'nav', 'style', 'link', 'meta', 'script', 'noscript', 'canvas', 'iframe', 'object', 'embed', 'footer', 'dialog']
const IGNORE_CLASSES: string[] = ['hidden', 'ignore', 'skip-link', 'sidenav', 'footer', 'blog-footer-bottom']
Expand Down Expand Up @@ -105,18 +91,6 @@ export function markInternalIdForInteractiveElements(elements: Element[], intern
return { cleanup, elements: items }
}

function cloneDocument(doc: Document) {
const policy = htmlTrustedPolicy()
if (policy) {
const safeHTML = policy.createHTML(doc.documentElement.outerHTML)
const cloned = new DOMParser().parseFromString(safeHTML, 'text/html')
return cloned
}
else {
return doc.cloneNode(true) as Document
}
}

export async function getAccessibleMarkdown(options: GetAccessibleDomTreeOptions = {}) {
logger.debug('Getting accessible markdown', { options })

Expand All @@ -125,14 +99,13 @@ export async function getAccessibleMarkdown(options: GetAccessibleDomTreeOptions

const { elements: interactiveElements } = markInternalIdForInteractiveElements(getInteractiveElements(), options.internalIdPrefix ?? '')

const clonedDocument = cloneDocument(document)

let filteredDocument = clonedDocument
let filteredDocument: Document
let removedElements: Element[] = []
if (!noFilter) {
({ document: filteredDocument, removedElements } = new PruningContentFilter(4, 'fixed', contentFilterThreshold ?? 0.28).filterContent(clonedDocument))
({ document: filteredDocument, removedElements } = new PruningContentFilter(4, 'fixed', contentFilterThreshold ?? 0.28).filterContent(document))
}
else {
filteredDocument = cloneDocument(document)
logger.debug('Skipping content filtering in last attempt')
}

Expand Down
28 changes: 28 additions & 0 deletions entrypoints/inject-utils/helpers.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,35 @@
import logger from '@/utils/logger'
import { lazyInitialize } from '@/utils/memo'

export function getElementAttributes(el: Element): Record<string, string | undefined> {
return Object.fromEntries(Array.from(el.attributes).map((attr) => [attr.name, attr.value]))
}

export function checkNodeType<E extends (typeof Element | typeof Text | typeof ShadowRoot)>(type: E, node: Node): node is InstanceType<E> {
return node instanceof type
}

const htmlTrustedPolicy = lazyInitialize<{ createHTML: (str: string) => string } | undefined>(() => {
// @ts-expect-error - no type support for trusted types yet
if (window.trustedTypes) {
try {
// @ts-expect-error - no type support for trusted types yet
return window.trustedTypes.createPolicy('NativeMindSafeHTML', { createHTML: (str: string) => str })
}
catch (err) {
logger.error('Failed to create trusted types policy, falling back to unsafe HTML', { error: err })
}
}
})

export function cloneDocument(doc: Document) {
const policy = htmlTrustedPolicy()
if (policy) {
const safeHTML = policy.createHTML(doc.documentElement.outerHTML)
const cloned = new DOMParser().parseFromString(safeHTML, 'text/html')
return cloned
}
else {
return doc.cloneNode(true) as Document
}
}
48 changes: 42 additions & 6 deletions entrypoints/inject-utils/pruning-content-filter.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import { cloneDocument } from './helpers'

interface MetricConfig {
textDensity: boolean
linkDensity: boolean
Expand Down Expand Up @@ -58,6 +60,10 @@ export class PruningContentFilter {
private includedTags: string[]
private negativePatterns: RegExp

private static readonly dataKeys = {
IGNORE_ELEMENT: 'data-nm-parser-ignore',
}

/**
* Initializes the PruningContentFilter class, if not provided, falls back to page metadata.
*
Expand Down Expand Up @@ -127,7 +133,7 @@ export class PruningContentFilter {

// Common excluded tags
this.excludedTags = [
'script', 'style', 'nav', 'header', 'footer', 'aside', 'menu',
'script', 'style', 'header', 'footer', 'aside', 'menu',
'noscript', 'meta', 'link', 'title', 'head',
'.hidden', '.ignore', '.skip-link', '.sidenav', '.footer', '.blog-footer-bottom',
'#side_nav', '#sidenav', '#blog-calendar', '#footer', '#page_end_html',
Expand All @@ -139,6 +145,32 @@ export class PruningContentFilter {
this.negativePatterns = /\b(ad|advertisement|banner|sidebar|navigation|menu|footer|header|comment|popup|modal|overlay)\b/i
}

markIgnoredElement(doc: Document) {
const markedSelectors = new Set<string>()
const treeWalker = document.createTreeWalker(doc.body, NodeFilter.SHOW_ELEMENT)
while (treeWalker.nextNode()) {
const node = treeWalker.currentNode as HTMLElement
if (this.shouldIgnoreElement(node)) {
node.setAttribute(PruningContentFilter.dataKeys.IGNORE_ELEMENT, 'true')
markedSelectors.add(`[${PruningContentFilter.dataKeys.IGNORE_ELEMENT}]`)
}
}

const cleanupMark = () => {
const elements = doc.body.querySelectorAll(`[${PruningContentFilter.dataKeys.IGNORE_ELEMENT}]`)
elements.forEach((el) => el.removeAttribute(PruningContentFilter.dataKeys.IGNORE_ELEMENT))
}
return { cleanupMark, markedSelectors }
}

shouldIgnoreElement(element: HTMLElement) {
const elStyle = getComputedStyle(element)
if (element.offsetHeight === 0 && element.offsetWidth === 0 && elStyle.position !== 'absolute') {
return true
}
return false
}

/**
* Implements content filtering using pruning algorithm with dynamic threshold.
*
Expand All @@ -150,25 +182,29 @@ export class PruningContentFilter {
* @returns Array of filtered HTML content blocks.
*/
filterContent(doc: Document) {
const body = doc.body
// mark on original document to keep the layout info
const { cleanupMark, markedSelectors } = this.markIgnoredElement(doc)
const clonedDoc = cloneDocument(doc)
cleanupMark()
const body = clonedDoc.body

const elementRemover = this.createElementRemover()
this.removeUnwantedTags(body, elementRemover)
this.removeUnwantedTags(body, [...this.excludedTags, ...markedSelectors], elementRemover)

// Prune tree starting from body
this.pruneTree(body, elementRemover)

return {
document: doc,
document: clonedDoc,
removedElements: elementRemover.getRemoved(),
}
}

/**
* Removes unwanted tags from the element tree
*/
private removeUnwantedTags(element: Element, remover: ElementRemover): void {
const elements = element.querySelectorAll(this.excludedTags.join(', '))
private removeUnwantedTags(element: Element, selectors: string[], remover: ElementRemover): void {
const elements = element.querySelectorAll(selectors.join(', '))
elements.forEach((el) => remover.remove(el))
}

Expand Down
2 changes: 1 addition & 1 deletion entrypoints/settings/components/DebugSettings/index.vue
Original file line number Diff line number Diff line change
Expand Up @@ -626,7 +626,7 @@ import Selector from '@/components/Selector.vue'
import Switch from '@/components/Switch.vue'
import Button from '@/components/ui/Button.vue'
import UILanguageSelector from '@/components/UILanguageSelector.vue'
import { BrowserSession } from '@/entrypoints/sidepanel/utils/chat/tool-calls/browser-use/utils'
import { BrowserSession } from '@/entrypoints/sidepanel/utils/chat/tool-calls/utils/browser-use'
import { SettingsScrollTarget } from '@/types/scroll-targets'
import { ADVANCED_MODELS_FOR_AGENT, INVALID_URLS } from '@/utils/constants'
import { formatSize } from '@/utils/formatter'
Expand Down
17 changes: 10 additions & 7 deletions utils/llm/tools/prompt-based/helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,9 @@ export class PromptBasedTool<Name extends string, T extends PromptBasedToolParam
}

parseFromText(text: string): { params: InferredParams<T>, lastIndex: number, errors: string[] } | null {
const normalized = text.replace(new RegExp(`\`\`\`${this.toolName}(.*?)\`\`\``, 's'), `<${this.toolName}>$1</${this.toolName}>`)
const normalized = text
.replace(new RegExp(`\`\`\`${this.toolName}(.*?)\`\`\``, 's'), `<${this.toolName}>$1</${this.toolName}>`)
.replace(new RegExp(`<.{3,12}\\.${this.toolName}>(.*?)</.{3,12}\\.${this.toolName}>`, 's'), `<${this.toolName}>$1</${this.toolName}>`)
const regex = new RegExp(`<${this.toolName}>(.*?)</${this.toolName}>`, 's')
const match = normalized.match(regex)
// currently only parse the first match
Expand Down Expand Up @@ -213,12 +215,13 @@ export class PromptBasedTool<Name extends string, T extends PromptBasedToolParam
static createToolCallsStreamParser<Tools extends PromptBasedToolType[]>(tools: Tools) {
type ToolWithParams = ExtractToolWithParams<Tools[number]> & { tagText: string }
let accText = ''
const pairs = tools.map((tool) => ([
{ start: `<${tool.toolName}>`, end: `</${tool.toolName}>` },
{ start: `\`\`\`${tool.toolName}`, end: '```' },
{ start: `<tool_calls>\n<${tool.toolName}>`, end: `</tool_calls>` },
{ start: `<tool_calls><${tool.toolName}>`, end: `</tool_calls>` },
])).flat()
const pairs = [
...tools.map((tool) => ([
{ start: `<${tool.toolName}>`, end: `</${tool.toolName}>` },
{ start: `\`\`\`${tool.toolName}`, end: '```' },
])).flat(),
{ start: `<tool_calls>`, end: `</tool_calls>` },
]
const toolCallsWalkParser = new TagWalker(pairs)
return (text: string) => {
const errors: string[] = []
Expand Down
85 changes: 85 additions & 0 deletions utils/llm/tools/prompt-based/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,23 @@ another text
expect(text).toBe('start\n\n\nanother text\n')
})

it('should parse the tool calls', async () => {
const response = `Test \n\n<tool_calls>\n<click>\n<element_id>85</element_id>\n</click>\n</tool_calls>`

const extractor = PromptBasedTool.createToolCallsStreamParser(promptBasedTools)

const calls = []
for (const char of response) {
const { toolCalls: currentCalls } = extractor(char)
for (const call of currentCalls) {
calls.push(call)
}
}

expect(calls.length).toBe(1)
expect(calls[0].params).toEqual({ element_id: '85' })
})

it('should ignore the unrelated tags', async () => {
const tagWalker = new TagWalker([{ start: '<view_tab>', end: '</view_tab>' }])
const input = `<think>this is a test</think>`
Expand Down Expand Up @@ -310,6 +327,74 @@ test block response
<fetch_page>
<url>https://weather.sz.gov.cn/qixiangfuwu/yubaofuwu/index.html</url>
</fetch_page>
</tool_calls>`,
tool: promptBasedTools[4],
params: {
url: 'https://weather.sz.gov.cn/qixiangfuwu/yubaofuwu/index.html',
},
},
])
})

it('should handle weird response from gpt-oss', async () => {
const response = `>

<tool_calls>
<browser.fetch_page>
<url>https://weather.sz.gov.cn/qixiangfuwu/yubaofuwu/index.html</url>
</browser.fetch_page>
</tool_calls>`

const extractor = PromptBasedTool.createToolCallsStreamParser(promptBasedTools)

const calls = []
for (const char of response) {
const { toolCalls: currentCalls } = extractor(char)
for (const call of currentCalls) {
calls.push(call)
}
}

expect(calls).toEqual([
{
tagText: `<tool_calls>
<browser.fetch_page>
<url>https://weather.sz.gov.cn/qixiangfuwu/yubaofuwu/index.html</url>
</browser.fetch_page>
</tool_calls>`,
tool: promptBasedTools[4],
params: {
url: 'https://weather.sz.gov.cn/qixiangfuwu/yubaofuwu/index.html',
},
},
])
})

it('should handle weird response from gpt-oss (2)', async () => {
const response = `>

<tool_calls>
<browser_use.fetch_page>
<url>https://weather.sz.gov.cn/qixiangfuwu/yubaofuwu/index.html</url>
</browser_use.fetch_page>
</tool_calls>`

const extractor = PromptBasedTool.createToolCallsStreamParser(promptBasedTools)

const calls = []
for (const char of response) {
const { toolCalls: currentCalls } = extractor(char)
for (const call of currentCalls) {
calls.push(call)
}
}

expect(calls).toEqual([
{
tagText: `<tool_calls>
<browser_use.fetch_page>
<url>https://weather.sz.gov.cn/qixiangfuwu/yubaofuwu/index.html</url>
</browser_use.fetch_page>
</tool_calls>`,
tool: promptBasedTools[4],
params: {
Expand Down
Loading