Skip to content

Commit

Permalink
fix(text selector): revert quoted match to match by text nodes only (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
dgozman authored Mar 3, 2021
1 parent 986286a commit d87522f
Show file tree
Hide file tree
Showing 5 changed files with 72 additions and 60 deletions.
6 changes: 3 additions & 3 deletions docs/src/selectors.md
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ page.click("text=Log in")

Text selector has a few variations:

- `text=Log in` - default matching is case-insensitive and searches for a substring. For example `text=Log` matches `<button>Log in</button>`.
- `text=Log in` - default matching is case-insensitive and searches for a substring. For example, `text=Log` matches `<button>Log in</button>`.

```js
await page.click('text=Log in');
Expand All @@ -184,7 +184,7 @@ Text selector has a few variations:
page.click("text=Log in")
```

- `text="Log in"` - text body can be escaped with single or double quotes for case-sensitive match. For example `text="Log"` does not match `<button>log in</button>` but instead matches `<span>Log in</span>`.
- `text="Log in"` - text body can be escaped with single or double quotes to search for a text node with exact content. For example, `text="Log"` does not match `<button>Log in</button>` because `<button>` contains a single text node `"Log in"` that is not equal to `"Log"`. However, `text="Log"` matches `<button>Log<span>in</span></button>`, because `<button>` contains a text node `"Log"`.

Quoted body follows the usual escaping rules, e.g. use `\"` to escape double quote in a double-quoted string: `text="foo\"bar"`.

Expand Down Expand Up @@ -276,7 +276,7 @@ Text selector has a few variations:
page.click("#nav-bar :text('Home')")
```

- `#nav-bar :text-is("Home")` - the `:text-is()` pseudo-class can be used inside a [css] selector, for case-sensitive match. This example is equivalent to `text="Home"` (note quotes), but inside the `#nav-bar` element.
- `#nav-bar :text-is("Home")` - the `:text-is()` pseudo-class can be used inside a [css] selector, for strict text node match. This example is equivalent to `text="Home"` (note quotes), but inside the `#nav-bar` element.

* `#nav-bar :text-matches("reg?ex", "i")` - the `:text-matches()` pseudo-class can be used inside a [css] selector, for regex-based match. This example is equivalent to `text=/reg?ex/i`, but inside the `#nav-bar` element.

Expand Down
27 changes: 9 additions & 18 deletions src/server/injected/injectedScript.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import { SelectorEngine, SelectorRoot } from './selectorEngine';
import { XPathEngine } from './xpathSelectorEngine';
import { ParsedSelector, ParsedSelectorPart, parseSelector } from '../common/selectorParser';
import { FatalDOMError } from '../common/domErrors';
import { SelectorEvaluatorImpl, isVisible, parentElementOrShadowHost, elementMatchesText } from './selectorEvaluator';
import { SelectorEvaluatorImpl, isVisible, parentElementOrShadowHost, elementMatchesText, TextMatcher, createRegexTextMatcher, createStrictTextMatcher, createLaxTextMatcher } from './selectorEvaluator';
import { CSSComplexSelectorList } from '../common/cssParser';

type Predicate<T> = (progress: InjectedScriptProgress, continuePolling: symbol) => T | symbol;
Expand Down Expand Up @@ -164,18 +164,18 @@ export class InjectedScript {

private _createTextEngine(shadow: boolean): SelectorEngine {
const queryList = (root: SelectorRoot, selector: string, single: boolean): Element[] => {
const { matcher, strict } = createTextMatcher(selector);
const { matcher, kind } = createTextMatcher(selector);
const result: Element[] = [];
let lastDidNotMatchSelf: Element | null = null;

const checkElement = (element: Element) => {
// TODO: replace contains() with something shadow-dom-aware?
if (!strict && lastDidNotMatchSelf && lastDidNotMatchSelf.contains(element))
if (kind === 'lax' && lastDidNotMatchSelf && lastDidNotMatchSelf.contains(element))
return false;
const matches = elementMatchesText(this._evaluator, element, matcher);
if (matches === 'none')
lastDidNotMatchSelf = element;
if (matches === 'self')
if (matches === 'self' || (matches === 'selfAndChildren' && kind === 'strict'))
result.push(element);
return single && result.length > 0;
};
Expand Down Expand Up @@ -759,12 +759,11 @@ function unescape(s: string): string {
return r.join('');
}

type Matcher = (text: string) => boolean;
function createTextMatcher(selector: string): { matcher: Matcher, strict: boolean } {
function createTextMatcher(selector: string): { matcher: TextMatcher, kind: 'regex' | 'strict' | 'lax' } {
if (selector[0] === '/' && selector.lastIndexOf('/') > 0) {
const lastSlash = selector.lastIndexOf('/');
const re = new RegExp(selector.substring(1, lastSlash), selector.substring(lastSlash + 1));
return { matcher: text => re.test(text), strict: true };
const matcher: TextMatcher = createRegexTextMatcher(selector.substring(1, lastSlash), selector.substring(lastSlash + 1));
return { matcher, kind: 'regex' };
}
let strict = false;
if (selector.length > 1 && selector[0] === '"' && selector[selector.length - 1] === '"') {
Expand All @@ -775,16 +774,8 @@ function createTextMatcher(selector: string): { matcher: Matcher, strict: boolea
selector = unescape(selector.substring(1, selector.length - 1));
strict = true;
}
selector = selector.trim().replace(/\s+/g, ' ');
if (!strict)
selector = selector.toLowerCase();
const matcher = (text: string) => {
text = text.trim().replace(/\s+/g, ' ');
if (!strict)
text = text.toLowerCase();
return text.includes(selector);
};
return { matcher, strict };
const matcher = strict ? createStrictTextMatcher(selector) : createLaxTextMatcher(selector);
return { matcher, kind: strict ? 'strict' : 'lax' };
}

export default InjectedScript;
71 changes: 46 additions & 25 deletions src/server/injected/selectorEvaluator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ export class SelectorEvaluatorImpl implements SelectorEvaluator {
private _cacheCallMatches: QueryCache = new Map();
private _cacheCallQuery: QueryCache = new Map();
private _cacheQuerySimple: QueryCache = new Map();
_cacheText = new Map<Element | ShadowRoot, string>();
_cacheText = new Map<Element | ShadowRoot, ElementText>();
private _scoreMap: Map<Element, number> | undefined;
private _retainCacheCounter = 0;

Expand Down Expand Up @@ -427,7 +427,7 @@ const textEngine: SelectorEngine = {
matches(element: Element, args: (string | number | Selector)[], context: QueryContext, evaluator: SelectorEvaluator): boolean {
if (args.length !== 1 || typeof args[0] !== 'string')
throw new Error(`"text" engine expects a single string`);
const matcher = textMatcher(args[0], true);
const matcher = createLaxTextMatcher(args[0]);
return elementMatchesText(evaluator as SelectorEvaluatorImpl, element, matcher) === 'self';
},
};
Expand All @@ -436,17 +436,16 @@ const textIsEngine: SelectorEngine = {
matches(element: Element, args: (string | number | Selector)[], context: QueryContext, evaluator: SelectorEvaluator): boolean {
if (args.length !== 1 || typeof args[0] !== 'string')
throw new Error(`"text-is" engine expects a single string`);
const matcher = textMatcher(args[0], false);
return elementMatchesText(evaluator as SelectorEvaluatorImpl, element, matcher) === 'self';
const matcher = createStrictTextMatcher(args[0]);
return elementMatchesText(evaluator as SelectorEvaluatorImpl, element, matcher) !== 'none';
},
};

const textMatchesEngine: SelectorEngine = {
matches(element: Element, args: (string | number | Selector)[], context: QueryContext, evaluator: SelectorEvaluator): boolean {
if (args.length === 0 || typeof args[0] !== 'string' || args.length > 2 || (args.length === 2 && typeof args[1] !== 'string'))
throw new Error(`"text-matches" engine expects a regexp body and optional regexp flags`);
const re = new RegExp(args[0], args.length === 2 ? args[1] : undefined);
const matcher = (s: string) => re.test(s);
const matcher = createRegexTextMatcher(args[0], args.length === 2 ? args[1] : undefined);
return elementMatchesText(evaluator as SelectorEvaluatorImpl, element, matcher) === 'self';
},
};
Expand All @@ -457,51 +456,73 @@ const hasTextEngine: SelectorEngine = {
throw new Error(`"has-text" engine expects a single string`);
if (shouldSkipForTextMatching(element))
return false;
const matcher = textMatcher(args[0], true);
const matcher = createLaxTextMatcher(args[0]);
return matcher(elementText(evaluator as SelectorEvaluatorImpl, element));
},
};

function textMatcher(text: string, caseInsensitive: boolean): (s: string) => boolean {
text = text.trim().replace(/\s+/g, ' ');
if (caseInsensitive)
text = text.toLowerCase();
return (s: string) => {
s = s.trim().replace(/\s+/g, ' ');
if (caseInsensitive)
s = s.toLowerCase();
export function createLaxTextMatcher(text: string): TextMatcher {
text = text.trim().replace(/\s+/g, ' ').toLowerCase();
return (elementText: ElementText) => {
const s = elementText.full.trim().replace(/\s+/g, ' ').toLowerCase();
return s.includes(text);
};
}

export function createStrictTextMatcher(text: string): TextMatcher {
text = text.trim().replace(/\s+/g, ' ');
return (elementText: ElementText) => {
return elementText.immediate.some(s => s.trim().replace(/\s+/g, ' ') === text);
};
}

export function createRegexTextMatcher(source: string, flags?: string): TextMatcher {
const re = new RegExp(source, flags);
return (elementText: ElementText) => {
return re.test(elementText.full);
};
}

function shouldSkipForTextMatching(element: Element | ShadowRoot) {
return element.nodeName === 'SCRIPT' || element.nodeName === 'STYLE' || document.head && document.head.contains(element);
}

export function elementText(evaluator: SelectorEvaluatorImpl, root: Element | ShadowRoot): string {
export type ElementText = { full: string, immediate: string[] };
export type TextMatcher = (text: ElementText) => boolean;

export function elementText(evaluator: SelectorEvaluatorImpl, root: Element | ShadowRoot): ElementText {
let value = evaluator._cacheText.get(root);
if (value === undefined) {
value = '';
value = { full: '', immediate: [] };
if (!shouldSkipForTextMatching(root)) {
let currentImmediate = '';
if ((root instanceof HTMLInputElement) && (root.type === 'submit' || root.type === 'button')) {
value = root.value;
value = { full: root.value, immediate: [root.value] };
} else {
for (let child = root.firstChild; child; child = child.nextSibling) {
if (child.nodeType === Node.ELEMENT_NODE)
value += elementText(evaluator, child as Element);
else if (child.nodeType === Node.TEXT_NODE)
value += child.nodeValue || '';
if (child.nodeType === Node.TEXT_NODE) {
value.full += child.nodeValue || '';
currentImmediate += child.nodeValue || '';
} else {
if (currentImmediate)
value.immediate.push(currentImmediate);
currentImmediate = '';
if (child.nodeType === Node.ELEMENT_NODE)
value.full += elementText(evaluator, child as Element).full;
}
}
if (currentImmediate)
value.immediate.push(currentImmediate);
if ((root as Element).shadowRoot)
value += elementText(evaluator, (root as Element).shadowRoot!);
value.full += elementText(evaluator, (root as Element).shadowRoot!).full;
}
}
evaluator._cacheText.set(root, value);
}
return value;
}

export function elementMatchesText(evaluator: SelectorEvaluatorImpl, element: Element, matcher: (s: string) => boolean): 'none' | 'self' | 'selfAndChildren' {
export function elementMatchesText(evaluator: SelectorEvaluatorImpl, element: Element, matcher: TextMatcher): 'none' | 'self' | 'selfAndChildren' {
if (shouldSkipForTextMatching(element))
return 'none';
if (!matcher(elementText(evaluator, element)))
Expand All @@ -510,7 +531,7 @@ export function elementMatchesText(evaluator: SelectorEvaluatorImpl, element: El
if (child.nodeType === Node.ELEMENT_NODE && matcher(elementText(evaluator, child as Element)))
return 'selfAndChildren';
}
if (element.shadowRoot && matcher(elementText(evaluator, element.shadowRoot)))
if (element.shadowRoot && matcher(elementText(evaluator, element.shadowRoot)))
return 'selfAndChildren';
return 'self';
}
Expand Down
2 changes: 1 addition & 1 deletion src/server/supplements/injected/selectorGenerator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ function buildCandidates(injectedScript: InjectedScript, element: Element): Sele
function buildTextCandidates(injectedScript: InjectedScript, element: Element, allowHasText: boolean): SelectorToken[] {
if (element.nodeName === 'SELECT')
return [];
const text = elementText(injectedScript._evaluator, element).trim().replace(/\s+/g, ' ').substring(0, 80);
const text = elementText(injectedScript._evaluator, element).full.trim().replace(/\s+/g, ' ').substring(0, 80);
if (!text)
return [];
const candidates: SelectorToken[] = [];
Expand Down
26 changes: 13 additions & 13 deletions test/selectors-text.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,8 @@ it('should work', async ({page}) => {
expect(await page.$eval(`text=yo>> text="ya"`, e => e.outerHTML)).toBe('<div>ya</div>');
expect(await page.$eval(`text=yo >>text='ya'`, e => e.outerHTML)).toBe('<div>ya</div>');
expect(await page.$eval(`text=yo >> text='ya'`, e => e.outerHTML)).toBe('<div>ya</div>');
expect(await page.$eval(`'yoyaheyhey'>>"ya"`, e => e.outerHTML)).toBe('<div>ya</div>');
expect(await page.$eval(`"yoyaheyhey" >> 'ya'`, e => e.outerHTML)).toBe('<div>ya</div>');
expect(await page.$eval(`'yo'>>"ya"`, e => e.outerHTML)).toBe('<div>ya</div>');
expect(await page.$eval(`"yo" >> 'ya'`, e => e.outerHTML)).toBe('<div>ya</div>');

await page.setContent(`<div>yo<span id="s1"></span></div><div>yo<span id="s2"></span><span id="s3"></span></div>`);
expect(await page.$$eval(`text=yo`, es => es.map(e => e.outerHTML).join('\n'))).toBe('<div>yo<span id="s1"></span></div>\n<div>yo<span id="s2"></span><span id="s3"></span></div>');
Expand Down Expand Up @@ -103,9 +103,9 @@ it('should work', async ({page}) => {
expect((await page.$$(`text="Sign in"`)).length).toBe(1);
expect(await page.$eval(`text=lo wo`, e => e.outerHTML)).toBe('<span>Hello\n \nworld</span>');
expect(await page.$eval(`text="Hello world"`, e => e.outerHTML)).toBe('<span>Hello\n \nworld</span>');
expect(await page.$eval(`text="lo wo"`, e => e.outerHTML)).toBe('<span>Hello\n \nworld</span>');
expect(await page.$(`text="lo wo"`)).toBe(null);
expect((await page.$$(`text=lo \nwo`)).length).toBe(1);
expect((await page.$$(`text="lo \nwo"`)).length).toBe(1);
expect((await page.$$(`text="lo \nwo"`)).length).toBe(0);
});

it('should work with :text', async ({page}) => {
Expand Down Expand Up @@ -144,11 +144,11 @@ it('should work across nodes', async ({page}) => {
expect(await page.$$eval(`text=world`, els => els.length)).toBe(1);
expect(await page.$(`text=hello world`)).toBe(null);

expect(await page.$eval(`:text-is("Hello, world!")`, e => e.id)).toBe('target1');
expect(await page.$(`:text-is("Hello, world!")`)).toBe(null);
expect(await page.$eval(`:text-is("Hello")`, e => e.id)).toBe('target1');
expect(await page.$eval(`:text-is("world")`, e => e.id)).toBe('target2');
expect(await page.$$eval(`:text-is("world")`, els => els.length)).toBe(1);
expect(await page.$eval(`text="Hello, world!"`, e => e.id)).toBe('target1');
expect(await page.$(`text="Hello, world!"`)).toBe(null);
expect(await page.$eval(`text="Hello"`, e => e.id)).toBe('target1');
expect(await page.$eval(`text="world"`, e => e.id)).toBe('target2');
expect(await page.$$eval(`text="world"`, els => els.length)).toBe(1);
Expand All @@ -167,11 +167,11 @@ it('should work with text nodes in quoted mode', async ({page}) => {
expect(await page.$eval(`text="Hello"`, e => e.id)).toBe('target1');
expect(await page.$eval(`text="Hi again"`, e => e.id)).toBe('target1');
expect(await page.$eval(`text="wo rld"`, e => e.id)).toBe('target2');
expect(await page.$eval(`text="Hellowo rld Hi again"`, e => e.id)).toBe('target1');
expect(await page.$eval(`text="Hellowo"`, e => e.id)).toBe('target1');
expect(await page.$eval(`text="Hellowo rld"`, e => e.id)).toBe('target1');
expect(await page.$eval(`text="wo rld Hi ag"`, e => e.id)).toBe('target1');
expect(await page.$eval(`text="again"`, e => e.id)).toBe('target1');
expect(await page.$(`text="Hellowo rld Hi again"`)).toBe(null);
expect(await page.$(`text="Hellowo"`)).toBe(null);
expect(await page.$(`text="Hellowo rld"`)).toBe(null);
expect(await page.$(`text="wo rld Hi ag"`)).toBe(null);
expect(await page.$(`text="again"`)).toBe(null);
expect(await page.$(`text="hi again"`)).toBe(null);
expect(await page.$eval(`text=hi again`, e => e.id)).toBe('target1');
});
Expand Down Expand Up @@ -291,10 +291,10 @@ it('should be case sensitive if quotes are specified', async ({page}) => {
expect(await page.$(`text="yA"`)).toBe(null);
});

it('should search for a substring', async ({page}) => {
it('should search for a substring without quotes', async ({page}) => {
await page.setContent(`<div>textwithsubstring</div>`);
expect(await page.$eval(`text=with`, e => e.outerHTML)).toBe('<div>textwithsubstring</div>');
expect(await page.$eval(`text="with"`, e => e.outerHTML)).toBe('<div>textwithsubstring</div>');
expect(await page.$(`text="with"`)).toBe(null);
});

it('should skip head, script and style', async ({page}) => {
Expand Down

0 comments on commit d87522f

Please sign in to comment.