Skip to content

Commit 2f1cf07

Browse files
authored
feat(autocapture): add maskTextRegex option to autocapture (#1259)
1 parent 5e986f8 commit 2f1cf07

File tree

15 files changed

+803
-524
lines changed

15 files changed

+803
-524
lines changed

packages/analytics-core/src/types/element-interactions.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,11 @@ export interface ElementInteractionsOptions {
9292
triggers: Trigger[];
9393
labeledEvents: Record<string, LabeledEvent>;
9494
};
95+
96+
/**
97+
* RegExp pattern list to allow custom patterns for text masking
98+
*/
99+
maskTextRegex?: (RegExp | { pattern: string; description: string })[];
95100
}
96101

97102
type MatchingCondition = {

packages/analytics-core/src/types/frustration-interactions.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,11 @@ export interface FrustrationInteractionsOptions {
5959
* Configuration for rage clicks tracking
6060
*/
6161
rageClicks?: RageClickOptions;
62+
63+
/**
64+
* RegExp pattern list to allow custom patterns for text masking
65+
*/
66+
maskTextRegex?: (RegExp | { pattern: string; description: string })[];
6267
}
6368

6469
const CLICKABLE_ELEMENT_SELECTORS = [

packages/plugin-autocapture-browser/src/autocapture-plugin.ts

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,7 @@ import { createRemoteConfigFetch } from '@amplitude/analytics-remote-config';
1212
import * as constants from './constants';
1313
import { fromEvent, map, type Observable, type Subscription, share } from 'rxjs';
1414
import {
15-
addAdditionalEventProperties,
1615
createShouldTrackEvent,
17-
getEventProperties,
1816
type ElementBasedTimestampedEvent,
1917
type TimestampedEvent,
2018
type NavigateEvent,
@@ -31,6 +29,7 @@ import {
3129
createTriggerEvaluator,
3230
groupLabeledEventIdsByEventType,
3331
} from './pageActions/triggers';
32+
import { DataExtractor } from './data-extractor';
3433

3534
declare global {
3635
interface Window {
@@ -79,12 +78,15 @@ export const autocapturePlugin = (options: ElementInteractionsOptions = {}): Bro
7978

8079
const subscriptions: Subscription[] = [];
8180

81+
// Create data extractor based on options
82+
const dataExtractor = new DataExtractor(options);
83+
8284
// Create observables on events on the window
8385
const createObservables = (): AllWindowObservables => {
8486
// Create Observables from direct user events
8587
const clickObservable = createClickObservable().pipe(
8688
map((click) =>
87-
addAdditionalEventProperties(
89+
dataExtractor.addAdditionalEventProperties(
8890
click,
8991
'click',
9092
(options as AutoCaptureOptionsWithDefaults).cssSelectorAllowlist,
@@ -95,7 +97,7 @@ export const autocapturePlugin = (options: ElementInteractionsOptions = {}): Bro
9597
);
9698
const changeObservable = fromEvent<Event>(document, 'change', { capture: true }).pipe(
9799
map((change) =>
98-
addAdditionalEventProperties(
100+
dataExtractor.addAdditionalEventProperties(
99101
change,
100102
'change',
101103
(options as AutoCaptureOptionsWithDefaults).cssSelectorAllowlist,
@@ -116,7 +118,7 @@ export const autocapturePlugin = (options: ElementInteractionsOptions = {}): Bro
116118
if (window.navigation) {
117119
navigateObservable = fromEvent<NavigateEvent>(window.navigation, 'navigate').pipe(
118120
map((navigate) =>
119-
addAdditionalEventProperties(
121+
dataExtractor.addAdditionalEventProperties(
120122
navigate,
121123
'navigate',
122124
(options as AutoCaptureOptionsWithDefaults).cssSelectorAllowlist,
@@ -130,7 +132,7 @@ export const autocapturePlugin = (options: ElementInteractionsOptions = {}): Bro
130132
// Track DOM Mutations using shared observable
131133
const mutationObservable = createMutationObservable().pipe(
132134
map((mutation) =>
133-
addAdditionalEventProperties(
135+
dataExtractor.addAdditionalEventProperties(
134136
mutation,
135137
'mutation',
136138
(options as AutoCaptureOptionsWithDefaults).cssSelectorAllowlist,
@@ -155,7 +157,12 @@ export const autocapturePlugin = (options: ElementInteractionsOptions = {}): Bro
155157
let labeledEventToTriggerMap = createLabeledEventToTriggerMap(options.pageActions?.triggers ?? []);
156158

157159
// Evaluate triggers for the given event by running the actions associated with the matching triggers
158-
const evaluateTriggers = createTriggerEvaluator(groupedLabeledEvents, labeledEventToTriggerMap, options);
160+
const evaluateTriggers = createTriggerEvaluator(
161+
groupedLabeledEvents,
162+
labeledEventToTriggerMap,
163+
dataExtractor,
164+
options,
165+
);
159166

160167
// Function to recalculate internal variables when remote config is updated
161168
const recomputePageActionsData = (remotePageActions: ElementInteractionsOptions['pageActions']) => {
@@ -229,7 +236,7 @@ export const autocapturePlugin = (options: ElementInteractionsOptions = {}): Bro
229236

230237
const changeSubscription = trackChange({
231238
allObservables,
232-
getEventProperties: (...args) => getEventProperties(...args, dataAttributePrefix),
239+
getEventProperties: (...args) => dataExtractor.getEventProperties(...args, dataAttributePrefix),
233240
amplitude,
234241
shouldTrackEvent: shouldTrackEvent,
235242
evaluateTriggers: evaluateTriggers.evaluate.bind(evaluateTriggers),
@@ -239,7 +246,7 @@ export const autocapturePlugin = (options: ElementInteractionsOptions = {}): Bro
239246
const actionClickSubscription = trackActionClick({
240247
allObservables,
241248
options: options as AutoCaptureOptionsWithDefaults,
242-
getEventProperties: (...args) => getEventProperties(...args, dataAttributePrefix),
249+
getEventProperties: (...args) => dataExtractor.getEventProperties(...args, dataAttributePrefix),
243250
amplitude,
244251
shouldTrackEvent,
245252
shouldTrackActionClick: shouldTrackActionClick,
@@ -256,6 +263,7 @@ export const autocapturePlugin = (options: ElementInteractionsOptions = {}): Bro
256263

257264
/* istanbul ignore next */
258265
visualTaggingOptions.messenger?.setup({
266+
dataExtractor: dataExtractor,
259267
logger: config?.loggerProvider,
260268
...(config?.serverZone && { endpoint: constants.AMPLITUDE_ORIGINS_MAP[config.serverZone] }),
261269
isElementSelectable: createShouldTrackEvent(options, [...allowlist, ...actionClickAllowlist]),

packages/plugin-autocapture-browser/src/constants.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,3 +38,5 @@ export const AMPLITUDE_VISUAL_TAGGING_SELECTOR_SCRIPT_URL =
3838
// This is the class name used by the visual tagging selector to highlight the selected element.
3939
// Should not use this class in the selector.
4040
export const AMPLITUDE_VISUAL_TAGGING_HIGHLIGHT_CLASS = 'amp-visual-tagging-selector-highlight';
41+
42+
export const MAX_MASK_TEXT_PATTERNS = 25;
Lines changed: 238 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,238 @@
1+
/* eslint-disable no-restricted-globals */
2+
import type { ElementInteractionsOptions, ActionType } from '@amplitude/analytics-core';
3+
import type { DataSource } from '@amplitude/analytics-core/lib/esm/types/element-interactions';
4+
import * as constants from './constants';
5+
import {
6+
isTextNode,
7+
removeEmptyProperties,
8+
isNonSensitiveElement,
9+
getAttributesWithPrefix,
10+
isElementPointerCursor,
11+
getClosestElement,
12+
isElementBasedEvent,
13+
} from './helpers';
14+
import type { BaseTimestampedEvent, ElementBasedTimestampedEvent, TimestampedEvent } from './helpers';
15+
import { getHierarchy } from './hierarchy';
16+
import type { JSONValue } from './helpers';
17+
import { getDataSource } from './pageActions/actions';
18+
19+
const CC_REGEX =
20+
/^(?:(4[0-9]{12}(?:[0-9]{3})?)|(5[1-5][0-9]{14})|(6(?:011|5[0-9]{2})[0-9]{12})|(3[47][0-9]{13})|(3(?:0[0-5]|[68][0-9])[0-9]{11})|((?:2131|1800|35[0-9]{3})[0-9]{11}))$/;
21+
const SSN_REGEX = /(^\d{3}-?\d{2}-?\d{4}$)/;
22+
const EMAIL_REGEX = /[^\s@]+@[^\s@.]+\.[^\s@]+/;
23+
24+
export class DataExtractor {
25+
private readonly additionalMaskTextPatterns: RegExp[];
26+
27+
constructor(options: ElementInteractionsOptions) {
28+
const rawPatterns = options.maskTextRegex ?? [];
29+
30+
const compiled: RegExp[] = [];
31+
for (const entry of rawPatterns) {
32+
if (compiled.length >= constants.MAX_MASK_TEXT_PATTERNS) {
33+
break;
34+
}
35+
if (entry instanceof RegExp) {
36+
compiled.push(entry);
37+
} else if ('pattern' in entry && typeof entry.pattern === 'string') {
38+
try {
39+
compiled.push(new RegExp(entry.pattern));
40+
} catch {
41+
// ignore invalid pattern strings
42+
}
43+
}
44+
}
45+
this.additionalMaskTextPatterns = compiled;
46+
}
47+
48+
isNonSensitiveString = (text: string | null): boolean => {
49+
if (typeof text !== 'string') {
50+
return true;
51+
}
52+
53+
// Check for credit card number
54+
if (CC_REGEX.test((text || '').replace(/[- ]/g, ''))) {
55+
return false;
56+
}
57+
58+
// Check for social security number or email
59+
if (SSN_REGEX.test(text) || EMAIL_REGEX.test(text)) {
60+
return false;
61+
}
62+
63+
// Check for additional mask text patterns
64+
for (const pattern of this.additionalMaskTextPatterns) {
65+
try {
66+
if (pattern.test(text)) {
67+
return false;
68+
}
69+
} catch {
70+
// ignore invalid pattern
71+
}
72+
}
73+
74+
return true;
75+
};
76+
77+
getNearestLabel = (element: Element): string => {
78+
const parent = element.parentElement;
79+
if (!parent) {
80+
return '';
81+
}
82+
let labelElement: Element | null;
83+
try {
84+
labelElement = parent.querySelector(':scope>span,h1,h2,h3,h4,h5,h6');
85+
} catch {
86+
/* istanbul ignore next */
87+
labelElement = null;
88+
}
89+
if (labelElement) {
90+
/* istanbul ignore next */
91+
const labelText = labelElement.textContent || '';
92+
return this.isNonSensitiveString(labelText) ? labelText : '';
93+
}
94+
return this.getNearestLabel(parent);
95+
};
96+
97+
// Returns the Amplitude event properties for the given element.
98+
getEventProperties = (actionType: ActionType, element: Element, dataAttributePrefix: string) => {
99+
/* istanbul ignore next */
100+
const tag = element?.tagName?.toLowerCase?.();
101+
/* istanbul ignore next */
102+
const rect =
103+
typeof element.getBoundingClientRect === 'function' ? element.getBoundingClientRect() : { left: null, top: null };
104+
const ariaLabel = element.getAttribute('aria-label');
105+
const attributes = getAttributesWithPrefix(element, dataAttributePrefix);
106+
const nearestLabel = this.getNearestLabel(element);
107+
/* istanbul ignore next */
108+
const properties: Record<string, unknown> = {
109+
[constants.AMPLITUDE_EVENT_PROP_ELEMENT_ID]: element.getAttribute('id') || '',
110+
[constants.AMPLITUDE_EVENT_PROP_ELEMENT_CLASS]: element.getAttribute('class'),
111+
[constants.AMPLITUDE_EVENT_PROP_ELEMENT_HIERARCHY]: getHierarchy(element),
112+
[constants.AMPLITUDE_EVENT_PROP_ELEMENT_TAG]: tag,
113+
[constants.AMPLITUDE_EVENT_PROP_ELEMENT_TEXT]: this.getText(element),
114+
[constants.AMPLITUDE_EVENT_PROP_ELEMENT_POSITION_LEFT]: rect.left == null ? null : Math.round(rect.left),
115+
[constants.AMPLITUDE_EVENT_PROP_ELEMENT_POSITION_TOP]: rect.top == null ? null : Math.round(rect.top),
116+
[constants.AMPLITUDE_EVENT_PROP_ELEMENT_ARIA_LABEL]: ariaLabel,
117+
[constants.AMPLITUDE_EVENT_PROP_ELEMENT_ATTRIBUTES]: attributes,
118+
[constants.AMPLITUDE_EVENT_PROP_ELEMENT_PARENT_LABEL]: nearestLabel,
119+
[constants.AMPLITUDE_EVENT_PROP_PAGE_URL]: window.location.href.split('?')[0],
120+
[constants.AMPLITUDE_EVENT_PROP_PAGE_TITLE]: (typeof document !== 'undefined' && document.title) || '',
121+
[constants.AMPLITUDE_EVENT_PROP_VIEWPORT_HEIGHT]: window.innerHeight,
122+
[constants.AMPLITUDE_EVENT_PROP_VIEWPORT_WIDTH]: window.innerWidth,
123+
};
124+
if (tag === 'a' && actionType === 'click' && element instanceof HTMLAnchorElement) {
125+
properties[constants.AMPLITUDE_EVENT_PROP_ELEMENT_HREF] = element.href;
126+
}
127+
return removeEmptyProperties(properties);
128+
};
129+
130+
addAdditionalEventProperties = <T>(
131+
event: T,
132+
type: TimestampedEvent<T>['type'],
133+
selectorAllowlist: string[],
134+
dataAttributePrefix: string,
135+
// capture the event if the cursor is a "pointer" when this element is clicked on
136+
// reason: a "pointer" cursor indicates that an element should be interactable
137+
// regardless of the element's tag name
138+
isCapturingCursorPointer = false,
139+
): TimestampedEvent<T> | ElementBasedTimestampedEvent<T> => {
140+
const baseEvent: BaseTimestampedEvent<T> | ElementBasedTimestampedEvent<T> = {
141+
event,
142+
timestamp: Date.now(),
143+
type,
144+
};
145+
146+
if (isElementBasedEvent(baseEvent) && baseEvent.event.target !== null) {
147+
if (isCapturingCursorPointer) {
148+
const isCursorPointer = isElementPointerCursor(baseEvent.event.target as Element, baseEvent.type);
149+
if (isCursorPointer) {
150+
baseEvent.closestTrackedAncestor = baseEvent.event.target as HTMLElement;
151+
baseEvent.targetElementProperties = this.getEventProperties(
152+
baseEvent.type,
153+
baseEvent.closestTrackedAncestor,
154+
dataAttributePrefix,
155+
);
156+
return baseEvent;
157+
}
158+
}
159+
// Retrieve additional event properties from the target element
160+
const closestTrackedAncestor = getClosestElement(baseEvent.event.target as HTMLElement, selectorAllowlist);
161+
if (closestTrackedAncestor) {
162+
baseEvent.closestTrackedAncestor = closestTrackedAncestor;
163+
baseEvent.targetElementProperties = this.getEventProperties(
164+
baseEvent.type,
165+
closestTrackedAncestor,
166+
dataAttributePrefix,
167+
);
168+
}
169+
return baseEvent;
170+
}
171+
172+
return baseEvent;
173+
};
174+
175+
extractDataFromDataSource = (dataSource: DataSource, contextElement: HTMLElement) => {
176+
// Extract from DOM Element
177+
if (dataSource.sourceType === 'DOM_ELEMENT') {
178+
const sourceElement = getDataSource(dataSource, contextElement);
179+
if (!sourceElement) {
180+
return undefined;
181+
}
182+
183+
if (dataSource.elementExtractType === 'TEXT') {
184+
return this.getText(sourceElement);
185+
} else if (dataSource.elementExtractType === 'ATTRIBUTE' && dataSource.attribute) {
186+
return sourceElement.getAttribute(dataSource.attribute);
187+
}
188+
return undefined;
189+
}
190+
191+
// TODO: Extract from other source types
192+
return undefined;
193+
};
194+
195+
combineText = (element: Element): string => {
196+
let text = '';
197+
if (isNonSensitiveElement(element) && element.childNodes && element.childNodes.length) {
198+
element.childNodes.forEach((child) => {
199+
let childText = '';
200+
if (isTextNode(child)) {
201+
if (child.textContent) {
202+
childText = child.textContent;
203+
}
204+
} else {
205+
childText = this.combineText(child as Element);
206+
}
207+
text += childText
208+
.split(/(\s+)/)
209+
.filter(this.isNonSensitiveString)
210+
.join('')
211+
.replace(/[\r\n]/g, ' ')
212+
.replace(/[ ]+/g, ' ')
213+
.substring(0, 255);
214+
});
215+
}
216+
return text;
217+
};
218+
219+
getText = (element: Element): string => {
220+
return this.combineText(element).trim();
221+
};
222+
223+
// Returns the element properties for the given element in Visual Labeling.
224+
getEventTagProps = (element: Element): Record<string, JSONValue> => {
225+
if (!element) {
226+
return {};
227+
}
228+
/* istanbul ignore next */
229+
const tag = element?.tagName?.toLowerCase?.();
230+
231+
const properties = {
232+
[constants.AMPLITUDE_EVENT_PROP_ELEMENT_TAG]: tag,
233+
[constants.AMPLITUDE_EVENT_PROP_ELEMENT_TEXT]: this.getText(element),
234+
[constants.AMPLITUDE_EVENT_PROP_PAGE_URL]: window.location.href.split('?')[0],
235+
};
236+
return removeEmptyProperties(properties) as Record<string, JSONValue>;
237+
};
238+
}

0 commit comments

Comments
 (0)