|
| 1 | +/* eslint-disable no-restricted-globals */ |
| 2 | +import type { ElementInteractionsOptions, ActionType } from '@amplitude/analytics-core'; |
| 3 | +import type { DataSource } from '@amplitude/analytics-core/lib/esm/types/element-interactions'; |
| 4 | +import * as constants from './constants'; |
| 5 | +import { |
| 6 | + isTextNode, |
| 7 | + removeEmptyProperties, |
| 8 | + isNonSensitiveElement, |
| 9 | + getAttributesWithPrefix, |
| 10 | + isElementPointerCursor, |
| 11 | + getClosestElement, |
| 12 | + isElementBasedEvent, |
| 13 | +} from './helpers'; |
| 14 | +import type { BaseTimestampedEvent, ElementBasedTimestampedEvent, TimestampedEvent } from './helpers'; |
| 15 | +import { getHierarchy } from './hierarchy'; |
| 16 | +import type { JSONValue } from './helpers'; |
| 17 | +import { getDataSource } from './pageActions/actions'; |
| 18 | + |
| 19 | +const CC_REGEX = |
| 20 | + /^(?:(4[0-9]{12}(?:[0-9]{3})?)|(5[1-5][0-9]{14})|(6(?:011|5[0-9]{2})[0-9]{12})|(3[47][0-9]{13})|(3(?:0[0-5]|[68][0-9])[0-9]{11})|((?:2131|1800|35[0-9]{3})[0-9]{11}))$/; |
| 21 | +const SSN_REGEX = /(^\d{3}-?\d{2}-?\d{4}$)/; |
| 22 | +const EMAIL_REGEX = /[^\s@]+@[^\s@.]+\.[^\s@]+/; |
| 23 | + |
| 24 | +export class DataExtractor { |
| 25 | + private readonly additionalMaskTextPatterns: RegExp[]; |
| 26 | + |
| 27 | + constructor(options: ElementInteractionsOptions) { |
| 28 | + const rawPatterns = options.maskTextRegex ?? []; |
| 29 | + |
| 30 | + const compiled: RegExp[] = []; |
| 31 | + for (const entry of rawPatterns) { |
| 32 | + if (compiled.length >= constants.MAX_MASK_TEXT_PATTERNS) { |
| 33 | + break; |
| 34 | + } |
| 35 | + if (entry instanceof RegExp) { |
| 36 | + compiled.push(entry); |
| 37 | + } else if ('pattern' in entry && typeof entry.pattern === 'string') { |
| 38 | + try { |
| 39 | + compiled.push(new RegExp(entry.pattern)); |
| 40 | + } catch { |
| 41 | + // ignore invalid pattern strings |
| 42 | + } |
| 43 | + } |
| 44 | + } |
| 45 | + this.additionalMaskTextPatterns = compiled; |
| 46 | + } |
| 47 | + |
| 48 | + isNonSensitiveString = (text: string | null): boolean => { |
| 49 | + if (typeof text !== 'string') { |
| 50 | + return true; |
| 51 | + } |
| 52 | + |
| 53 | + // Check for credit card number |
| 54 | + if (CC_REGEX.test((text || '').replace(/[- ]/g, ''))) { |
| 55 | + return false; |
| 56 | + } |
| 57 | + |
| 58 | + // Check for social security number or email |
| 59 | + if (SSN_REGEX.test(text) || EMAIL_REGEX.test(text)) { |
| 60 | + return false; |
| 61 | + } |
| 62 | + |
| 63 | + // Check for additional mask text patterns |
| 64 | + for (const pattern of this.additionalMaskTextPatterns) { |
| 65 | + try { |
| 66 | + if (pattern.test(text)) { |
| 67 | + return false; |
| 68 | + } |
| 69 | + } catch { |
| 70 | + // ignore invalid pattern |
| 71 | + } |
| 72 | + } |
| 73 | + |
| 74 | + return true; |
| 75 | + }; |
| 76 | + |
| 77 | + getNearestLabel = (element: Element): string => { |
| 78 | + const parent = element.parentElement; |
| 79 | + if (!parent) { |
| 80 | + return ''; |
| 81 | + } |
| 82 | + let labelElement: Element | null; |
| 83 | + try { |
| 84 | + labelElement = parent.querySelector(':scope>span,h1,h2,h3,h4,h5,h6'); |
| 85 | + } catch { |
| 86 | + /* istanbul ignore next */ |
| 87 | + labelElement = null; |
| 88 | + } |
| 89 | + if (labelElement) { |
| 90 | + /* istanbul ignore next */ |
| 91 | + const labelText = labelElement.textContent || ''; |
| 92 | + return this.isNonSensitiveString(labelText) ? labelText : ''; |
| 93 | + } |
| 94 | + return this.getNearestLabel(parent); |
| 95 | + }; |
| 96 | + |
| 97 | + // Returns the Amplitude event properties for the given element. |
| 98 | + getEventProperties = (actionType: ActionType, element: Element, dataAttributePrefix: string) => { |
| 99 | + /* istanbul ignore next */ |
| 100 | + const tag = element?.tagName?.toLowerCase?.(); |
| 101 | + /* istanbul ignore next */ |
| 102 | + const rect = |
| 103 | + typeof element.getBoundingClientRect === 'function' ? element.getBoundingClientRect() : { left: null, top: null }; |
| 104 | + const ariaLabel = element.getAttribute('aria-label'); |
| 105 | + const attributes = getAttributesWithPrefix(element, dataAttributePrefix); |
| 106 | + const nearestLabel = this.getNearestLabel(element); |
| 107 | + /* istanbul ignore next */ |
| 108 | + const properties: Record<string, unknown> = { |
| 109 | + [constants.AMPLITUDE_EVENT_PROP_ELEMENT_ID]: element.getAttribute('id') || '', |
| 110 | + [constants.AMPLITUDE_EVENT_PROP_ELEMENT_CLASS]: element.getAttribute('class'), |
| 111 | + [constants.AMPLITUDE_EVENT_PROP_ELEMENT_HIERARCHY]: getHierarchy(element), |
| 112 | + [constants.AMPLITUDE_EVENT_PROP_ELEMENT_TAG]: tag, |
| 113 | + [constants.AMPLITUDE_EVENT_PROP_ELEMENT_TEXT]: this.getText(element), |
| 114 | + [constants.AMPLITUDE_EVENT_PROP_ELEMENT_POSITION_LEFT]: rect.left == null ? null : Math.round(rect.left), |
| 115 | + [constants.AMPLITUDE_EVENT_PROP_ELEMENT_POSITION_TOP]: rect.top == null ? null : Math.round(rect.top), |
| 116 | + [constants.AMPLITUDE_EVENT_PROP_ELEMENT_ARIA_LABEL]: ariaLabel, |
| 117 | + [constants.AMPLITUDE_EVENT_PROP_ELEMENT_ATTRIBUTES]: attributes, |
| 118 | + [constants.AMPLITUDE_EVENT_PROP_ELEMENT_PARENT_LABEL]: nearestLabel, |
| 119 | + [constants.AMPLITUDE_EVENT_PROP_PAGE_URL]: window.location.href.split('?')[0], |
| 120 | + [constants.AMPLITUDE_EVENT_PROP_PAGE_TITLE]: (typeof document !== 'undefined' && document.title) || '', |
| 121 | + [constants.AMPLITUDE_EVENT_PROP_VIEWPORT_HEIGHT]: window.innerHeight, |
| 122 | + [constants.AMPLITUDE_EVENT_PROP_VIEWPORT_WIDTH]: window.innerWidth, |
| 123 | + }; |
| 124 | + if (tag === 'a' && actionType === 'click' && element instanceof HTMLAnchorElement) { |
| 125 | + properties[constants.AMPLITUDE_EVENT_PROP_ELEMENT_HREF] = element.href; |
| 126 | + } |
| 127 | + return removeEmptyProperties(properties); |
| 128 | + }; |
| 129 | + |
| 130 | + addAdditionalEventProperties = <T>( |
| 131 | + event: T, |
| 132 | + type: TimestampedEvent<T>['type'], |
| 133 | + selectorAllowlist: string[], |
| 134 | + dataAttributePrefix: string, |
| 135 | + // capture the event if the cursor is a "pointer" when this element is clicked on |
| 136 | + // reason: a "pointer" cursor indicates that an element should be interactable |
| 137 | + // regardless of the element's tag name |
| 138 | + isCapturingCursorPointer = false, |
| 139 | + ): TimestampedEvent<T> | ElementBasedTimestampedEvent<T> => { |
| 140 | + const baseEvent: BaseTimestampedEvent<T> | ElementBasedTimestampedEvent<T> = { |
| 141 | + event, |
| 142 | + timestamp: Date.now(), |
| 143 | + type, |
| 144 | + }; |
| 145 | + |
| 146 | + if (isElementBasedEvent(baseEvent) && baseEvent.event.target !== null) { |
| 147 | + if (isCapturingCursorPointer) { |
| 148 | + const isCursorPointer = isElementPointerCursor(baseEvent.event.target as Element, baseEvent.type); |
| 149 | + if (isCursorPointer) { |
| 150 | + baseEvent.closestTrackedAncestor = baseEvent.event.target as HTMLElement; |
| 151 | + baseEvent.targetElementProperties = this.getEventProperties( |
| 152 | + baseEvent.type, |
| 153 | + baseEvent.closestTrackedAncestor, |
| 154 | + dataAttributePrefix, |
| 155 | + ); |
| 156 | + return baseEvent; |
| 157 | + } |
| 158 | + } |
| 159 | + // Retrieve additional event properties from the target element |
| 160 | + const closestTrackedAncestor = getClosestElement(baseEvent.event.target as HTMLElement, selectorAllowlist); |
| 161 | + if (closestTrackedAncestor) { |
| 162 | + baseEvent.closestTrackedAncestor = closestTrackedAncestor; |
| 163 | + baseEvent.targetElementProperties = this.getEventProperties( |
| 164 | + baseEvent.type, |
| 165 | + closestTrackedAncestor, |
| 166 | + dataAttributePrefix, |
| 167 | + ); |
| 168 | + } |
| 169 | + return baseEvent; |
| 170 | + } |
| 171 | + |
| 172 | + return baseEvent; |
| 173 | + }; |
| 174 | + |
| 175 | + extractDataFromDataSource = (dataSource: DataSource, contextElement: HTMLElement) => { |
| 176 | + // Extract from DOM Element |
| 177 | + if (dataSource.sourceType === 'DOM_ELEMENT') { |
| 178 | + const sourceElement = getDataSource(dataSource, contextElement); |
| 179 | + if (!sourceElement) { |
| 180 | + return undefined; |
| 181 | + } |
| 182 | + |
| 183 | + if (dataSource.elementExtractType === 'TEXT') { |
| 184 | + return this.getText(sourceElement); |
| 185 | + } else if (dataSource.elementExtractType === 'ATTRIBUTE' && dataSource.attribute) { |
| 186 | + return sourceElement.getAttribute(dataSource.attribute); |
| 187 | + } |
| 188 | + return undefined; |
| 189 | + } |
| 190 | + |
| 191 | + // TODO: Extract from other source types |
| 192 | + return undefined; |
| 193 | + }; |
| 194 | + |
| 195 | + combineText = (element: Element): string => { |
| 196 | + let text = ''; |
| 197 | + if (isNonSensitiveElement(element) && element.childNodes && element.childNodes.length) { |
| 198 | + element.childNodes.forEach((child) => { |
| 199 | + let childText = ''; |
| 200 | + if (isTextNode(child)) { |
| 201 | + if (child.textContent) { |
| 202 | + childText = child.textContent; |
| 203 | + } |
| 204 | + } else { |
| 205 | + childText = this.combineText(child as Element); |
| 206 | + } |
| 207 | + text += childText |
| 208 | + .split(/(\s+)/) |
| 209 | + .filter(this.isNonSensitiveString) |
| 210 | + .join('') |
| 211 | + .replace(/[\r\n]/g, ' ') |
| 212 | + .replace(/[ ]+/g, ' ') |
| 213 | + .substring(0, 255); |
| 214 | + }); |
| 215 | + } |
| 216 | + return text; |
| 217 | + }; |
| 218 | + |
| 219 | + getText = (element: Element): string => { |
| 220 | + return this.combineText(element).trim(); |
| 221 | + }; |
| 222 | + |
| 223 | + // Returns the element properties for the given element in Visual Labeling. |
| 224 | + getEventTagProps = (element: Element): Record<string, JSONValue> => { |
| 225 | + if (!element) { |
| 226 | + return {}; |
| 227 | + } |
| 228 | + /* istanbul ignore next */ |
| 229 | + const tag = element?.tagName?.toLowerCase?.(); |
| 230 | + |
| 231 | + const properties = { |
| 232 | + [constants.AMPLITUDE_EVENT_PROP_ELEMENT_TAG]: tag, |
| 233 | + [constants.AMPLITUDE_EVENT_PROP_ELEMENT_TEXT]: this.getText(element), |
| 234 | + [constants.AMPLITUDE_EVENT_PROP_PAGE_URL]: window.location.href.split('?')[0], |
| 235 | + }; |
| 236 | + return removeEmptyProperties(properties) as Record<string, JSONValue>; |
| 237 | + }; |
| 238 | +} |
0 commit comments