-
Notifications
You must be signed in to change notification settings - Fork 125
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: compress replay data #1436
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,7 @@ import { assignableWindow, document, window } from '../../utils/globals' | |
import { buildNetworkRequestOptions } from './config' | ||
import { isLocalhost } from '../../utils/request-utils' | ||
import { MutationRateLimiter } from './mutation-rate-limiter' | ||
import { gzipSync, strFromU8, strToU8 } from 'fflate' | ||
|
||
const BASE_ENDPOINT = '/s/' | ||
|
||
|
@@ -88,6 +89,95 @@ const newQueuedEvent = (rrwebMethod: () => void): QueuedRRWebEvent => ({ | |
|
||
const LOGGER_PREFIX = '[SessionRecording]' | ||
|
||
type compressedFullSnapshotEvent = { | ||
type: EventType.FullSnapshot | ||
data: string | ||
} | ||
|
||
type compressedIncrementalSnapshotEvent = { | ||
type: EventType.IncrementalSnapshot | ||
data: { | ||
source: IncrementalSource | ||
texts: string | ||
attributes: string | ||
removes: string | ||
adds: string | ||
} | ||
} | ||
|
||
type compressedIncrementalStyleSnapshotEvent = { | ||
type: EventType.IncrementalSnapshot | ||
data: { | ||
source: IncrementalSource.StyleSheetRule | ||
id?: number | ||
styleId?: number | ||
replace?: string | ||
replaceSync?: string | ||
adds: string | ||
removes: string | ||
} | ||
} | ||
|
||
export type compressedEvent = | ||
| compressedIncrementalStyleSnapshotEvent | ||
| compressedFullSnapshotEvent | ||
| compressedIncrementalSnapshotEvent | ||
export type compressedEventWithTime = compressedEvent & { | ||
timestamp: number | ||
delay?: number | ||
// marker for compression version | ||
cv: '2024-10' | ||
} | ||
|
||
function gzipToString(data: unknown): string { | ||
return strFromU8(gzipSync(strToU8(JSON.stringify(data))), true) | ||
} | ||
|
||
// rrweb's packer takes an event and returns a string or the reverse on unpact, | ||
// but we want to be able to inspect metadata during ingestion, and don't want to compress the entire event | ||
// so we have a custom packer that only compresses part of some events | ||
function compressEvent(event: eventWithTime, ph: PostHog): eventWithTime | compressedEventWithTime { | ||
try { | ||
if (event.type === EventType.FullSnapshot) { | ||
return { | ||
...event, | ||
data: gzipToString(event.data), | ||
cv: '2024-10', | ||
} | ||
} | ||
if (event.type === EventType.IncrementalSnapshot && event.data.source === IncrementalSource.Mutation) { | ||
return { | ||
...event, | ||
cv: '2024-10', | ||
data: { | ||
...event.data, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure what gains this really gives. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so... we know that when compressed the data fits into kafka but when uncompressed it doesn't always (and we reject it unnecessarily) when I looked at pulling the metadata out (and it's fair I don't have this anywhere but in my head) i at least had to change the SDK, django capture, blobby and maybe playback depending on how it was implemented and we'd still have multiple compress/decompress steps since we're doing that at network boundaries anyway this way i can test the impact with a relatively small change There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
it should mean that the data that is currently not making into kafka from rust or django capture does make it in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. #BiasForImpact :p |
||
texts: gzipToString(event.data.texts), | ||
attributes: gzipToString(event.data.attributes), | ||
removes: gzipToString(event.data.removes), | ||
adds: gzipToString(event.data.adds), | ||
}, | ||
} | ||
} | ||
if (event.type === EventType.IncrementalSnapshot && event.data.source === IncrementalSource.StyleSheetRule) { | ||
return { | ||
...event, | ||
cv: '2024-10', | ||
data: { | ||
...event.data, | ||
adds: gzipToString(event.data.adds), | ||
removes: gzipToString(event.data.removes), | ||
}, | ||
} | ||
} | ||
} catch (e: unknown) { | ||
logger.error(LOGGER_PREFIX + ' could not compress event', e) | ||
ph.captureException((e as Error) || 'e was not an error', { | ||
attempted_event_type: event?.type || 'no event type', | ||
}) | ||
} | ||
return event | ||
} | ||
|
||
export class SessionRecording { | ||
private _endpoint: string | ||
private flushBufferTimer?: any | ||
|
@@ -795,11 +885,10 @@ export class SessionRecording { | |
|
||
// TODO: Re-add ensureMaxMessageSize once we are confident in it | ||
const event = truncateLargeConsoleLogs(throttledEvent) | ||
const size = estimateSize(event) | ||
|
||
this._updateWindowAndSessionIds(event) | ||
|
||
// When in an idle state we keep recording, but don't capture the events | ||
// When in an idle state we keep recording, but don't capture the events, | ||
// but we allow custom events even when idle | ||
if (this.isIdle && event.type !== EventType.Custom) { | ||
return | ||
|
@@ -817,9 +906,13 @@ export class SessionRecording { | |
} | ||
} | ||
|
||
const eventToSend = this.instance.config.session_recording.compress_events | ||
? compressEvent(event, this.instance) | ||
: event | ||
const size = estimateSize(eventToSend) | ||
const properties = { | ||
$snapshot_bytes: size, | ||
$snapshot_data: event, | ||
$snapshot_data: eventToSend, | ||
$session_id: this.sessionId, | ||
$window_id: this.windowId, | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Have you considered streaming, for at least the full snapshots? This seems memory intensive to do the Object->String->Array->Gzip Array->String transformation synchronously
(Though I imagine it would need a bigger refactor to the network code)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Streaming in the browser? 🤔 certainly not a thing I've ever heard of?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's new-ish (at least for sending, reading the response as a stream has been available for a while): https://developer.chrome.com/docs/capabilities/web-apis/fetch-streaming-requests#using_with_writable_streams
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we'd not... that's interesting.
certainly we've talked about having a fast path for full snapshots at least since if all we capture are full snapshots we can play something back