Skip to content

Commit

Permalink
cache full rendering (github#25424)
Browse files Browse the repository at this point in the history
* cache full rendering

* still not working with gzip

* progress progress progress

* smaller

* hacky progress

* small fixes

* wip

* lock file

* wip

* wip

* package-lock updates

* wip

* search DOM in lowercase

* simplify

* with instrument

* improve test coverage

* mutateCheeriobodyByRequest

* fix

* remove renderContentCacheByContex

* disable render caching in sync-search

* diables things in github/github link checker

* gzip lru

* tidying up

* updated

* correct tests

* fix: move userLanguage to LanguagesContext

* Revert "fix: move userLanguage to LanguagesContext"

This reverts commit d7c05d958c71eaad496eb46764eb845d80b866ca.

* contexts ftw

* fixed rendering tests

* oops for got new file

* nits addressed

Co-authored-by: Mike Surowiec <mikesurowiec@users.noreply.github.com>
  • Loading branch information
peterbe and mikesurowiec authored May 23, 2022
1 parent 00d0f82 commit 1850487
Show file tree
Hide file tree
Showing 23 changed files with 700 additions and 572 deletions.
5 changes: 4 additions & 1 deletion .github/workflows/check-broken-links-github-github.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,11 @@ jobs:
env:
NODE_ENV: production
PORT: 4000
# Overload protection is on by default (when NODE_ENV==production)
# but it would help in this context.
DISABLE_OVERLOAD_PROTECTION: true
DISABLE_RENDER_CACHING: true
# Render caching won't help when we visit every page exactly once.
DISABLE_RENDERING_CACHE: true
run: |
node server.mjs &
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/sync-search-indices.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ jobs:
# Because the overload protection runs in NODE_ENV==production
# and it can break the sync-search.
DISABLE_OVERLOAD_PROTECTION: true
# Render caching won't help when we visit every page exactly once.
DISABLE_RENDERING_CACHE: true

run: npm run sync-search

Expand Down
1 change: 1 addition & 0 deletions components/DefaultLayout.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ export const DefaultLayout = (props: Props) => {
const { t } = useTranslation(['errors', 'meta', 'scroll_button'])
const router = useRouter()
const metaDescription = page.introPlainText ? page.introPlainText : t('default_description')

return (
<div className="d-lg-flex">
<Head>
Expand Down
19 changes: 19 additions & 0 deletions components/context/DotComAuthenticatedContext.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { createContext, useContext } from 'react'

export type DotComAuthenticatedContextT = {
isDotComAuthenticated: boolean
}

export const DotComAuthenticatedContext = createContext<DotComAuthenticatedContextT | null>(null)

export const useAuth = (): DotComAuthenticatedContextT => {
const context = useContext(DotComAuthenticatedContext)

if (!context) {
throw new Error(
'"useAuthContext" may only be used inside "DotComAuthenticatedContext.Provider"'
)
}

return context
}
1 change: 1 addition & 0 deletions components/context/LanguagesContext.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ type LanguageItem = {

export type LanguagesContextT = {
languages: Record<string, LanguageItem>
userLanguage: string
}

export const LanguagesContext = createContext<LanguagesContextT | null>(null)
Expand Down
4 changes: 0 additions & 4 deletions components/context/MainContext.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ export type MainContextT = {
relativePath?: string
enterpriseServerReleases: EnterpriseServerReleases
currentPathWithoutLanguage: string
userLanguage: string
allVersions: Record<string, VersionItem>
currentVersion?: string
currentProductTree?: ProductTreeNode | null
Expand Down Expand Up @@ -125,7 +124,6 @@ export type MainContextT = {

status: number
fullUrl: string
isDotComAuthenticated: boolean
}

export const getMainContext = (req: any, res: any): MainContextT => {
Expand Down Expand Up @@ -181,7 +179,6 @@ export const getMainContext = (req: any, res: any): MainContextT => {
'supported',
]),
enterpriseServerVersions: req.context.enterpriseServerVersions,
userLanguage: req.context.userLanguage || '',
allVersions: req.context.allVersions,
currentVersion: req.context.currentVersion,
currentProductTree: req.context.currentProductTree
Expand All @@ -192,7 +189,6 @@ export const getMainContext = (req: any, res: any): MainContextT => {
nonEnterpriseDefaultVersion: req.context.nonEnterpriseDefaultVersion,
status: res.statusCode,
fullUrl: req.protocol + '://' + req.get('host') + req.originalUrl,
isDotComAuthenticated: Boolean(req.cookies.dotcom_user),
}
}

Expand Down
5 changes: 4 additions & 1 deletion components/page-header/Header.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { useVersion } from 'components/hooks/useVersion'

import { Link } from 'components/Link'
import { useMainContext } from 'components/context/MainContext'
import { useAuth } from 'components/context/DotComAuthenticatedContext'
import { LanguagePicker } from './LanguagePicker'
import { HeaderNotifications } from 'components/page-header/HeaderNotifications'
import { ProductPicker } from 'components/page-header/ProductPicker'
Expand All @@ -17,14 +18,16 @@ import styles from './Header.module.scss'

export const Header = () => {
const router = useRouter()
const { isDotComAuthenticated, error } = useMainContext()
const { error } = useMainContext()
const { currentVersion } = useVersion()
const { t } = useTranslation(['header', 'homepage'])
const [isMenuOpen, setIsMenuOpen] = useState(
router.pathname !== '/' && router.query.query && true
)
const [scroll, setScroll] = useState(false)

const { isDotComAuthenticated } = useAuth()

const signupCTAVisible =
!isDotComAuthenticated &&
(currentVersion === 'free-pro-team@latest' || currentVersion === 'enterprise-cloud@latest')
Expand Down
6 changes: 3 additions & 3 deletions components/page-header/HeaderNotifications.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ type Notif = {
export const HeaderNotifications = () => {
const router = useRouter()
const { currentVersion } = useVersion()
const { relativePath, allVersions, data, userLanguage, currentPathWithoutLanguage, page } =
useMainContext()
const { languages } = useLanguages()
const { relativePath, allVersions, data, currentPathWithoutLanguage, page } = useMainContext()
const { languages, userLanguage } = useLanguages()

const { t } = useTranslation('header')

const translationNotices: Array<Notif> = []
Expand Down
2 changes: 0 additions & 2 deletions lib/get-theme.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
// export const defaultCSSThemeProps = {
export const defaultCSSTheme = {
colorMode: 'auto', // light, dark, auto
nightTheme: 'dark',
dayTheme: 'light',
}

// export const defaultComponentThemeProps = {
export const defaultComponentTheme = {
colorMode: 'auto', // day, night, auto
nightTheme: 'dark',
Expand Down
35 changes: 8 additions & 27 deletions lib/page.js
Original file line number Diff line number Diff line change
Expand Up @@ -24,22 +24,6 @@ import { union } from 'lodash-es'
// every single time, we turn it into a Set once.
const productMapKeysAsSet = new Set(Object.keys(productMap))

// Wrapper on renderContent() that caches the output depending on the
// `context` by extracting information about the page's current permalink
const _renderContentCache = new Map()

function renderContentCacheByContext(prefix) {
return async function (template = '', context = {}, options = {}) {
const { currentPath } = context
const cacheKey = prefix + currentPath

if (!_renderContentCache.has(cacheKey)) {
_renderContentCache.set(cacheKey, await renderContent(template, context, options))
}
return _renderContentCache.get(cacheKey)
}
}

class Page {
static async init(opts) {
opts = await Page.read(opts)
Expand Down Expand Up @@ -186,26 +170,26 @@ class Page {
context.englishHeadings = englishHeadings
}

this.intro = await renderContentCacheByContext('intro')(this.rawIntro, context)
this.introPlainText = await renderContentCacheByContext('rawIntro')(this.rawIntro, context, {
this.intro = await renderContent(this.rawIntro, context)
this.introPlainText = await renderContent(this.rawIntro, context, {
textOnly: true,
})
this.title = await renderContentCacheByContext('rawTitle')(this.rawTitle, context, {
this.title = await renderContent(this.rawTitle, context, {
textOnly: true,
encodeEntities: true,
})
this.titlePlainText = await renderContentCacheByContext('titleText')(this.rawTitle, context, {
this.titlePlainText = await renderContent(this.rawTitle, context, {
textOnly: true,
})
this.shortTitle = await renderContentCacheByContext('shortTitle')(this.shortTitle, context, {
this.shortTitle = await renderContent(this.shortTitle, context, {
textOnly: true,
encodeEntities: true,
})

this.product_video = await renderContent(this.raw_product_video, context, { textOnly: true })

context.relativePath = this.relativePath
const html = await renderContentCacheByContext('markdown')(this.markdown, context)
const html = await renderContent(this.markdown, context)

// Adding communityRedirect for Discussions, Sponsors, and Codespaces - request from Product
if (
Expand All @@ -222,15 +206,12 @@ class Page {

// product frontmatter may contain liquid
if (this.rawProduct) {
this.product = await renderContentCacheByContext('product')(this.rawProduct, context)
this.product = await renderContent(this.rawProduct, context)
}

// permissions frontmatter may contain liquid
if (this.rawPermissions) {
this.permissions = await renderContentCacheByContext('permissions')(
this.rawPermissions,
context
)
this.permissions = await renderContent(this.rawPermissions, context)
}

// Learning tracks may contain Liquid and need to have versioning processed.
Expand Down
167 changes: 167 additions & 0 deletions middleware/cache-full-rendering.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
import zlib from 'zlib'

import cheerio from 'cheerio'
import QuickLRU from 'quick-lru'

// This is what NextJS uses when it injects the JSON serialized
// in the `<script id="__NEXT_DATA__">`
import { htmlEscapeJsonString } from 'next/dist/server/htmlescape.js'

import { getTheme } from '../lib/get-theme.js'
import statsd from '../lib/statsd.js'

const HEADER_NAME = 'x-middleware-cache'
const HEADER_VALUE_HIT = 'hit'
const HEADER_VALUE_MISS = 'miss'
const HEADER_VALUE_DISABLED = 'disabled'
const HEADER_VALUE_TRANSFERRING = 'transferring'

const DISABLE_RENDERING_CACHE = Boolean(JSON.parse(process.env.DISABLE_RENDERING_CACHE || 'false'))

const cheerioCache = new QuickLRU({
// NOTE: Apr 20, when storing about 200 cheerio instances, the total
// heap size becomes about 2.3GB.
maxSize: 100,
// Don't use arrow function so we can access `this`.
onEviction: function onEviction() {
const { heapUsed } = process.memoryUsage()
statsd.gauge('rendering_cache_cheerio', heapUsed, [`size:${this.size}`])
},
})

const gzipCache = new QuickLRU({
maxSize: 1000,
// Don't use arrow function so we can access `this`.
onEviction: function onEviction() {
const { heapUsed } = process.memoryUsage()
statsd.gauge('rendering_cache_gzip', heapUsed, [`size:${gzipCache.size}`])
},
})

export default async function cacheFullRendering(req, res, next) {
// Even if you use `app.get('/*', myMiddleware)` in Express, the
// middleware will be executed for HEAD requests.
if (req.method !== 'GET') return next()

// The req.pagePath will be identical if it's a regular HTML GET
// or one of those /_next/data/... URLs.
const key = req.url

// We have 2 LRU caches.
// - Tuples of [cheerio object, headers]
// - Tuples of [html gzipped, headers]
// The reason for having two is that many cheerio objects will
// significantly bloat the heap memory. Where as storing the
// html strings as gzip buffers is tiny.
// The point of using cheerio objects, is to avoid deserializing the
// HTML on every warm hit (e.g. stampeding herd) and only pay
// for the mutation + serialization which is unavoidable.
// Since the gzip cache is larger than the cheerio cache,
// we elevate from one cache to the other. Like layers of caching.

if (!cheerioCache.has(key) && gzipCache.has(key)) {
res.setHeader(HEADER_NAME, HEADER_VALUE_TRANSFERRING)
const [htmlBuffer, headers] = gzipCache.get(key)
setHeaders(headers, res)
const html = zlib.gunzipSync(htmlBuffer).toString()
const body = cheerio.load(html)
cheerioCache.set(key, [body, headers])
mutateCheeriobodyByRequest(body, req)
return res.status(200).send(body.html())
} else if (cheerioCache.has(key)) {
res.setHeader(HEADER_NAME, HEADER_VALUE_HIT)
const [$, headers] = cheerioCache.get(key)
setHeaders(headers, res)
mutateCheeriobodyByRequest($, req)
return res.status(200).send($.html())
} else {
res.setHeader(HEADER_NAME, HEADER_VALUE_MISS)
}

if (DISABLE_RENDERING_CACHE) {
res.setHeader(HEADER_NAME, HEADER_VALUE_DISABLED)
} else {
const originalEndFunc = res.end.bind(res)
res.end = function (body) {
if (body && res.statusCode === 200) {
// It's important to note that we only cache the HTML outputs.
// Why, because JSON outputs should be cached in the CDN.
// The only JSON outputs we have today is the search API
// and the NextJS data requests. These are not dependent on the
// request cookie, so they're primed for caching in the CDN.
const ct = res.get('content-type')
if (ct.startsWith('text/html')) {
const $ = cheerio.load(body)
const headers = res.getHeaders()
cheerioCache.set(key, [$, headers])
const gzipped = zlib.gzipSync(Buffer.from(body))
gzipCache.set(key, [gzipped, headers])
}
// If it's not HTML or JSON, it's probably an image (binary)
// or some plain text. Let's ignore all of those.
}
return originalEndFunc(body)
}
}

next()
}

function setHeaders(headers, res) {
Object.entries(headers).forEach(([key, value]) => {
if (!(key === HEADER_NAME || key === 'set-cookie')) {
res.setHeader(key, value)
}
})
}

function mutateCheeriobodyByRequest($, req) {
// A fresh CSRF token into the <meta> tag
const freshCsrfToken = req.csrfToken()
$('meta[name="csrf-token"]').attr('content', freshCsrfToken)

// Populate if you have the `dotcom_user` user cookie and it's truthy
const isDotComAuthenticated = Boolean(req.cookies?.dotcom_user)

const cssTheme = getTheme(req, true)
const theme = getTheme(req, false)

// The <body> needs tags pertaining to the parsed theme
// Don't use `$.data()` because it doesn't actually mutate the "DOM"
// https://github.com/cheeriojs/cheerio/issues/950#issuecomment-274324269
$('body')
.attr('data-color-mode', cssTheme.colorMode)
.attr('data-dark-theme', cssTheme.nightTheme)
.attr('data-light-theme', cssTheme.dayTheme)

// Update the __NEXT_DATA__ too with the equivalent pieces
const nextData = $('script#__NEXT_DATA__')
console.assert(nextData.length === 1, 'Not exactly 1')
// Note, once we upgrade to cheerio >= v1.0.0-rc.11
// we can access this with `.text()`.
// See https://github.com/cheeriojs/cheerio/releases/tag/v1.0.0-rc.11
// and https://github.com/cheeriojs/cheerio/pull/2509
const parsedNextData = JSON.parse(nextData.get()[0].children[0].data)
parsedNextData.props.csrfToken = freshCsrfToken
parsedNextData.props.dotComAuthenticatedContext.isDotComAuthenticated = isDotComAuthenticated
parsedNextData.props.languagesContext.userLanguage = req.context.userLanguage
parsedNextData.props.themeProps = {
colorMode: theme.colorMode,
nightTheme: theme.nightTheme,
dayTheme: theme.dayTheme,
}
nextData.text(htmlEscapeJsonString(JSON.stringify(parsedNextData)))

// The <ThemeProvider {...} preventSSRMismatch> component will
// inject a script tag too that looks like this:
//
// <script
// type="application/json"
// id="__PRIMER_DATA__">{"resolvedServerColorMode":"night"}</script>
//
const primerData = $('script#__PRIMER_DATA__')
console.assert(primerData.length === 1, 'Not exactly 1')
const parsedPrimerData = JSON.parse(primerData.get()[0].children[0].data)
parsedPrimerData.resolvedServerColorMode = cssTheme.colorMode === 'dark' ? 'night' : 'day'
primerData.text(htmlEscapeJsonString(JSON.stringify(parsedPrimerData)))
}
Loading

0 comments on commit 1850487

Please sign in to comment.