Skip to content

Commit

Permalink
warmup remotejson cache (#36116)
Browse files Browse the repository at this point in the history
  • Loading branch information
peterbe authored May 2, 2023
1 parent be73901 commit 1778d58
Show file tree
Hide file tree
Showing 8 changed files with 137 additions and 10 deletions.
43 changes: 43 additions & 0 deletions .github/actions/warmup-remotejson-cache/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: Warmup getRemoteJSON's cache

description: Run the script that prepares the disk-cache for getRemoteJSON

inputs:
restore-only:
description: Only attempt to restore, don't warm up
required: false

runs:
using: 'composite'
steps:
# The caching technique here is to unboundedly add and add to the cache.
# You "wrap" the step that appends to disk and it will possibly retrieve
# some from the cache, then save it when it's got more in it.
- name: Cache .remotejson-cache (restore)
# You can't use a SHA on these. Only possible with `actions/cache@SHA...`
uses: actions/cache/restore@v3
with:
path: .remotejson-cache
key: remotejson-cache-
restore-keys: remotejson-cache-

# When we use this composite action from the workflows like
# Azure Preview Deploy and Azure Production Deploy, we don't have
# any Node installed or any of its packages. I.e. we never
# run `npm ci` in those actions. For security sake.
# So we can't do things that require Node code.
# Tests and others will omit the `restore-only` input, but
# prepping for Docker build and push, will set it to a non-empty
# string which basically means "If you can restore it, great.
# If not, that's fine, don't bother".
- name: Run script
if: ${{ inputs.restore-only == '' }}
shell: bash
run: node script/warmup-remotejson.js

- name: Cache .remotejson-cache (save)
if: ${{ inputs.restore-only == '' }}
uses: actions/cache/save@v3
with:
path: .remotejson-cache
key: remotejson-cache-${{ github.sha }}
4 changes: 4 additions & 0 deletions .github/workflows/azure-preview-env-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,10 @@ jobs:
rsync -rptovR ./user-code/pages/./**/*.tsx ./pages
rsync -rptovR ./user-code/stylesheets/./**/*.scss ./stylesheets
- uses: ./.github/actions/warmup-remotejson-cache
with:
restore-only: true

# In addition to making the final image smaller, we also save time by not sending unnecessary files to the docker build context
- name: 'Prune for preview env'
run: .github/actions-scripts/prune-for-preview-env.sh
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/azure-prod-build-deploy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,10 @@ jobs:
- name: Merge docs-early-access repo's folders
run: .github/actions-scripts/merge-early-access.sh

- uses: ./.github/actions/warmup-remotejson-cache
with:
restore-only: true

- uses: ./.github/actions/clone-translations
with:
token: ${{ secrets.DOCUBOT_REPO_PAT }}
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/keep-caches-warm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,5 @@ jobs:

- name: Build
run: npm run build

- uses: ./.github/actions/warmup-remotejson-cache
12 changes: 4 additions & 8 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,10 @@ jobs:
- name: Run build script
run: npm run build

- name: Disk cache used by getRemoteJSON function in middleware
uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8
with:
path: .remotejson-cache
# Very liberal cache key. Note, for this to become populated
# for other branches, you have to manually run this workflow
# at least once using the "Run workflow" button.
key: ${{ runner.os }}-remotejson
- uses: ./.github/actions/warmup-remotejson-cache
# Only the 'routing' tests include end-to-end tests about
# archived enterprise server URLs.
if: ${{ matrix.name == 'routing' }}

- name: Index fixtures into the local Elasticsearch
# For the sake of saving time, only run this step if the group
Expand Down
3 changes: 3 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ COPY pages ./pages
COPY components ./components
COPY lib ./lib
COPY src ./src
# The star is because it's an optional directory
COPY .remotejson-cache* ./.remotejson-cache
# Certain content is necessary for being able to build
COPY content/index.md ./content/index.md
COPY content/rest ./content/rest
Expand Down Expand Up @@ -90,6 +92,7 @@ COPY --chown=node:node assets ./assets
COPY --chown=node:node content ./content
COPY --chown=node:node lib ./lib
COPY --chown=node:node src ./src
COPY --chown=node:node .remotejson-cache* ./.remotejson-cache
COPY --chown=node:node middleware ./middleware
COPY --chown=node:node data ./data
COPY --chown=node:node next.config.js ./
Expand Down
8 changes: 6 additions & 2 deletions middleware/get-remote-json.js
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ export default async function getRemoteJSON(url, config) {
const ROOT = process.env.GET_REMOTE_JSON_DISK_CACHE_ROOT || '.remotejson-cache'

const onDisk = path.join(ROOT, `${tempFilename}.json`)
// Never even try reading from disk in production.
if (!inProd && fs.existsSync(onDisk)) {

try {
const body = fs.readFileSync(onDisk, 'utf-8')
// It might exist on disk, but it could be empty
if (body) {
Expand All @@ -58,6 +58,10 @@ export default async function getRemoteJSON(url, config) {
}
}
}
} catch (error) {
if (!(error instanceof SyntaxError || error.code === 'ENOENT')) {
throw error
}
}

if (!foundOnDisk) {
Expand Down
71 changes: 71 additions & 0 deletions script/warmup-remotejson.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env node

// [start-readme]
//
// This calls a function directly that is used by our archived enterprise
// middleware. Namely, the `getRemoteJSON` function. That function is
// able to use the disk to cache responses quite aggressively. So when
// it's been run once, with the same disk, next time it can draw from disk
// rather than having to rely on network.
//
// We have this script to avoid excessive network fetches in production
// where, due to production deploys restarting new Node services, we
// can't rely on in-memory caching often enough.
//
// The list of URLs hardcoded in here is based on analyzing the URLs that
// were logged as tags in Datadog for entries that couldn't rely on
// in-memory cache.
//
// [end-readme]

import { program } from 'commander'
import semver from 'semver'

import getRemoteJSON from '../middleware/get-remote-json.js'
import {
deprecated,
firstReleaseStoredInBlobStorage,
lastVersionWithoutArchivedRedirectsFile,
} from '../lib/enterprise-server-releases.js'

program
.description(
"Visit a bunch of archived redirects.json URLs to warm up getRemoteJSON's disk cache"
)
.parse(process.argv)

main()

function version2url(version) {
const inBlobStorage = semver.gte(
semver.coerce(version).raw,
semver.coerce(firstReleaseStoredInBlobStorage).raw
)
return inBlobStorage
? `https://githubdocs.azureedge.net/enterprise/${version}/redirects.json`
: `https://github.github.com/help-docs-archived-enterprise-versions/${version}/redirects.json`
}

function withArchivedRedirectsFile(version) {
return semver.eq(
semver.coerce(version).raw,
semver.coerce(lastVersionWithoutArchivedRedirectsFile).raw
)
}

async function main() {
const urls = []
for (const version of deprecated) {
if (withArchivedRedirectsFile(version)) {
break
}
urls.push(version2url(version))
}
const config = {
retry: { limit: 3 },
timeout: { response: 1000 },
}
console.time(`Time to fetch ${urls.length} URLs`)
await Promise.all(urls.map((url) => getRemoteJSON(url, config)))
console.timeEnd(`Time to fetch ${urls.length} URLs`)
}

0 comments on commit 1778d58

Please sign in to comment.