From 1778d583782900c652ef65f597ffc8a71058d218 Mon Sep 17 00:00:00 2001 From: Peter Bengtsson Date: Tue, 2 May 2023 07:41:43 -0400 Subject: [PATCH] warmup remotejson cache (#36116) --- .../warmup-remotejson-cache/action.yml | 43 +++++++++++ .../workflows/azure-preview-env-deploy.yml | 4 ++ .github/workflows/azure-prod-build-deploy.yml | 4 ++ .github/workflows/keep-caches-warm.yml | 2 + .github/workflows/test.yml | 12 ++-- Dockerfile | 3 + middleware/get-remote-json.js | 8 ++- script/warmup-remotejson.js | 71 +++++++++++++++++++ 8 files changed, 137 insertions(+), 10 deletions(-) create mode 100644 .github/actions/warmup-remotejson-cache/action.yml create mode 100755 script/warmup-remotejson.js diff --git a/.github/actions/warmup-remotejson-cache/action.yml b/.github/actions/warmup-remotejson-cache/action.yml new file mode 100644 index 000000000000..02b1c916fe9c --- /dev/null +++ b/.github/actions/warmup-remotejson-cache/action.yml @@ -0,0 +1,43 @@ +name: Warmup getRemoteJSON's cache + +description: Run the script that prepares the disk-cache for getRemoteJSON + +inputs: + restore-only: + description: Only attempt to restore, don't warm up + required: false + +runs: + using: 'composite' + steps: + # The caching technique here is to unboundedly add and add to the cache. + # You "wrap" the step that appends to disk and it will possibly retrieve + # some from the cache, then save it when it's got more in it. + - name: Cache .remotejson-cache (restore) + # You can't use a SHA on these. Only possible with `actions/cache@SHA...` + uses: actions/cache/restore@v3 + with: + path: .remotejson-cache + key: remotejson-cache- + restore-keys: remotejson-cache- + + # When we use this composite action from the workflows like + # Azure Preview Deploy and Azure Production Deploy, we don't have + # any Node installed or any of its packages. I.e. we never + # run `npm ci` in those actions. For security sake. + # So we can't do things that require Node code. + # Tests and others will omit the `restore-only` input, but + # prepping for Docker build and push, will set it to a non-empty + # string which basically means "If you can restore it, great. + # If not, that's fine, don't bother". + - name: Run script + if: ${{ inputs.restore-only == '' }} + shell: bash + run: node script/warmup-remotejson.js + + - name: Cache .remotejson-cache (save) + if: ${{ inputs.restore-only == '' }} + uses: actions/cache/save@v3 + with: + path: .remotejson-cache + key: remotejson-cache-${{ github.sha }} diff --git a/.github/workflows/azure-preview-env-deploy.yml b/.github/workflows/azure-preview-env-deploy.yml index 1aa78f242f19..20f417b0f662 100644 --- a/.github/workflows/azure-preview-env-deploy.yml +++ b/.github/workflows/azure-preview-env-deploy.yml @@ -175,6 +175,10 @@ jobs: rsync -rptovR ./user-code/pages/./**/*.tsx ./pages rsync -rptovR ./user-code/stylesheets/./**/*.scss ./stylesheets + - uses: ./.github/actions/warmup-remotejson-cache + with: + restore-only: true + # In addition to making the final image smaller, we also save time by not sending unnecessary files to the docker build context - name: 'Prune for preview env' run: .github/actions-scripts/prune-for-preview-env.sh diff --git a/.github/workflows/azure-prod-build-deploy.yml b/.github/workflows/azure-prod-build-deploy.yml index efbe0298e6e3..f33ef78a201d 100644 --- a/.github/workflows/azure-prod-build-deploy.yml +++ b/.github/workflows/azure-prod-build-deploy.yml @@ -71,6 +71,10 @@ jobs: - name: Merge docs-early-access repo's folders run: .github/actions-scripts/merge-early-access.sh + - uses: ./.github/actions/warmup-remotejson-cache + with: + restore-only: true + - uses: ./.github/actions/clone-translations with: token: ${{ secrets.DOCUBOT_REPO_PAT }} diff --git a/.github/workflows/keep-caches-warm.yml b/.github/workflows/keep-caches-warm.yml index 4cf202512348..5dd816a7ea76 100644 --- a/.github/workflows/keep-caches-warm.yml +++ b/.github/workflows/keep-caches-warm.yml @@ -33,3 +33,5 @@ jobs: - name: Build run: npm run build + + - uses: ./.github/actions/warmup-remotejson-cache diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 00a7bcfaf4b8..0050dad1c905 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -153,14 +153,10 @@ jobs: - name: Run build script run: npm run build - - name: Disk cache used by getRemoteJSON function in middleware - uses: actions/cache@88522ab9f39a2ea568f7027eddc7d8d8bc9d59c8 - with: - path: .remotejson-cache - # Very liberal cache key. Note, for this to become populated - # for other branches, you have to manually run this workflow - # at least once using the "Run workflow" button. - key: ${{ runner.os }}-remotejson + - uses: ./.github/actions/warmup-remotejson-cache + # Only the 'routing' tests include end-to-end tests about + # archived enterprise server URLs. + if: ${{ matrix.name == 'routing' }} - name: Index fixtures into the local Elasticsearch # For the sake of saving time, only run this step if the group diff --git a/Dockerfile b/Dockerfile index 98cb084310f3..759e2674d8bf 100644 --- a/Dockerfile +++ b/Dockerfile @@ -49,6 +49,8 @@ COPY pages ./pages COPY components ./components COPY lib ./lib COPY src ./src +# The star is because it's an optional directory +COPY .remotejson-cache* ./.remotejson-cache # Certain content is necessary for being able to build COPY content/index.md ./content/index.md COPY content/rest ./content/rest @@ -90,6 +92,7 @@ COPY --chown=node:node assets ./assets COPY --chown=node:node content ./content COPY --chown=node:node lib ./lib COPY --chown=node:node src ./src +COPY --chown=node:node .remotejson-cache* ./.remotejson-cache COPY --chown=node:node middleware ./middleware COPY --chown=node:node data ./data COPY --chown=node:node next.config.js ./ diff --git a/middleware/get-remote-json.js b/middleware/get-remote-json.js index e0ea61b19e49..404c4a308b6c 100644 --- a/middleware/get-remote-json.js +++ b/middleware/get-remote-json.js @@ -42,8 +42,8 @@ export default async function getRemoteJSON(url, config) { const ROOT = process.env.GET_REMOTE_JSON_DISK_CACHE_ROOT || '.remotejson-cache' const onDisk = path.join(ROOT, `${tempFilename}.json`) - // Never even try reading from disk in production. - if (!inProd && fs.existsSync(onDisk)) { + + try { const body = fs.readFileSync(onDisk, 'utf-8') // It might exist on disk, but it could be empty if (body) { @@ -58,6 +58,10 @@ export default async function getRemoteJSON(url, config) { } } } + } catch (error) { + if (!(error instanceof SyntaxError || error.code === 'ENOENT')) { + throw error + } } if (!foundOnDisk) { diff --git a/script/warmup-remotejson.js b/script/warmup-remotejson.js new file mode 100755 index 000000000000..5ae39cd8b355 --- /dev/null +++ b/script/warmup-remotejson.js @@ -0,0 +1,71 @@ +#!/usr/bin/env node + +// [start-readme] +// +// This calls a function directly that is used by our archived enterprise +// middleware. Namely, the `getRemoteJSON` function. That function is +// able to use the disk to cache responses quite aggressively. So when +// it's been run once, with the same disk, next time it can draw from disk +// rather than having to rely on network. +// +// We have this script to avoid excessive network fetches in production +// where, due to production deploys restarting new Node services, we +// can't rely on in-memory caching often enough. +// +// The list of URLs hardcoded in here is based on analyzing the URLs that +// were logged as tags in Datadog for entries that couldn't rely on +// in-memory cache. +// +// [end-readme] + +import { program } from 'commander' +import semver from 'semver' + +import getRemoteJSON from '../middleware/get-remote-json.js' +import { + deprecated, + firstReleaseStoredInBlobStorage, + lastVersionWithoutArchivedRedirectsFile, +} from '../lib/enterprise-server-releases.js' + +program + .description( + "Visit a bunch of archived redirects.json URLs to warm up getRemoteJSON's disk cache" + ) + .parse(process.argv) + +main() + +function version2url(version) { + const inBlobStorage = semver.gte( + semver.coerce(version).raw, + semver.coerce(firstReleaseStoredInBlobStorage).raw + ) + return inBlobStorage + ? `https://githubdocs.azureedge.net/enterprise/${version}/redirects.json` + : `https://github.github.com/help-docs-archived-enterprise-versions/${version}/redirects.json` +} + +function withArchivedRedirectsFile(version) { + return semver.eq( + semver.coerce(version).raw, + semver.coerce(lastVersionWithoutArchivedRedirectsFile).raw + ) +} + +async function main() { + const urls = [] + for (const version of deprecated) { + if (withArchivedRedirectsFile(version)) { + break + } + urls.push(version2url(version)) + } + const config = { + retry: { limit: 3 }, + timeout: { response: 1000 }, + } + console.time(`Time to fetch ${urls.length} URLs`) + await Promise.all(urls.map((url) => getRemoteJSON(url, config))) + console.timeEnd(`Time to fetch ${urls.length} URLs`) +}