From 2f361ccdbbd11c43495d54f8202919757831a023 Mon Sep 17 00:00:00 2001 From: Ward Peeters Date: Thu, 3 Mar 2022 16:04:01 +0100 Subject: [PATCH] fix(gatsby-core-utils): multiple requests with different outputdir (#35039) --- .../src/__tests__/fetch-remote-file.js | 33 +++++- .../src/fetch-remote-file.ts | 102 +++++++++++------- 2 files changed, 93 insertions(+), 42 deletions(-) diff --git a/packages/gatsby-core-utils/src/__tests__/fetch-remote-file.js b/packages/gatsby-core-utils/src/__tests__/fetch-remote-file.js index 76b0b4cc8b035..eae06145d79fa 100644 --- a/packages/gatsby-core-utils/src/__tests__/fetch-remote-file.js +++ b/packages/gatsby-core-utils/src/__tests__/fetch-remote-file.js @@ -513,7 +513,38 @@ Fetch details: expect(cachedFilePath).toStartWith(path.join(cache.directory, `public`)) expect(gotStream).toBeCalledTimes(1) expect(fs.pathExists).toBeCalledTimes(1) - expect(fs.copy).toBeCalled() + expect(fs.copy).toBeCalledTimes(1) + expect(await fs.pathExists(cachedFilePath)).toBe(true) + global.__GATSBY = currentGlobal + }) + + it(`should not re-download but copy file to public folder when the same url is requested`, async () => { + const currentGlobal = global.__GATSBY + global.__GATSBY = { + root: cache.directory, + } + await fs.ensureDir(path.join(cache.directory, `public`)) + const filePathPromise = fetchRemoteFile({ + url: `http://external.com/dog.jpg?v=4`, + directory: cache.directory, + cacheKey: `4`, + }) + const cachedFilePathPromise = fetchRemoteFile({ + url: `http://external.com/dog.jpg?v=4`, + directory: path.join(cache.directory, `public`), + cacheKey: `4`, + }) + + const [filePath, cachedFilePath] = await Promise.all([ + filePathPromise, + cachedFilePathPromise, + ]) + + expect(filePath).not.toBe(cachedFilePath) + expect(cachedFilePath).toStartWith(path.join(cache.directory, `public`)) + expect(gotStream).toBeCalledTimes(1) + expect(fs.pathExists).toBeCalledTimes(0) + expect(fs.copy).toBeCalledTimes(1) global.__GATSBY = currentGlobal }) diff --git a/packages/gatsby-core-utils/src/fetch-remote-file.ts b/packages/gatsby-core-utils/src/fetch-remote-file.ts index 2f4a2f893db5c..770677ffb7744 100644 --- a/packages/gatsby-core-utils/src/fetch-remote-file.ts +++ b/packages/gatsby-core-utils/src/fetch-remote-file.ts @@ -1,6 +1,6 @@ import fileType from "file-type" import path from "path" -import fs, { pathExists } from "fs-extra" +import fs from "fs-extra" import Queue from "fastq" import { createContentDigest } from "./create-content-digest" import { @@ -49,32 +49,11 @@ export async function fetchRemoteFile( if (await fs.pathExists(cachedPath)) { // If the cached directory is not part of the public directory, we don't need to copy it // as it won't be part of the build. - if ( - !downloadPath.startsWith( - path.join(global.__GATSBY?.root ?? process.cwd(), `public`) - ) - ) { - return cachedPath + if (isPublicPath(downloadPath)) { + return copyCachedPathToDownloadPath({ cachedPath, downloadPath }) } - // Create a mutex to do our copy - we could do a md5 hash check as well but that's also expensive - if (!alreadyCopiedFiles.has(downloadPath)) { - const copyFileMutex = createMutex( - `gatsby-core-utils:copy-fetch:${downloadPath}`, - 200 - ) - await copyFileMutex.acquire() - if (!alreadyCopiedFiles.has(downloadPath)) { - await fs.copy(cachedPath, downloadPath, { - overwrite: true, - }) - } - - alreadyCopiedFiles.add(downloadPath) - await copyFileMutex.release() - } - - return downloadPath + return cachedPath } } } @@ -82,6 +61,39 @@ export async function fetchRemoteFile( return pushTask({ args }) } +function isPublicPath(downloadPath: string): boolean { + return downloadPath.startsWith( + path.join(global.__GATSBY?.root ?? process.cwd(), `public`) + ) +} + +async function copyCachedPathToDownloadPath({ + cachedPath, + downloadPath, +}: { + cachedPath: string + downloadPath: string +}): Promise { + // Create a mutex to do our copy - we could do a md5 hash check as well but that's also expensive + if (!alreadyCopiedFiles.has(downloadPath)) { + const copyFileMutex = createMutex( + `gatsby-core-utils:copy-fetch:${downloadPath}`, + 200 + ) + await copyFileMutex.acquire() + if (!alreadyCopiedFiles.has(downloadPath)) { + await fs.copy(cachedPath, downloadPath, { + overwrite: true, + }) + } + + alreadyCopiedFiles.add(downloadPath) + await copyFileMutex.release() + } + + return downloadPath +} + const queue = Queue( /** * fetchWorker @@ -144,13 +156,33 @@ async function fetchFile({ // Fetch the file. try { - const inFlightValue = getInFlightObject(url, BUILD_ID) - if (inFlightValue) { - return inFlightValue + const digest = createContentDigest(url) + const finalDirectory = excludeDigest + ? fileDirectory + : path.join(fileDirectory, digest) + + if (!name) { + name = getRemoteFileName(url) + } + + if (!ext) { + ext = getRemoteFileExtension(url) } const cachedEntry = await storage.remoteFileInfo.get(url) + const inFlightValue = getInFlightObject(url, BUILD_ID) + if (inFlightValue) { + if (!isPublicPath(finalDirectory)) { + return inFlightValue + } + + return await copyCachedPathToDownloadPath({ + cachedPath: inFlightValue, + downloadPath: createFilePath(finalDirectory, name, ext), + }) + } + // Add htaccess authentication if passed in. This isn't particularly // extensible. We should define a proper API that we validate. const httpOptions: Options = {} @@ -159,18 +191,6 @@ async function fetchFile({ httpOptions.password = auth.htaccess_pass } - if (!name) { - name = getRemoteFileName(url) - } - - if (!ext) { - ext = getRemoteFileExtension(url) - } - - const digest = createContentDigest(url) - const finalDirectory = excludeDigest - ? fileDirectory - : path.join(fileDirectory, digest) await fs.ensureDir(finalDirectory) const tmpFilename = createFilePath(fileDirectory, `tmp-${digest}`, ext) @@ -179,7 +199,7 @@ async function fetchFile({ // See if there's response headers for this url // from a previous request. const headers = { ...httpHeaders } - if (cachedEntry?.headers?.etag && (await pathExists(filename))) { + if (cachedEntry?.headers?.etag && (await fs.pathExists(filename))) { // eslint-disable-next-line @typescript-eslint/no-non-null-assertion headers[`If-None-Match`] = cachedEntry.headers.etag }