Skip to content

Commit

Permalink
fix(gatsby-core-utils): multiple requests with different outputdir (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
wardpeet authored Mar 3, 2022
1 parent f21a63b commit 2f361cc
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 42 deletions.
33 changes: 32 additions & 1 deletion packages/gatsby-core-utils/src/__tests__/fetch-remote-file.js
Original file line number Diff line number Diff line change
Expand Up @@ -513,7 +513,38 @@ Fetch details:
expect(cachedFilePath).toStartWith(path.join(cache.directory, `public`))
expect(gotStream).toBeCalledTimes(1)
expect(fs.pathExists).toBeCalledTimes(1)
expect(fs.copy).toBeCalled()
expect(fs.copy).toBeCalledTimes(1)
expect(await fs.pathExists(cachedFilePath)).toBe(true)
global.__GATSBY = currentGlobal
})

it(`should not re-download but copy file to public folder when the same url is requested`, async () => {
const currentGlobal = global.__GATSBY
global.__GATSBY = {
root: cache.directory,
}
await fs.ensureDir(path.join(cache.directory, `public`))
const filePathPromise = fetchRemoteFile({
url: `http://external.com/dog.jpg?v=4`,
directory: cache.directory,
cacheKey: `4`,
})
const cachedFilePathPromise = fetchRemoteFile({
url: `http://external.com/dog.jpg?v=4`,
directory: path.join(cache.directory, `public`),
cacheKey: `4`,
})

const [filePath, cachedFilePath] = await Promise.all([
filePathPromise,
cachedFilePathPromise,
])

expect(filePath).not.toBe(cachedFilePath)
expect(cachedFilePath).toStartWith(path.join(cache.directory, `public`))
expect(gotStream).toBeCalledTimes(1)
expect(fs.pathExists).toBeCalledTimes(0)
expect(fs.copy).toBeCalledTimes(1)
global.__GATSBY = currentGlobal
})

Expand Down
102 changes: 61 additions & 41 deletions packages/gatsby-core-utils/src/fetch-remote-file.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import fileType from "file-type"
import path from "path"
import fs, { pathExists } from "fs-extra"
import fs from "fs-extra"
import Queue from "fastq"
import { createContentDigest } from "./create-content-digest"
import {
Expand Down Expand Up @@ -49,39 +49,51 @@ export async function fetchRemoteFile(
if (await fs.pathExists(cachedPath)) {
// If the cached directory is not part of the public directory, we don't need to copy it
// as it won't be part of the build.
if (
!downloadPath.startsWith(
path.join(global.__GATSBY?.root ?? process.cwd(), `public`)
)
) {
return cachedPath
if (isPublicPath(downloadPath)) {
return copyCachedPathToDownloadPath({ cachedPath, downloadPath })
}

// Create a mutex to do our copy - we could do a md5 hash check as well but that's also expensive
if (!alreadyCopiedFiles.has(downloadPath)) {
const copyFileMutex = createMutex(
`gatsby-core-utils:copy-fetch:${downloadPath}`,
200
)
await copyFileMutex.acquire()
if (!alreadyCopiedFiles.has(downloadPath)) {
await fs.copy(cachedPath, downloadPath, {
overwrite: true,
})
}

alreadyCopiedFiles.add(downloadPath)
await copyFileMutex.release()
}

return downloadPath
return cachedPath
}
}
}

return pushTask({ args })
}

function isPublicPath(downloadPath: string): boolean {
return downloadPath.startsWith(
path.join(global.__GATSBY?.root ?? process.cwd(), `public`)
)
}

async function copyCachedPathToDownloadPath({
cachedPath,
downloadPath,
}: {
cachedPath: string
downloadPath: string
}): Promise<string> {
// Create a mutex to do our copy - we could do a md5 hash check as well but that's also expensive
if (!alreadyCopiedFiles.has(downloadPath)) {
const copyFileMutex = createMutex(
`gatsby-core-utils:copy-fetch:${downloadPath}`,
200
)
await copyFileMutex.acquire()
if (!alreadyCopiedFiles.has(downloadPath)) {
await fs.copy(cachedPath, downloadPath, {
overwrite: true,
})
}

alreadyCopiedFiles.add(downloadPath)
await copyFileMutex.release()
}

return downloadPath
}

const queue = Queue<null, ITask, string>(
/**
* fetchWorker
Expand Down Expand Up @@ -144,13 +156,33 @@ async function fetchFile({

// Fetch the file.
try {
const inFlightValue = getInFlightObject(url, BUILD_ID)
if (inFlightValue) {
return inFlightValue
const digest = createContentDigest(url)
const finalDirectory = excludeDigest
? fileDirectory
: path.join(fileDirectory, digest)

if (!name) {
name = getRemoteFileName(url)
}

if (!ext) {
ext = getRemoteFileExtension(url)
}

const cachedEntry = await storage.remoteFileInfo.get(url)

const inFlightValue = getInFlightObject(url, BUILD_ID)
if (inFlightValue) {
if (!isPublicPath(finalDirectory)) {
return inFlightValue
}

return await copyCachedPathToDownloadPath({
cachedPath: inFlightValue,
downloadPath: createFilePath(finalDirectory, name, ext),
})
}

// Add htaccess authentication if passed in. This isn't particularly
// extensible. We should define a proper API that we validate.
const httpOptions: Options = {}
Expand All @@ -159,18 +191,6 @@ async function fetchFile({
httpOptions.password = auth.htaccess_pass
}

if (!name) {
name = getRemoteFileName(url)
}

if (!ext) {
ext = getRemoteFileExtension(url)
}

const digest = createContentDigest(url)
const finalDirectory = excludeDigest
? fileDirectory
: path.join(fileDirectory, digest)
await fs.ensureDir(finalDirectory)

const tmpFilename = createFilePath(fileDirectory, `tmp-${digest}`, ext)
Expand All @@ -179,7 +199,7 @@ async function fetchFile({
// See if there's response headers for this url
// from a previous request.
const headers = { ...httpHeaders }
if (cachedEntry?.headers?.etag && (await pathExists(filename))) {
if (cachedEntry?.headers?.etag && (await fs.pathExists(filename))) {
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
headers[`If-None-Match`] = cachedEntry.headers.etag
}
Expand Down

0 comments on commit 2f361cc

Please sign in to comment.