Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: extract tarball to temp directory on Windows #2846

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
181 changes: 123 additions & 58 deletions lib/install.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

const fs = require('graceful-fs')
const os = require('os')
const { backOff } = require('exponential-backoff')
const rm = require('rimraf')
const tar = require('tar')
const path = require('path')
const util = require('util')
Expand Down Expand Up @@ -98,6 +100,40 @@ async function install (fs, gyp, argv) {
}
}

async function copyDirectory (src, dest) {
StefanStojanovic marked this conversation as resolved.
Show resolved Hide resolved
try {
await fs.promises.stat(src)
} catch {
throw new Error(`Missing source directory for copy: ${src}`)
}
await fs.promises.mkdir(dest, { recursive: true })
const entries = await fs.promises.readdir(src, { withFileTypes: true })
for (const entry of entries) {
if (entry.isDirectory()) {
await copyDirectory(path.join(src, entry.name), path.join(dest, entry.name))
} else if (entry.isFile()) {
// with parallel installs, copying files may cause file errors on
// Windows so use an exponential backoff to resolve collisions
await backOff(async () => {
try {
await fs.promises.copyFile(path.join(src, entry.name), path.join(dest, entry.name))
} catch (err) {
// if ensure, check if file already exists and that's good enough
if (gyp.opts.ensure && err.code === 'EBUSY') {
try {
await fs.promises.stat(path.join(dest, entry.name))
return
} catch {}
}
throw err
}
})
} else {
throw new Error('Unexpected file directory entry type')
}
}
}

async function go () {
log.verbose('ensuring nodedir is created', devDir)

Expand All @@ -118,6 +154,7 @@ async function install (fs, gyp, argv) {

// now download the node tarball
const tarPath = gyp.opts.tarball
let extractErrors = false
let extractCount = 0
const contentShasums = {}
const expectShasums = {}
Expand All @@ -136,71 +173,99 @@ async function install (fs, gyp, argv) {
return isValid
}

function onwarn (code, message) {
extractErrors = true
log.error('error while extracting tarball', code, message)
}

// download the tarball and extract!

if (tarPath) {
await tar.extract({
file: tarPath,
strip: 1,
filter: isValid,
cwd: devDir
})
} else {
try {
const res = await download(gyp, release.tarballUrl)
// on Windows there can be file errors from tar if parallel installs
// are happening (not uncommon with multiple native modules) so
// extract the tarball to a temp directory first and then copy over
const tarExtractDir = win ? await fs.promises.mkdtemp(path.join(os.tmpdir(), 'node-gyp-tmp-')) : devDir

if (res.status !== 200) {
throw new Error(`${res.status} response downloading ${release.tarballUrl}`)
}
try {
if (tarPath) {
await tar.extract({
file: tarPath,
strip: 1,
filter: isValid,
onwarn,
cwd: tarExtractDir
})
} else {
try {
const res = await download(gyp, release.tarballUrl)

await streamPipeline(
res.body,
// content checksum
new ShaSum((_, checksum) => {
const filename = path.basename(release.tarballUrl).trim()
contentShasums[filename] = checksum
log.verbose('content checksum', filename, checksum)
}),
tar.extract({
strip: 1,
cwd: devDir,
filter: isValid
})
)
} catch (err) {
// something went wrong downloading the tarball?
if (err.code === 'ENOTFOUND') {
throw new Error('This is most likely not a problem with node-gyp or the package itself and\n' +
'is related to network connectivity. In most cases you are behind a proxy or have bad \n' +
'network settings.')
if (res.status !== 200) {
throw new Error(`${res.status} response downloading ${release.tarballUrl}`)
}

await streamPipeline(
res.body,
// content checksum
new ShaSum((_, checksum) => {
const filename = path.basename(release.tarballUrl).trim()
contentShasums[filename] = checksum
log.verbose('content checksum', filename, checksum)
}),
tar.extract({
strip: 1,
cwd: tarExtractDir,
filter: isValid,
onwarn
})
)
} catch (err) {
// something went wrong downloading the tarball?
if (err.code === 'ENOTFOUND') {
throw new Error('This is most likely not a problem with node-gyp or the package itself and\n' +
'is related to network connectivity. In most cases you are behind a proxy or have bad \n' +
'network settings.')
}
throw err
}
throw err
}
}

// invoked after the tarball has finished being extracted
if (extractCount === 0) {
throw new Error('There was a fatal problem while downloading/extracting the tarball')
}
// invoked after the tarball has finished being extracted
if (extractErrors || extractCount === 0) {
throw new Error('There was a fatal problem while downloading/extracting the tarball')
}

log.verbose('tarball', 'done parsing tarball')

const installVersionPath = path.resolve(tarExtractDir, 'installVersion')
await Promise.all([
// need to download node.lib
...(win ? downloadNodeLib() : []),
// write the "installVersion" file
fs.promises.writeFile(installVersionPath, gyp.package.installVersion + '\n'),
// Only download SHASUMS.txt if we downloaded something in need of SHA verification
...(!tarPath || win ? [downloadShasums()] : [])
])

log.verbose('download contents checksum', JSON.stringify(contentShasums))
// check content shasums
for (const k in contentShasums) {
log.verbose('validating download checksum for ' + k, '(%s == %s)', contentShasums[k], expectShasums[k])
if (contentShasums[k] !== expectShasums[k]) {
throw new Error(k + ' local checksum ' + contentShasums[k] + ' not match remote ' + expectShasums[k])
}
}

log.verbose('tarball', 'done parsing tarball')

const installVersionPath = path.resolve(devDir, 'installVersion')
await Promise.all([
// need to download node.lib
...(win ? downloadNodeLib() : []),
// write the "installVersion" file
fs.promises.writeFile(installVersionPath, gyp.package.installVersion + '\n'),
// Only download SHASUMS.txt if we downloaded something in need of SHA verification
...(!tarPath || win ? [downloadShasums()] : [])
])

log.verbose('download contents checksum', JSON.stringify(contentShasums))
// check content shasums
for (const k in contentShasums) {
log.verbose('validating download checksum for ' + k, '(%s == %s)', contentShasums[k], expectShasums[k])
if (contentShasums[k] !== expectShasums[k]) {
throw new Error(k + ' local checksum ' + contentShasums[k] + ' not match remote ' + expectShasums[k])
// copy over the files from the temp tarball extract directory to devDir
if (tarExtractDir !== devDir) {
await copyDirectory(tarExtractDir, devDir)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason we wouldn't want to rename / move here instead of copy? This could still race condition if it's not atomic?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason we wouldn't want to rename / move here instead of copy?

Since there are parallel installs happening, there's no way to atomically rename to devDir. That directory is created (if it doesn't already exist) at the start of the install process to confirm permissions, so a direct rename of tarExtractDir to devDir would fail. Any rename process would need to first rename devDir, which opens a race condition where devDir disappears momentarily during an ongoing build, causing a failure. That was my original approach for fixing this issue, but I switched to the current implementation to close that race condition.

This could still race condition if it's not atomic?

The copyFile approach stays similar to the existing implementation, with node-tar overwriting the existing files when there are parallel installs going on. The main difference is that on Windows node-tar always unlinks the destination file path before writing to it to avoid certain conditions (same path multiple times in an archive) which don't exist for this usage.

I've stress tested this change (on CI and locally with a real package) with 35 parallel installs and didn't trip over any race conditions in practice.

}
} finally {
if (tarExtractDir !== devDir) {
try {
// try to cleanup temp dir
await util.promisify(rm)(tarExtractDir)
} catch {
log.warn('failed to clean up temp tarball extract directory')
}
}
}

Expand Down Expand Up @@ -232,7 +297,7 @@ async function install (fs, gyp, argv) {
log.verbose('on Windows; need to download `' + release.name + '.lib`...')
const archs = ['ia32', 'x64', 'arm64']
return archs.map(async (arch) => {
const dir = path.resolve(devDir, arch)
const dir = path.resolve(tarExtractDir, arch)
const targetLibPath = path.resolve(dir, release.name + '.lib')
const { libUrl, libPath } = release[arch]
const name = `${arch} ${release.name}.lib`
Expand Down
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"gyp"
],
"version": "9.3.1",
"installVersion": 9,
"installVersion": 10,
"author": "Nathan Rajlich <nathan@tootallnate.net> (http://tootallnate.net)",
"repository": {
"type": "git",
Expand All @@ -23,6 +23,7 @@
"main": "./lib/node-gyp.js",
"dependencies": {
"env-paths": "^2.2.0",
"exponential-backoff": "^3.1.1",
"glob": "^7.1.4",
"graceful-fs": "^4.2.6",
"make-fetch-happen": "^11.0.3",
Expand All @@ -45,6 +46,6 @@
},
"scripts": {
"lint": "standard */*.js test/**/*.js",
"test": "npm run lint && tap --timeout=600 test/test-*"
"test": "npm run lint && tap --timeout=1200 test/test-*"
}
}
2 changes: 1 addition & 1 deletion test/test-download.js
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ test('download headers (actual)', async (t) => {
await util.promisify(install)(prog, [])

const data = await fs.promises.readFile(path.join(expectedDir, 'installVersion'), 'utf8')
t.strictEqual(data, '9\n', 'correct installVersion')
t.strictEqual(data, '10\n', 'correct installVersion')

const list = await fs.promises.readdir(path.join(expectedDir, 'include/node'))
t.ok(list.includes('common.gypi'))
Expand Down
86 changes: 85 additions & 1 deletion test/test-install.js
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
'use strict'

const { test } = require('tap')
const { test: { install } } = require('../lib/install')
const path = require('path')
const os = require('os')
const util = require('util')
const { test: { download, install } } = require('../lib/install')
const rimraf = require('rimraf')
const gyp = require('../lib/node-gyp')
const log = require('npmlog')
const semver = require('semver')
const stream = require('stream')
const streamPipeline = util.promisify(stream.pipeline)

log.level = 'error' // we expect a warning

Expand Down Expand Up @@ -44,3 +52,79 @@ test('EACCES retry once', async (t) => {
}
}
})

// only run these tests if we are running a version of Node with predictable version path behavior
const skipParallelInstallTests = process.env.FAST_TEST ||
process.release.name !== 'node' ||
semver.prerelease(process.version) !== null ||
semver.satisfies(process.version, '<10')

async function parallelInstallsTest (t, fs, devDir, prog) {
if (skipParallelInstallTests) {
return t.skip('Skipping parallel installs test due to test environment configuration')
}

t.tearDown(async () => {
await util.promisify(rimraf)(devDir)
})

const expectedDir = path.join(devDir, process.version.replace(/^v/, ''))
await util.promisify(rimraf)(expectedDir)

await Promise.all([
install(fs, prog, []),
install(fs, prog, []),
install(fs, prog, []),
install(fs, prog, []),
install(fs, prog, []),
install(fs, prog, []),
install(fs, prog, []),
install(fs, prog, []),
install(fs, prog, []),
install(fs, prog, [])
])
}

test('parallel installs (ensure=true)', async (t) => {
const fs = require('graceful-fs')
const devDir = await util.promisify(fs.mkdtemp)(path.join(os.tmpdir(), 'node-gyp-test-'))

const prog = gyp()
prog.parseArgv([])
prog.devDir = devDir
prog.opts.ensure = true
log.level = 'warn'

await parallelInstallsTest(t, fs, devDir, prog)
})

test('parallel installs (ensure=false)', async (t) => {
const fs = require('graceful-fs')
const devDir = await util.promisify(fs.mkdtemp)(path.join(os.tmpdir(), 'node-gyp-test-'))

const prog = gyp()
prog.parseArgv([])
prog.devDir = devDir
prog.opts.ensure = false
log.level = 'warn'

await parallelInstallsTest(t, fs, devDir, prog)
})

test('parallel installs (tarball)', async (t) => {
const fs = require('graceful-fs')
const devDir = await util.promisify(fs.mkdtemp)(path.join(os.tmpdir(), 'node-gyp-test-'))

const prog = gyp()
prog.parseArgv([])
prog.devDir = devDir
prog.opts.tarball = path.join(devDir, 'node-headers.tar.gz')
log.level = 'warn'

await streamPipeline(
(await download(prog, 'https://nodejs.org/dist/v16.0.0/node-v16.0.0.tar.gz')).body,
fs.createWriteStream(prog.opts.tarball)
)

await parallelInstallsTest(t, fs, devDir, prog)
})