-
-
Notifications
You must be signed in to change notification settings - Fork 34.6k
fix: eliminate TOCTOU Race Condition (Hackerone report #3407207) #61664
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
giulioAZ
wants to merge
1
commit into
nodejs:main
Choose a base branch
from
giulioAZ:fix-worker-cwd-race-condition
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+227
−1
Open
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,226 @@ | ||
| import { Worker, isMainThread, workerData, threadName } from 'worker_threads' | ||
| import fs from 'fs/promises' | ||
| import path from 'path' | ||
|
|
||
| // Number of iterations for each test | ||
| const n = 1000; | ||
|
|
||
| // Artificial delays to add in mock process | ||
| const mockDelayMs = 1; | ||
|
|
||
| async function main() { | ||
| const { barrier, mockCWD, mockCWDCount } = await setup() | ||
| if (isMainThread) { | ||
| console.log("Node", process.version) | ||
| } | ||
| test("real process", barrier, process) | ||
| test("mock process", barrier, fakeProcess(mockCWDCount, mockCWD)) | ||
| test("mock fixed process", barrier, fixedProcess(mockCWDCount, mockCWD)) | ||
| } | ||
|
|
||
| function test(label: string, barrier: Barrier, testProcess: TestProcess) { | ||
| let racesWon = 0; | ||
| let problems = 0; | ||
|
|
||
| for (let i = 0; i < n; i++) { | ||
| const expected = dir(i) | ||
| let first = '' | ||
|
|
||
| barrier.wait() | ||
|
|
||
| // The race! | ||
| // main thread changes directory while tester checks what the directory is | ||
| if (isMainThread) { | ||
| testProcess.chdir(expected) | ||
| } else if (threadName === 'tester') { | ||
| first = testProcess.cwd() | ||
| } | ||
|
|
||
| barrier.wait() | ||
|
|
||
| // We know the main thread has completed chdir so tester cwd should equal expected, | ||
| // but we think there's a TOCTOU bug. If tester won the race and checked cwd simultaneously | ||
| // with chdir then cwd will continue to return the stale value. | ||
| if (threadName === 'tester') { | ||
| const actual = testProcess.cwd() | ||
| if (first != expected) { | ||
| racesWon++ | ||
| if (actual == first) problems++ | ||
| } else { | ||
| if (actual != expected) throw Error("Bug in tests") | ||
| } | ||
| } | ||
| } | ||
|
|
||
| //barrier.wait() | ||
|
|
||
| if (threadName === 'tester') { | ||
| console.log("Test case:", label) | ||
| const wonPercent = (racesWon * 100 / n).toFixed(2) | ||
| if (racesWon == 0) { | ||
| console.log(" ⚠️The tester never called cwd before main thread chdir.") | ||
| console.log(" Results are inconclusive, bad luck or bad test code.") | ||
| } else if (problems > 0) { | ||
| const problemPercent = (problems * 100 / racesWon).toFixed(2) | ||
| console.log(` ❌ cwd in worker thread reflected stale value`) | ||
| console.log(` errors: ${problemPercent}% ${problems}/${racesWon}`) | ||
| } else { | ||
| const percent = (racesWon * 100 / n).toFixed(2) | ||
| console.log(` ✅ cwd in worker thread always had expected value`) | ||
| } | ||
| console.log(` races won: ${wonPercent}% ${racesWon}/${n}`) | ||
| } | ||
| } | ||
|
|
||
| // Create directories to chdir to, start worker threads, and create shared resources. | ||
| async function setup() { | ||
| const shraedMemSize = Barrier.BYTES_PER_ELEMENT * 1 + Int32Array.BYTES_PER_ELEMENT * 2 | ||
| const sharedBuffer = isMainThread ? new SharedArrayBuffer(shraedMemSize) : workerData.sharedBuffer; | ||
| let off = 0; | ||
|
|
||
| const barrier = new Barrier(sharedBuffer, off, 2); | ||
| off += Barrier.BYTES_PER_ELEMENT; | ||
|
|
||
| const mockCWDCount = new Int32Array(sharedBuffer, off, 1) | ||
| off += mockCWDCount.byteLength | ||
|
|
||
| const mockCWD = new Int32Array(sharedBuffer, off, 1) | ||
| off += mockCWD.byteLength | ||
|
|
||
| if (isMainThread) { | ||
| // Create test directories | ||
| for (let i = 0; i < n; i++) { | ||
| await fs.mkdir(dir(i), { recursive: true }) | ||
| } | ||
|
|
||
| // Spawn the worker | ||
| new Worker(import.meta.filename, { workerData: { sharedBuffer }, name: 'coordinator' }) | ||
| new Worker(import.meta.filename, { workerData: { sharedBuffer }, name: 'tester' }) | ||
| } | ||
| return { barrier, mockCWD, mockCWDCount } | ||
| } | ||
|
|
||
|
|
||
| // simulation of the real process | ||
| function fakeProcess(mockCWDCount: Int32Array, mockCWD: Int32Array) { | ||
| let cachedCwd = ''; | ||
| let lastCounter = 0; | ||
|
|
||
| return { | ||
| chdir: (dir: string) => { | ||
| const cwd = Number(path.basename(dir)) | ||
|
|
||
| // Increment counter, then store dir - like node's real chdir. | ||
| busyWait(Math.random() * mockDelayMs) | ||
| lastCounter = Atomics.add(mockCWDCount, 0, 1) + 1; | ||
| busyWait(Math.random() * mockDelayMs) | ||
| Atomics.store(mockCWD, 0, cwd); | ||
| }, | ||
| cwd: () => { | ||
| busyWait(Math.random() * mockDelayMs) | ||
| const currentCounter = Atomics.load(mockCWDCount, 0); | ||
| if (currentCounter === lastCounter) { | ||
| return cachedCwd; | ||
| } | ||
| busyWait(Math.random() * mockDelayMs) | ||
| lastCounter = currentCounter; | ||
| cachedCwd = dir(Atomics.load(mockCWD, 0)) | ||
| return cachedCwd; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // same as fakeProcess but counter order changed, this should fix the bug | ||
| function fixedProcess(mockCWDCount: Int32Array, mockCWD: Int32Array) { | ||
| let cachedCwd = ''; | ||
| let lastCounter = 0; | ||
|
|
||
| return { | ||
| chdir: (dir: string) => { | ||
| const cwd = Number(path.basename(dir)) | ||
|
|
||
| // store dir, then increment counter which should fix the bug | ||
| busyWait(Math.random() * mockDelayMs) | ||
| Atomics.store(mockCWD, 0, cwd); | ||
| busyWait(Math.random() * mockDelayMs) | ||
| lastCounter = Atomics.add(mockCWDCount, 0, 1) + 1; | ||
| }, | ||
| cwd: () => { | ||
| busyWait(Math.random() * mockDelayMs) | ||
| const currentCounter = Atomics.load(mockCWDCount, 0); | ||
| if (currentCounter === lastCounter) { | ||
| return cachedCwd; | ||
| } | ||
| busyWait(Math.random() * mockDelayMs) | ||
| lastCounter = currentCounter; | ||
| cachedCwd = dir(Atomics.load(mockCWD, 0)) | ||
| return cachedCwd; | ||
| } | ||
| } | ||
| } | ||
|
|
||
| const originalCwd = process.cwd() | ||
|
|
||
| // Return expected dir for Nth test | ||
| function dir(n: number): string { | ||
| return `${originalCwd}/.tmp/${n}` | ||
| } | ||
|
|
||
| // block for a number of ms, including fractional sub-ms values. | ||
| // this is used in the mock processes so we don't have 100% races won or problems | ||
| // not necessary but it makes me more confident in the test/mock setup. | ||
| function busyWait(ms: number) { | ||
| const end = performance.now() + ms; | ||
| do { | ||
| // @ts-ignore-error | ||
| Atomics.pause() | ||
| } while (performance.now() < end); | ||
| } | ||
|
|
||
| // class to synchronize threads | ||
| // this version relies on a 'coordinator' thread that releases | ||
| // the other threads, I think this makes the race more 'fair' | ||
| // as neither of the racers are holding the starting gun | ||
| // but you can reproduce the bug without a coordinator | ||
| class Barrier { | ||
| static ELEMENTS = 2 | ||
| static BYTES_PER_ELEMENT = Int32Array.BYTES_PER_ELEMENT * Barrier.ELEMENTS | ||
|
|
||
| count: number | ||
| state: Int32Array | ||
| constructor(shared: SharedArrayBuffer, off: number, count: number) { | ||
| this.count = count; | ||
| this.state = new Int32Array(shared, off, Barrier.ELEMENTS) | ||
| } | ||
|
|
||
| wait() { | ||
| // index for number of waiting threads | ||
| const WAITING = 0; | ||
| // index for how many times this barrier has been used | ||
| const GEN = 1; | ||
|
|
||
| if (threadName === 'coordinator') { | ||
| // the coordinator will reset waiting, notifying the threads | ||
| // to resume | ||
| while (true) { | ||
| const waiting = Atomics.load(this.state, WAITING) | ||
| if (waiting >= this.count) { | ||
| Atomics.add(this.state, GEN, 1); | ||
| Atomics.store(this.state, WAITING, 0); | ||
| Atomics.notify(this.state, GEN); | ||
| return; | ||
| } | ||
| Atomics.wait(this.state, WAITING, waiting) | ||
| } | ||
| } else { | ||
| const counter = Atomics.load(this.state, GEN) | ||
| Atomics.add(this.state, WAITING, 1); | ||
| Atomics.notify(this.state, WAITING); | ||
| Atomics.wait(this.state, GEN, counter); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| type TestProcess = Pick<typeof process, 'chdir' | 'cwd'> | ||
|
|
||
| await main() |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This doesn't guarantee execution order either; that would require wait/notify.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
execution order is guaranteed (originalChdir is a blocking syscall that completes before AtomicsAdd executes). the Atomic provides a memory barrier for cross-thread.
the current fix (lines swapped) already ensures the cwdCounter only signals freshness after the directory actually changes. This s turning the race condition (workers cache stale data as current) into a safe eventual consistency (workers may be conservative but never trust stale data).
the test results confirm 0% error rate across the 832 races of the PoC, what race condition do you see remaining?