Breaking change: cluster connection behavior when between workers #10427
Description
On OSX, I've noticed a big difference between the way that connections are dealt with by a node.js master process when there are no workers ready to take care of that incoming connection. In node 0.10.36
(and before), the connection would be held open, and a worker that hadn't been started when that request was made would have the chance to handle it. In node 0.12.0
, incoming connections when between workers are outright refused.
At the very least, this should be documented.
Example code and output on both 0.10.36
and 0.12.0
follows:
var cluster = require('cluster');
var http = require('http');
var supertest = require('supertest');
var PORT = 3000;
// cluster.schedulingPolicy = cluster.SCHED_NONE;
if (!cluster.isMaster) {
http.createServer(function (req, res) {
if (req.url === '/error') {
setTimeout(function() {
throw new Error('something went wrong!');
}, 500);
}
else {
res.writeHead(200, {'Content-Type': 'text/plain'});
res.end('Hello World\n');
}
}).listen(PORT);
console.log('Worker %s running at port %s', cluster.worker.id, PORT);
}
else {
var count = 0;
var request = supertest('http://localhost:' + PORT);
var hitWorker = function(count) {
console.log('%s: Worker listening! Hitting it...', count);
request
.get('/error')
.expect(200, function(err, res) {
console.log('%s: Worker taken down, now making second request', count);
request
.get('/')
.expect('Hello World\n')
.expect(200, function(err, res) {
console.log('%s: Second request complete. Error:', count, err);
});
});
};
cluster.on('disconnect', function() {
count +=1;
if (count < 2) {
cluster.fork();
}
});
cluster.on('listening', function() {
hitWorker(count);
});
// start just one worker
cluster.fork();
var interval = setInterval(function() {
console.log('...');
}, 1000);
interval.unref();
}
output
node 0.12.0 (scheduling policy does not make a difference):
Worker 1 running at port 3000
0: Worker listening! Hitting it...
/Users/scottnonnenberg/Development/thehelp/cluster/test.js:13
throw new Error('something went wrong!');
^
Error: something went wrong!
at null._onTimeout (/test.js:13:15)
at Timer.listOnTimeout (timers.js:110:15)
0: Worker taken down, now making second request
0: Second request complete. Error: { [Error: connect ECONNREFUSED]
code: 'ECONNREFUSED',
errno: 'ECONNREFUSED',
syscall: 'connect' }
Worker 2 running at port 3000
1: Worker listening! Hitting it...
...
/Users/scottnonnenberg/Development/thehelp/cluster/test.js:13
throw new Error('something went wrong!');
^
Error: something went wrong!
at null._onTimeout (/test.js:13:15)
at Timer.listOnTimeout (timers.js:110:15)
1: Worker taken down, now making second request
1: Second request complete. Error: { [Error: connect ECONNREFUSED]
code: 'ECONNREFUSED',
errno: 'ECONNREFUSED',
syscall: 'connect' }
node 0.10.36:
Worker 1 running at port 3000
0: Worker listening! Hitting it...
/Users/scottnonnenberg/Development/thehelp/cluster/test.js:13
throw new Error('something went wrong!');
^
Error: something went wrong!
at null._onTimeout (/test.js:13:15)
at Timer.listOnTimeout [as ontimeout] (timers.js:112:15)
0: Worker taken down, now making second request
Worker 2 running at port 3000
1: Worker listening! Hitting it...
0: Second request complete. Error: null
...
/Users/scottnonnenberg/Development/thehelp/cluster/test.js:13
throw new Error('something went wrong!');
^
Error: something went wrong!
at null._onTimeout (/test.js:13:15)
at Timer.listOnTimeout [as ontimeout] (timers.js:112:15)
1: Worker taken down, now making second request
...
...
...
...
^C
This version hangs, because third worker not started, and master keeps connection open. Note also that '0: second request complete' actually comes after '1: worker listening!'. This is because that initial second request actually ends up hitting the second worker.