Skip to content

Breaking change: cluster connection behavior when between workers #1239

Closed
@scottnonnenberg

Description

@scottnonnenberg

Already reported this on node 0.12, thought I should report it here as well.

On OSX, I've noticed a big difference between the way that connections are dealt with by a master process when there are no workers ready to take care of that incoming connection. In node 0.10.36 (and before), the connection would be held open, and a worker that hadn't been started when that request was made would have the chance to handle it. In all versions of iojs (and node 0.12.0), incoming connections when between workers are outright refused.

At the very least, this should be documented.

Example code and output on both node 0.10.36 and iojs 1.6.1 follows:

var cluster = require('cluster');
var http = require('http');
var supertest = require('supertest');
var PORT = 3000;

// cluster.schedulingPolicy = cluster.SCHED_NONE;

if (!cluster.isMaster) {
  http.createServer(function (req, res) {
    if (req.url === '/error') {
      setTimeout(function() {
        throw new Error('something went wrong!');
      }, 500);
    }
    else {
      res.writeHead(200, {'Content-Type': 'text/plain'});
      res.end('Hello World\n');
    }
  }).listen(PORT);

  console.log('Worker %s running at port %s', cluster.worker.id, PORT);
}
else {
  var count = 0;
  var request = supertest('http://localhost:' + PORT);

  var hitWorker = function(count) {
    console.log('%s: Worker listening! Hitting it...', count);

    request
      .get('/error')
      .expect(200, function(err, res) {
        console.log('%s: Worker taken down, now making second request', count);

        request
          .get('/')
          .expect('Hello World\n')
          .expect(200, function(err, res) {
            console.log('%s: Second request complete. Error:', count, err);
          });
      });
  };

  cluster.on('disconnect', function() {
    count +=1;
    if (count < 2) {
      cluster.fork();
    }
  });

  cluster.on('listening', function() {
    hitWorker(count);
  });

  // start just one worker
  cluster.fork();

  var interval = setInterval(function() {
    console.log('...');
  }, 1000);
  interval.unref();
}

output

iojs 1.6.1 (scheduling policy does not make a difference):

Worker 1 running at port 3000
0: Worker listening! Hitting it...
/Users/scottnonnenberg/Development/thehelp/cluster/test.js:12
        throw new Error('something went wrong!');
              ^
Error: something went wrong!
    at null._onTimeout (/Users/scottnonnenberg/Development/thehelp/cluster/test.js:12:15)
    at Timer.listOnTimeout (timers.js:88:15)
0: Worker taken down, now making second request
0: Second request complete. Error: { [Error: connect ECONNREFUSED 127.0.0.1:3000]
  code: 'ECONNREFUSED',
  errno: 'ECONNREFUSED',
  syscall: 'connect',
  address: '127.0.0.1',
  port: 3000 }
Worker 2 running at port 3000
1: Worker listening! Hitting it...
...
/Users/scottnonnenberg/Development/thehelp/cluster/test.js:12
        throw new Error('something went wrong!');
              ^
Error: something went wrong!
    at null._onTimeout (/Users/scottnonnenberg/Development/thehelp/cluster/test.js:12:15)
    at Timer.listOnTimeout (timers.js:88:15)
1: Worker taken down, now making second request
1: Second request complete. Error: { [Error: connect ECONNREFUSED 127.0.0.1:3000]
  code: 'ECONNREFUSED',
  errno: 'ECONNREFUSED',
  syscall: 'connect',
  address: '127.0.0.1',
  port: 3000 }
...

node 0.10.36:

Worker 1 running at port 3000
0: Worker listening! Hitting it...

/Users/scottnonnenberg/Development/thehelp/cluster/test.js:13
        throw new Error('something went wrong!');
              ^
Error: something went wrong!
    at null._onTimeout (/test.js:13:15)
    at Timer.listOnTimeout [as ontimeout] (timers.js:112:15)
0: Worker taken down, now making second request
Worker 2 running at port 3000
1: Worker listening! Hitting it...
0: Second request complete. Error: null
...

/Users/scottnonnenberg/Development/thehelp/cluster/test.js:13
        throw new Error('something went wrong!');
              ^
Error: something went wrong!
    at null._onTimeout (/test.js:13:15)
    at Timer.listOnTimeout [as ontimeout] (timers.js:112:15)
1: Worker taken down, now making second request
...
...
...
...
^C

This version hangs, because third worker not started, and master keeps connection open. Note also that '0: second request complete' actually comes after '1: worker listening!'. This is because that initial second request actually ends up hitting the second worker.

Metadata

Metadata

Assignees

Labels

clusterIssues and PRs related to the cluster subsystem.docIssues and PRs related to the documentations.

Type

No type

Projects

No projects

Milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions