Skip to content

Commit

Permalink
Fix IO thread hang on releasing a socket.
Browse files Browse the repository at this point in the history
The problem was we assumed ProcessPendingRequest() would always make progress once the group's releasing socket went down to zero.  However, in OnAvailableSocketSlot(), since the top stalled group might still have a releasing socket, it won't necessarily make progress.  The algorithmic solution is to simply never do any socket slot processing in DoReleaseSocket() if there are any releasing sockets.  This requires us to search for any stalled groups after releasing sockets gets back down to 0, so it requires the full group scan each time num_releasing_sockets goes back to 0.  There is a performance hit here, but I think a linear search through 256~ groups in the worst case is ok.
TODO(willchan): evaluate the perf hit and consider adding a secondary data structure to improve the stalled group search.
BUG=42267

Review URL: http://codereview.chromium.org/2013009

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@46757 0039d316-1c4b-4281-b951-d872f2087c98
  • Loading branch information
willchan@chromium.org committed May 7, 2010
1 parent 9e64440 commit 7da0176
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 26 deletions.
56 changes: 30 additions & 26 deletions net/socket/client_socket_pool_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ ClientSocketPoolBaseHelper::ClientSocketPoolBaseHelper(
: idle_socket_count_(0),
connecting_socket_count_(0),
handed_out_socket_count_(0),
num_releasing_sockets_(0),
max_sockets_(max_sockets),
max_sockets_per_group_(max_sockets_per_group),
unused_idle_socket_timeout_(unused_idle_socket_timeout),
Expand Down Expand Up @@ -358,6 +359,7 @@ void ClientSocketPoolBaseHelper::ReleaseSocket(const std::string& group_name,
ClientSocket* socket) {
Group& group = group_map_[group_name];
group.num_releasing_sockets++;
num_releasing_sockets_++;
DCHECK_LE(group.num_releasing_sockets, group.active_socket_count);
// Run this asynchronously to allow the caller to finish before we let
// another to begin doing work. This also avoids nasty recursion issues.
Expand Down Expand Up @@ -482,43 +484,45 @@ void ClientSocketPoolBaseHelper::DoReleaseSocket(const std::string& group_name,
CHECK_GT(group.active_socket_count, 0);
group.active_socket_count--;

CHECK_GT(num_releasing_sockets_, 0);
num_releasing_sockets_--;

const bool can_reuse = socket->IsConnectedAndIdle();
if (can_reuse) {
AddIdleSocket(socket, true /* used socket */, &group);
} else {
delete socket;
}

OnAvailableSocketSlot(group_name, &group);

// If there are no more releasing sockets, then we might have to process
// multiple available socket slots, since we stalled their processing until
// all sockets have been released.
i = group_map_.find(group_name);
if (i == group_map_.end() || i->second.num_releasing_sockets > 0)
return;

while (true) {
// We can't activate more sockets since we're already at our global limit.
if (ReachedMaxSocketsLimit())
return;

// |group| might now be deleted.
i = group_map_.find(group_name);
if (i == group_map_.end())
return;

group = i->second;

// If we already have enough ConnectJobs to satisfy the pending requests,
// don't bother starting up more.
if (group.pending_requests.size() <= group.jobs.size())
return;
// all sockets have been released. Note that ProcessPendingRequest() will
// invoke user callbacks, so |num_releasing_sockets_| may change.
//
// This code has been known to infinite loop. Set a counter and CHECK to make
// sure it doesn't get ridiculously high.

int iterations = 0;
while (num_releasing_sockets_ == 0) {
CHECK_LT(iterations, 100000) << "Probably stuck in an infinite loop.";
std::string top_group_name;
Group* top_group = NULL;
int stalled_group_count = FindTopStalledGroup(&top_group, &top_group_name);
if (stalled_group_count >= 1) {
if (ReachedMaxSocketsLimit()) {
// We can't activate more sockets since we're already at our global
// limit.
may_have_stalled_group_ = true;
return;
}

if (!group.HasAvailableSocketSlot(max_sockets_per_group_))
ProcessPendingRequest(top_group_name, top_group);
} else {
may_have_stalled_group_ = false;
return;
}

OnAvailableSocketSlot(group_name, &group);
iterations++;
}
}

Expand Down Expand Up @@ -640,7 +644,7 @@ void ClientSocketPoolBaseHelper::OnAvailableSocketSlot(
std::string top_group_name;
Group* top_group = NULL;
int stalled_group_count = FindTopStalledGroup(&top_group, &top_group_name);
if (stalled_group_count <= 1)
if (stalled_group_count <= 1 && top_group->num_releasing_sockets == 0)
may_have_stalled_group_ = false;
if (stalled_group_count >= 1)
ProcessPendingRequest(top_group_name, top_group);
Expand Down
3 changes: 3 additions & 0 deletions net/socket/client_socket_pool_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,9 @@ class ClientSocketPoolBaseHelper
// Number of connected sockets we handed out across all groups.
int handed_out_socket_count_;

// Number of sockets being released.
int num_releasing_sockets_;

// The maximum total number of sockets. See ReachedMaxSocketsLimit.
const int max_sockets_;

Expand Down
64 changes: 64 additions & 0 deletions net/socket/client_socket_pool_base_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1455,6 +1455,70 @@ TEST_F(ClientSocketPoolBaseTest, MultipleReleasingDisconnectedSockets) {
EXPECT_FALSE(req4.handle()->is_reused());
}

// Regression test for http://crbug.com/42267.
// When DoReleaseSocket() is processed for one socket, it is blocked because the
// other stalled groups all have releasing sockets, so no progress can be made.
TEST_F(ClientSocketPoolBaseTest, SocketLimitReleasingSockets) {
CreatePoolWithIdleTimeouts(
4 /* socket limit */, 4 /* socket limit per group */,
base::TimeDelta(), // Time out unused sockets immediately.
base::TimeDelta::FromDays(1)); // Don't time out used sockets.

connect_job_factory_->set_job_type(TestConnectJob::kMockJob);

// Max out the socket limit with 2 per group.

scoped_ptr<TestSocketRequest> req_a[4];
scoped_ptr<TestSocketRequest> req_b[4];

for (int i = 0; i < 2; ++i) {
req_a[i].reset(new TestSocketRequest(&request_order_, &completion_count_));
req_b[i].reset(new TestSocketRequest(&request_order_, &completion_count_));
EXPECT_EQ(OK,
InitHandle(req_a[i]->handle(), "a", LOWEST, req_a[i].get(), pool_,
BoundNetLog()));
EXPECT_EQ(OK,
InitHandle(req_b[i]->handle(), "b", LOWEST, req_b[i].get(), pool_,
BoundNetLog()));
}

// Make 4 pending requests, 2 per group.

for (int i = 2; i < 4; ++i) {
req_a[i].reset(new TestSocketRequest(&request_order_, &completion_count_));
req_b[i].reset(new TestSocketRequest(&request_order_, &completion_count_));
EXPECT_EQ(ERR_IO_PENDING,
InitHandle(req_a[i]->handle(), "a", LOWEST, req_a[i].get(), pool_,
BoundNetLog()));
EXPECT_EQ(ERR_IO_PENDING,
InitHandle(req_b[i]->handle(), "b", LOWEST, req_b[i].get(), pool_,
BoundNetLog()));
}

// Release b's socket first. The order is important, because in
// DoReleaseSocket(), we'll process b's released socket, and since both b and
// a are stalled, but 'a' is lower lexicographically, we'll process group 'a'
// first, which has a releasing socket, so it refuses to start up another
// ConnectJob. So, we used to infinite loop on this.
req_b[0]->handle()->socket()->Disconnect();
req_b[0]->handle()->Reset();
req_a[0]->handle()->socket()->Disconnect();
req_a[0]->handle()->Reset();

// Used to get stuck here.
MessageLoop::current()->RunAllPending();

req_b[1]->handle()->socket()->Disconnect();
req_b[1]->handle()->Reset();
req_a[1]->handle()->socket()->Disconnect();
req_a[1]->handle()->Reset();

for (int i = 2; i < 4; ++i) {
EXPECT_EQ(OK, req_b[i]->WaitForResult());
EXPECT_EQ(OK, req_a[i]->WaitForResult());
}
}

TEST_F(ClientSocketPoolBaseTest,
ReleasingDisconnectedSocketsMaintainsPriorityOrder) {
CreatePool(kDefaultMaxSockets, kDefaultMaxSocketsPerGroup);
Expand Down
7 changes: 7 additions & 0 deletions net/socket/tcp_client_socket_pool_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,13 @@ TEST_F(TCPClientSocketPoolTest, BackupSocketConnect) {
// One socket is stalled, the other is active.
EXPECT_EQ(0, pool_->IdleSocketCount());
handle.Reset();

pool_ = new TCPClientSocketPool(kMaxSockets,
kMaxSocketsPerGroup,
"TCPUnitTest",
host_resolver_,
&client_socket_factory_,
NULL);
}
}

Expand Down

0 comments on commit 7da0176

Please sign in to comment.