Skip to content

Ratchet on 100% CPU load #939

@ollm

Description

@ollm

Hi.

During the last month, I am having a problem with running a Ratchet server in a production environment, suddenly the process goes from normally using 0-1% of a CPU core to 98-100%, the websocket server still working but it takes longer to respond, this happens in a variety of ways, sometimes it takes a few hours to happen and others it takes days / weeks.

When it enters this state it does not exit again until I kill the process and supervisor starts a new one.

I am also checking the logic of my application so that it is not a problem on my part.

Apache version

Server Version: Apache/2.4.51 (cPanel) OpenSSL/1.1.1g mod_bwlimited/1.4
Server MPM: event
Server Built: Nov 7 2021 17:32:31

PHP version

PHP 8.0.12 (cli) (built: Nov 10 2021 01:18:26) ( NTS )
Copyright (c) The PHP Group
Zend Engine v4.0.12, Copyright (c) Zend Technologies
    with Zend OPcache v8.0.12, Copyright (c), by Zend Technologies

Ratchet version being used

cboden/ratchet v0.4.4 PHP WebSocket library
├──guzzlehttp/psr7 v1.8.3
├──ratchet/rfc6455 v0.3.1
├──react/event-loop v1.2.0
├──react/socket v1.10.0
├──symfony/http-foundation v6.0.1
└──symfony/routing v4.4.34

Event loop

ExtEvLoop
ev (1.1.4)

supervisor.conf

[unix_http_server]
file = /tmp/supervisor.sock

[supervisord]
logfile          = ./logs/supervisord.log
logfile_maxbytes = 50MB
logfile_backups  = 10
loglevel         = info
pidfile          = /tmp/supervisord.pid
nodaemon         = false
minfds           = 4096
minprocs         = 200

[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface

[supervisorctl]
serverurl = unix:///tmp/supervisor.sock

[program:ratchet]
directory				= /home/username/public_html
command                 = bash -c "ulimit -n 10000; exec php chat.php"
process_name            = Chat
numprocs                = 1
autostart               = true
autorestart             = true
user                    = root
stdout_logfile          = ./logs/chat_info.log
stdout_logfile_maxbytes = 1MB
stderr_logfile          = ./logs/chat_error.log
stderr_logfile_maxbytes = 1MB

I have this code in the app, but there is no record in chat_info.log or chat_error.log

public function onError(ConnectionInterface $conn, \Exception $e)
{
	echo "An error has occurred: {$e->getMessage()}\n";

	$conn->close();
}

This is what appears if I run strace in the pid

strace

--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10400) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10400) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10400) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10400) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10400) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10399) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10399) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10399) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10399) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10399) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10398) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10398) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10398) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10398) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10398) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10397) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10397) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10397) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)
--- SIGPIPE {si_signo=SIGPIPE, si_code=SI_USER, si_pid=3517427, si_uid=0} ---
epoll_wait(6, [{EPOLLOUT|EPOLLHUP, {u32=144, u64=3702261809296}}], 680, 10397) = 1
epoll_ctl(6, EPOLL_CTL_MOD, 144, {EPOLLOUT, {u32=144, u64=3702261809296}}) = 0
write(144, "\27\3\3\0\25\261\3\360_\3138\365>\260H\207\321X\226\361\206\240v\326:\351", 26) = -1 EPIPE (Broken pipe)

Thank you for your replies.

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions