Open
Description
- Version: master
- Platform:
Linux foo 5.0.0-36-generic #39~18.04.1-Ubuntu SMP Tue Nov 12 11:09:50 UTC 2019 x86_64 x86_64 x86_64 GNU/Linux
- Subsystem: buffer
I was running some benchmarks (for private code) and noticed a significant slowdown with some Buffer methods. Here is a comparison of the C++ portion of --prof
between v12.16.1 and master:
v12.16.1:
[C++]:
ticks total nonlib name
66 4.2% 8.4% void node::Buffer::(anonymous namespace)::StringSlice<(node::encoding)1>(v8::FunctionCallbackInfo<v8::Value> const&)
54 3.4% 6.9% __libc_read
48 3.0% 6.1% node::Buffer::(anonymous namespace)::ParseArrayIndex(node::Environment*, v8::Local<v8::Value>, unsigned long, unsigned long*)
35 2.2% 4.5% v8::ArrayBuffer::GetContents()
35 2.2% 4.5% epoll_pwait
27 1.7% 3.4% v8::internal::Builtin_TypedArrayPrototypeBuffer(int, unsigned long*, v8::internal::Isolate*)
26 1.6% 3.3% v8::String::NewFromUtf8(v8::Isolate*, char const*, v8::NewStringType, int)
24 1.5% 3.1% node::native_module::NativeModuleEnv::CompileFunction(v8::FunctionCallbackInfo<v8::Value> const&)
22 1.4% 2.8% __pthread_cond_signal
20 1.3% 2.5% node::StringBytes::Encode(v8::Isolate*, char const*, unsigned long, node::encoding, v8::Local<v8::Value>*)
16 1.0% 2.0% v8::Value::IsArrayBufferView() const
14 0.9% 1.8% v8::ArrayBufferView::Buffer()
9 0.6% 1.1% v8::internal::FixedArray::set(int, v8::internal::Object)
8 0.5% 1.0% v8::ArrayBufferView::ByteLength()
7 0.4% 0.9% v8::Value::IntegerValue(v8::Local<v8::Context>) const
6 0.4% 0.8% v8::ArrayBufferView::ByteOffset()
5 0.3% 0.6% __libc_malloc
4 0.3% 0.5% write
4 0.3% 0.5% v8::internal::libc_memmove(void*, void const*, unsigned long)
4 0.3% 0.5% node::binding::GetInternalBinding(v8::FunctionCallbackInfo<v8::Value> const&)
3 0.2% 0.4% v8::internal::libc_memset(void*, int, unsigned long)
3 0.2% 0.4% __lll_unlock_wake
2 0.1% 0.3% void node::StreamBase::JSMethod<&node::StreamBase::WriteBuffer>(v8::FunctionCallbackInfo<v8::Value> const&)
2 0.1% 0.3% fwrite
2 0.1% 0.3% do_futex_wait.constprop.1
2 0.1% 0.3% __clock_gettime
2 0.1% 0.3% __GI___pthread_mutex_unlock
1 0.1% 0.1% void node::StreamBase::JSMethod<&(int node::StreamBase::WriteString<(node::encoding)1>(v8::FunctionCallbackInfo<v8::Value> const&))>(v8::FunctionCallbackInfo<v8::Value> const&)
1 0.1% 0.1% v8::internal::Scope::DeserializeScopeChain(v8::internal::Isolate*, v8::internal::Zone*, v8::internal::ScopeInfo, v8::internal::DeclarationScope*, v8::internal::AstValueFactory*, v8::internal::Scope::DeserializationMode)
1 0.1% 0.1% v8::internal::RuntimeCallTimerScope::RuntimeCallTimerScope(v8::internal::Isolate*, v8::internal::RuntimeCallCounterId)
1 0.1% 0.1% v8::EscapableHandleScope::Escape(unsigned long*)
1 0.1% 0.1% std::ostreambuf_iterator<char, std::char_traits<char> > std::num_put<char, std::ostreambuf_iterator<char, std::char_traits<char> > >::_M_insert_int<long>(std::ostreambuf_iterator<char, std::char_traits<char> >, std::ios_base&, char, long) const
1 0.1% 0.1% std::ostream::sentry::sentry(std::ostream&)
1 0.1% 0.1% std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)
1 0.1% 0.1% std::__detail::_Prime_rehash_policy::_M_need_rehash(unsigned long, unsigned long, unsigned long) const
1 0.1% 0.1% non-virtual thunk to node::LibuvStreamWrap::GetAsyncWrap()
1 0.1% 0.1% node::LibuvStreamWrap::ReadStart()::{lambda(uv_stream_s*, long, uv_buf_t const*)#2}::_FUN(uv_stream_s*, long, uv_buf_t const*)
1 0.1% 0.1% node::CustomBufferJSListener::OnStreamRead(long, uv_buf_t const&)
1 0.1% 0.1% node::AsyncWrap::EmitTraceEventBefore()
1 0.1% 0.1% mprotect
1 0.1% 0.1% getpid
1 0.1% 0.1% cfree
1 0.1% 0.1% __lll_lock_wait
master:
[C++]:
ticks total nonlib name
155 6.5% 14.0% std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release()
88 3.7% 7.9% __GI___pthread_mutex_lock
87 3.7% 7.8% v8::ArrayBuffer::GetBackingStore()
71 3.0% 6.4% __GI___pthread_mutex_unlock
54 2.3% 4.9% void node::Buffer::(anonymous namespace)::StringSlice<(node::encoding)1>(v8::FunctionCallbackInfo<v8::Value> const&)
36 1.5% 3.2% node::Buffer::(anonymous namespace)::ParseArrayIndex(node::Environment*, v8::Local<v8::Value>, unsigned long, unsigned long*)
32 1.4% 2.9% v8::internal::Builtin_TypedArrayPrototypeBuffer(int, unsigned long*, v8::internal::Isolate*)
31 1.3% 2.8% v8::Value::IntegerValue(v8::Local<v8::Context>) const
27 1.1% 2.4% node::native_module::NativeModuleEnv::CompileFunction(v8::FunctionCallbackInfo<v8::Value> const&)
27 1.1% 2.4% epoll_pwait
26 1.1% 2.3% v8::String::NewFromUtf8(v8::Isolate*, char const*, v8::NewStringType, int)
20 0.8% 1.8% v8::Value::IsArrayBufferView() const
20 0.8% 1.8% __libc_read
19 0.8% 1.7% __pthread_cond_signal
16 0.7% 1.4% node::StringBytes::Encode(v8::Isolate*, char const*, unsigned long, node::encoding, v8::Local<v8::Value>*)
13 0.5% 1.2% v8::ArrayBufferView::Buffer()
10 0.4% 0.9% v8::internal::FixedArray::set(int, v8::internal::Object)
6 0.3% 0.5% v8::internal::libc_memset(void*, int, unsigned long)
6 0.3% 0.5% v8::internal::RuntimeCallTimerScope::RuntimeCallTimerScope(v8::internal::Isolate*, v8::internal::RuntimeCallCounterId)
5 0.2% 0.5% v8::internal::libc_memmove(void*, void const*, unsigned long)
5 0.2% 0.5% v8::ArrayBufferView::ByteLength()
4 0.2% 0.4% write
4 0.2% 0.4% void node::StreamBase::JSMethod<&node::StreamBase::WriteBuffer>(v8::FunctionCallbackInfo<v8::Value> const&)
4 0.2% 0.4% do_futex_wait.constprop.1
4 0.2% 0.4% __lll_lock_wait
3 0.1% 0.3% node::binding::GetInternalBinding(v8::FunctionCallbackInfo<v8::Value> const&)
3 0.1% 0.3% _init
2 0.1% 0.2% v8::BackingStore::Data() const
2 0.1% 0.2% fwrite
2 0.1% 0.2% cfree
2 0.1% 0.2% __lll_unlock_wake
2 0.1% 0.2% __libc_malloc
1 0.0% 0.1% v8::internal::DeclarationScope::DeclareDefaultFunctionVariables(v8::internal::AstValueFactory*)
1 0.0% 0.1% v8::internal::Builtin_HandleApiCall(int, unsigned long*, v8::internal::Isolate*)
1 0.0% 0.1% v8::internal::AstValueFactory::GetOneByteStringInternal(v8::internal::Vector<unsigned char const>)
1 0.0% 0.1% std::num_put<char, std::ostreambuf_iterator<char, std::char_traits<char> > >::do_put(std::ostreambuf_iterator<char, std::char_traits<char> >, std::ios_base&, char, long) const
1 0.0% 0.1% operator new[](unsigned long)
1 0.0% 0.1% node::contextify::ContextifyContext::CompileFunction(v8::FunctionCallbackInfo<v8::Value> const&)
1 0.0% 0.1% node::TCPWrap::Connect(v8::FunctionCallbackInfo<v8::Value> const&)
1 0.0% 0.1% node::Environment::RunAndClearNativeImmediates(bool)
1 0.0% 0.1% node::AsyncWrap::EmitTraceEventBefore()
1 0.0% 0.1% mprotect
1 0.0% 0.1% __pthread_cond_timedwait
1 0.0% 0.1% __fxstat
1 0.0% 0.1% __GI___pthread_getspecific
As you will see, master has these additional items at the top of the list:
155 6.5% 14.0% std::_Sp_counted_base<(__gnu_cxx::_Lock_policy)2>::_M_release()
88 3.7% 7.9% __GI___pthread_mutex_lock
87 3.7% 7.8% v8::ArrayBuffer::GetBackingStore()
71 3.0% 6.4% __GI___pthread_mutex_unlock
Is there some way we can avoid this slowdown?