Skip to content

Suboptimal code generation for thread_local! #104033

Open
@stepancheg

Description

@stepancheg

Code:

use std::cell::*;

thread_local! {
    static X: Cell<Vec<u32>> = const { Cell::new(Vec::new()) };
}

pub fn thread_local() {
    X.with(|x| {
        let mut xx = x.take();
        xx.pop();
        x.set(xx);
    })
}

Emits:

example::thread_local:
  push rbx
  sub rsp, 16
  lea rdi, [rip + example::X::__getit::STATE.0@TLSLD]
  call __tls_get_addr@PLT
  mov rbx, rax
  movzx eax, byte ptr [rax + example::X::__getit::STATE.0@DTPOFF]
  cmp eax, 1
  je .LBB1_3
  test eax, eax
  jne .LBB1_4
  lea rdi, [rbx + example::X::__getit::VAL@DTPOFF]
  lea rsi, [rip + example::X::__getit::destroy]
  call qword ptr [rip + std::sys::unix::thread_local_dtor::register_dtor@GOTPCREL]
  mov rax, rbx
  mov byte ptr [rbx + example::X::__getit::STATE.0@DTPOFF], 1
.LBB1_3:
  mov rcx, qword ptr [rbx + example::X::__getit::VAL@DTPOFF+16]
  xor edx, edx
  sub rcx, 1
  cmovae rdx, rcx
  mov qword ptr [rbx + example::X::__getit::VAL@DTPOFF+16], rdx
  add rsp, 16
  pop rbx
  ret
.LBB1_4:
  lea rdi, [rip + .L__unnamed_1]
  lea rcx, [rip + .L__unnamed_2]
  lea r8, [rip + .L__unnamed_3]
  lea rdx, [rsp + 8]
  mov esi, 70
  call qword ptr [rip + core::result::unwrap_failed@GOTPCREL]
  ud2

(Compiler explorer)

Here default path (when thread-local is initialized) is after the jump to LBB1_3.

The issue seems to be missing #[cold] annotation in register_dtor function or missing likely(STATE == 1):

match STATE {
// 0 == we haven't registered a destructor, so do
// so now.
0 => {
$crate::thread::__FastLocalKeyInner::<$t>::register_dtor(
$crate::ptr::addr_of_mut!(VAL) as *mut $crate::primitive::u8,
destroy,
);
STATE = 1;
$crate::option::Option::Some(&VAL)
}
// 1 == the destructor is registered and the value
// is valid, so return the pointer.
1 => $crate::option::Option::Some(&VAL),
// otherwise the destructor has already run, so we
// can't give access.
_ => $crate::option::Option::None,
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-thread-localsArea: Thread local storage (TLS)C-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchI-slowIssue: Problems and improvements with respect to performance of generated code.T-compilerRelevant to the compiler team, which will review and decide on the PR/issue.T-libsRelevant to the library team, which will review and decide on the PR/issue.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions