Skip to content

String::push is slow #116235

Closed
Closed
@lincot

Description

@lincot

An alternative implementation of String::push, which reserves space if needed and then performs push_unchecked, gives a significant performance improvement.

current push:

test bench_push_1_byte  ... bench:      10,927 ns/iter (+/- 147) = 915 MB/s
test bench_push_2_bytes ... bench:      49,662 ns/iter (+/- 373) = 402 MB/s
test bench_push_3_bytes ... bench:      49,780 ns/iter (+/- 102) = 602 MB/s
test bench_push_4_bytes ... bench:      40,556 ns/iter (+/- 349) = 986 MB/s

new push:

test bench_push_1_byte  ... bench:      10,366 ns/iter (+/- 155) = 964 MB/s
test bench_push_2_bytes ... bench:      14,446 ns/iter (+/- 137) = 1384 MB/s
test bench_push_3_bytes ... bench:      19,238 ns/iter (+/- 265) = 1559 MB/s
test bench_push_4_bytes ... bench:      23,922 ns/iter (+/- 213) = 1672 MB/s
bench.rs
#![no_std]
#![feature(test)]

extern crate alloc;
extern crate test;
use alloc::string::String;
use test::{black_box, Bencher};

// #[inline]
// fn push(s: &mut String, ch: char) {
//     s.push(ch);
// }

#[inline]
fn push(s: &mut String, ch: char) {
    if s.capacity() - s.len() < ch.len_utf8() {
        s.reserve(ch.len_utf8());
    }
    unsafe { s.push_unchecked(ch) };
}

trait PushUnchecked<T> {
    unsafe fn push_unchecked(&mut self, value: T);
}

impl PushUnchecked<char> for String {
    #[inline]
    unsafe fn push_unchecked(&mut self, ch: char) {
        let len = self.len();
        let ptr = self.as_mut_vec().as_mut_ptr().add(len);
        let count = ch.len_utf8();
        debug_assert!(len + count <= self.capacity());
        match count {
            1 => {
                core::ptr::write(ptr, ch as u8);
            }
            2 => {
                core::ptr::write(ptr, (ch as u32 >> 6 & 0x1F) as u8 | 0b1100_0000);
                core::ptr::write(ptr.add(1), (ch as u32 & 0x3F) as u8 | 0b1000_0000);
            }
            3 => {
                core::ptr::write(ptr, (ch as u32 >> 12 & 0x0F) as u8 | 0b1110_0000);
                core::ptr::write(ptr.add(1), (ch as u32 >> 6 & 0x3F) as u8 | 0b1000_0000);
                core::ptr::write(ptr.add(2), (ch as u32 & 0x3F) as u8 | 0b1000_0000);
            }
            4 => {
                core::ptr::write(ptr, (ch as u32 >> 18 & 0x07) as u8 | 0b1111_0000);
                core::ptr::write(ptr.add(1), (ch as u32 >> 12 & 0x3F) as u8 | 0b1000_0000);
                core::ptr::write(ptr.add(2), (ch as u32 >> 6 & 0x3F) as u8 | 0b1000_0000);
                core::ptr::write(ptr.add(3), (ch as u32 & 0x3F) as u8 | 0b1000_0000);
            }
            _ => core::hint::unreachable_unchecked(),
        }
        self.as_mut_vec().set_len(len + count);
    }
}

const ITERATIONS: u64 = 10_000;

#[bench]
fn bench_push_1_byte(bencher: &mut Bencher) {
    const CHAR: char = '0';
    assert_eq!(CHAR.len_utf8(), 1);
    bencher.bytes = ITERATIONS;
    bencher.iter(|| {
        let mut s = String::new();
        for _ in 0..black_box(ITERATIONS) {
            push(&mut s, black_box(CHAR));
        }
        s
    });
}

#[bench]
fn bench_push_2_bytes(bencher: &mut Bencher) {
    const CHAR: char = 'д';
    assert_eq!(CHAR.len_utf8(), 2);
    bencher.bytes = 2 * ITERATIONS;
    bencher.iter(|| {
        let mut s = String::new();
        for _ in 0..black_box(ITERATIONS) {
            push(&mut s, black_box(CHAR));
        }
        s
    });
}

#[bench]
fn bench_push_3_bytes(bencher: &mut Bencher) {
    const CHAR: char = '❗';
    assert_eq!(CHAR.len_utf8(), 3);
    bencher.bytes = 3 * ITERATIONS;
    bencher.iter(|| {
        let mut s = String::new();
        for _ in 0..black_box(ITERATIONS) {
            push(&mut s, black_box(CHAR));
        }
        s
    });
}

#[bench]
fn bench_push_4_bytes(bencher: &mut Bencher) {
    const CHAR: char = '🤨';
    assert_eq!(CHAR.len_utf8(), 4);
    bencher.bytes = 4 * ITERATIONS;
    bencher.iter(|| {
        let mut s = String::new();
        for _ in 0..black_box(ITERATIONS) {
            push(&mut s, black_box(CHAR));
        }
        s
    });
}

It's worth noting that the current String::push implementation encodes characters into a temporary zero-initialized buffer. The zeroing alone is an unnecessary instruction, but this method is used in several places, notably String::insert and the default Write::write_char.

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-strArea: str and StringC-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchI-slowIssue: Problems and improvements with respect to performance of generated code.T-libsRelevant to the library team, which will review and decide on the PR/issue.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions