Skip to content

/std:c++latest makes headers much slower to include, up to 10 times #3599

Open

Description

Times are reported by compiler frontend /Bt flag, >5ms >1% diff shown, min of 10 runs, /permissive- was used on every /std to exclude preprocessor/parser differences:

default c++17 c++20 c++latest slowdown header
0.121 0.275 1.248 1.308 981% <chrono>
0.033 0.037 0.182 0.186 464% <cmath>
0.166 0.193 0.291 0.537 223% <queue>
0.137 0.162 0.210 0.438 220% <stack>
..... 0.581 1.394 1.474 154% <filesystem>
0.079 0.124 0.169 0.174 120% <numeric>
0.053 0.061 0.111 0.114 115% <utility>
0.081 0.092 0.169 0.172 112% <array>
0.062 0.070 0.124 0.128 106% <tuple>
0.056 0.064 0.114 0.114 104% <typeindex>
0.143 0.159 0.263 0.287 101% <algorithm>
..... 0.397 0.559 0.758 91% <execution>
0.149 0.214 0.269 0.284 91% <functional>
0.147 0.159 0.249 0.259 76% <memory>
0.171 0.209 0.281 0.291 70% <stdexcept>
0.172 0.210 0.280 0.292 70% <bitset>
0.188 0.227 0.302 0.312 66% <string>
0.199 0.239 0.320 0.330 66% <system_error>
0.391 0.450 0.611 0.645 65% <regex>
0.109 0.120 0.174 0.178 63% <iterator>
0.192 0.204 0.303 0.312 62% <thread>
0.138 0.160 0.209 0.223 62% <list>
0.318 0.378 0.498 0.510 60% <sstream>
0.406 0.458 0.618 0.646 59% <random>
0.148 0.165 0.226 0.235 59% <valarray>
0.259 0.298 0.397 0.411 59% <streambuf>
0.287 0.332 0.442 0.455 59% <ostream>
0.282 0.325 0.438 0.447 59% <ios>
0.531 0.604 0.801 0.841 58% <future>
0.292 0.339 0.449 0.461 58% <iostream>
0.134 0.158 0.205 0.211 57% <deque>
0.135 0.157 0.205 0.212 57% <forward_list>
0.271 0.304 0.414 0.425 57% <mutex>
0.298 0.342 0.454 0.465 56% <locale>
0.140 0.166 0.212 0.218 56% <vector>
0.307 0.353 0.465 0.478 56% <strstream>
0.301 0.342 0.459 0.468 55% <fstream>
0.279 0.310 0.423 0.433 55% <shared_mutex>
0.304 0.349 0.456 0.470 55% <codecvt>
0.279 0.307 0.418 0.431 54% <condition_variable>
0.307 0.371 0.464 0.471 53% <iomanip>
..... 0.166 0.246 0.252 52% <charconv>
0.145 0.171 0.210 0.220 52% <set>
0.334 0.383 0.494 0.505 51% <complex>
0.137 0.153 0.200 0.206 50% <scoped_allocator>
0.175 0.199 0.254 0.263 50% <unordered_map>
0.308 0.355 0.452 0.460 49% <istream>
0.176 0.198 0.255 0.262 49% <unordered_set>
0.148 0.165 0.212 0.218 47% <map>
..... 0.150 0.201 0.215 43% <any>
..... 0.342 0.473 0.490 43% <memory_resource>
..... 0.149 0.206 0.213 43% <optional>
..... 0.206 0.279 0.293 42% <string_view>
..... 0.213 0.273 0.277 30% <variant>
0.072 0.080 0.090 0.090 25% <atomic>
0.053 0.062 0.066 0.066 25% <new>
0.048 0.055 0.058 0.059 23% <type_traits>
0.054 0.062 0.065 0.065 20% <exception>
0.056 0.063 0.067 0.067 20% <typeinfo>
0.052 0.059 0.062 0.062 19% <ratio>
..... ..... 0.385 0.418 9% <ranges>
..... ..... 0.826 0.867 5% <format>
..... ..... 0.229 0.237 3% <stop_token>
..... ..... 0.514 0.527 3% <syncstream>
..... ..... 0.221 0.226 2% <barrier>
>cl
Microsoft (R) C/C++ Optimizing Compiler Version 19.35.32216.1 for x64

Repro:

import subprocess
import json
import os
import sys
import colorama
from colorama import Fore, Back, Style
from collections import defaultdict
colorama.init()


def parse_msvc_wall(output):
    i = output.find(b'c1xx.dll)=')
    if i == -1: return None
    j = output.find(b's', i)
    return float(output[i+10:j])


def msvc_get_parsing_time(*fnames, std=None):
    if len(fnames) == 1 and isinstance(fnames, list):
        fnames = fnames[0]

    cmd = ['cl', '/nologo', '/Bt', '/Zs', '/TP', '/w', '.empty.tmp']

    if std is not None:
        cmd.append(f'/std:c++{std}')

    cmd += [f'/FI{fn[1:-1]}' for fn in fnames]
    #print(' '.join(cmd))

    try:
        output = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True).stdout
    except subprocess.CalledProcessError as e:
        if b'C1083' in e.stdout:
            return None
        time = parse_msvc_wall(e.stderr)
        if time is not None:
            return time
        print(f'stderr={e.stderr}')
        print(f'stderr={e.stdout}')
        raise

    return parse_msvc_wall(output)


# https://eel.is/c++draft/headers
headers = set('''
<algorithm>
<flat_set>
<mutex>
<stdexcept>
<any>
<format>
<new>
<stdfloat>
<array>
<forward_list>
<numbers>
<stop_token>
<atomic>
<fstream>
<numeric>
<streambuf>
<barrier>
<functional>
<optional>
<string>
<bit>
<future>
<ostream>
<string_view>
<bitset>
<generator>
<print>
<strstream>
<charconv>
<initializer_list>
<queue>
<syncstream>
<chrono>
<iomanip>
<random>
<system_error>
<codecvt>
<ios>
<ranges>
<thread>
🔗
<compare>
<iosfwd>
<ratio>
<tuple>
🔗
<complex>
<iostream>
<regex>
<type_traits>
🔗
<concepts>
<istream>
<scoped_allocator>
<typeindex>
🔗
<condition_variable>
<iterator>
<semaphore>
<typeinfo>
🔗
<coroutine>
<latch>
<set>
<unordered_map>
🔗
<deque>
<limits>
<shared_mutex>
<unordered_set>
🔗
<exception>
<list>
<source_location>
<utility>
🔗
<execution>
<locale>
<span>
<valarray>
🔗
<expected>
<map>
<spanstream>
<variant>
🔗
<filesystem>
<mdspan>
<sstream>
<vector>
🔗
<flat_map>
<memory>
<stack>
<version>
🔗
<memory_resource>
<stacktrace>
<cassert>
<cfenv>
<climits>
<csetjmp>
<cstddef>
<cstdlib>
<cuchar>
🔗
<cctype>
<cfloat>
<clocale>
<csignal>
<cstdint>
<cstring>
<cwchar>
🔗
<cerrno>
<cinttypes>
<cmath>
<cstdarg>
<cstdio>
<ctime>
<cwctype>
'''.strip().splitlines()) - {'🔗'}
#headers = ['<string_view>', '<chrono>']
max_header_name_len = max(map(len, headers))


def time_to_color(t):
    if t < 0.010: return Fore.BLACK, Style.BRIGHT
    if t < 0.020: return Fore.WHITE, Style.DIM
    if t < 0.040: return '', ''
    if t < 0.060: return Fore.CYAN, Style.BRIGHT
    if t < 0.080: return Fore.WHITE, Style.BRIGHT
    if t < 0.100: return Fore.YELLOW, Style.BRIGHT
    if t < 0.150: return Fore.RED, Style.BRIGHT
    if t < 0.200: return Fore.RED, Style.DIM
    if t < 0.300: return Fore.MAGENTA, Style.BRIGHT
    return Fore.MAGENTA, Style.DIM


reset_colors = Style.RESET_ALL + Fore.RESET

stds = [None, '17', '20', 'latest']


def ttc(time):
    f, s = time_to_color(time)
    return f + s


def info(timings, expected_count=len(stds), reverse=False, delim=' ', fill=' .....'):
    a = [f'{ttc(time)}{time:>6.3f}{reset_colors}' for time in timings]
    b = [fill] * (expected_count - len(timings))
    return delim.join(b + a if reverse else a + b )


def print_slowdown(all_timings, considered_disabled=0.030, min_diff=0.005, min_diff_rel=0.01):
    s = ' | '.join(f'c++{std}' if std else 'default' for std in stds)
    print(f'{s} | slowdown | header')
    print(('-' * 6 + ':|') * (len(stds) + 1) + '-' * max_header_name_len)

    results = []
    for header, timings in all_timings.items():
        timings = timings
        while len(timings) and timings[0] < considered_disabled:
            timings = timings[1:]

        if len(timings) == 0: continue

        slowdown = timings[-1] / timings[0] - 1
        if abs(timings[-1] - timings[0]) > min_diff and abs(slowdown) > min_diff_rel:
            results.append((slowdown, header, timings))

    for slowdown, header, timings in sorted(results, reverse=True):
        print(f'{info(timings, reverse=True, delim=" |")} | {slowdown:>5.0%} | `{header}`')


def get_timings():
    with open('.empty.tmp', 'w+') as f:
        pass
    all_timings = defaultdict(list)
    print(f'Timing standard library headers:')
    for header in headers:
        #print(f'working on {header}...', end='')
        print(f'{info([])} {header}', end='')

        for std in stds:
            timings = []
            for _ in range(10):
                time = msvc_get_parsing_time(header, std=std)
                if time is None:
                    timings.append(float('nan'))
                    break
                timings.append(time)
        
            time = min(timings)
            f, s = time_to_color(time)
            self_colors = f + s
            all_timings[header].append(time)
            print(f'\r{info(all_timings[header])} {header}', end='')
        print(f'\r{info(all_timings[header])} {header}')
    return all_timings


if __name__ == "__main__":
    fn = 'bench_syshdrs.json'
    if os.path.exists(fn) and not '-r' in sys.argv:
        with open(fn) as f:
            timings = json.load(f)
    else:
        timings = get_timings()
        with open(fn, 'w+') as f:
            json.dump(timings, f)
        print('\n' * 3)
    print_slowdown(timings)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions