Skip to content

Heisen-misoptimisation on x86_64 #112213

Open
@cbeuw

Description

@cbeuw

Fuzzer generated code, minimised and rewritten into surface Rust. Miri reports no UB under either aliasing models. The only unsafe here are for mutating the static mut hasher and a pointer write on a not-taken branch (the pointer happens to be dangling, so it would've been UB had the branch been taken).

#![feature(const_hash)]
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use std::ptr;
static mut H: DefaultHasher = DefaultHasher::new();

fn dump_var<T: Hash, U: Hash, V: Hash, W: Hash>(val0: T, val1: U, val2: V, val3: W) {
    unsafe {
        val0.hash(&mut H);
        val1.hash(&mut H);
        val2.hash(&mut H);
        val3.hash(&mut H);
    }
}
pub fn fn0(mut _2: [isize; 7], mut _3: usize, mut _4: i32, mut _5: u64) -> [isize; 2] {
    let mut _13: ([bool; 1],) = ([false; 1],);
    let mut _14: [isize; 6] = [0; 6];
    let mut _29: [isize; 6] = [0; 6];
    let ret: [isize; 2] = [13; 2];
    let _6 = fn1(_2);
    fn5(ret, _2, _3, _2, _2, _4, _6);
    dump_var(_4, _5, 0, 0);
    dump_var([-112; 6], _13.0, 0, (4, 10));
    _14 = [(-9223372036854775808_isize); 6];
    dump_var((), (), _29, _14);
    return ret;
}

pub fn fn1(mut _10: [isize; 7]) -> *mut isize {
    let arr = [0; 3];
    let mut _21: (u8, u128, [u64; 3], isize, i16) = (0, 0, arr, 0, 0);
    let mut _24: (u8, u128, [u64; 3], isize, i16) = (0, 0, arr, 0, 0);
    dump_var(_10, _10, 0, 0);
    unsafe {
        _24.hash(&mut H);
        _21.hash(&mut H);
    }
    return core::ptr::addr_of_mut!(_21.3);
}
fn fn5(
    mut _2: [isize; 2],
    mut _6: [isize; 7],
    mut _7: usize,
    mut _9: [isize; 7],
    mut _10: [isize; 7],
    mut _12: i32,
    mut _16: *mut isize,
) {
    let mut _23: (
        *const usize,
        (usize, u8),
        (char, i32, (i64,), u64),
        char,
        (
            (f64, i64, i8),
            [f32; 1],
            (usize, u8),
            i8,
            (isize, i64),
            [f32; 1],
        ),
        *const u8,
    ) = (
        ptr::null(),
        (0, 0),
        ('a', 0, (0,), 0),
        'a',
        ((0., 0, 0), [0.; 1], (0, 0), 0, (0, 0), [0.; 1]),
        ptr::null(),
    );
    let mut _27: ((f64, i64, i8),) = Default::default();
    loop {
        _9 = _6;
        let mut _20 = (-9223372036854775808_isize) as i128;
        let mut _21 = (0,);
        loop {
            _23.0 = core::ptr::addr_of!(_23.1 .0);
            _23.4 .0 = (f64::NAN, (-1102345069964335552_i64), 9_i8);
            dump_var(0, 0, _6, _27.0 .1);
            _27.0 = _23.4 .0;
            match _27.0 .2 {
                9 => {
                    dump_var(_9, _21, _20, _12);
                    dump_var(_2, _10, _6, _7);
                    return;
                }
                1 => break,
                _ => unsafe {
                    (*_16) = 88_isize;
                },
            }
        }
    }
}
pub fn main() {
    println!(
        "{:?}",
        fn0(
            [(-56_isize); 7],
            15609822513776909592_usize,
            -652623562_i32,
            18399139786288871729_u64
        )
    );
    unsafe {
        println!("hash: {}", H.finish());
    }
}

The correct hash should be 12326103344558250442, which Miri agrees

$ rustc -Zmir-opt-level=3 -Copt-level=1 repro.rs && ./repro
[13, 13]
hash: 12326103344558250442

With -Zmir-opt-level>=3 and -Copt-level>=2, it prints something different

$ rustc -Zmir-opt-level=3 -Copt-level=2 repro.rs && ./repro
[13, 13]
hash: 5661618040229725087

I'm fairly certain that the bug is in LLVM, because in this alternative version where fn5 is written in custom MIR, the miscompilation can be triggered with -Zmir-opt-level=0

$ rustc -Zmir-opt-level=0 -Copt-level=1 repro-alt.rs && ./repro-alt
[13, 13]
hash: 17317577282543711276 # right
$ rustc -Zmir-opt-level=0 -Copt-level=2 repro-alt.rs && ./repro-alt
[13, 13]
hash: 469181360651188025 # wrong
repro-alt.rs
#![feature(const_hash)]
#![feature(custom_mir, core_intrinsics)]
extern crate core;
use core::intrinsics::mir::*;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
static mut H: DefaultHasher = DefaultHasher::new();
fn dump_var<T: Hash, U: Hash, V: Hash, W: Hash>(val0: T, val1: U, val2: V, val3: W) {
    unsafe {
        val0.hash(&mut H);
        val1.hash(&mut H);
        val2.hash(&mut H);
        val3.hash(&mut H);
    }
}
pub fn fn0(mut _2: [isize; 7], mut _3: usize, mut _4: i32, mut _5: u64) -> [isize; 2] {
    let mut _13: ([bool; 1],) = ([false; 1],);
    let mut _14: [isize; 6] = [0; 6];
    let mut _29: [isize; 6] = [0; 6];
    let ret: [isize; 2] = [13; 2];
    let _6 = fn1(_2);
    fn5(ret, _2, _3, _2, _2, _4, _6);
    dump_var(_4, _5, 0, 0);
    dump_var([-112; 6], _13.0, 0, (4, 10));
    _14 = [(-9223372036854775808_isize); 6];
    _29 = [9223372036854775807_isize; 6];
    dump_var((), (), _29, _14);
    return ret;
}
pub fn fn1(mut _10: [isize; 7]) -> *mut isize {
    let arr = [0; 3];
    let mut _21: (u8, u128, [u64; 3], isize, i16) = (0, 0, arr, 0, 0);
    let mut _24: (u8, u128, [u64; 3], isize, i16) = (0, 0, arr, 0, 0);
    dump_var(_10, _10, 0, 0);
    unsafe {
        _24.hash(&mut H);
        _21.hash(&mut H);
    }
    return core::ptr::addr_of_mut!(_21.3);
}
#[custom_mir(dialect = "runtime", phase = "initial")]
fn fn5(
    mut _2: [isize; 2],
    mut _6: [isize; 7],
    mut _7: usize,
    mut _9: [isize; 7],
    mut _10: [isize; 7],
    mut _12: i32,
    mut _16: *mut isize,
) {
    mir! {
    let _20: i128;
    let _21: (i64,);
    let _23: (*const usize, (usize, u8), (char, i32, (i64,), u64), char, ((f64, i64, i8), [f32; 1], (usize, u8), i8, (isize, i64), [f32; 1]), *const u8);
    let _27: ((f64, i64, i8), [f32; 1], (usize, u8), i8, (isize, i64), [f32; 1]);
    let _36: ();
    {
    Goto(bb7)
    }
    bb7 = {
    _9 = _6;
    _20 = (-9223372036854775808_isize) as i128;
    _21 = (0,);
    Goto(bb9)
    }
    bb9 = {
    _23.0 = core::ptr::addr_of!(_23.1.0);
    _23.4.0 = (f64::NAN, (-1102345069964335552_i64), 9_i8);
    _27.0.1 = _7 as i64;
    Call(_36, bb11, dump_var(0,0, _6, _27.0.1))
    }
    bb11 = {
    _27.0 = _23.4.0;
    match _27.0.2 {
    1 => bb7,
    9 => bb17,
    _ => bb15
    }
    }
    bb15 = {
    (*_16) = 88_isize;
    Goto(bb9)
    }
    bb17 = {
    Call(_36, bb18, dump_var(Move(_9), Move(_21), Move(_20), Move(_12)))
    }
    bb18 = {
    Call(_36, bb19, dump_var(Move(_2), Move(_10), Move(_6), Move(_7)))
    }
    bb19 = {
    Return()
    }
    }
}
pub fn main() {
    println!(
        "{:?}",
        fn0(
            std::hint::black_box([(-56_isize); 7]),
            std::hint::black_box(15609822513776909592_usize),
            std::hint::black_box(-652623562_i32),
            std::hint::black_box(18399139786288871729_u64)
        )
    );
    unsafe {
        println!("hash: {}", H.finish());
    }
}

Unfortunately this is a Heisenbug: if you replace dump_var to something that debug-prints its arguments, the misoptimisation goes away, so I don't know which variable got the wrong value

Recent LLVM miscompilation fixes (llvm/llvm-project@e506bfa and llvm/llvm-project@97f0e7b) do not fix this.

I can only reproduce this on x86_64 Linux, not Apple Silicon

cc @nikic

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.A-rustlantisA miscompilation found by RustlantisI-unsoundIssue: A soundness hole (worst kind of bug), see: https://en.wikipedia.org/wiki/SoundnessP-mediumMedium priorityT-compilerRelevant to the compiler team, which will review and decide on the PR/issue.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions