diff --git a/.github/workflows/failures.yml b/.github/workflows/failures.yml
index 66129f805d4c8..e5d94767fe7d7 100644
--- a/.github/workflows/failures.yml
+++ b/.github/workflows/failures.yml
@@ -94,12 +94,16 @@ jobs:
       run: cat tests/failing-non-lto-tests.txt >> tests/failing-ui-tests.txt
 
     - name: Run tests
+      # TODO: re-enable those tests for libgccjit 12.
+      if: matrix.libgccjit_version.gcc != 'libgccjit12.so'
       id: tests
       run: |
         ${{ matrix.libgccjit_version.env_extra }} ./y.sh test --release --clean --build-sysroot --test-failing-rustc ${{ matrix.libgccjit_version.extra }} | tee output_log
         rg --text "test result" output_log >> $GITHUB_STEP_SUMMARY
 
     - name: Run failing ui pattern tests for ICE
+      # TODO: re-enable those tests for libgccjit 12.
+      if: matrix.libgccjit_version.gcc != 'libgccjit12.so'
       id: ui-tests
       run: |
         ${{ matrix.libgccjit_version.env_extra }} ./y.sh test --release --test-failing-ui-pattern-tests ${{ matrix.libgccjit_version.extra }} | tee output_log_ui
diff --git a/.github/workflows/gcc12.yml b/.github/workflows/gcc12.yml
index 44dca6be6a19a..5977ed33c56ec 100644
--- a/.github/workflows/gcc12.yml
+++ b/.github/workflows/gcc12.yml
@@ -67,22 +67,24 @@ jobs:
     - name: Build
       run: |
         ./y.sh prepare --only-libcore --libgccjit12-patches
-        ./y.sh build --sysroot --no-default-features --sysroot-panic-abort
-        cargo test --no-default-features
-        ./y.sh clean all
-
-    - name: Prepare dependencies
-      run: |
-        git config --global user.email "user@example.com"
-        git config --global user.name "User"
-        ./y.sh prepare --libgccjit12-patches
-
-    - name: Add more failing tests for GCC 12
-      run: cat tests/failing-ui-tests12.txt >> tests/failing-ui-tests.txt
-
-    - name: Add more failing tests because the sysroot is not compiled with LTO
-      run: cat tests/failing-non-lto-tests.txt >> tests/failing-ui-tests.txt
-
-    - name: Run tests
-      run: |
-        ./y.sh test --release --clean --build-sysroot ${{ matrix.commands }} --no-default-features
+        ./y.sh build --no-default-features --sysroot-panic-abort
+        # Uncomment when we no longer need to remove global variables.
+        #./y.sh build --sysroot --no-default-features --sysroot-panic-abort
+        #cargo test --no-default-features
+        #./y.sh clean all
+
+    #- name: Prepare dependencies
+      #run: |
+        #git config --global user.email "user@example.com"
+        #git config --global user.name "User"
+        #./y.sh prepare --libgccjit12-patches
+
+    #- name: Add more failing tests for GCC 12
+      #run: cat tests/failing-ui-tests12.txt >> tests/failing-ui-tests.txt
+
+    #- name: Add more failing tests because the sysroot is not compiled with LTO
+      #run: cat tests/failing-non-lto-tests.txt >> tests/failing-ui-tests.txt
+
+    #- name: Run tests
+      #run: |
+        #./y.sh test --release --clean --build-sysroot ${{ matrix.commands }} --no-default-features
diff --git a/.github/workflows/stdarch.yml b/.github/workflows/stdarch.yml
index 20341ae53d7ee..e24b25b73690f 100644
--- a/.github/workflows/stdarch.yml
+++ b/.github/workflows/stdarch.yml
@@ -95,4 +95,5 @@ jobs:
       if: ${{ matrix.cargo_runner }}
       run: |
         # FIXME: these tests fail when the sysroot is compiled with LTO because of a missing symbol in proc-macro.
-        STDARCH_TEST_EVERYTHING=1 CHANNEL=release CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="${{ matrix.cargo_runner }}" TARGET=x86_64-unknown-linux-gnu CG_RUSTFLAGS="-Ainternal_features" ./y.sh cargo test --manifest-path build/build_sysroot/sysroot_src/library/stdarch/Cargo.toml -- --skip rtm --skip tbm --skip sse4a
+        # TODO: remove --skip test_mm512_stream_ps when stdarch is updated in rustc.
+        STDARCH_TEST_EVERYTHING=1 CHANNEL=release CARGO_TARGET_X86_64_UNKNOWN_LINUX_GNU_RUNNER="${{ matrix.cargo_runner }}" TARGET=x86_64-unknown-linux-gnu CG_RUSTFLAGS="-Ainternal_features" ./y.sh cargo test --manifest-path build/build_sysroot/sysroot_src/library/stdarch/Cargo.toml -- --skip rtm --skip tbm --skip sse4a --skip test_mm512_stream_ps
diff --git a/Cargo.lock b/Cargo.lock
index 2ce9eb081eecc..cd693835ded1d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -80,7 +80,7 @@ dependencies = [
 [[package]]
 name = "gccjit"
 version = "2.0.0"
-source = "git+https://github.com/antoyo/gccjit.rs#f1545d7c2c13e42d78eaac8032d49ab8f7d43b6e"
+source = "git+https://github.com/rust-lang/gccjit.rs#328cb1b414f67dfa15162ba7a55ed01931f1b219"
 dependencies = [
  "gccjit_sys",
 ]
@@ -88,7 +88,7 @@ dependencies = [
 [[package]]
 name = "gccjit_sys"
 version = "0.1.0"
-source = "git+https://github.com/antoyo/gccjit.rs#f1545d7c2c13e42d78eaac8032d49ab8f7d43b6e"
+source = "git+https://github.com/rust-lang/gccjit.rs#328cb1b414f67dfa15162ba7a55ed01931f1b219"
 dependencies = [
  "libc",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 100c10ef1d7af..5caca63f63480 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -22,7 +22,7 @@ master = ["gccjit/master"]
 default = ["master"]
 
 [dependencies]
-gccjit = { git = "https://github.com/antoyo/gccjit.rs" }
+gccjit = "2.1"
 
 # Local copy.
 #gccjit = { path = "../gccjit.rs" }
diff --git a/Readme.md b/Readme.md
index bd552b84f9282..e92c16ece2f10 100644
--- a/Readme.md
+++ b/Readme.md
@@ -121,8 +121,7 @@ If you compiled cg_gccjit in debug mode (aka you didn't pass `--release` to `./y
 
 ### LTO
 
-To use LTO, you need to set the variable `FAT_LTO=1` and `EMBED_LTO_BITCODE=1` in addition to setting `lto = "fat"` in the `Cargo.toml`.
-Don't set `FAT_LTO` when compiling the sysroot, though: only set `EMBED_LTO_BITCODE=1`.
+To use LTO, you need to set the variable `EMBED_LTO_BITCODE=1` in addition to setting `lto = "fat"` in the `Cargo.toml`.
 
 Failing to set `EMBED_LTO_BITCODE` will give you the following error:
 
diff --git a/build_system/src/config.rs b/build_system/src/config.rs
index 081e7d2e250e0..965aedd8be891 100644
--- a/build_system/src/config.rs
+++ b/build_system/src/config.rs
@@ -387,11 +387,6 @@ impl ConfigInfo {
             rustflags.push("-Csymbol-mangling-version=v0".to_string());
         }
 
-        // Since we don't support ThinLTO, disable LTO completely when not trying to do LTO.
-        // TODO(antoyo): remove when we can handle ThinLTO.
-        if !env.contains_key(&"FAT_LTO".to_string()) {
-            rustflags.push("-Clto=off".to_string());
-        }
         // FIXME(antoyo): remove once the atomic shim is gone
         if os_name == "Darwin" {
             rustflags.extend_from_slice(&[
diff --git a/build_system/src/test.rs b/build_system/src/test.rs
index 462d20d728d12..8d088a3aac318 100644
--- a/build_system/src/test.rs
+++ b/build_system/src/test.rs
@@ -645,12 +645,16 @@ fn test_projects(env: &Env, args: &TestArg) -> Result<(), String> {
         //"https://github.com/rust-lang/cargo", // TODO: very slow, only run on master?
     ];
 
+    let mut env = env.clone();
+    let rustflags =
+        format!("{} --cap-lints allow", env.get("RUSTFLAGS").cloned().unwrap_or_default());
+    env.insert("RUSTFLAGS".to_string(), rustflags);
     let run_tests = |projects_path, iter: &mut dyn Iterator<Item = &&str>| -> Result<(), String> {
         for project in iter {
             let clone_result = git_clone_root_dir(project, projects_path, true)?;
             let repo_path = Path::new(&clone_result.repo_dir);
-            run_cargo_command(&[&"build", &"--release"], Some(repo_path), env, args)?;
-            run_cargo_command(&[&"test"], Some(repo_path), env, args)?;
+            run_cargo_command(&[&"build", &"--release"], Some(repo_path), &env, args)?;
+            run_cargo_command(&[&"test"], Some(repo_path), &env, args)?;
         }
 
         Ok(())
@@ -1034,18 +1038,19 @@ where
 }
 
 fn test_rustc(env: &Env, args: &TestArg) -> Result<(), String> {
-    test_rustc_inner(env, args, |_| Ok(false), false, "run-make")?;
+    //test_rustc_inner(env, args, |_| Ok(false), false, "run-make")?;
     test_rustc_inner(env, args, |_| Ok(false), false, "ui")
 }
 
 fn test_failing_rustc(env: &Env, args: &TestArg) -> Result<(), String> {
-    let result1 = test_rustc_inner(
+    let result1 = Ok(());
+    /*test_rustc_inner(
         env,
         args,
         retain_files_callback("tests/failing-run-make-tests.txt", "run-make"),
         false,
         "run-make",
-    );
+    )*/
 
     let result2 = test_rustc_inner(
         env,
@@ -1066,13 +1071,14 @@ fn test_successful_rustc(env: &Env, args: &TestArg) -> Result<(), String> {
         false,
         "ui",
     )?;
-    test_rustc_inner(
+    Ok(())
+    /*test_rustc_inner(
         env,
         args,
         remove_files_callback("tests/failing-run-make-tests.txt", "run-make"),
         false,
         "run-make",
-    )
+    )*/
 }
 
 fn test_failing_ui_pattern_tests(env: &Env, args: &TestArg) -> Result<(), String> {
diff --git a/example/example.rs b/example/example.rs
index 5878e8548d926..03470b74d0a13 100644
--- a/example/example.rs
+++ b/example/example.rs
@@ -153,7 +153,7 @@ fn array_as_slice(arr: &[u8; 3]) -> &[u8] {
     arr
 }
 
-unsafe fn use_ctlz_nonzero(a: u16) -> u16 {
+unsafe fn use_ctlz_nonzero(a: u16) -> u32 {
     intrinsics::ctlz_nonzero(a)
 }
 
diff --git a/example/mini_core.rs b/example/mini_core.rs
index cc3d647c8c82f..a48c0a4450c29 100644
--- a/example/mini_core.rs
+++ b/example/mini_core.rs
@@ -1,6 +1,6 @@
 #![feature(
     no_core, lang_items, intrinsics, unboxed_closures, type_ascription, extern_types,
-    decl_macro, rustc_attrs, transparent_unions, auto_traits,
+    decl_macro, rustc_attrs, transparent_unions, auto_traits, freeze_impls,
     thread_local
 )]
 #![no_core]
@@ -418,6 +418,35 @@ pub fn panic(_msg: &'static str) -> ! {
     }
 }
 
+macro_rules! panic_const {
+    ($($lang:ident = $message:expr,)+) => {
+        pub mod panic_const {
+            use super::*;
+
+            $(
+                #[track_caller]
+                #[lang = stringify!($lang)]
+                pub fn $lang() -> ! {
+                    panic($message);
+                }
+            )+
+        }
+    }
+}
+
+panic_const! {
+    panic_const_add_overflow = "attempt to add with overflow",
+    panic_const_sub_overflow = "attempt to subtract with overflow",
+    panic_const_mul_overflow = "attempt to multiply with overflow",
+    panic_const_div_overflow = "attempt to divide with overflow",
+    panic_const_rem_overflow = "attempt to calculate the remainder with overflow",
+    panic_const_neg_overflow = "attempt to negate with overflow",
+    panic_const_shr_overflow = "attempt to shift right with overflow",
+    panic_const_shl_overflow = "attempt to shift left with overflow",
+    panic_const_div_by_zero = "attempt to divide by zero",
+    panic_const_rem_by_zero = "attempt to calculate the remainder with a divisor of zero",
+}
+
 #[lang = "panic_cannot_unwind"]
 fn panic_cannot_unwind() -> ! {
     unsafe {
@@ -563,7 +592,7 @@ pub mod intrinsics {
         pub fn min_align_of_val<T: ?Sized>(val: *const T) -> usize;
         pub fn copy<T>(src: *const T, dst: *mut T, count: usize);
         pub fn transmute<T, U>(e: T) -> U;
-        pub fn ctlz_nonzero<T>(x: T) -> T;
+        pub fn ctlz_nonzero<T>(x: T) -> u32;
         #[rustc_safe_intrinsic]
         pub fn needs_drop<T: ?Sized>() -> bool;
         #[rustc_safe_intrinsic]
diff --git a/example/mini_core_hello_world.rs b/example/mini_core_hello_world.rs
index add77880716c8..5a7ddc4cd7fa5 100644
--- a/example/mini_core_hello_world.rs
+++ b/example/mini_core_hello_world.rs
@@ -2,7 +2,7 @@
 
 #![feature(
     no_core, unboxed_closures, start, lang_items, never_type, linkage,
-    extern_types, thread_local
+    extern_types, thread_local, raw_ref_op
 )]
 #![no_core]
 #![allow(dead_code, internal_features, non_camel_case_types)]
@@ -99,9 +99,7 @@ fn start<T: Termination + 'static>(
 
 static mut NUM: u8 = 6 * 7;
 
-// FIXME: Use `SyncUnsafeCell` instead of allowing `static_mut_refs` lint
-#[allow(static_mut_refs)]
-static NUM_REF: &'static u8 = unsafe { &NUM };
+static NUM_REF: &'static u8 = unsafe { &* &raw const NUM };
 
 macro_rules! assert {
     ($e:expr) => {
diff --git a/example/std_example.rs b/example/std_example.rs
index ad69409eb6590..8ab8fcc525e5c 100644
--- a/example/std_example.rs
+++ b/example/std_example.rs
@@ -1,5 +1,5 @@
 #![allow(internal_features)]
-#![feature(core_intrinsics, coroutines, coroutine_trait, is_sorted)]
+#![feature(core_intrinsics, coroutines, coroutine_trait, is_sorted, stmt_expr_attributes)]
 
 #[cfg(feature="master")]
 #[cfg(target_arch="x86_64")]
@@ -103,7 +103,7 @@ fn main() {
         test_simd();
     }
 
-    Box::pin(move |mut _task_context| {
+    Box::pin(#[coroutine] move |mut _task_context| {
         yield ();
     }).as_mut().resume(0);
 
diff --git a/libgccjit.version b/libgccjit.version
index 8cce735832139..23ca7f022155e 100644
--- a/libgccjit.version
+++ b/libgccjit.version
@@ -1 +1 @@
-d61ce945badf4c9d8237a13ca135e3c46ad13be3
+341be3b7d7ac6976cfed8ed59da3573c040d0776
diff --git a/patches/0001-Add-stdarch-Cargo.toml-for-testing.patch b/patches/0001-Add-stdarch-Cargo.toml-for-testing.patch
index 2a55f2cb796f9..9cc377850b9b7 100644
--- a/patches/0001-Add-stdarch-Cargo.toml-for-testing.patch
+++ b/patches/0001-Add-stdarch-Cargo.toml-for-testing.patch
@@ -19,7 +19,7 @@ index 0000000..4c63700
 +members = [
 +  "crates/core_arch",
 +  "crates/std_detect",
-+  "crates/stdarch-gen",
++  "crates/stdarch-gen-arm",
 +  #"examples/"
 +]
 +exclude = [
diff --git a/patches/0022-core-Disable-not-compiling-tests.patch b/patches/0022-core-Disable-not-compiling-tests.patch
index a7d523f940826..ea1a5a8d35535 100644
--- a/patches/0022-core-Disable-not-compiling-tests.patch
+++ b/patches/0022-core-Disable-not-compiling-tests.patch
@@ -39,4 +39,4 @@ index 42a26ae..5ac1042 100644
 +#![cfg(test)]
  #![feature(alloc_layout_extra)]
  #![feature(array_chunks)]
- #![feature(array_windows)]
+ #![feature(array_ptr_get)]
diff --git a/patches/libgccjit12/0001-core-Disable-portable-simd-test.patch b/patches/libgccjit12/0001-core-Disable-portable-simd-test.patch
index 914ae986b50e7..c060300f44f65 100644
--- a/patches/libgccjit12/0001-core-Disable-portable-simd-test.patch
+++ b/patches/libgccjit12/0001-core-Disable-portable-simd-test.patch
@@ -1,4 +1,4 @@
-From a5663265f797a43c502915c356fe7899c16cee92 Mon Sep 17 00:00:00 2001
+From 124a11ce086952a5794d5cfbaa45175809497b81 Mon Sep 17 00:00:00 2001
 From: None <none@example.com>
 Date: Sat, 18 Nov 2023 10:50:36 -0500
 Subject: [PATCH] [core] Disable portable-simd test
@@ -8,18 +8,18 @@ Subject: [PATCH] [core] Disable portable-simd test
  1 file changed, 2 deletions(-)
 
 diff --git a/library/core/tests/lib.rs b/library/core/tests/lib.rs
-index d0a119c..76fdece 100644
+index b71786c..cf484d5 100644
 --- a/library/core/tests/lib.rs
 +++ b/library/core/tests/lib.rs
-@@ -89,7 +89,6 @@
+@@ -95,7 +95,6 @@
  #![feature(never_type)]
  #![feature(unwrap_infallible)]
- #![feature(pointer_is_aligned)]
+ #![feature(pointer_is_aligned_to)]
 -#![feature(portable_simd)]
  #![feature(ptr_metadata)]
- #![feature(lazy_cell)]
  #![feature(unsized_tuple_coercion)]
-@@ -155,7 +154,6 @@ mod pin;
+ #![feature(const_option)]
+@@ -157,7 +156,6 @@ mod pin;
  mod pin_macro;
  mod ptr;
  mod result;
@@ -28,5 +28,5 @@ index d0a119c..76fdece 100644
  mod str;
  mod str_lossy;
 -- 
-2.42.1
+2.45.2
 
diff --git a/rust-toolchain b/rust-toolchain
index 42c26df058a60..3c83f4b4608de 100644
--- a/rust-toolchain
+++ b/rust-toolchain
@@ -1,3 +1,3 @@
 [toolchain]
-channel = "nightly-2024-03-10"
+channel = "nightly-2024-07-02"
 components = ["rust-src", "rustc-dev", "llvm-tools-preview"]
diff --git a/src/asm.rs b/src/asm.rs
index b446843a106c2..aa485846cd429 100644
--- a/src/asm.rs
+++ b/src/asm.rs
@@ -539,7 +539,7 @@ impl<'a, 'gcc, 'tcx> AsmBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
             let builtin_unreachable = self.context.get_builtin_function("__builtin_unreachable");
             let builtin_unreachable: RValue<'gcc> =
                 unsafe { std::mem::transmute(builtin_unreachable) };
-            self.call(self.type_void(), None, None, builtin_unreachable, &[], None);
+            self.call(self.type_void(), None, None, builtin_unreachable, &[], None, None);
         }
 
         // Write results to outputs.
diff --git a/src/back/lto.rs b/src/back/lto.rs
index 96e0b84450f8c..6b2dbbbed6771 100644
--- a/src/back/lto.rs
+++ b/src/back/lto.rs
@@ -16,20 +16,22 @@
 // /usr/bin/ld: warning: type of symbol `_RNvNvNvNtCs5JWOrf9uCus_5rayon11thread_pool19WORKER_THREAD_STATE7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o
 // /usr/bin/ld: warning: type of symbol `_RNvNvNvNvNtNtNtCsAj5i4SGTR7_3std4sync4mpmc5waker17current_thread_id5DUMMY7___getit5___KEY' changed from 1 to 6 in /tmp/ccKeUSiR.ltrans0.ltrans.o
 // /usr/bin/ld: warning: incremental linking of LTO and non-LTO objects; using -flinker-output=nolto-rel which will bypass whole program optimization
-use std::ffi::CString;
+use std::ffi::{CStr, CString};
 use std::fs::{self, File};
 use std::path::{Path, PathBuf};
+use std::sync::Arc;
 
-use gccjit::OutputKind;
+use gccjit::{Context, OutputKind};
 use object::read::archive::ArchiveFile;
-use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule};
+use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
 use rustc_codegen_ssa::back::symbol_export;
 use rustc_codegen_ssa::back::write::{CodegenContext, FatLtoInput};
 use rustc_codegen_ssa::traits::*;
 use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind};
 use rustc_data_structures::memmap::Mmap;
-use rustc_errors::{DiagCtxt, FatalError};
+use rustc_errors::{DiagCtxtHandle, FatalError};
 use rustc_hir::def_id::LOCAL_CRATE;
+use rustc_middle::bug;
 use rustc_middle::dep_graph::WorkProduct;
 use rustc_middle::middle::exported_symbols::{SymbolExportInfo, SymbolExportLevel};
 use rustc_session::config::{CrateType, Lto};
@@ -37,7 +39,7 @@ use tempfile::{tempdir, TempDir};
 
 use crate::back::write::save_temp_bitcode;
 use crate::errors::{DynamicLinkingWithLTO, LtoBitcodeFromRlib, LtoDisallowed, LtoDylib};
-use crate::{to_gcc_opt_level, GccCodegenBackend, GccContext};
+use crate::{to_gcc_opt_level, GccCodegenBackend, GccContext, SyncContext};
 
 /// We keep track of the computed LTO cache keys from the previous
 /// session to determine which CGUs we can reuse.
@@ -59,7 +61,7 @@ struct LtoData {
 
 fn prepare_lto(
     cgcx: &CodegenContext<GccCodegenBackend>,
-    dcx: &DiagCtxt,
+    dcx: DiagCtxtHandle<'_>,
 ) -> Result<LtoData, FatalError> {
     let export_threshold = match cgcx.lto {
         // We're just doing LTO for our one crate
@@ -178,12 +180,13 @@ pub(crate) fn run_fat(
     cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
 ) -> Result<LtoModuleCodegen<GccCodegenBackend>, FatalError> {
     let dcx = cgcx.create_dcx();
-    let lto_data = prepare_lto(cgcx, &dcx)?;
+    let dcx = dcx.handle();
+    let lto_data = prepare_lto(cgcx, dcx)?;
     /*let symbols_below_threshold =
     lto_data.symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();*/
     fat_lto(
         cgcx,
-        &dcx,
+        dcx,
         modules,
         cached_modules,
         lto_data.upstream_modules,
@@ -194,7 +197,7 @@ pub(crate) fn run_fat(
 
 fn fat_lto(
     cgcx: &CodegenContext<GccCodegenBackend>,
-    _dcx: &DiagCtxt,
+    _dcx: DiagCtxtHandle<'_>,
     modules: Vec<FatLtoInput<GccCodegenBackend>>,
     cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
     mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
@@ -347,6 +350,395 @@ impl ModuleBuffer {
 
 impl ModuleBufferMethods for ModuleBuffer {
     fn data(&self) -> &[u8] {
-        unimplemented!("data not needed for GCC codegen");
+        &[]
     }
 }
+
+/// Performs thin LTO by performing necessary global analysis and returning two
+/// lists, one of the modules that need optimization and another for modules that
+/// can simply be copied over from the incr. comp. cache.
+pub(crate) fn run_thin(
+    cgcx: &CodegenContext<GccCodegenBackend>,
+    modules: Vec<(String, ThinBuffer)>,
+    cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
+) -> Result<(Vec<LtoModuleCodegen<GccCodegenBackend>>, Vec<WorkProduct>), FatalError> {
+    let dcx = cgcx.create_dcx();
+    let dcx = dcx.handle();
+    let lto_data = prepare_lto(cgcx, dcx)?;
+    /*let symbols_below_threshold =
+    symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();*/
+    if cgcx.opts.cg.linker_plugin_lto.enabled() {
+        unreachable!(
+            "We should never reach this case if the LTO step \
+                      is deferred to the linker"
+        );
+    }
+    thin_lto(
+        cgcx,
+        dcx,
+        modules,
+        lto_data.upstream_modules,
+        lto_data.tmp_path,
+        cached_modules, /*, &symbols_below_threshold*/
+    )
+}
+
+pub(crate) fn prepare_thin(
+    module: ModuleCodegen<GccContext>,
+    _emit_summary: bool,
+) -> (String, ThinBuffer) {
+    let name = module.name;
+    //let buffer = ThinBuffer::new(module.module_llvm.context, true, emit_summary);
+    let buffer = ThinBuffer::new(&module.module_llvm.context);
+    (name, buffer)
+}
+
+/// Prepare "thin" LTO to get run on these modules.
+///
+/// The general structure of ThinLTO is quite different from the structure of
+/// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
+/// one giant LLVM module, and then we run more optimization passes over this
+/// big module after internalizing most symbols. Thin LTO, on the other hand,
+/// avoid this large bottleneck through more targeted optimization.
+///
+/// At a high level Thin LTO looks like:
+///
+///    1. Prepare a "summary" of each LLVM module in question which describes
+///       the values inside, cost of the values, etc.
+///    2. Merge the summaries of all modules in question into one "index"
+///    3. Perform some global analysis on this index
+///    4. For each module, use the index and analysis calculated previously to
+///       perform local transformations on the module, for example inlining
+///       small functions from other modules.
+///    5. Run thin-specific optimization passes over each module, and then code
+///       generate everything at the end.
+///
+/// The summary for each module is intended to be quite cheap, and the global
+/// index is relatively quite cheap to create as well. As a result, the goal of
+/// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
+/// situations. For example one cheap optimization is that we can parallelize
+/// all codegen modules, easily making use of all the cores on a machine.
+///
+/// With all that in mind, the function here is designed at specifically just
+/// calculating the *index* for ThinLTO. This index will then be shared amongst
+/// all of the `LtoModuleCodegen` units returned below and destroyed once
+/// they all go out of scope.
+fn thin_lto(
+    cgcx: &CodegenContext<GccCodegenBackend>,
+    _dcx: DiagCtxtHandle<'_>,
+    modules: Vec<(String, ThinBuffer)>,
+    serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
+    tmp_path: TempDir,
+    cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
+    //symbols_below_threshold: &[*const libc::c_char],
+) -> Result<(Vec<LtoModuleCodegen<GccCodegenBackend>>, Vec<WorkProduct>), FatalError> {
+    let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis");
+    info!("going for that thin, thin LTO");
+
+    /*let green_modules: FxHashMap<_, _> =
+    cached_modules.iter().map(|(_, wp)| (wp.cgu_name.clone(), wp.clone())).collect();*/
+
+    let full_scope_len = modules.len() + serialized_modules.len() + cached_modules.len();
+    let mut thin_buffers = Vec::with_capacity(modules.len());
+    let mut module_names = Vec::with_capacity(full_scope_len);
+    //let mut thin_modules = Vec::with_capacity(full_scope_len);
+
+    for (i, (name, buffer)) in modules.into_iter().enumerate() {
+        info!("local module: {} - {}", i, name);
+        let cname = CString::new(name.as_bytes()).unwrap();
+        /*thin_modules.push(llvm::ThinLTOModule {
+            identifier: cname.as_ptr(),
+            data: buffer.data().as_ptr(),
+            len: buffer.data().len(),
+        });*/
+        thin_buffers.push(buffer);
+        module_names.push(cname);
+    }
+
+    // FIXME: All upstream crates are deserialized internally in the
+    //        function below to extract their summary and modules. Note that
+    //        unlike the loop above we *must* decode and/or read something
+    //        here as these are all just serialized files on disk. An
+    //        improvement, however, to make here would be to store the
+    //        module summary separately from the actual module itself. Right
+    //        now this is store in one large bitcode file, and the entire
+    //        file is deflate-compressed. We could try to bypass some of the
+    //        decompression by storing the index uncompressed and only
+    //        lazily decompressing the bytecode if necessary.
+    //
+    //        Note that truly taking advantage of this optimization will
+    //        likely be further down the road. We'd have to implement
+    //        incremental ThinLTO first where we could actually avoid
+    //        looking at upstream modules entirely sometimes (the contents,
+    //        we must always unconditionally look at the index).
+    let mut serialized = Vec::with_capacity(serialized_modules.len() + cached_modules.len());
+
+    let cached_modules =
+        cached_modules.into_iter().map(|(sm, wp)| (sm, CString::new(wp.cgu_name).unwrap()));
+
+    for (module, name) in serialized_modules.into_iter().chain(cached_modules) {
+        info!("upstream or cached module {:?}", name);
+        /*thin_modules.push(llvm::ThinLTOModule {
+            identifier: name.as_ptr(),
+            data: module.data().as_ptr(),
+            len: module.data().len(),
+        });*/
+
+        match module {
+            SerializedModule::Local(_) => {
+                //let path = module_buffer.0.to_str().expect("path");
+                //let my_path = PathBuf::from(path);
+                //let exists = my_path.exists();
+                /*module.module_llvm.should_combine_object_files = true;
+                module
+                .module_llvm
+                .context
+                .add_driver_option(module_buffer.0.to_str().expect("path"));*/
+            }
+            SerializedModule::FromRlib(_) => unimplemented!("from rlib"),
+            SerializedModule::FromUncompressedFile(_) => {
+                unimplemented!("from uncompressed file")
+            }
+        }
+
+        serialized.push(module);
+        module_names.push(name);
+    }
+
+    // Sanity check
+    //assert_eq!(thin_modules.len(), module_names.len());
+
+    // Delegate to the C++ bindings to create some data here. Once this is a
+    // tried-and-true interface we may wish to try to upstream some of this
+    // to LLVM itself, right now we reimplement a lot of what they do
+    // upstream...
+    /*let data = llvm::LLVMRustCreateThinLTOData(
+        thin_modules.as_ptr(),
+        thin_modules.len() as u32,
+        symbols_below_threshold.as_ptr(),
+        symbols_below_threshold.len() as u32,
+    )
+    .ok_or_else(|| write::llvm_err(dcx, LlvmError::PrepareThinLtoContext))?;
+    */
+
+    let data = ThinData; //(Arc::new(tmp_path))/*(data)*/;
+
+    info!("thin LTO data created");
+
+    /*let (key_map_path, prev_key_map, curr_key_map) =
+        if let Some(ref incr_comp_session_dir) = cgcx.incr_comp_session_dir {
+            let path = incr_comp_session_dir.join(THIN_LTO_KEYS_INCR_COMP_FILE_NAME);
+            // If the previous file was deleted, or we get an IO error
+            // reading the file, then we'll just use `None` as the
+            // prev_key_map, which will force the code to be recompiled.
+            let prev =
+                if path.exists() { ThinLTOKeysMap::load_from_file(&path).ok() } else { None };
+            let curr = ThinLTOKeysMap::from_thin_lto_modules(&data, &thin_modules, &module_names);
+            (Some(path), prev, curr)
+        }
+        else {
+            // If we don't compile incrementally, we don't need to load the
+            // import data from LLVM.
+            assert!(green_modules.is_empty());
+            let curr = ThinLTOKeysMap::default();
+            (None, None, curr)
+        };
+    info!("thin LTO cache key map loaded");
+    info!("prev_key_map: {:#?}", prev_key_map);
+    info!("curr_key_map: {:#?}", curr_key_map);*/
+
+    // Throw our data in an `Arc` as we'll be sharing it across threads. We
+    // also put all memory referenced by the C++ data (buffers, ids, etc)
+    // into the arc as well. After this we'll create a thin module
+    // codegen per module in this data.
+    let shared =
+        Arc::new(ThinShared { data, thin_buffers, serialized_modules: serialized, module_names });
+
+    let copy_jobs = vec![];
+    let mut opt_jobs = vec![];
+
+    info!("checking which modules can be-reused and which have to be re-optimized.");
+    for (module_index, module_name) in shared.module_names.iter().enumerate() {
+        let module_name = module_name_to_str(module_name);
+        /*if let (Some(prev_key_map), true) =
+            (prev_key_map.as_ref(), green_modules.contains_key(module_name))
+        {
+            assert!(cgcx.incr_comp_session_dir.is_some());
+
+            // If a module exists in both the current and the previous session,
+            // and has the same LTO cache key in both sessions, then we can re-use it
+            if prev_key_map.keys.get(module_name) == curr_key_map.keys.get(module_name) {
+                let work_product = green_modules[module_name].clone();
+                copy_jobs.push(work_product);
+                info!(" - {}: re-used", module_name);
+                assert!(cgcx.incr_comp_session_dir.is_some());
+                continue;
+            }
+        }*/
+
+        info!(" - {}: re-compiled", module_name);
+        opt_jobs
+            .push(LtoModuleCodegen::Thin(ThinModule { shared: shared.clone(), idx: module_index }));
+    }
+
+    // Save the current ThinLTO import information for the next compilation
+    // session, overwriting the previous serialized data (if any).
+    /*if let Some(path) = key_map_path {
+        if let Err(err) = curr_key_map.save_to_file(&path) {
+            return Err(write::llvm_err(dcx, LlvmError::WriteThinLtoKey { err }));
+        }
+    }*/
+
+    // NOTE: save the temporary directory used by LTO so that it gets deleted after linking instead
+    // of now.
+    //module.module_llvm.temp_dir = Some(tmp_path);
+    // TODO: save the directory so that it gets deleted later.
+    std::mem::forget(tmp_path);
+
+    Ok((opt_jobs, copy_jobs))
+}
+
+pub unsafe fn optimize_thin_module(
+    thin_module: ThinModule<GccCodegenBackend>,
+    _cgcx: &CodegenContext<GccCodegenBackend>,
+) -> Result<ModuleCodegen<GccContext>, FatalError> {
+    //let dcx = cgcx.create_dcx();
+
+    //let module_name = &thin_module.shared.module_names[thin_module.idx];
+    /*let tm_factory_config = TargetMachineFactoryConfig::new(cgcx, module_name.to_str().unwrap());
+    let tm = (cgcx.tm_factory)(tm_factory_config).map_err(|e| write::llvm_err(&dcx, e))?;*/
+
+    // Right now the implementation we've got only works over serialized
+    // modules, so we create a fresh new LLVM context and parse the module
+    // into that context. One day, however, we may do this for upstream
+    // crates but for locally codegened modules we may be able to reuse
+    // that LLVM Context and Module.
+    //let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
+    //let llmod_raw = parse_module(llcx, module_name, thin_module.data(), &dcx)? as *const _;
+    let mut should_combine_object_files = false;
+    let context = match thin_module.shared.thin_buffers.get(thin_module.idx) {
+        Some(thin_buffer) => Arc::clone(&thin_buffer.context),
+        None => {
+            let context = Context::default();
+            let len = thin_module.shared.thin_buffers.len();
+            let module = &thin_module.shared.serialized_modules[thin_module.idx - len];
+            match *module {
+                SerializedModule::Local(ref module_buffer) => {
+                    let path = module_buffer.0.to_str().expect("path");
+                    context.add_driver_option(path);
+                    should_combine_object_files = true;
+                    /*module.module_llvm.should_combine_object_files = true;
+                    module
+                        .module_llvm
+                        .context
+                        .add_driver_option(module_buffer.0.to_str().expect("path"));*/
+                }
+                SerializedModule::FromRlib(_) => unimplemented!("from rlib"),
+                SerializedModule::FromUncompressedFile(_) => {
+                    unimplemented!("from uncompressed file")
+                }
+            }
+            Arc::new(SyncContext::new(context))
+        }
+    };
+    let module = ModuleCodegen {
+        module_llvm: GccContext { context, should_combine_object_files, temp_dir: None },
+        name: thin_module.name().to_string(),
+        kind: ModuleKind::Regular,
+    };
+    /*{
+        let target = &*module.module_llvm.tm;
+        let llmod = module.module_llvm.llmod();
+        save_temp_bitcode(cgcx, &module, "thin-lto-input");
+
+        // Up next comes the per-module local analyses that we do for Thin LTO.
+        // Each of these functions is basically copied from the LLVM
+        // implementation and then tailored to suit this implementation. Ideally
+        // each of these would be supported by upstream LLVM but that's perhaps
+        // a patch for another day!
+        //
+        // You can find some more comments about these functions in the LLVM
+        // bindings we've got (currently `PassWrapper.cpp`)
+        {
+            let _timer =
+                cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
+            if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) {
+                return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
+        }
+
+        {
+            let _timer = cgcx
+                .prof
+                .generic_activity_with_arg("LLVM_thin_lto_resolve_weak", thin_module.name());
+            if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
+                return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
+        }
+
+        {
+            let _timer = cgcx
+                .prof
+                .generic_activity_with_arg("LLVM_thin_lto_internalize", thin_module.name());
+            if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
+                return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
+        }
+
+        {
+            let _timer =
+                cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_import", thin_module.name());
+            if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target) {
+                return Err(write::llvm_err(&dcx, LlvmError::PrepareThinLtoModule));
+            }
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
+        }
+
+        // Alright now that we've done everything related to the ThinLTO
+        // analysis it's time to run some optimizations! Here we use the same
+        // `run_pass_manager` as the "fat" LTO above except that we tell it to
+        // populate a thin-specific pass manager, which presumably LLVM treats a
+        // little differently.
+        {
+            info!("running thin lto passes over {}", module.name);
+            run_pass_manager(cgcx, &dcx, &mut module, true)?;
+            save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
+        }
+    }*/
+    Ok(module)
+}
+
+pub struct ThinBuffer {
+    context: Arc<SyncContext>,
+}
+
+// TODO: check if this makes sense to make ThinBuffer Send and Sync.
+unsafe impl Send for ThinBuffer {}
+unsafe impl Sync for ThinBuffer {}
+
+impl ThinBuffer {
+    pub(crate) fn new(context: &Arc<SyncContext>) -> Self {
+        Self { context: Arc::clone(context) }
+    }
+}
+
+impl ThinBufferMethods for ThinBuffer {
+    fn data(&self) -> &[u8] {
+        &[]
+    }
+
+    fn thin_link_data(&self) -> &[u8] {
+        unimplemented!();
+    }
+}
+
+pub struct ThinData; //(Arc<TempDir>);
+
+fn module_name_to_str(c_str: &CStr) -> &str {
+    c_str.to_str().unwrap_or_else(|e| {
+        bug!("Encountered non-utf8 GCC module name `{}`: {}", c_str.to_string_lossy(), e)
+    })
+}
diff --git a/src/back/write.rs b/src/back/write.rs
index a38fc8c14fbb7..802968979c722 100644
--- a/src/back/write.rs
+++ b/src/back/write.rs
@@ -4,7 +4,7 @@ use gccjit::OutputKind;
 use rustc_codegen_ssa::back::link::ensure_removed;
 use rustc_codegen_ssa::back::write::{BitcodeSection, CodegenContext, EmitObj, ModuleConfig};
 use rustc_codegen_ssa::{CompiledModule, ModuleCodegen};
-use rustc_errors::DiagCtxt;
+use rustc_errors::DiagCtxtHandle;
 use rustc_fs_util::link_or_copy;
 use rustc_session::config::OutputType;
 use rustc_span::fatal_error::FatalError;
@@ -15,7 +15,7 @@ use crate::{GccCodegenBackend, GccContext};
 
 pub(crate) unsafe fn codegen(
     cgcx: &CodegenContext<GccCodegenBackend>,
-    dcx: &DiagCtxt,
+    dcx: DiagCtxtHandle<'_>,
     module: ModuleCodegen<GccContext>,
     config: &ModuleConfig,
 ) -> Result<CompiledModule, FatalError> {
@@ -31,6 +31,7 @@ pub(crate) unsafe fn codegen(
 
         // NOTE: Only generate object files with GIMPLE when this environment variable is set for
         // now because this requires a particular setup (same gcc/lto1/lto-wrapper commit as libgccjit).
+        // TODO: remove this environment variable.
         let fat_lto = env::var("EMBED_LTO_BITCODE").as_deref() == Ok("1");
 
         let bc_out = cgcx.output_filenames.temp_path(OutputType::Bitcode, module_name);
@@ -56,6 +57,8 @@ pub(crate) unsafe fn codegen(
                     .generic_activity_with_arg("GCC_module_codegen_emit_bitcode", &*module.name);
                 context.add_command_line_option("-flto=auto");
                 context.add_command_line_option("-flto-partition=one");
+                // TODO: remove since we don't want fat objects when it is for Bitcode only.
+                context.add_command_line_option("-ffat-lto-objects");
                 context
                     .compile_to_file(OutputKind::ObjectFile, bc_out.to_str().expect("path to str"));
             }
@@ -113,17 +116,20 @@ pub(crate) unsafe fn codegen(
                     context.set_debug_info(true);
                     context.dump_to_file(path, true);
                 }
-                if should_combine_object_files && fat_lto {
-                    context.add_command_line_option("-flto=auto");
-                    context.add_command_line_option("-flto-partition=one");
+                if should_combine_object_files {
+                    if fat_lto {
+                        context.add_command_line_option("-flto=auto");
+                        context.add_command_line_option("-flto-partition=one");
+
+                        // NOTE: without -fuse-linker-plugin, we get the following error:
+                        // lto1: internal compiler error: decompressed stream: Destination buffer is too small
+                        context.add_driver_option("-fuse-linker-plugin");
+                    }
 
                     context.add_driver_option("-Wl,-r");
                     // NOTE: we need -nostdlib, otherwise, we get the following error:
                     // /usr/bin/ld: cannot find -lgcc_s: No such file or directory
                     context.add_driver_option("-nostdlib");
-                    // NOTE: without -fuse-linker-plugin, we get the following error:
-                    // lto1: internal compiler error: decompressed stream: Destination buffer is too small
-                    context.add_driver_option("-fuse-linker-plugin");
 
                     // NOTE: this doesn't actually generate an executable. With the above flags, it combines the .o files together in another .o.
                     context.compile_to_file(
@@ -158,13 +164,15 @@ pub(crate) unsafe fn codegen(
         config.emit_obj != EmitObj::None,
         cgcx.target_can_use_split_dwarf && cgcx.split_debuginfo == SplitDebuginfo::Unpacked,
         config.emit_bc,
+        config.emit_asm,
+        config.emit_ir,
         &cgcx.output_filenames,
     ))
 }
 
 pub(crate) fn link(
     _cgcx: &CodegenContext<GccCodegenBackend>,
-    _dcx: &DiagCtxt,
+    _dcx: DiagCtxtHandle<'_>,
     mut _modules: Vec<ModuleCodegen<GccContext>>,
 ) -> Result<ModuleCodegen<GccContext>, FatalError> {
     unimplemented!();
diff --git a/src/base.rs b/src/base.rs
index e88fde8ebef71..be149ffe5a16f 100644
--- a/src/base.rs
+++ b/src/base.rs
@@ -1,5 +1,6 @@
 use std::collections::HashSet;
 use std::env;
+use std::sync::Arc;
 use std::time::Instant;
 
 use gccjit::{CType, FunctionType, GlobalKind};
@@ -18,8 +19,8 @@ use rustc_target::spec::PanicStrategy;
 
 use crate::builder::Builder;
 use crate::context::CodegenCx;
-use crate::GccContext;
 use crate::{gcc_util, new_context, LockedTargetInfo};
+use crate::{GccContext, SyncContext};
 
 #[cfg(feature = "master")]
 pub fn visibility_to_gcc(linkage: Visibility) -> gccjit::Visibility {
@@ -222,7 +223,11 @@ pub fn compile_codegen_unit(
 
         ModuleCodegen {
             name: cgu_name.to_string(),
-            module_llvm: GccContext { context, should_combine_object_files: false, temp_dir: None },
+            module_llvm: GccContext {
+                context: Arc::new(SyncContext::new(context)),
+                should_combine_object_files: false,
+                temp_dir: None,
+            },
             kind: ModuleKind::Regular,
         }
     }
diff --git a/src/builder.rs b/src/builder.rs
index 30343f2e17b48..307348f595dc9 100644
--- a/src/builder.rs
+++ b/src/builder.rs
@@ -25,13 +25,13 @@ use rustc_middle::ty::layout::{
     FnAbiError, FnAbiOfHelpers, FnAbiRequest, HasParamEnv, HasTyCtxt, LayoutError, LayoutOfHelpers,
     TyAndLayout,
 };
-use rustc_middle::ty::{ParamEnv, Ty, TyCtxt};
+use rustc_middle::ty::{Instance, ParamEnv, Ty, TyCtxt};
 use rustc_span::def_id::DefId;
 use rustc_span::Span;
 use rustc_target::abi::{
     self, call::FnAbi, Align, HasDataLayout, Size, TargetDataLayout, WrappingRange,
 };
-use rustc_target::spec::{HasTargetSpec, Target};
+use rustc_target::spec::{HasTargetSpec, HasWasmCAbiOpt, Target, WasmCAbi};
 
 use crate::common::{type_is_pointer, SignType, TypeReflection};
 use crate::context::CodegenCx;
@@ -68,7 +68,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         src: RValue<'gcc>,
         order: AtomicOrdering,
     ) -> RValue<'gcc> {
-        let size = src.get_type().get_size();
+        let size = get_maybe_pointer_size(src);
 
         let func = self.current_func();
 
@@ -138,7 +138,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         failure_order: AtomicOrdering,
         weak: bool,
     ) -> RValue<'gcc> {
-        let size = src.get_type().get_size();
+        let size = get_maybe_pointer_size(src);
         let compare_exchange =
             self.context.get_builtin_function(&format!("__atomic_compare_exchange_{}", size));
         let order = self.context.new_rvalue_from_int(self.i32_type, order.to_gcc());
@@ -153,7 +153,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
         // NOTE: not sure why, but we have the wrong type here.
         let int_type = compare_exchange.get_param(2).to_rvalue().get_type();
-        let src = self.context.new_cast(self.location, src, int_type);
+        let src = self.context.new_bitcast(self.location, src, int_type);
         self.context.new_call(
             self.location,
             compare_exchange,
@@ -190,8 +190,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
         let casted_args: Vec<_> = param_types
             .into_iter()
             .zip(args.iter())
-            .enumerate()
-            .map(|(_i, (expected_ty, &actual_val))| {
+            .map(|(expected_ty, &actual_val)| {
                 let actual_ty = actual_val.get_type();
                 if expected_ty != actual_ty {
                     self.bitcast(actual_val, expected_ty)
@@ -253,7 +252,22 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                     {
                         self.context.new_cast(self.location, actual_val, expected_ty)
                     } else if on_stack_param_indices.contains(&index) {
-                        actual_val.dereference(self.location).to_rvalue()
+                        let ty = actual_val.get_type();
+                        // It's possible that the value behind the pointer is actually not exactly
+                        // the expected type, so to go around that, we add a cast before
+                        // dereferencing the value.
+                        if let Some(pointee_val) = ty.get_pointee()
+                            && pointee_val != expected_ty
+                        {
+                            let new_val = self.context.new_cast(
+                                self.location,
+                                actual_val,
+                                expected_ty.make_pointer(),
+                            );
+                            new_val.dereference(self.location).to_rvalue()
+                        } else {
+                            actual_val.dereference(self.location).to_rvalue()
+                        }
                     } else {
                         assert!(
                             (!expected_ty.is_vector() || actual_ty.is_vector())
@@ -592,12 +606,13 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         then: Block<'gcc>,
         catch: Block<'gcc>,
         _funclet: Option<&Funclet>,
+        instance: Option<Instance<'tcx>>,
     ) -> RValue<'gcc> {
         let try_block = self.current_func().new_block("try");
 
         let current_block = self.block;
         self.block = try_block;
-        let call = self.call(typ, fn_attrs, None, func, args, None); // TODO(antoyo): use funclet here?
+        let call = self.call(typ, fn_attrs, None, func, args, None, instance); // TODO(antoyo): use funclet here?
         self.block = current_block;
 
         let return_value =
@@ -629,8 +644,9 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         then: Block<'gcc>,
         catch: Block<'gcc>,
         _funclet: Option<&Funclet>,
+        instance: Option<Instance<'tcx>>,
     ) -> RValue<'gcc> {
-        let call_site = self.call(typ, fn_attrs, None, func, args, None);
+        let call_site = self.call(typ, fn_attrs, None, func, args, None, instance);
         let condition = self.context.new_rvalue_from_int(self.bool_type, 1);
         self.llbb().end_with_conditional(self.location, condition, then, catch);
         if let Some(_fn_abi) = fn_abi {
@@ -915,26 +931,16 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         self.gcc_checked_binop(oop, typ, lhs, rhs)
     }
 
-    fn alloca(&mut self, ty: Type<'gcc>, align: Align) -> RValue<'gcc> {
-        // FIXME(antoyo): this check that we don't call get_aligned() a second time on a type.
-        // Ideally, we shouldn't need to do this check.
-        let aligned_type = if ty == self.cx.u128_type || ty == self.cx.i128_type {
-            ty
-        } else {
-            ty.get_aligned(align.bytes())
-        };
+    fn alloca(&mut self, size: Size, align: Align) -> RValue<'gcc> {
+        let ty = self.cx.type_array(self.cx.type_i8(), size.bytes()).get_aligned(align.bytes());
         // TODO(antoyo): It might be better to return a LValue, but fixing the rustc API is non-trivial.
         self.stack_var_count.set(self.stack_var_count.get() + 1);
         self.current_func()
-            .new_local(
-                self.location,
-                aligned_type,
-                &format!("stack_var_{}", self.stack_var_count.get()),
-            )
+            .new_local(self.location, ty, &format!("stack_var_{}", self.stack_var_count.get()))
             .get_address(self.location)
     }
 
-    fn byte_array_alloca(&mut self, _len: RValue<'gcc>, _align: Align) -> RValue<'gcc> {
+    fn dynamic_alloca(&mut self, _len: RValue<'gcc>, _align: Align) -> RValue<'gcc> {
         unimplemented!();
     }
 
@@ -991,7 +997,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         &mut self,
         place: PlaceRef<'tcx, RValue<'gcc>>,
     ) -> OperandRef<'tcx, RValue<'gcc>> {
-        assert_eq!(place.llextra.is_some(), place.layout.is_unsized());
+        assert_eq!(place.val.llextra.is_some(), place.layout.is_unsized());
 
         if place.layout.is_zst() {
             return OperandRef::zero_sized(place.layout);
@@ -1016,10 +1022,11 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
             }
         }
 
-        let val = if let Some(llextra) = place.llextra {
-            OperandValue::Ref(place.llval, Some(llextra), place.align)
+        let val = if place.val.llextra.is_some() {
+            // FIXME: Merge with the `else` below?
+            OperandValue::Ref(place.val)
         } else if place.layout.is_gcc_immediate() {
-            let load = self.load(place.layout.gcc_type(self), place.llval, place.align);
+            let load = self.load(place.layout.gcc_type(self), place.val.llval, place.val.align);
             if let abi::Abi::Scalar(ref scalar) = place.layout.abi {
                 scalar_load_metadata(self, load, scalar);
             }
@@ -1029,9 +1036,9 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
 
             let mut load = |i, scalar: &abi::Scalar, align| {
                 let llptr = if i == 0 {
-                    place.llval
+                    place.val.llval
                 } else {
-                    self.inbounds_ptradd(place.llval, self.const_usize(b_offset.bytes()))
+                    self.inbounds_ptradd(place.val.llval, self.const_usize(b_offset.bytes()))
                 };
                 let llty = place.layout.scalar_pair_element_gcc_type(self, i);
                 let load = self.load(llty, llptr, align);
@@ -1044,11 +1051,11 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
             };
 
             OperandValue::Pair(
-                load(0, a, place.align),
-                load(1, b, place.align.restrict_for_offset(b_offset)),
+                load(0, a, place.val.align),
+                load(1, b, place.val.align.restrict_for_offset(b_offset)),
             )
         } else {
-            OperandValue::Ref(place.llval, None, place.align)
+            OperandValue::Ref(place.val)
         };
 
         OperandRef { val, layout: place.layout }
@@ -1062,8 +1069,8 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     ) {
         let zero = self.const_usize(0);
         let count = self.const_usize(count);
-        let start = dest.project_index(self, zero).llval;
-        let end = dest.project_index(self, count).llval;
+        let start = dest.project_index(self, zero).val.llval;
+        let end = dest.project_index(self, count).val.llval;
 
         let header_bb = self.append_sibling_block("repeat_loop_header");
         let body_bb = self.append_sibling_block("repeat_loop_body");
@@ -1081,7 +1088,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         self.cond_br(keep_going, body_bb, next_bb);
 
         self.switch_to_block(body_bb);
-        let align = dest.align.restrict_for_offset(dest.layout.field(self.cx(), 0).size);
+        let align = dest.val.align.restrict_for_offset(dest.layout.field(self.cx(), 0).size);
         cg_elem.val.store(self, PlaceRef::new_sized_aligned(current_val, cg_elem.layout, align));
 
         let next = self.inbounds_gep(
@@ -1323,19 +1330,13 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
     fn memmove(
         &mut self,
         dst: RValue<'gcc>,
-        dst_align: Align,
+        _dst_align: Align,
         src: RValue<'gcc>,
-        src_align: Align,
+        _src_align: Align,
         size: RValue<'gcc>,
         flags: MemFlags,
     ) {
-        if flags.contains(MemFlags::NONTEMPORAL) {
-            // HACK(nox): This is inefficient but there is no nontemporal memmove.
-            let val = self.load(src.get_type().get_pointee().expect("get_pointee"), src, src_align);
-            let ptr = self.pointercast(dst, self.type_ptr_to(self.val_ty(val)));
-            self.store_with_flags(val, ptr, dst_align, flags);
-            return;
-        }
+        assert!(!flags.contains(MemFlags::NONTEMPORAL), "non-temporal memmove not supported");
         let size = self.intcast(size, self.type_size_t(), false);
         let _is_volatile = flags.contains(MemFlags::VOLATILE);
         let dst = self.pointercast(dst, self.type_i8p());
@@ -1357,6 +1358,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         _align: Align,
         flags: MemFlags,
     ) {
+        assert!(!flags.contains(MemFlags::NONTEMPORAL), "non-temporal memset not supported");
         let _is_volatile = flags.contains(MemFlags::VOLATILE);
         let ptr = self.pointercast(ptr, self.type_i8p());
         let memset = self.context.get_builtin_function("memset");
@@ -1616,7 +1618,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         src: RValue<'gcc>,
         order: AtomicOrdering,
     ) -> RValue<'gcc> {
-        let size = src.get_type().get_size();
+        let size = get_maybe_pointer_size(src);
         let name = match op {
             AtomicRmwBinOp::AtomicXchg => format!("__atomic_exchange_{}", size),
             AtomicRmwBinOp::AtomicAdd => format!("__atomic_fetch_add_{}", size),
@@ -1647,7 +1649,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         let dst = self.context.new_cast(self.location, dst, volatile_void_ptr_type);
         // FIXME(antoyo): not sure why, but we have the wrong type here.
         let new_src_type = atomic_function.get_param(1).to_rvalue().get_type();
-        let src = self.context.new_cast(self.location, src, new_src_type);
+        let src = self.context.new_bitcast(self.location, src, new_src_type);
         let res = self.context.new_call(self.location, atomic_function, &[dst, src, order]);
         self.context.new_cast(self.location, res, src.get_type())
     }
@@ -1685,9 +1687,10 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
         func: RValue<'gcc>,
         args: &[RValue<'gcc>],
         funclet: Option<&Funclet>,
+        _instance: Option<Instance<'tcx>>,
     ) -> RValue<'gcc> {
         // FIXME(antoyo): remove when having a proper API.
-        let gcc_func = unsafe { std::mem::transmute(func) };
+        let gcc_func = unsafe { std::mem::transmute::<RValue<'gcc>, Function<'gcc>>(func) };
         let call = if self.functions.borrow().values().any(|value| *value == gcc_func) {
             self.function_call(func, args, funclet)
         } else {
@@ -1702,11 +1705,6 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
 
     fn zext(&mut self, value: RValue<'gcc>, dest_typ: Type<'gcc>) -> RValue<'gcc> {
         // FIXME(antoyo): this does not zero-extend.
-        if value.get_type().is_bool() && dest_typ.is_i8(self.cx) {
-            // FIXME(antoyo): hack because base::from_immediate converts i1 to i8.
-            // Fix the code in codegen_ssa::base::from_immediate.
-            return value;
-        }
         self.gcc_int_cast(value, dest_typ)
     }
 
@@ -2365,6 +2363,12 @@ impl<'tcx> HasTargetSpec for Builder<'_, '_, 'tcx> {
     }
 }
 
+impl<'tcx> HasWasmCAbiOpt for Builder<'_, '_, 'tcx> {
+    fn wasm_c_abi_opt(&self) -> WasmCAbi {
+        self.cx.wasm_c_abi_opt()
+    }
+}
+
 pub trait ToGccComp {
     fn to_gcc_comparison(&self) -> ComparisonOp;
 }
@@ -2440,3 +2444,19 @@ impl ToGccOrdering for AtomicOrdering {
         ordering as i32
     }
 }
+
+// Needed because gcc 12 `get_size()` doesn't work on pointers.
+#[cfg(feature = "master")]
+fn get_maybe_pointer_size(value: RValue<'_>) -> u32 {
+    value.get_type().get_size()
+}
+
+#[cfg(not(feature = "master"))]
+fn get_maybe_pointer_size(value: RValue<'_>) -> u32 {
+    let type_ = value.get_type();
+    if type_.get_pointee().is_some() {
+        std::mem::size_of::<*const ()>() as _
+    } else {
+        type_.get_size()
+    }
+}
diff --git a/src/common.rs b/src/common.rs
index 6a3b5442adf27..35699346b2979 100644
--- a/src/common.rs
+++ b/src/common.rs
@@ -28,6 +28,19 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
         global
         // TODO(antoyo): set linkage.
     }
+
+    pub fn const_bitcast(&self, value: RValue<'gcc>, typ: Type<'gcc>) -> RValue<'gcc> {
+        if value.get_type() == self.bool_type.make_pointer() {
+            if let Some(pointee) = typ.get_pointee() {
+                if pointee.dyncast_vector().is_some() {
+                    panic!()
+                }
+            }
+        }
+        // NOTE: since bitcast makes a value non-constant, don't bitcast if not necessary as some
+        // SIMD builtins require a constant value.
+        self.bitcast_if_needed(value, typ)
+    }
 }
 
 pub fn bytes_in_context<'gcc, 'tcx>(cx: &CodegenCx<'gcc, 'tcx>, bytes: &[u8]) -> RValue<'gcc> {
@@ -94,6 +107,10 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         self.const_int(self.type_i32(), i as i64)
     }
 
+    fn const_i8(&self, i: i8) -> RValue<'gcc> {
+        self.const_int(self.type_i8(), i as i64)
+    }
+
     fn const_u32(&self, i: u32) -> RValue<'gcc> {
         self.const_uint(self.type_u32(), i as u64)
     }
@@ -162,7 +179,7 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         let bitsize = if layout.is_bool() { 1 } else { layout.size(self).bits() };
         match cv {
             Scalar::Int(int) => {
-                let data = int.assert_bits(layout.size(self));
+                let data = int.to_bits(layout.size(self));
 
                 // FIXME(antoyo): there's some issues with using the u128 code that follows, so hard-code
                 // the paths for floating-point values.
@@ -236,19 +253,6 @@ impl<'gcc, 'tcx> ConstMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         const_alloc_to_gcc(self, alloc)
     }
 
-    fn const_bitcast(&self, value: RValue<'gcc>, typ: Type<'gcc>) -> RValue<'gcc> {
-        if value.get_type() == self.bool_type.make_pointer() {
-            if let Some(pointee) = typ.get_pointee() {
-                if pointee.dyncast_vector().is_some() {
-                    panic!()
-                }
-            }
-        }
-        // NOTE: since bitcast makes a value non-constant, don't bitcast if not necessary as some
-        // SIMD builtins require a constant value.
-        self.bitcast_if_needed(value, typ)
-    }
-
     fn const_ptr_byte_offset(&self, base_addr: Self::Value, offset: abi::Size) -> Self::Value {
         self.context
             .new_array_access(None, base_addr, self.const_usize(offset.bytes()))
diff --git a/src/consts.rs b/src/consts.rs
index f47fd56553323..ba7e08e33efa3 100644
--- a/src/consts.rs
+++ b/src/consts.rs
@@ -1,15 +1,16 @@
 #[cfg(feature = "master")]
 use gccjit::{FnAttribute, VarAttribute, Visibility};
-use gccjit::{Function, GlobalKind, LValue, RValue, ToRValue};
-use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods, DerivedTypeMethods, StaticMethods};
+use gccjit::{Function, GlobalKind, LValue, RValue, ToRValue, Type};
+use rustc_codegen_ssa::traits::{BaseTypeMethods, ConstMethods, StaticMethods};
+use rustc_hir::def::DefKind;
+use rustc_middle::bug;
 use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs};
 use rustc_middle::mir::interpret::{
     self, read_target_uint, ConstAllocation, ErrorHandled, Scalar as InterpScalar,
 };
-use rustc_middle::mir::mono::MonoItem;
 use rustc_middle::span_bug;
 use rustc_middle::ty::layout::LayoutOf;
-use rustc_middle::ty::{self, Instance, Ty};
+use rustc_middle::ty::{self, Instance};
 use rustc_span::def_id::DefId;
 use rustc_target::abi::{self, Align, HasDataLayout, Primitive, Size, WrappingRange};
 
@@ -63,16 +64,15 @@ impl<'gcc, 'tcx> StaticMethods for CodegenCx<'gcc, 'tcx> {
         global_value
     }
 
-    fn codegen_static(&self, def_id: DefId, is_mutable: bool) {
+    #[cfg_attr(not(feature = "master"), allow(unused_mut))]
+    fn codegen_static(&self, def_id: DefId) {
         let attrs = self.tcx.codegen_fn_attrs(def_id);
 
-        let value = match codegen_static_initializer(self, def_id) {
-            Ok((value, _)) => value,
+        let Ok((value, alloc)) = codegen_static_initializer(self, def_id) else {
             // Error has already been reported
-            Err(_) => return,
+            return;
         };
-
-        let global = self.get_static(def_id);
+        let alloc = alloc.inner();
 
         // boolean SSA values are i1, but they have to be stored in i8 slots,
         // otherwise some LLVM optimization passes don't work as expected
@@ -81,23 +81,25 @@ impl<'gcc, 'tcx> StaticMethods for CodegenCx<'gcc, 'tcx> {
             unimplemented!();
         };
 
-        let instance = Instance::mono(self.tcx, def_id);
-        let ty = instance.ty(self.tcx, ty::ParamEnv::reveal_all());
-        let gcc_type = self.layout_of(ty).gcc_type(self);
+        let is_thread_local = attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL);
+        let global = self.get_static_inner(def_id, val_llty);
 
-        set_global_alignment(self, global, self.align_of(ty));
+        #[cfg(feature = "master")]
+        if global.to_rvalue().get_type() != val_llty {
+            global.to_rvalue().set_type(val_llty);
+        }
+        set_global_alignment(self, global, alloc.align);
 
-        let value = self.bitcast_if_needed(value, gcc_type);
         global.global_set_initializer_rvalue(value);
 
         // As an optimization, all shared statics which do not have interior
         // mutability are placed into read-only memory.
-        if !is_mutable && self.type_is_freeze(ty) {
+        if alloc.mutability.is_not() {
             #[cfg(feature = "master")]
             global.global_set_readonly();
         }
 
-        if attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL) {
+        if is_thread_local {
             // Do not allow LLVM to change the alignment of a TLS on macOS.
             //
             // By default a global's alignment can be freely increased.
@@ -205,34 +207,53 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
 
     pub fn get_static(&self, def_id: DefId) -> LValue<'gcc> {
         let instance = Instance::mono(self.tcx, def_id);
-        let fn_attrs = self.tcx.codegen_fn_attrs(def_id);
+        let DefKind::Static { nested, .. } = self.tcx.def_kind(def_id) else { bug!() };
+        // Nested statics do not have a type, so pick a random type and let `define_static` figure out
+        // the gcc type from the actual evaluated initializer.
+        let gcc_type = if nested {
+            self.type_i8()
+        } else {
+            let ty = instance.ty(self.tcx, ty::ParamEnv::reveal_all());
+            self.layout_of(ty).gcc_type(self)
+        };
+
+        self.get_static_inner(def_id, gcc_type)
+    }
+
+    pub(crate) fn get_static_inner(&self, def_id: DefId, gcc_type: Type<'gcc>) -> LValue<'gcc> {
+        let instance = Instance::mono(self.tcx, def_id);
         if let Some(&global) = self.instances.borrow().get(&instance) {
+            trace!("used cached value");
             return global;
         }
 
-        let defined_in_current_codegen_unit =
-            self.codegen_unit.items().contains_key(&MonoItem::Static(def_id));
-        assert!(
-            !defined_in_current_codegen_unit,
-            "consts::get_static() should always hit the cache for \
-                 statics defined in the same CGU, but did not for `{:?}`",
-            def_id
-        );
-
-        let ty = instance.ty(self.tcx, ty::ParamEnv::reveal_all());
+        // FIXME: Once we stop removing globals in `codegen_static`, we can uncomment this code.
+        // let defined_in_current_codegen_unit =
+        //     self.codegen_unit.items().contains_key(&MonoItem::Static(def_id));
+        // assert!(
+        //     !defined_in_current_codegen_unit,
+        //     "consts::get_static() should always hit the cache for \
+        //          statics defined in the same CGU, but did not for `{:?}`",
+        //     def_id
+        // );
         let sym = self.tcx.symbol_name(instance).name;
+        let fn_attrs = self.tcx.codegen_fn_attrs(def_id);
 
         let global = if def_id.is_local() && !self.tcx.is_foreign_item(def_id) {
-            let llty = self.layout_of(ty).gcc_type(self);
             if let Some(global) = self.get_declared_value(sym) {
-                if self.val_ty(global) != self.type_ptr_to(llty) {
+                if self.val_ty(global) != self.type_ptr_to(gcc_type) {
                     span_bug!(self.tcx.def_span(def_id), "Conflicting types for static");
                 }
             }
 
             let is_tls = fn_attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL);
-            let global =
-                self.declare_global(sym, llty, GlobalKind::Exported, is_tls, fn_attrs.link_section);
+            let global = self.declare_global(
+                sym,
+                gcc_type,
+                GlobalKind::Exported,
+                is_tls,
+                fn_attrs.link_section,
+            );
 
             if !self.tcx.is_reachable_non_generic(def_id) {
                 #[cfg(feature = "master")]
@@ -241,7 +262,7 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
 
             global
         } else {
-            check_and_apply_linkage(self, fn_attrs, ty, sym)
+            check_and_apply_linkage(self, fn_attrs, gcc_type, sym)
         };
 
         if !def_id.is_local() {
@@ -344,7 +365,7 @@ pub fn const_alloc_to_gcc<'gcc, 'tcx>(
     cx.const_struct(&llvals, true)
 }
 
-pub fn codegen_static_initializer<'gcc, 'tcx>(
+fn codegen_static_initializer<'gcc, 'tcx>(
     cx: &CodegenCx<'gcc, 'tcx>,
     def_id: DefId,
 ) -> Result<(RValue<'gcc>, ConstAllocation<'tcx>), ErrorHandled> {
@@ -355,11 +376,10 @@ pub fn codegen_static_initializer<'gcc, 'tcx>(
 fn check_and_apply_linkage<'gcc, 'tcx>(
     cx: &CodegenCx<'gcc, 'tcx>,
     attrs: &CodegenFnAttrs,
-    ty: Ty<'tcx>,
+    gcc_type: Type<'gcc>,
     sym: &str,
 ) -> LValue<'gcc> {
     let is_tls = attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL);
-    let gcc_type = cx.layout_of(ty).gcc_type(cx);
     if let Some(linkage) = attrs.import_linkage {
         // Declare a symbol `foo` with the desired linkage.
         let global1 =
diff --git a/src/context.rs b/src/context.rs
index 5ece10cc70d12..4458ca84bbb07 100644
--- a/src/context.rs
+++ b/src/context.rs
@@ -6,7 +6,8 @@ use gccjit::{
 use rustc_codegen_ssa::base::wants_msvc_seh;
 use rustc_codegen_ssa::errors as ssa_errors;
 use rustc_codegen_ssa::traits::{BackendTypes, BaseTypeMethods, MiscMethods};
-use rustc_data_structures::base_n;
+use rustc_data_structures::base_n::ToBaseN;
+use rustc_data_structures::base_n::ALPHANUMERIC_ONLY;
 use rustc_data_structures::fx::{FxHashMap, FxHashSet};
 use rustc_middle::mir::mono::CodegenUnit;
 use rustc_middle::span_bug;
@@ -20,13 +21,12 @@ use rustc_span::{source_map::respan, Span};
 use rustc_target::abi::{
     call::FnAbi, HasDataLayout, PointeeInfo, Size, TargetDataLayout, VariantIdx,
 };
-use rustc_target::spec::{HasTargetSpec, Target, TlsModel};
+use rustc_target::spec::{HasTargetSpec, HasWasmCAbiOpt, Target, TlsModel, WasmCAbi};
 
 use crate::callee::get_fn;
 use crate::common::SignType;
 
 pub struct CodegenCx<'gcc, 'tcx> {
-    pub check_overflow: bool,
     pub codegen_unit: &'tcx CodegenUnit<'tcx>,
     pub context: &'gcc Context<'gcc>,
 
@@ -114,6 +114,7 @@ pub struct CodegenCx<'gcc, 'tcx> {
     local_gen_sym_counter: Cell<usize>,
 
     eh_personality: Cell<Option<RValue<'gcc>>>,
+    #[cfg(feature = "master")]
     pub rust_try_fn: Cell<Option<(Type<'gcc>, Function<'gcc>)>>,
 
     pub pointee_infos: RefCell<FxHashMap<(Ty<'tcx>, Size), Option<PointeeInfo>>>,
@@ -125,6 +126,7 @@ pub struct CodegenCx<'gcc, 'tcx> {
     /// FIXME(antoyo): fix the rustc API to avoid having this hack.
     pub structs_as_pointer: RefCell<FxHashSet<RValue<'gcc>>>,
 
+    #[cfg(feature = "master")]
     pub cleanup_blocks: RefCell<FxHashSet<Block<'gcc>>>,
 }
 
@@ -140,8 +142,6 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
         supports_f64_type: bool,
         supports_f128_type: bool,
     ) -> Self {
-        let check_overflow = tcx.sess.overflow_checks();
-
         let create_type = |ctype, rust_type| {
             let layout = tcx.layout_of(ParamEnv::reveal_all().and(rust_type)).unwrap();
             let align = layout.align.abi.bytes();
@@ -277,7 +277,6 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
         }
 
         let mut cx = Self {
-            check_overflow,
             codegen_unit,
             context,
             current_func: RefCell::new(None),
@@ -338,9 +337,11 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             struct_types: Default::default(),
             local_gen_sym_counter: Cell::new(0),
             eh_personality: Cell::new(None),
+            #[cfg(feature = "master")]
             rust_try_fn: Cell::new(None),
             pointee_infos: Default::default(),
             structs_as_pointer: Default::default(),
+            #[cfg(feature = "master")]
             cleanup_blocks: Default::default(),
         };
         // TODO(antoyo): instead of doing this, add SsizeT to libgccjit.
@@ -488,14 +489,12 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         let tcx = self.tcx;
         let func = match tcx.lang_items().eh_personality() {
             Some(def_id) if !wants_msvc_seh(self.sess()) => {
-                let instance = ty::Instance::resolve(
+                let instance = ty::Instance::expect_resolve(
                     tcx,
                     ty::ParamEnv::reveal_all(),
                     def_id,
                     ty::List::empty(),
-                )
-                .unwrap()
-                .unwrap();
+                );
 
                 let symbol_name = tcx.symbol_name(instance).name;
                 let fn_abi = self.fn_abi_of_instance(instance, ty::List::empty());
@@ -511,7 +510,7 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
                     "rust_eh_personality"
                 };
                 let func = self.declare_func(name, self.type_i32(), &[], true);
-                unsafe { std::mem::transmute(func) }
+                unsafe { std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(func) }
             }
         };
         // TODO(antoyo): apply target cpu attributes.
@@ -523,10 +522,6 @@ impl<'gcc, 'tcx> MiscMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         self.tcx.sess
     }
 
-    fn check_overflow(&self) -> bool {
-        self.check_overflow
-    }
-
     fn codegen_unit(&self) -> &'tcx CodegenUnit<'tcx> {
         self.codegen_unit
     }
@@ -570,6 +565,12 @@ impl<'gcc, 'tcx> HasTargetSpec for CodegenCx<'gcc, 'tcx> {
     }
 }
 
+impl<'gcc, 'tcx> HasWasmCAbiOpt for CodegenCx<'gcc, 'tcx> {
+    fn wasm_c_abi_opt(&self) -> WasmCAbi {
+        self.tcx.sess.opts.unstable_opts.wasm_c_abi
+    }
+}
+
 impl<'gcc, 'tcx> LayoutOfHelpers<'tcx> for CodegenCx<'gcc, 'tcx> {
     type LayoutOfResult = TyAndLayout<'tcx>;
 
@@ -628,7 +629,7 @@ impl<'b, 'tcx> CodegenCx<'b, 'tcx> {
         let mut name = String::with_capacity(prefix.len() + 6);
         name.push_str(prefix);
         name.push('.');
-        base_n::push_str(idx as u128, base_n::ALPHANUMERIC_ONLY, &mut name);
+        name.push_str(&(idx as u64).to_base(ALPHANUMERIC_ONLY));
         name
     }
 }
diff --git a/src/coverageinfo.rs b/src/coverageinfo.rs
index 849e9886ef39d..4e44f78f23c26 100644
--- a/src/coverageinfo.rs
+++ b/src/coverageinfo.rs
@@ -1,11 +1,11 @@
 use rustc_codegen_ssa::traits::CoverageInfoBuilderMethods;
-use rustc_middle::mir::Coverage;
+use rustc_middle::mir::coverage::CoverageKind;
 use rustc_middle::ty::Instance;
 
 use crate::builder::Builder;
 
 impl<'a, 'gcc, 'tcx> CoverageInfoBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
-    fn add_coverage(&mut self, _instance: Instance<'tcx>, _coverage: &Coverage) {
+    fn add_coverage(&mut self, _instance: Instance<'tcx>, _kind: &CoverageKind) {
         // TODO(antoyo)
     }
 }
diff --git a/src/debuginfo.rs b/src/debuginfo.rs
index 04c517e7d2952..3d9ea278a6399 100644
--- a/src/debuginfo.rs
+++ b/src/debuginfo.rs
@@ -1,10 +1,9 @@
-use crate::rustc_index::Idx;
 use gccjit::{Location, RValue};
 use rustc_codegen_ssa::mir::debuginfo::{DebugScope, FunctionDebugContext, VariableKind};
 use rustc_codegen_ssa::traits::{DebugInfoBuilderMethods, DebugInfoMethods};
 use rustc_data_structures::sync::Lrc;
 use rustc_index::bit_set::BitSet;
-use rustc_index::IndexVec;
+use rustc_index::{Idx, IndexVec};
 use rustc_middle::mir::{self, Body, SourceScope};
 use rustc_middle::ty::{Instance, PolyExistentialTraitRef, Ty};
 use rustc_session::config::DebugInfo;
diff --git a/src/errors.rs b/src/errors.rs
index f963a153fbaba..6bada3d334ce4 100644
--- a/src/errors.rs
+++ b/src/errors.rs
@@ -1,4 +1,4 @@
-use rustc_errors::{Diag, DiagCtxt, EmissionGuarantee, IntoDiagnostic, Level};
+use rustc_errors::{Diag, DiagCtxtHandle, Diagnostic, EmissionGuarantee, Level};
 use rustc_macros::{Diagnostic, Subdiagnostic};
 use rustc_span::Span;
 
@@ -89,14 +89,14 @@ pub(crate) struct TargetFeatureDisableOrEnable<'a> {
 #[help(codegen_gcc_missing_features)]
 pub(crate) struct MissingFeatures;
 
-impl<G: EmissionGuarantee> IntoDiagnostic<'_, G> for TargetFeatureDisableOrEnable<'_> {
-    fn into_diagnostic(self, dcx: &'_ DiagCtxt, level: Level) -> Diag<'_, G> {
+impl<G: EmissionGuarantee> Diagnostic<'_, G> for TargetFeatureDisableOrEnable<'_> {
+    fn into_diag(self, dcx: DiagCtxtHandle<'_>, level: Level) -> Diag<'_, G> {
         let mut diag = Diag::new(dcx, level, fluent::codegen_gcc_target_feature_disable_or_enable);
         if let Some(span) = self.span {
             diag.span(span);
         };
         if let Some(missing_features) = self.missing_features {
-            diag.subdiagnostic(dcx, missing_features);
+            diag.subdiagnostic(missing_features);
         }
         diag.arg("features", self.features.join(", "));
         diag
diff --git a/src/int.rs b/src/int.rs
index b92db7a742467..e4c5eb9137317 100644
--- a/src/int.rs
+++ b/src/int.rs
@@ -81,7 +81,19 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                 let b = self.context.new_cast(self.location, b, a_type);
                 a >> b
             } else {
-                a >> b
+                let a_size = a_type.get_size();
+                let b_size = b_type.get_size();
+                match a_size.cmp(&b_size) {
+                    std::cmp::Ordering::Less => {
+                        let a = self.context.new_cast(self.location, a, b_type);
+                        a >> b
+                    }
+                    std::cmp::Ordering::Equal => a >> b,
+                    std::cmp::Ordering::Greater => {
+                        let b = self.context.new_cast(self.location, b, a_type);
+                        a >> b
+                    }
+                }
             }
         } else if a_type.is_vector() && a_type.is_vector() {
             a >> b
@@ -636,7 +648,19 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                 let b = self.context.new_cast(self.location, b, a_type);
                 a << b
             } else {
-                a << b
+                let a_size = a_type.get_size();
+                let b_size = b_type.get_size();
+                match a_size.cmp(&b_size) {
+                    std::cmp::Ordering::Less => {
+                        let a = self.context.new_cast(self.location, a, b_type);
+                        a << b
+                    }
+                    std::cmp::Ordering::Equal => a << b,
+                    std::cmp::Ordering::Greater => {
+                        let b = self.context.new_cast(self.location, b, a_type);
+                        a << b
+                    }
+                }
             }
         } else if a_type.is_vector() && a_type.is_vector() {
             a << b
diff --git a/src/intrinsic/mod.rs b/src/intrinsic/mod.rs
index eca4a72c756a0..839ebf3f29876 100644
--- a/src/intrinsic/mod.rs
+++ b/src/intrinsic/mod.rs
@@ -11,7 +11,7 @@ use rustc_codegen_ssa::base::wants_msvc_seh;
 use rustc_codegen_ssa::common::IntPredicate;
 use rustc_codegen_ssa::errors::InvalidMonomorphization;
 use rustc_codegen_ssa::mir::operand::{OperandRef, OperandValue};
-use rustc_codegen_ssa::mir::place::PlaceRef;
+use rustc_codegen_ssa::mir::place::{PlaceRef, PlaceValue};
 use rustc_codegen_ssa::traits::{
     ArgAbiMethods, BuilderMethods, ConstMethods, IntrinsicCallMethods,
 };
@@ -130,7 +130,9 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
         let llval = match name {
             _ if simple.is_some() => {
                 // FIXME(antoyo): remove this cast when the API supports function.
-                let func = unsafe { std::mem::transmute(simple.expect("simple")) };
+                let func = unsafe {
+                    std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(simple.expect("simple"))
+                };
                 self.call(
                     self.type_void(),
                     None,
@@ -138,6 +140,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                     func,
                     &args.iter().map(|arg| arg.immediate()).collect::<Vec<_>>(),
                     None,
+                    None,
                 )
             }
             sym::likely => self.expect(args[0].immediate(), true),
@@ -217,12 +220,12 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
                             let after_block = func.new_block("after");
 
                             let arg = args[0].immediate();
-                            let result = func.new_local(None, arg.get_type(), "zeros");
+                            let result = func.new_local(None, self.u32_type, "zeros");
                             let zero = self.cx.gcc_zero(arg.get_type());
                             let cond = self.gcc_icmp(IntPredicate::IntEQ, arg, zero);
                             self.llbb().end_with_conditional(None, cond, then_block, else_block);
 
-                            let zero_result = self.cx.gcc_uint(arg.get_type(), width);
+                            let zero_result = self.cx.gcc_uint(self.u32_type, width);
                             then_block.add_assignment(None, result, zero_result);
                             then_block.end_with_jump(None, after_block);
 
@@ -358,7 +361,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
 
                 let block = self.llbb();
                 let extended_asm = block.add_extended_asm(None, "");
-                extended_asm.add_input_operand(None, "r", result.llval);
+                extended_asm.add_input_operand(None, "r", result.val.llval);
                 extended_asm.add_clobber("memory");
                 extended_asm.set_volatile_flag(true);
 
@@ -392,8 +395,8 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
         if !fn_abi.ret.is_ignore() {
             if let PassMode::Cast { cast: ref ty, .. } = fn_abi.ret.mode {
                 let ptr_llty = self.type_ptr_to(ty.gcc_type(self));
-                let ptr = self.pointercast(result.llval, ptr_llty);
-                self.store(llval, ptr, result.align);
+                let ptr = self.pointercast(result.val.llval, ptr_llty);
+                self.store(llval, ptr, result.val.align);
             } else {
                 OperandRef::from_immediate_or_packed_pair(self, llval, result.layout)
                     .val
@@ -406,7 +409,7 @@ impl<'a, 'gcc, 'tcx> IntrinsicCallMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
     fn abort(&mut self) {
         let func = self.context.get_builtin_function("abort");
         let func: RValue<'gcc> = unsafe { std::mem::transmute(func) };
-        self.call(self.type_void(), None, None, func, &[], None);
+        self.call(self.type_void(), None, None, func, &[], None, None);
     }
 
     fn assume(&mut self, value: Self::Value) {
@@ -506,7 +509,7 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
             return;
         }
         if self.is_sized_indirect() {
-            OperandValue::Ref(val, None, self.layout.align.abi).store(bx, dst)
+            OperandValue::Ref(PlaceValue::new_sized(val, self.layout.align.abi)).store(bx, dst)
         } else if self.is_unsized_indirect() {
             bug!("unsized `ArgAbi` must be handled through `store_fn_arg`");
         } else if let PassMode::Cast { ref cast, .. } = self.mode {
@@ -515,7 +518,7 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
             let can_store_through_cast_ptr = false;
             if can_store_through_cast_ptr {
                 let cast_ptr_llty = bx.type_ptr_to(cast.gcc_type(bx));
-                let cast_dst = bx.pointercast(dst.llval, cast_ptr_llty);
+                let cast_dst = bx.pointercast(dst.val.llval, cast_ptr_llty);
                 bx.store(val, cast_dst, self.layout.align.abi);
             } else {
                 // The actual return type is a struct, but the ABI
@@ -535,7 +538,7 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
                 // We instead thus allocate some scratch space...
                 let scratch_size = cast.size(bx);
                 let scratch_align = cast.align(bx);
-                let llscratch = bx.alloca(cast.gcc_type(bx), scratch_align);
+                let llscratch = bx.alloca(scratch_size, scratch_align);
                 bx.lifetime_start(llscratch, scratch_size);
 
                 // ... where we first store the value...
@@ -543,7 +546,7 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
 
                 // ... and then memcpy it to the intended destination.
                 bx.memcpy(
-                    dst.llval,
+                    dst.val.llval,
                     self.layout.align.abi,
                     llscratch,
                     scratch_align,
@@ -575,7 +578,12 @@ impl<'gcc, 'tcx> ArgAbiExt<'gcc, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
                 OperandValue::Pair(next(), next()).store(bx, dst);
             }
             PassMode::Indirect { meta_attrs: Some(_), .. } => {
-                OperandValue::Ref(next(), Some(next()), self.layout.align.abi).store(bx, dst);
+                let place_val = PlaceValue {
+                    llval: next(),
+                    llextra: Some(next()),
+                    align: self.layout.align.abi,
+                };
+                OperandValue::Ref(place_val).store(bx, dst);
             }
             PassMode::Direct(_)
             | PassMode::Indirect { meta_attrs: None, .. }
@@ -701,6 +709,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
     fn count_leading_zeroes(&mut self, width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
         // TODO(antoyo): use width?
         let arg_type = arg.get_type();
+        let result_type = self.u32_type;
         let count_leading_zeroes =
             // TODO(antoyo): write a new function Type::is_compatible_with(&Type) and use it here
             // instead of using is_uint().
@@ -758,7 +767,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
                 let res = self.context.new_array_access(self.location, result, index);
 
-                return self.gcc_int_cast(res.to_rvalue(), arg_type);
+                return self.gcc_int_cast(res.to_rvalue(), result_type);
             }
             else {
                 let count_leading_zeroes = self.context.get_builtin_function("__builtin_clzll");
@@ -766,17 +775,18 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
                 let diff = self.ulonglong_type.get_size() as i64 - arg_type.get_size() as i64;
                 let diff = self.context.new_rvalue_from_long(self.int_type, diff * 8);
                 let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]) - diff;
-                return self.context.new_cast(self.location, res, arg_type);
+                return self.context.new_cast(self.location, res, result_type);
             };
         let count_leading_zeroes = self.context.get_builtin_function(count_leading_zeroes);
         let res = self.context.new_call(self.location, count_leading_zeroes, &[arg]);
-        self.context.new_cast(self.location, res, arg_type)
+        self.context.new_cast(self.location, res, result_type)
     }
 
     fn count_trailing_zeroes(&mut self, _width: u64, arg: RValue<'gcc>) -> RValue<'gcc> {
-        let result_type = arg.get_type();
-        let arg = if result_type.is_signed(self.cx) {
-            let new_type = result_type.to_unsigned(self.cx);
+        let arg_type = arg.get_type();
+        let result_type = self.u32_type;
+        let arg = if arg_type.is_signed(self.cx) {
+            let new_type = arg_type.to_unsigned(self.cx);
             self.gcc_int_cast(arg, new_type)
         } else {
             arg
@@ -866,14 +876,12 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
 
     fn pop_count(&mut self, value: RValue<'gcc>) -> RValue<'gcc> {
         // TODO(antoyo): use the optimized version with fewer operations.
-        let result_type = value.get_type();
-        let value_type = result_type.to_unsigned(self.cx);
+        let result_type = self.u32_type;
+        let arg_type = value.get_type();
+        let value_type = arg_type.to_unsigned(self.cx);
 
-        let value = if result_type.is_signed(self.cx) {
-            self.gcc_int_cast(value, value_type)
-        } else {
-            value
-        };
+        let value =
+            if arg_type.is_signed(self.cx) { self.gcc_int_cast(value, value_type) } else { value };
 
         // only break apart 128-bit ints if they're not natively supported
         // TODO(antoyo): remove this if/when native 128-bit integers land in libgccjit
@@ -1108,7 +1116,7 @@ fn try_intrinsic<'a, 'b, 'gcc, 'tcx>(
     dest: RValue<'gcc>,
 ) {
     if bx.sess().panic_strategy() == PanicStrategy::Abort {
-        bx.call(bx.type_void(), None, None, try_func, &[data], None);
+        bx.call(bx.type_void(), None, None, try_func, &[data], None, None);
         // Return 0 unconditionally from the intrinsic call;
         // we can never unwind.
         let ret_align = bx.tcx.data_layout.i32_align.abi;
@@ -1182,21 +1190,21 @@ fn codegen_gnu_try<'gcc>(
         let zero = bx.cx.context.new_rvalue_zero(bx.int_type);
         let ptr = bx.cx.context.new_call(None, eh_pointer_builtin, &[zero]);
         let catch_ty = bx.type_func(&[bx.type_i8p(), bx.type_i8p()], bx.type_void());
-        bx.call(catch_ty, None, None, catch_func, &[data, ptr], None);
+        bx.call(catch_ty, None, None, catch_func, &[data, ptr], None, None);
         bx.ret(bx.const_i32(1));
 
         // NOTE: the blocks must be filled before adding the try/catch, otherwise gcc will not
         // generate a try/catch.
         // FIXME(antoyo): add a check in the libgccjit API to prevent this.
         bx.switch_to_block(current_block);
-        bx.invoke(try_func_ty, None, None, try_func, &[data], then, catch, None);
+        bx.invoke(try_func_ty, None, None, try_func, &[data], then, catch, None, None);
     });
 
-    let func = unsafe { std::mem::transmute(func) };
+    let func = unsafe { std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(func) };
 
     // Note that no invoke is used here because by definition this function
     // can't panic (that's what it's catching).
-    let ret = bx.call(llty, None, None, func, &[try_func, data, catch_func], None);
+    let ret = bx.call(llty, None, None, func, &[try_func, data, catch_func], None, None);
     let i32_align = bx.tcx().data_layout.i32_align.abi;
     bx.store(ret, dest, i32_align);
 }
@@ -1222,9 +1230,9 @@ fn get_rust_try_fn<'a, 'gcc, 'tcx>(
         tcx,
         ty::Binder::dummy(tcx.mk_fn_sig(
             iter::once(i8p),
-            Ty::new_unit(tcx),
+            tcx.types.unit,
             false,
-            rustc_hir::Unsafety::Unsafe,
+            rustc_hir::Safety::Unsafe,
             Abi::Rust,
         )),
     );
@@ -1233,9 +1241,9 @@ fn get_rust_try_fn<'a, 'gcc, 'tcx>(
         tcx,
         ty::Binder::dummy(tcx.mk_fn_sig(
             [i8p, i8p].iter().cloned(),
-            Ty::new_unit(tcx),
+            tcx.types.unit,
             false,
-            rustc_hir::Unsafety::Unsafe,
+            rustc_hir::Safety::Unsafe,
             Abi::Rust,
         )),
     );
@@ -1244,7 +1252,7 @@ fn get_rust_try_fn<'a, 'gcc, 'tcx>(
         [try_fn_ty, i8p, catch_fn_ty],
         tcx.types.i32,
         false,
-        rustc_hir::Unsafety::Unsafe,
+        rustc_hir::Safety::Unsafe,
         Abi::Rust,
     ));
     let rust_try = gen_fn(cx, "__rust_try", rust_fn_sig, codegen);
@@ -1266,7 +1274,7 @@ fn gen_fn<'a, 'gcc, 'tcx>(
     // FIXME(eddyb) find a nicer way to do this.
     cx.linkage.set(FunctionType::Internal);
     let func = cx.declare_fn(name, fn_abi);
-    let func_val = unsafe { std::mem::transmute(func) };
+    let func_val = unsafe { std::mem::transmute::<Function<'gcc>, RValue<'gcc>>(func) };
     cx.set_frame_pointer_type(func_val);
     cx.apply_target_cpu_attr(func_val);
     let block = Builder::append_block(cx, func_val, "entry-block");
diff --git a/src/intrinsic/simd.rs b/src/intrinsic/simd.rs
index a8d7c37dc8717..ba214a9c24cff 100644
--- a/src/intrinsic/simd.rs
+++ b/src/intrinsic/simd.rs
@@ -13,12 +13,14 @@ use rustc_codegen_ssa::errors::InvalidMonomorphization;
 use rustc_codegen_ssa::mir::operand::OperandRef;
 use rustc_codegen_ssa::mir::place::PlaceRef;
 use rustc_codegen_ssa::traits::{BaseTypeMethods, BuilderMethods};
+#[cfg(feature = "master")]
 use rustc_hir as hir;
+use rustc_middle::mir::BinOp;
 use rustc_middle::span_bug;
 use rustc_middle::ty::layout::HasTyCtxt;
 use rustc_middle::ty::{self, Ty};
 use rustc_span::{sym, Span, Symbol};
-use rustc_target::abi::Align;
+use rustc_target::abi::{Align, Size};
 
 use crate::builder::Builder;
 #[cfg(not(feature = "master"))]
@@ -82,7 +84,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                 let place = PlaceRef::alloca(bx, args[0].layout);
                 args[0].val.store(bx, place);
                 let int_ty = bx.type_ix(expected_bytes * 8);
-                let ptr = bx.pointercast(place.llval, bx.cx.type_ptr_to(int_ty));
+                let ptr = bx.pointercast(place.val.llval, bx.cx.type_ptr_to(int_ty));
                 bx.load(int_ty, ptr, Align::ONE)
             }
             _ => return_error!(InvalidMonomorphization::InvalidBitmask {
@@ -122,12 +124,12 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
     let in_ty = arg_tys[0];
 
     let comparison = match name {
-        sym::simd_eq => Some(hir::BinOpKind::Eq),
-        sym::simd_ne => Some(hir::BinOpKind::Ne),
-        sym::simd_lt => Some(hir::BinOpKind::Lt),
-        sym::simd_le => Some(hir::BinOpKind::Le),
-        sym::simd_gt => Some(hir::BinOpKind::Gt),
-        sym::simd_ge => Some(hir::BinOpKind::Ge),
+        sym::simd_eq => Some(BinOp::Eq),
+        sym::simd_ne => Some(BinOp::Ne),
+        sym::simd_lt => Some(BinOp::Lt),
+        sym::simd_le => Some(BinOp::Le),
+        sym::simd_gt => Some(BinOp::Gt),
+        sym::simd_ge => Some(BinOp::Ge),
         _ => None,
     };
 
@@ -340,11 +342,13 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             .map(|i| {
                 let index = bx.context.new_rvalue_from_long(bx.i32_type, i as i64);
                 let value = bx.extract_element(vector, index).to_rvalue();
-                if name == sym::simd_ctlz {
-                    bx.count_leading_zeroes(value.get_type().get_size() as u64 * 8, value)
+                let value_type = value.get_type();
+                let element = if name == sym::simd_ctlz {
+                    bx.count_leading_zeroes(value_type.get_size() as u64 * 8, value)
                 } else {
-                    bx.count_trailing_zeroes(value.get_type().get_size() as u64 * 8, value)
-                }
+                    bx.count_trailing_zeroes(value_type.get_size() as u64 * 8, value)
+                };
+                bx.context.new_cast(None, element, value_type)
             })
             .collect();
         return Ok(bx.context.new_rvalue_from_vector(None, vector.get_type(), &elements));
@@ -451,8 +455,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         );
 
         match *in_elem.kind() {
-            ty::RawPtr(p) => {
-                let metadata = p.ty.ptr_metadata_ty(bx.tcx, |ty| {
+            ty::RawPtr(p_ty, _) => {
+                let metadata = p_ty.ptr_metadata_ty(bx.tcx, |ty| {
                     bx.tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), ty)
                 });
                 require!(
@@ -465,8 +469,8 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             }
         }
         match *out_elem.kind() {
-            ty::RawPtr(p) => {
-                let metadata = p.ty.ptr_metadata_ty(bx.tcx, |ty| {
+            ty::RawPtr(p_ty, _) => {
+                let metadata = p_ty.ptr_metadata_ty(bx.tcx, |ty| {
                     bx.tcx.normalize_erasing_regions(ty::ParamEnv::reveal_all(), ty)
                 });
                 require!(
@@ -491,7 +495,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         return Ok(bx.context.new_rvalue_from_vector(bx.location, llret_ty, &values));
     }
 
-    if name == sym::simd_expose_addr {
+    if name == sym::simd_expose_provenance {
         require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
         let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
 
@@ -508,7 +512,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         );
 
         match *in_elem.kind() {
-            ty::RawPtr(_) => {}
+            ty::RawPtr(_, _) => {}
             _ => {
                 return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: in_elem })
             }
@@ -530,7 +534,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         return Ok(bx.context.new_rvalue_from_vector(bx.location, llret_ty, &values));
     }
 
-    if name == sym::simd_from_exposed_addr {
+    if name == sym::simd_with_exposed_provenance {
         require_simd!(ret_ty, InvalidMonomorphization::SimdReturn { span, name, ty: ret_ty });
         let (out_len, out_elem) = ret_ty.simd_size_and_type(bx.tcx());
 
@@ -551,7 +555,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
             _ => return_error!(InvalidMonomorphization::ExpectedUsize { span, name, ty: in_elem }),
         }
         match *out_elem.kind() {
-            ty::RawPtr(_) => {}
+            ty::RawPtr(_, _) => {}
             _ => {
                 return_error!(InvalidMonomorphization::ExpectedPointer { span, name, ty: out_elem })
             }
@@ -690,7 +694,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
                 let ze = bx.zext(result, bx.type_ix(expected_bytes * 8));
 
                 // Convert the integer to a byte array
-                let ptr = bx.alloca(bx.type_ix(expected_bytes * 8), Align::ONE);
+                let ptr = bx.alloca(Size::from_bytes(expected_bytes), Align::ONE);
                 bx.store(ze, ptr, Align::ONE);
                 let array_ty = bx.type_array(bx.type_i8(), expected_bytes);
                 let ptr = bx.pointercast(ptr, bx.cx.type_ptr_to(array_ty));
@@ -929,7 +933,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         // This counts how many pointers
         fn ptr_count(t: Ty<'_>) -> usize {
             match *t.kind() {
-                ty::RawPtr(p) => 1 + ptr_count(p.ty),
+                ty::RawPtr(p_ty, _) => 1 + ptr_count(p_ty),
                 _ => 0,
             }
         }
@@ -937,7 +941,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         // Non-ptr type
         fn non_ptr(t: Ty<'_>) -> Ty<'_> {
             match *t.kind() {
-                ty::RawPtr(p) => non_ptr(p.ty),
+                ty::RawPtr(p_ty, _) => non_ptr(p_ty),
                 _ => t,
             }
         }
@@ -947,7 +951,9 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         let (_, element_ty0) = arg_tys[0].simd_size_and_type(bx.tcx());
         let (_, element_ty1) = arg_tys[1].simd_size_and_type(bx.tcx());
         let (pointer_count, underlying_ty) = match *element_ty1.kind() {
-            ty::RawPtr(p) if p.ty == in_elem => (ptr_count(element_ty1), non_ptr(element_ty1)),
+            ty::RawPtr(p_ty, _) if p_ty == in_elem => {
+                (ptr_count(element_ty1), non_ptr(element_ty1))
+            }
             _ => {
                 require!(
                     false,
@@ -1043,7 +1049,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         // This counts how many pointers
         fn ptr_count(t: Ty<'_>) -> usize {
             match *t.kind() {
-                ty::RawPtr(p) => 1 + ptr_count(p.ty),
+                ty::RawPtr(p_ty, _) => 1 + ptr_count(p_ty),
                 _ => 0,
             }
         }
@@ -1051,7 +1057,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         // Non-ptr type
         fn non_ptr(t: Ty<'_>) -> Ty<'_> {
             match *t.kind() {
-                ty::RawPtr(p) => non_ptr(p.ty),
+                ty::RawPtr(p_ty, _) => non_ptr(p_ty),
                 _ => t,
             }
         }
@@ -1062,7 +1068,7 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
         let (_, element_ty1) = arg_tys[1].simd_size_and_type(bx.tcx());
         let (_, element_ty2) = arg_tys[2].simd_size_and_type(bx.tcx());
         let (pointer_count, underlying_ty) = match *element_ty1.kind() {
-            ty::RawPtr(p) if p.ty == in_elem && p.mutbl == hir::Mutability::Mut => {
+            ty::RawPtr(p_ty, mutbl) if p_ty == in_elem && mutbl == hir::Mutability::Mut => {
                 (ptr_count(element_ty1), non_ptr(element_ty1))
             }
             _ => {
diff --git a/src/lib.rs b/src/lib.rs
index 9b9b97b9595db..1132b0cd2f5ab 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -4,7 +4,7 @@
  * TODO(antoyo): support LTO (gcc's equivalent to Full LTO is -flto -flto-partition=one — https://documentation.suse.com/sbp/all/html/SBP-GCC-10/index.html).
  * For Thin LTO, this might be helpful:
  * In gcc 4.6 -fwhopr was removed and became default with -flto. The non-whopr path can still be executed via -flto-partition=none.
- * Or the new incremental LTO?
+ * Or the new incremental LTO (https://www.phoronix.com/news/GCC-Incremental-LTO-Patches)?
  *
  * Maybe some missing optizations enabled by rustc's LTO is in there: https://gcc.gnu.org/onlinedocs/gcc/Optimize-Options.html
  * Like -fipa-icf (should be already enabled) and maybe -fdevirtualize-at-ltrans.
@@ -16,14 +16,7 @@
 #![allow(internal_features)]
 #![doc(rust_logo)]
 #![feature(rustdoc_internals)]
-#![feature(
-    rustc_private,
-    decl_macro,
-    associated_type_bounds,
-    never_type,
-    trusted_len,
-    hash_raw_entry
-)]
+#![feature(rustc_private, decl_macro, never_type, trusted_len, hash_raw_entry, let_chains)]
 #![allow(broken_intra_doc_links)]
 #![recursion_limit = "256"]
 #![warn(rust_2018_idioms)]
@@ -81,6 +74,7 @@ mod type_of;
 
 use std::any::Any;
 use std::fmt::Debug;
+use std::ops::Deref;
 #[cfg(not(feature = "master"))]
 use std::sync::atomic::AtomicBool;
 #[cfg(not(feature = "master"))]
@@ -88,6 +82,8 @@ use std::sync::atomic::Ordering;
 use std::sync::Arc;
 use std::sync::Mutex;
 
+use back::lto::ThinBuffer;
+use back::lto::ThinData;
 use errors::LTONotSupported;
 use gccjit::CType;
 use gccjit::{Context, OptimizationLevel};
@@ -99,13 +95,11 @@ use rustc_codegen_ssa::back::write::{
     CodegenContext, FatLtoInput, ModuleConfig, TargetMachineFactoryFn,
 };
 use rustc_codegen_ssa::base::codegen_crate;
-use rustc_codegen_ssa::traits::{
-    CodegenBackend, ExtraBackendMethods, ThinBufferMethods, WriteBackendMethods,
-};
+use rustc_codegen_ssa::traits::{CodegenBackend, ExtraBackendMethods, WriteBackendMethods};
 use rustc_codegen_ssa::{CodegenResults, CompiledModule, ModuleCodegen};
 use rustc_data_structures::fx::FxIndexMap;
 use rustc_data_structures::sync::IntoDynSyncSend;
-use rustc_errors::{DiagCtxt, ErrorGuaranteed};
+use rustc_errors::{DiagCtxtHandle, ErrorGuaranteed};
 use rustc_metadata::EncodedMetadata;
 use rustc_middle::dep_graph::{WorkProduct, WorkProductId};
 use rustc_middle::ty::TyCtxt;
@@ -203,6 +197,7 @@ impl CodegenBackend for GccCodegenBackend {
 
         #[cfg(feature = "master")]
         gccjit::set_global_personality_function_name(b"rust_eh_personality\0");
+
         if sess.lto() == Lto::Thin {
             sess.dcx().emit_warn(LTONotSupported {});
         }
@@ -308,7 +303,7 @@ impl ExtraBackendMethods for GccCodegenBackend {
         alloc_error_handler_kind: AllocatorKind,
     ) -> Self::Module {
         let mut mods = GccContext {
-            context: new_context(tcx),
+            context: Arc::new(SyncContext::new(new_context(tcx))),
             should_combine_object_files: false,
             temp_dir: None,
         };
@@ -338,31 +333,42 @@ impl ExtraBackendMethods for GccCodegenBackend {
     }
 }
 
-pub struct ThinBuffer;
-
-impl ThinBufferMethods for ThinBuffer {
-    fn data(&self) -> &[u8] {
-        unimplemented!();
-    }
-}
-
 pub struct GccContext {
-    context: Context<'static>,
+    context: Arc<SyncContext>,
     should_combine_object_files: bool,
     // Temporary directory used by LTO. We keep it here so that it's not removed before linking.
     temp_dir: Option<TempDir>,
 }
 
-unsafe impl Send for GccContext {}
-// FIXME(antoyo): that shouldn't be Sync. Parallel compilation is currently disabled with "-Zno-parallel-llvm". Try to disable it here.
-unsafe impl Sync for GccContext {}
+struct SyncContext {
+    context: Context<'static>,
+}
+
+impl SyncContext {
+    fn new(context: Context<'static>) -> Self {
+        Self { context }
+    }
+}
+
+impl Deref for SyncContext {
+    type Target = Context<'static>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.context
+    }
+}
+
+unsafe impl Send for SyncContext {}
+// FIXME(antoyo): that shouldn't be Sync. Parallel compilation is currently disabled with "-Zno-parallel-llvm".
+// TODO: disable it here by returing false in CodegenBackend::supports_parallel().
+unsafe impl Sync for SyncContext {}
 
 impl WriteBackendMethods for GccCodegenBackend {
     type Module = GccContext;
     type TargetMachine = ();
     type TargetMachineError = ();
     type ModuleBuffer = ModuleBuffer;
-    type ThinData = ();
+    type ThinData = ThinData;
     type ThinBuffer = ThinBuffer;
 
     fn run_fat_lto(
@@ -374,11 +380,11 @@ impl WriteBackendMethods for GccCodegenBackend {
     }
 
     fn run_thin_lto(
-        _cgcx: &CodegenContext<Self>,
-        _modules: Vec<(String, Self::ThinBuffer)>,
-        _cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
+        cgcx: &CodegenContext<Self>,
+        modules: Vec<(String, Self::ThinBuffer)>,
+        cached_modules: Vec<(SerializedModule<Self::ModuleBuffer>, WorkProduct)>,
     ) -> Result<(Vec<LtoModuleCodegen<Self>>, Vec<WorkProduct>), FatalError> {
-        unimplemented!();
+        back::lto::run_thin(cgcx, modules, cached_modules)
     }
 
     fn print_pass_timings(&self) {
@@ -391,7 +397,7 @@ impl WriteBackendMethods for GccCodegenBackend {
 
     unsafe fn optimize(
         _cgcx: &CodegenContext<Self>,
-        _dcx: &DiagCtxt,
+        _dcx: DiagCtxtHandle<'_>,
         module: &ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     ) -> Result<(), FatalError> {
@@ -408,23 +414,26 @@ impl WriteBackendMethods for GccCodegenBackend {
     }
 
     unsafe fn optimize_thin(
-        _cgcx: &CodegenContext<Self>,
-        _thin: ThinModule<Self>,
+        cgcx: &CodegenContext<Self>,
+        thin: ThinModule<Self>,
     ) -> Result<ModuleCodegen<Self::Module>, FatalError> {
-        unimplemented!();
+        back::lto::optimize_thin_module(thin, cgcx)
     }
 
     unsafe fn codegen(
         cgcx: &CodegenContext<Self>,
-        dcx: &DiagCtxt,
+        dcx: DiagCtxtHandle<'_>,
         module: ModuleCodegen<Self::Module>,
         config: &ModuleConfig,
     ) -> Result<CompiledModule, FatalError> {
         back::write::codegen(cgcx, dcx, module, config)
     }
 
-    fn prepare_thin(_module: ModuleCodegen<Self::Module>) -> (String, Self::ThinBuffer) {
-        unimplemented!();
+    fn prepare_thin(
+        module: ModuleCodegen<Self::Module>,
+        emit_summary: bool,
+    ) -> (String, Self::ThinBuffer) {
+        back::lto::prepare_thin(module, emit_summary)
     }
 
     fn serialize_module(_module: ModuleCodegen<Self::Module>) -> (String, Self::ModuleBuffer) {
@@ -433,7 +442,7 @@ impl WriteBackendMethods for GccCodegenBackend {
 
     fn run_link(
         cgcx: &CodegenContext<Self>,
-        dcx: &DiagCtxt,
+        dcx: DiagCtxtHandle<'_>,
         modules: Vec<ModuleCodegen<Self::Module>>,
     ) -> Result<ModuleCodegen<Self::Module>, FatalError> {
         back::write::link(cgcx, dcx, modules)
diff --git a/src/mono_item.rs b/src/mono_item.rs
index e56c49686c012..44657ad4f6e25 100644
--- a/src/mono_item.rs
+++ b/src/mono_item.rs
@@ -1,7 +1,9 @@
 #[cfg(feature = "master")]
 use gccjit::{FnAttribute, VarAttribute};
 use rustc_codegen_ssa::traits::PreDefineMethods;
+use rustc_hir::def::DefKind;
 use rustc_hir::def_id::{DefId, LOCAL_CRATE};
+use rustc_middle::bug;
 use rustc_middle::middle::codegen_fn_attrs::CodegenFnAttrFlags;
 use rustc_middle::mir::mono::{Linkage, Visibility};
 use rustc_middle::ty::layout::{FnAbiOf, LayoutOf};
@@ -23,7 +25,14 @@ impl<'gcc, 'tcx> PreDefineMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
     ) {
         let attrs = self.tcx.codegen_fn_attrs(def_id);
         let instance = Instance::mono(self.tcx, def_id);
-        let ty = instance.ty(self.tcx, ty::ParamEnv::reveal_all());
+        let DefKind::Static { nested, .. } = self.tcx.def_kind(def_id) else { bug!() };
+        // Nested statics do not have a type, so pick a dummy type and let `codegen_static` figure out
+        // the gcc type from the actual evaluated initializer.
+        let ty = if nested {
+            self.tcx.types.unit
+        } else {
+            instance.ty(self.tcx, ty::ParamEnv::reveal_all())
+        };
         let gcc_type = self.layout_of(ty).gcc_type(self);
 
         let is_tls = attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL);
@@ -72,6 +81,6 @@ impl<'gcc, 'tcx> PreDefineMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         // TODO(antoyo): use inline attribute from there in linkage.set() above.
 
         self.functions.borrow_mut().insert(symbol_name.to_string(), decl);
-        self.function_instances.borrow_mut().insert(instance, unsafe { std::mem::transmute(decl) });
+        self.function_instances.borrow_mut().insert(instance, decl);
     }
 }
diff --git a/src/type_.rs b/src/type_.rs
index 184367e9cde1c..e20234c5ce79c 100644
--- a/src/type_.rs
+++ b/src/type_.rs
@@ -94,13 +94,34 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
             ty::FloatTy::F128 => self.type_f128(),
         }
     }
-}
 
-impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
-    fn type_i1(&self) -> Type<'gcc> {
+    pub fn type_i1(&self) -> Type<'gcc> {
         self.bool_type
     }
 
+    pub fn type_struct(&self, fields: &[Type<'gcc>], packed: bool) -> Type<'gcc> {
+        let types = fields.to_vec();
+        if let Some(typ) = self.struct_types.borrow().get(fields) {
+            return *typ;
+        }
+        let fields: Vec<_> = fields
+            .iter()
+            .enumerate()
+            .map(|(index, field)| {
+                self.context.new_field(None, *field, format!("field{}_TODO", index))
+            })
+            .collect();
+        let typ = self.context.new_struct_type(None, "struct", &fields).as_type();
+        if packed {
+            #[cfg(feature = "master")]
+            typ.set_packed();
+        }
+        self.struct_types.borrow_mut().insert(types, typ);
+        typ
+    }
+}
+
+impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
     fn type_i8(&self) -> Type<'gcc> {
         self.i8_type
     }
@@ -161,27 +182,6 @@ impl<'gcc, 'tcx> BaseTypeMethods<'tcx> for CodegenCx<'gcc, 'tcx> {
         self.context.new_function_pointer_type(None, return_type, params, false)
     }
 
-    fn type_struct(&self, fields: &[Type<'gcc>], packed: bool) -> Type<'gcc> {
-        let types = fields.to_vec();
-        if let Some(typ) = self.struct_types.borrow().get(fields) {
-            return *typ;
-        }
-        let fields: Vec<_> = fields
-            .iter()
-            .enumerate()
-            .map(|(index, field)| {
-                self.context.new_field(None, *field, format!("field{}_TODO", index))
-            })
-            .collect();
-        let typ = self.context.new_struct_type(None, "struct", &fields).as_type();
-        if packed {
-            #[cfg(feature = "master")]
-            typ.set_packed();
-        }
-        self.struct_types.borrow_mut().insert(types, typ);
-        typ
-    }
-
     #[cfg(feature = "master")]
     fn type_kind(&self, typ: Type<'gcc>) -> TypeKind {
         if self.is_int_type_or_bool(typ) {
diff --git a/src/type_of.rs b/src/type_of.rs
index 1c9a41649aae1..d85ed4f12ffab 100644
--- a/src/type_of.rs
+++ b/src/type_of.rs
@@ -1,15 +1,15 @@
 use std::fmt::Write;
 
-use crate::rustc_codegen_ssa::traits::{BaseTypeMethods, DerivedTypeMethods, LayoutTypeMethods};
 use gccjit::{Struct, Type};
+use rustc_codegen_ssa::traits::{BaseTypeMethods, DerivedTypeMethods, LayoutTypeMethods};
 use rustc_middle::bug;
 use rustc_middle::ty::layout::{LayoutOf, TyAndLayout};
 use rustc_middle::ty::print::with_no_trimmed_paths;
-use rustc_middle::ty::{self, Ty, TypeVisitableExt};
+use rustc_middle::ty::{self, CoroutineArgsExt, Ty, TypeVisitableExt};
 use rustc_target::abi::call::{CastTarget, FnAbi, Reg};
 use rustc_target::abi::{
-    self, Abi, Align, FieldsShape, Int, Integer, PointeeInfo, Pointer, Size, TyAbiInterface,
-    Variants, F128, F16, F32, F64,
+    self, Abi, FieldsShape, Float, Int, Integer, PointeeInfo, Pointer, Size, TyAbiInterface,
+    Variants,
 };
 
 use crate::abi::{FnAbiGcc, FnAbiGccExt, GccType};
@@ -53,12 +53,6 @@ impl<'gcc, 'tcx> CodegenCx<'gcc, 'tcx> {
     }
 }
 
-impl<'a, 'tcx> CodegenCx<'a, 'tcx> {
-    pub fn align_of(&self, ty: Ty<'tcx>) -> Align {
-        self.layout_of(ty).align.abi
-    }
-}
-
 fn uncached_gcc_type<'gcc, 'tcx>(
     cx: &CodegenCx<'gcc, 'tcx>,
     layout: TyAndLayout<'tcx>,
@@ -205,7 +199,7 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> {
     /// of that field's type - this is useful for taking the address of
     /// that field and ensuring the struct has the right alignment.
     fn gcc_type<'gcc>(&self, cx: &CodegenCx<'gcc, 'tcx>) -> Type<'gcc> {
-        use crate::rustc_middle::ty::layout::FnAbiOf;
+        use rustc_middle::ty::layout::FnAbiOf;
         // This must produce the same result for `repr(transparent)` wrappers as for the inner type!
         // In other words, this should generally not look at the type at all, but only at the
         // layout.
@@ -283,10 +277,7 @@ impl<'tcx> LayoutGccExt<'tcx> for TyAndLayout<'tcx> {
         match scalar.primitive() {
             Int(i, true) => cx.type_from_integer(i),
             Int(i, false) => cx.type_from_unsigned_integer(i),
-            F16 => cx.type_f16(),
-            F32 => cx.type_f32(),
-            F64 => cx.type_f64(),
-            F128 => cx.type_f128(),
+            Float(f) => cx.type_from_float(f),
             Pointer(address_space) => {
                 // If we know the alignment, pick something better than i8.
                 let pointee = if let Some(pointee) = self.pointee_info_at(cx, offset) {
diff --git a/tests/failing-run-make-tests.txt b/tests/failing-run-make-tests.txt
index 058261a3ceea0..842533cd3c62c 100644
--- a/tests/failing-run-make-tests.txt
+++ b/tests/failing-run-make-tests.txt
@@ -7,7 +7,6 @@ tests/run-make/doctests-runtool/
 tests/run-make/emit-shared-files/
 tests/run-make/exit-code/
 tests/run-make/issue-22131/
-tests/run-make/issue-38237/
 tests/run-make/issue-64153/
 tests/run-make/llvm-ident/
 tests/run-make/native-link-modifier-bundle/
diff --git a/tests/failing-ui-tests.txt b/tests/failing-ui-tests.txt
index 1521a4d254701..5a55bdb156e39 100644
--- a/tests/failing-ui-tests.txt
+++ b/tests/failing-ui-tests.txt
@@ -13,7 +13,6 @@ tests/ui/sepcomp/sepcomp-fns-backwards.rs
 tests/ui/sepcomp/sepcomp-fns.rs
 tests/ui/sepcomp/sepcomp-statics.rs
 tests/ui/asm/x86_64/may_unwind.rs
-tests/ui/backtrace.rs
 tests/ui/catch-unwind-bang.rs
 tests/ui/drop/dynamic-drop-async.rs
 tests/ui/cfg/cfg-panic-abort.rs
@@ -73,4 +72,26 @@ tests/ui/consts/issue-73976-monomorphic.rs
 tests/ui/consts/issue-94675.rs
 tests/ui/rfcs/rfc-2632-const-trait-impl/const-drop-fail.rs
 tests/ui/rfcs/rfc-2632-const-trait-impl/const-drop.rs
-
+tests/ui/runtime/on-broken-pipe/child-processes.rs
+tests/ui/sanitizer/cfi-assoc-ty-lifetime-issue-123053.rs
+tests/ui/sanitizer/cfi-async-closures.rs
+tests/ui/sanitizer/cfi-closures.rs
+tests/ui/sanitizer/cfi-complex-receiver.rs
+tests/ui/sanitizer/cfi-coroutine.rs
+tests/ui/sanitizer/cfi-drop-in-place.rs
+tests/ui/sanitizer/cfi-drop-no-principal.rs
+tests/ui/sanitizer/cfi-fn-ptr.rs
+tests/ui/sanitizer/cfi-self-ref.rs
+tests/ui/sanitizer/cfi-supertraits.rs
+tests/ui/sanitizer/cfi-virtual-auto.rs
+tests/ui/sanitizer/kcfi-mangling.rs
+tests/ui/statics/const_generics.rs
+tests/ui/backtrace/dylib-dep.rs
+tests/ui/errors/pic-linker.rs
+tests/ui/delegation/fn-header.rs
+tests/ui/consts/zst_no_llvm_alloc.rs
+tests/ui/consts/const-eval/parse_ints.rs
+tests/ui/simd/intrinsic/generic-arithmetic-pass.rs
+tests/ui/backtrace/backtrace.rs
+tests/ui/lifetimes/tail-expr-lock-poisoning.rs
+tests/ui/runtime/rt-explody-panic-payloads.rs
diff --git a/tests/lang_tests_common.rs b/tests/lang_tests_common.rs
index c0fb5fa40733d..aecea37ab5a91 100644
--- a/tests/lang_tests_common.rs
+++ b/tests/lang_tests_common.rs
@@ -1,4 +1,5 @@
 //! The common code for `tests/lang_tests_*.rs`
+
 use std::{
     env::{self, current_dir},
     path::{Path, PathBuf},
@@ -80,7 +81,6 @@ pub fn main_inner(profile: Profile) {
                 &format!("-Zcodegen-backend={}/target/debug/librustc_codegen_gcc.so", current_dir),
                 "--sysroot",
                 &format!("{}/build/build_sysroot/sysroot/", current_dir),
-                "-Zno-parallel-llvm",
                 "-C",
                 "link-arg=-lc",
                 "-o",
diff --git a/tests/run/array.rs b/tests/run/array.rs
index afd0eed82004c..3fe8917c9a3c4 100644
--- a/tests/run/array.rs
+++ b/tests/run/array.rs
@@ -205,6 +205,17 @@ impl Sub for i16 {
     }
 }
 
+#[track_caller]
+#[lang = "panic_const_add_overflow"]
+pub fn panic_const_add_overflow() -> ! {
+    panic("attempt to add with overflow");
+}
+
+#[track_caller]
+#[lang = "panic_const_sub_overflow"]
+pub fn panic_const_sub_overflow() -> ! {
+    panic("attempt to subtract with overflow");
+}
 
 /*
  * Code
diff --git a/tests/run/assign.rs b/tests/run/assign.rs
index 5b0db2da294dd..e105d64a8ad8f 100644
--- a/tests/run/assign.rs
+++ b/tests/run/assign.rs
@@ -120,6 +120,12 @@ impl Add for isize {
     }
 }
 
+#[track_caller]
+#[lang = "panic_const_add_overflow"]
+pub fn panic_const_add_overflow() -> ! {
+    panic("attempt to add with overflow");
+}
+
 /*
  * Code
  */
diff --git a/tests/run/closure.rs b/tests/run/closure.rs
index 4ce528f86800c..355f0acee7473 100644
--- a/tests/run/closure.rs
+++ b/tests/run/closure.rs
@@ -189,6 +189,12 @@ pub fn panic(_msg: &'static str) -> ! {
     }
 }
 
+#[track_caller]
+#[lang = "panic_const_add_overflow"]
+pub fn panic_const_add_overflow() -> ! {
+    panic("attempt to add with overflow");
+}
+
 /*
  * Code
  */
diff --git a/tests/run/mut_ref.rs b/tests/run/mut_ref.rs
index 194e55a3deaeb..5a3f72b69047e 100644
--- a/tests/run/mut_ref.rs
+++ b/tests/run/mut_ref.rs
@@ -122,6 +122,12 @@ impl Add for isize {
     }
 }
 
+#[track_caller]
+#[lang = "panic_const_add_overflow"]
+pub fn panic_const_add_overflow() -> ! {
+    panic("attempt to add with overflow");
+}
+
 /*
  * Code
  */
diff --git a/tests/run/operations.rs b/tests/run/operations.rs
index 2d781670873f0..d697bd921cd11 100644
--- a/tests/run/operations.rs
+++ b/tests/run/operations.rs
@@ -207,6 +207,24 @@ impl Mul for isize {
     }
 }
 
+#[track_caller]
+#[lang = "panic_const_add_overflow"]
+pub fn panic_const_add_overflow() -> ! {
+    panic("attempt to add with overflow");
+}
+
+#[track_caller]
+#[lang = "panic_const_sub_overflow"]
+pub fn panic_const_sub_overflow() -> ! {
+    panic("attempt to subtract with overflow");
+}
+
+#[track_caller]
+#[lang = "panic_const_mul_overflow"]
+pub fn panic_const_mul_overflow() -> ! {
+    panic("attempt to multiply with overflow");
+}
+
 /*
  * Code
  */