-
Notifications
You must be signed in to change notification settings - Fork 14.1k
[libc] add checksum for jmpbuf #101110
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[libc] add checksum for jmpbuf #101110
Conversation
Co-authored-by: Nick Desaulniers <ndesaulniers@google.com>
@llvm/pr-subscribers-libc Author: Schrodinger ZHU Yifan (SchrodingerZhu) ChangesThis is a continuation of Nick's work based on #88054. Besides adjust the way we populate the jump buffer, this PR also adds checksums for the jump buffer.
Full diff: https://github.com/llvm/llvm-project/pull/101110.diff 6 Files Affected:
diff --git a/libc/include/llvm-libc-types/jmp_buf.h b/libc/include/llvm-libc-types/jmp_buf.h
index 8949be9fa0ab7..fb48cac5c8f17 100644
--- a/libc/include/llvm-libc-types/jmp_buf.h
+++ b/libc/include/llvm-libc-types/jmp_buf.h
@@ -38,6 +38,10 @@ typedef struct {
#else
#error "__jmp_buf not available for your target architecture."
#endif
+ __UINT64_TYPE__ __sigmask;
+ __UINT64_TYPE__ __has_sigmask : 1;
+ __UINT64_TYPE__ __unused : 63;
+ __UINT64_TYPE__ __chksum;
} __jmp_buf;
typedef __jmp_buf jmp_buf[1];
diff --git a/libc/src/setjmp/CMakeLists.txt b/libc/src/setjmp/CMakeLists.txt
index d85c532e8636c..9120ba459f0a4 100644
--- a/libc/src/setjmp/CMakeLists.txt
+++ b/libc/src/setjmp/CMakeLists.txt
@@ -2,6 +2,16 @@ if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE})
add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_ARCHITECTURE})
endif()
+add_header_library(
+ checksum
+ HDRS
+ checksum.h
+ DEPENDS
+ libc.src.__support.hash
+ libc.src.stdlib.abort
+ libc.src.unistd.write
+)
+
add_entrypoint_object(
setjmp
ALIAS
diff --git a/libc/src/setjmp/checksum.h b/libc/src/setjmp/checksum.h
new file mode 100644
index 0000000000000..4ce1777d2bd87
--- /dev/null
+++ b/libc/src/setjmp/checksum.h
@@ -0,0 +1,67 @@
+//===-- Implementation header for jmpbuf checksum ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_SETJMP_CHECKSUM_H
+#define LLVM_LIBC_SRC_SETJMP_CHECKSUM_H
+
+#include "src/__support/hash.h"
+#include "src/__support/macros/attributes.h"
+#include "src/__support/macros/config.h"
+#include "src/setjmp/setjmp_impl.h"
+#include "src/stdlib/abort.h"
+#include "src/unistd/write.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace jmpbuf {
+using HashState = internal::HashState;
+// Initial values generated by
+// https://www.random.org/cgi-bin/randbyte?nbytes=48&format=h
+// These values are only used for overlay targets.
+LIBC_INLINE_VAR uint64_t register_mangle_cookie = 0xdf8a883867040cbc;
+LIBC_INLINE_VAR uint64_t checksum_mangle_cookie = 0x9ed4fe406ebe9cf9;
+LIBC_INLINE_VAR uint64_t randomness[4] = {
+ 0x83b9df7dddf5ab3d,
+ 0x06c931cca75e15c6,
+ 0x08280ec9e9a778bf,
+ 0x111f67f4aafc9276,
+};
+
+LIBC_INLINE int update_checksum(__jmp_buf *buf) {
+ HashState state{
+ randomness[0],
+ randomness[1],
+ randomness[2],
+ randomness[3],
+ };
+ state.update(buf, offsetof(__jmp_buf, __chksum));
+ buf->__chksum = state.finish() ^ checksum_mangle_cookie;
+ return 0;
+}
+
+LIBC_INLINE void verify(const __jmp_buf *buf) {
+ HashState state{
+ randomness[0],
+ randomness[1],
+ randomness[2],
+ randomness[3],
+ };
+ state.update(buf, offsetof(__jmp_buf, __chksum));
+ auto chksum = state.finish() ^ checksum_mangle_cookie;
+ if (chksum != buf->__chksum) {
+ constexpr char MSG[] = "jump buffer corrupted\n";
+ LIBC_NAMESPACE::write(2, MSG, sizeof(MSG) - 1);
+ LIBC_NAMESPACE::abort();
+ }
+}
+
+} // namespace jmpbuf
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC_SETJMP_CHECKSUM_H
diff --git a/libc/src/setjmp/x86_64/CMakeLists.txt b/libc/src/setjmp/x86_64/CMakeLists.txt
index ae84322a65401..3f5797c607df7 100644
--- a/libc/src/setjmp/x86_64/CMakeLists.txt
+++ b/libc/src/setjmp/x86_64/CMakeLists.txt
@@ -6,14 +6,11 @@ add_entrypoint_object(
../setjmp_impl.h
DEPENDS
libc.include.setjmp
+ libc.src.setjmp.checksum
COMPILE_OPTIONS
-O3
-fno-omit-frame-pointer
- # TODO: Remove once one of these lands:
- # https://github.com/llvm/llvm-project/pull/87837
- # https://github.com/llvm/llvm-project/pull/88054
- # https://github.com/llvm/llvm-project/pull/88157
- -ftrivial-auto-var-init=uninitialized
+ -momit-leaf-frame-pointer
)
add_entrypoint_object(
@@ -24,6 +21,7 @@ add_entrypoint_object(
../longjmp.h
DEPENDS
libc.include.setjmp
+ libc.src.setjmp.checksum
COMPILE_OPTIONS
-O3
-fomit-frame-pointer
diff --git a/libc/src/setjmp/x86_64/longjmp.cpp b/libc/src/setjmp/x86_64/longjmp.cpp
index f479c7bc96c97..2f17de7b693d1 100644
--- a/libc/src/setjmp/x86_64/longjmp.cpp
+++ b/libc/src/setjmp/x86_64/longjmp.cpp
@@ -7,8 +7,10 @@
//===----------------------------------------------------------------------===//
#include "src/setjmp/longjmp.h"
+#include "include/llvm-libc-types/jmp_buf.h"
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
+#include "src/setjmp/checksum.h"
#if !defined(LIBC_TARGET_ARCH_IS_X86_64)
#error "Invalid file include"
@@ -16,30 +18,51 @@
namespace LIBC_NAMESPACE_DECL {
+[[gnu::naked]]
LLVM_LIBC_FUNCTION(void, longjmp, (__jmp_buf * buf, int val)) {
- register __UINT64_TYPE__ rbx __asm__("rbx");
- register __UINT64_TYPE__ rbp __asm__("rbp");
- register __UINT64_TYPE__ r12 __asm__("r12");
- register __UINT64_TYPE__ r13 __asm__("r13");
- register __UINT64_TYPE__ r14 __asm__("r14");
- register __UINT64_TYPE__ r15 __asm__("r15");
- register __UINT64_TYPE__ rsp __asm__("rsp");
- register __UINT64_TYPE__ rax __asm__("rax");
-
- // ABI requires that the return value should be stored in rax. So, we store
- // |val| in rax. Note that this has to happen before we restore the registers
- // from values in |buf|. Otherwise, once rsp and rbp are updated, we cannot
- // read |val|.
- val = val == 0 ? 1 : val;
- LIBC_INLINE_ASM("mov %1, %0\n\t" : "=r"(rax) : "m"(val) :);
- LIBC_INLINE_ASM("mov %1, %0\n\t" : "=r"(rbx) : "m"(buf->rbx) :);
- LIBC_INLINE_ASM("mov %1, %0\n\t" : "=r"(rbp) : "m"(buf->rbp) :);
- LIBC_INLINE_ASM("mov %1, %0\n\t" : "=r"(r12) : "m"(buf->r12) :);
- LIBC_INLINE_ASM("mov %1, %0\n\t" : "=r"(r13) : "m"(buf->r13) :);
- LIBC_INLINE_ASM("mov %1, %0\n\t" : "=r"(r14) : "m"(buf->r14) :);
- LIBC_INLINE_ASM("mov %1, %0\n\t" : "=r"(r15) : "m"(buf->r15) :);
- LIBC_INLINE_ASM("mov %1, %0\n\t" : "=r"(rsp) : "m"(buf->rsp) :);
- LIBC_INLINE_ASM("jmp *%0\n\t" : : "m"(buf->rip));
+ asm(R"(
+ pushq %%rbp
+ pushq %%rbx
+ mov %%rdi, %%rbp
+ mov %%esi, %%ebx
+ subq $8, %%rsp
+ call %P0
+ addq $8, %%rsp
+ mov %%ebx, %%esi
+ mov %%rbp, %%rdi
+ popq %%rbx
+ popq %%rbp
+ )" :: "i"(jmpbuf::verify) : "rax", "rcx", "rdx", "r8", "r9", "r10", "r11");
+
+ register __UINT64_TYPE__ rcx __asm__("rcx");
+ // Load cookie
+ asm("mov %1, %0\n\t" : "=r"(rcx) : "m"(jmpbuf::register_mangle_cookie));
+
+ // load registers from buffer
+ // do not pass any invalid values into registers
+#define RECOVER(REG) \
+ asm("mov %c[" #REG "](%%rdi), %%rdx\n\t" \
+ "xor %%rdx, %%rcx\n\t" \
+ "mov %%rdx, %%" #REG "\n\t" ::[REG] "i"(offsetof(__jmp_buf, REG)) \
+ : "rdx");
+
+ RECOVER(rbx);
+ RECOVER(rbp);
+ RECOVER(r12);
+ RECOVER(r13);
+ RECOVER(r14);
+ RECOVER(r15);
+ RECOVER(rsp);
+
+ asm(R"(
+ xor %%eax,%%eax
+ cmp $1,%%esi
+ adc %%esi,%%eax
+ mov %c[rip](%%rdi),%%rdx
+ xor %%rdx, %%rcx
+ jmp *%%rdx
+ )" ::[rip] "i"(offsetof(__jmp_buf, rip))
+ : "rdx");
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/setjmp/x86_64/setjmp.cpp b/libc/src/setjmp/x86_64/setjmp.cpp
index 6a1cc7a83936a..b3c92ded08839 100644
--- a/libc/src/setjmp/x86_64/setjmp.cpp
+++ b/libc/src/setjmp/x86_64/setjmp.cpp
@@ -8,6 +8,7 @@
#include "src/__support/common.h"
#include "src/__support/macros/config.h"
+#include "src/setjmp/checksum.h"
#include "src/setjmp/setjmp_impl.h"
#if !defined(LIBC_TARGET_ARCH_IS_X86_64)
@@ -16,42 +17,40 @@
namespace LIBC_NAMESPACE_DECL {
+namespace jmpbuf {} // namespace jmpbuf
+[[gnu::naked]]
LLVM_LIBC_FUNCTION(int, setjmp, (__jmp_buf * buf)) {
- register __UINT64_TYPE__ rbx __asm__("rbx");
- register __UINT64_TYPE__ r12 __asm__("r12");
- register __UINT64_TYPE__ r13 __asm__("r13");
- register __UINT64_TYPE__ r14 __asm__("r14");
- register __UINT64_TYPE__ r15 __asm__("r15");
-
- // We want to store the register values as is. So, we will suppress the
- // compiler warnings about the uninitialized variables declared above.
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wuninitialized"
- LIBC_INLINE_ASM("mov %1, %0\n\t" : "=m"(buf->rbx) : "r"(rbx) :);
- LIBC_INLINE_ASM("mov %1, %0\n\t" : "=m"(buf->r12) : "r"(r12) :);
- LIBC_INLINE_ASM("mov %1, %0\n\t" : "=m"(buf->r13) : "r"(r13) :);
- LIBC_INLINE_ASM("mov %1, %0\n\t" : "=m"(buf->r14) : "r"(r14) :);
- LIBC_INLINE_ASM("mov %1, %0\n\t" : "=m"(buf->r15) : "r"(r15) :);
-#pragma GCC diagnostic pop
-
- // We want the rbp of the caller, which is what __builtin_frame_address(1)
- // should return. But, compilers generate a warning that calling
- // __builtin_frame_address with non-zero argument is unsafe. So, we use
- // the knowledge of the x86_64 ABI to fetch the callers rbp. As per the ABI,
- // the rbp of the caller is pushed on to the stack and then new top is saved
- // in this function's rbp. So, we fetch it from location at which this
- // functions's rbp is pointing.
- buf->rbp = *reinterpret_cast<__UINTPTR_TYPE__ *>(__builtin_frame_address(0));
-
- // The callers stack address is exactly 2 pointer widths ahead of the current
- // frame pointer - between the current frame pointer and the rsp of the caller
- // are the return address (pushed by the x86_64 call instruction) and the
- // previous stack pointer as required by the x86_64 ABI.
- // The stack pointer is ahead because the stack grows down on x86_64.
- buf->rsp = reinterpret_cast<__UINTPTR_TYPE__>(__builtin_frame_address(0)) +
- sizeof(__UINTPTR_TYPE__) * 2;
- buf->rip = reinterpret_cast<__UINTPTR_TYPE__>(__builtin_return_address(0));
- return 0;
+ register __UINT64_TYPE__ rcx __asm__("rcx");
+ // Load cookie
+ asm("mov %1, %0\n\t" : "=r"(rcx) : "m"(jmpbuf::register_mangle_cookie));
+ // store registers to buffer
+ // do not pass any invalid values into registers
+#define STORE(REG) \
+ asm("mov %%" #REG ", %%rdx\n\t" \
+ "xor %%rdx, %%rcx\n\t" \
+ "mov %%rdx, %c[" #REG \
+ "](%%rdi)\n\t" ::[REG] "i"(offsetof(__jmp_buf, REG)) \
+ : "rdx");
+
+ STORE(rbx);
+ STORE(rbp);
+ STORE(r12);
+ STORE(r13);
+ STORE(r14);
+ STORE(r15);
+ asm(R"(
+ lea 8(%%rsp),%%rdx
+ xor %%rdx, %%rcx
+ mov %%rdx,%c[rsp](%%rdi)
+ mov (%%rsp),%%rdx
+ xor %%rdx, %%rcx
+ mov %%rdx,%c[rip](%%rdi)
+ )" ::[rsp] "i"(offsetof(__jmp_buf, rsp)),
+ [rip] "i"(offsetof(__jmp_buf, rip))
+ : "rdx");
+
+ // tail call to update checksum
+ asm("jmp %P0" : : "i"(jmpbuf::update_checksum));
}
} // namespace LIBC_NAMESPACE_DECL
|
8606f82
to
bc49269
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall mostly looks fine, but I've left a few questions inline. The other big question I have is if there is any way to support debug instructions for these routines? For RISC-V its easy to know that there is only ELF + DWARF because that's all that's ratified or documented in the psABI (and MS hasn't released anything about COFF) . But it isn't clear to me that you could make any assumptions like that for X86_64.
libc/src/setjmp/x86_64/longjmp.cpp
Outdated
asm("mov %c[" #REG "](%%rdi), %%rdx\n\t" \ | ||
"xor %%rdx, %%rcx\n\t" \ | ||
"mov %%rdx, %%" #REG "\n\t" ::[REG] "i"(offsetof(__jmp_buf, REG)) \ | ||
: "rdx"); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you need rdx
? can't you load the value from buf into REG
and decrypt it directly instead of using rdx
as a scratch reg?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bionic suggest that for esp/ebp
, invalid values may lead to bugs. I guess they are worrying about signal frames within the instruction window:
https://android.googlesource.com/platform/bionic/+/master/libc/arch-x86/bionic/setjmp.S#166
// Initial values generated by | ||
// https://www.random.org/cgi-bin/randbyte?nbytes=48&format=h | ||
// These values are only used for overlay targets. | ||
LIBC_INLINE_VAR uint64_t register_mangle_cookie = 0xdf8a883867040cbc; | ||
LIBC_INLINE_VAR uint64_t checksum_mangle_cookie = 0x9ed4fe406ebe9cf9; | ||
LIBC_INLINE_VAR uint64_t randomness[4] = { | ||
0x83b9df7dddf5ab3d, | ||
0x06c931cca75e15c6, | ||
0x08280ec9e9a778bf, | ||
0x111f67f4aafc9276, | ||
}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it standard to use fixed keys in these implementations? xor
encryption is already quite weak, is there a way we can initialize these using some form of entropy on systems that can support that?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I plan to populate these keys on full build during startup.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, that sounds reasonable. For other targets, like baremetal, we may want to think about other means of initialization, but that's out of scope for this patch.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the initialization fails, we should make that as obvious as possible since a fixed key is as bad as no key at all. I'd recommend making these initial values 0xAAAAAAAAAAAAAAAA
, 0xBBBBBBBBBBBBBBBB
, and so on so that it's clear what has happened.
In general I guess we can mess with CFI directives. See https://android.googlesource.com/platform/bionic/+/master/libc/arch-arm64/bionic/setjmp.S#194 |
Right, but Android is ELF + DWARF for everything. I'd expect llvm-libc to also be able to support other formats, like COFF + PDB, etc. |
program namespace internal {
// Folded multiplication.
// This function multiplies two 64-bit integers and xor the high and
// low 64-bit parts of the result.
inline __UINT64_TYPE__ folded_multiply(__UINT64_TYPE__ x, __UINT64_TYPE__ y) {
__uint128_t p = static_cast<__uint128_t>(x) * static_cast<__uint128_t>(y);
__UINT64_TYPE__ low = static_cast<__UINT64_TYPE__>(p);
__UINT64_TYPE__ high = static_cast<__UINT64_TYPE__>(p >> 64);
return low ^ high;
}
// Read as little endian.
// Shift-and-or implementation does not give a satisfactory code on aarch64.
// Therefore, we use a union to read the value.
template <typename T> inline T read_little_endian(const void *ptr) {
const __UINT8_TYPE__ *bytes = static_cast<const __UINT8_TYPE__ *>(ptr);
union {
T value;
__UINT8_TYPE__ buffer[sizeof(T)];
} data;
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
// Compiler should able to optimize this as a load followed by a byte swap.
// On aarch64 (-mbig-endian), this compiles to the following for int:
// ldr w0, [x0]
// rev w0, w0
// ret
for (__SIZE_TYPE__ i = 0; i < sizeof(T); ++i) {
data.buffer[i] = bytes[sizeof(T) - i - 1];
}
#else
for (__SIZE_TYPE__ i = 0; i < sizeof(T); ++i) {
data.buffer[i] = bytes[i];
}
#endif
return data.value;
}
// Specialized read functions for small values. size must be <= 8.
inline void read_small_values(const void *ptr, __SIZE_TYPE__ size, __UINT64_TYPE__ &low,
__UINT64_TYPE__ &high) {
const __UINT8_TYPE__ *bytes = static_cast<const __UINT8_TYPE__ *>(ptr);
if (size >= 2) {
if (size >= 4) {
low = static_cast<__UINT64_TYPE__>(read_little_endian<__UINT32_TYPE__>(&bytes[0]));
high =
static_cast<__UINT64_TYPE__>(read_little_endian<__UINT32_TYPE__>(&bytes[size - 4]));
} else {
low = static_cast<__UINT64_TYPE__>(read_little_endian<__UINT16_TYPE__>(&bytes[0]));
high = static_cast<__UINT64_TYPE__>(bytes[size - 1]);
}
} else {
if (size > 0) {
low = static_cast<__UINT64_TYPE__>(bytes[0]);
high = static_cast<__UINT64_TYPE__>(bytes[0]);
} else {
low = 0;
high = 0;
}
}
}
// This constant comes from Kunth's prng (it empirically works well).
inline constexpr __UINT64_TYPE__ MULTIPLE = 6364136223846793005;
// Rotation amount for mixing.
inline constexpr __UINT64_TYPE__ ROTATE = 23;
// Randomly generated values. For now, we use the same values as in aHash as
// they are widely tested.
// https://github.com/tkaitchuck/aHash/blob/9f6a2ad8b721fd28da8dc1d0b7996677b374357c/src/random_state.rs#L38
inline constexpr __UINT64_TYPE__ RANDOMNESS[2][4] = {
{0x243f6a8885a308d3, 0x13198a2e03707344, 0xa4093822299f31d0,
0x082efa98ec4e6c89},
{0x452821e638d01377, 0xbe5466cf34e90c6c, 0xc0ac29b7c97c50dd,
0x3f84d5b5b5470917},
};
// This is a portable string hasher. It is not cryptographically secure.
// The quality of the hash is good enough to pass all tests in SMHasher.
// The implementation is derived from the generic routine of aHash.
class HashState {
__UINT64_TYPE__ buffer;
__UINT64_TYPE__ pad;
__UINT64_TYPE__ extra_keys[2];
inline void update(__UINT64_TYPE__ low, __UINT64_TYPE__ high) {
__UINT64_TYPE__ combined =
folded_multiply(low ^ extra_keys[0], high ^ extra_keys[1]);
buffer = (buffer + pad) ^ combined;
buffer = __builtin_rotateleft64(buffer, ROTATE);
}
inline static __UINT64_TYPE__ mix(__UINT64_TYPE__ seed) {
HashState mixer{RANDOMNESS[0][0], RANDOMNESS[0][1], RANDOMNESS[0][2],
RANDOMNESS[0][3]};
mixer.update(seed, 0);
return mixer.finish();
}
public:
inline constexpr HashState(__UINT64_TYPE__ a, __UINT64_TYPE__ b, __UINT64_TYPE__ c,
__UINT64_TYPE__ d)
: buffer(a), pad(b), extra_keys{c, d} {}
inline HashState(__UINT64_TYPE__ seed) {
// Mix one more round of the seed to make it stronger.
__UINT64_TYPE__ mixed = mix(seed);
buffer = RANDOMNESS[1][0] ^ mixed;
pad = RANDOMNESS[1][1] ^ mixed;
extra_keys[0] = RANDOMNESS[1][2] ^ mixed;
extra_keys[1] = RANDOMNESS[1][3] ^ mixed;
}
inline void update(const void *ptr, __SIZE_TYPE__ size) {
__UINT8_TYPE__ const *bytes = static_cast<const __UINT8_TYPE__ *>(ptr);
buffer = (buffer + size) * MULTIPLE;
__UINT64_TYPE__ low, high;
if (size > 8) {
if (size > 16) {
// update tail
low = read_little_endian<__UINT64_TYPE__>(&bytes[size - 16]);
high = read_little_endian<__UINT64_TYPE__>(&bytes[size - 8]);
update(low, high);
while (size > 16) {
low = read_little_endian<__UINT64_TYPE__>(&bytes[0]);
high = read_little_endian<__UINT64_TYPE__>(&bytes[8]);
update(low, high);
bytes += 16;
size -= 16;
}
} else {
low = read_little_endian<__UINT64_TYPE__>(&bytes[0]);
high = read_little_endian<__UINT64_TYPE__>(&bytes[size - 8]);
update(low, high);
}
} else {
read_small_values(ptr, size, low, high);
update(low, high);
}
}
inline __UINT64_TYPE__ finish() {
int rot = buffer & 63;
__UINT64_TYPE__ folded = folded_multiply(buffer, pad);
return __builtin_rotateleft64(folded, rot);
}
};
} // namespace internal
#define offsetof(A, B) __builtin_offsetof(A, B)
typedef struct {
__UINT64_TYPE__ rbx;
__UINT64_TYPE__ rbp;
__UINT64_TYPE__ r12;
__UINT64_TYPE__ r13;
__UINT64_TYPE__ r14;
__UINT64_TYPE__ r15;
__UINTPTR_TYPE__ rsp;
__UINTPTR_TYPE__ rip;
__UINT64_TYPE__ __sigmask;
__UINT64_TYPE__ __has_sigmask : 1;
__UINT64_TYPE__ __unused : 63;
__UINT64_TYPE__ __chksum;
} my_jmp_buf;
namespace jmpbuf {
using HashState = internal::HashState;
// Initial values generated by
// https://www.random.org/cgi-bin/randbyte?nbytes=48&format=h
// These values are only used for overlay targets.
inline __UINT64_TYPE__ register_mangle_cookie = 0xdf8a883867040cbc;
inline __UINT64_TYPE__ checksum_mangle_cookie = 0x9ed4fe406ebe9cf9;
inline __UINT64_TYPE__ randomness[4] = {
0x83b9df7dddf5ab3d,
0x06c931cca75e15c6,
0x08280ec9e9a778bf,
0x111f67f4aafc9276,
};
inline int update_checksum(my_jmp_buf *buf) {
HashState state{
randomness[0],
randomness[1],
randomness[2],
randomness[3],
};
state.update(buf, offsetof(my_jmp_buf, __chksum));
buf->__chksum = state.finish() ^ checksum_mangle_cookie;
return 0;
}
inline void verify(const my_jmp_buf *buf) {
HashState state{
randomness[0],
randomness[1],
randomness[2],
randomness[3],
};
state.update(buf, offsetof(my_jmp_buf, __chksum));
auto chksum = state.finish() ^ checksum_mangle_cookie;
if (chksum != buf->__chksum) {
__builtin_trap();
}
}
} // namespace jmpbuf
namespace test {
[[gnu::naked]]
void longjmp (my_jmp_buf * buf, int val) {
#ifndef NO_CHECK
asm(R"(
pushq %%rbp
pushq %%rbx
mov %%rdi, %%rbp
mov %%esi, %%ebx
subq $8, %%rsp
call %P0
addq $8, %%rsp
mov %%ebx, %%esi
mov %%rbp, %%rdi
popq %%rbx
popq %%rbp
)" ::"i"(jmpbuf::verify)
: "rax", "rcx", "rdx", "r8", "r9", "r10", "r11");
#endif
register __UINT64_TYPE__ rcx __asm__("rcx");
// Load cookie
asm("mov %1, %0\n\t" : "=r"(rcx) : "m"(jmpbuf::register_mangle_cookie));
// load registers from buffer
// do not pass any invalid values into registers
#define RECOVER(REG) \
register __UINT64_TYPE__ REG __asm__(#REG); \
asm volatile("mov %c[" #REG "](%%rdi), %%rdx\n\t" \
"xor %%rdx, %1\n\t" \
"mov %%rdx, %0\n\t" \
: "=r"(REG) \
: "r"(rcx), [REG] "i"(offsetof(my_jmp_buf, REG)) \
: "rdx");
RECOVER(rbx);
RECOVER(rbp);
RECOVER(r12);
RECOVER(r13);
RECOVER(r14);
RECOVER(r15);
RECOVER(rsp);
register int eax __asm__("eax");
asm volatile(R"(
xor %0,%0
cmp $1,%%esi
adc %%esi,%0
mov %c[rip](%%rdi),%%rdx
xor %%rdx, %%rcx
jmp *%%rdx
)"
: "=r"(eax)
: [rip] "i"(offsetof(my_jmp_buf, rip))
: "rdx");
}
[[gnu::naked]]
int setjmp (my_jmp_buf * buf) {
register __UINT64_TYPE__ rcx __asm__("rcx");
// Load cookie
asm("mov %1, %0\n\t" : "=r"(rcx) : "m"(jmpbuf::register_mangle_cookie));
// store registers to buffer
// do not pass any invalid values into registers
#define STORE(REG) \
asm("mov %%" #REG ", %%rdx\n\t" \
"xor %%rdx, %%rcx\n\t" \
"mov %%rdx, %c[" #REG \
"](%%rdi)\n\t" ::[REG] "i"(offsetof(my_jmp_buf, REG)) \
: "rdx");
STORE(rbx);
STORE(rbp);
STORE(r12);
STORE(r13);
STORE(r14);
STORE(r15);
asm(R"(
lea 8(%%rsp),%%rdx
xor %%rdx, %%rcx
mov %%rdx,%c[rsp](%%rdi)
mov (%%rsp),%%rdx
xor %%rdx, %%rcx
mov %%rdx,%c[rip](%%rdi)
)" ::[rsp] "i"(offsetof(my_jmp_buf, rsp)),
[rip] "i"(offsetof(my_jmp_buf, rip))
: "rdx");
#ifndef NO_CHECK
// tail call to update checksum
asm("jmp %P0" : : "i"(jmpbuf::update_checksum));
#else
asm("xor %eax, %eax\n\tret\n\t");
#endif
}
}
#include <setjmp.h>
#include <chrono>
#include <iostream>
int main() {
using namespace std::chrono;
{
auto x = high_resolution_clock::now();
#pragma push_macro("setjmp")
#undef setjmp
for (int i = 0; i < 1000000; ++i) {
my_jmp_buf buf;
if (test::setjmp(&buf))
continue;
test::longjmp(&buf, 0);
}
auto y = high_resolution_clock::now();
std::cout << duration_cast<nanoseconds>(y - x).count() << std::endl;
}
#pragma pop_macro("setjmp")
{
auto x = high_resolution_clock::now();
for (int i = 0; i < 1000000; ++i) {
jmp_buf buf;
if (::setjmp(buf))
continue;
::longjmp(buf, 0);
}
auto y = high_resolution_clock::now();
std::cout << duration_cast<nanoseconds>(y - x).count() << std::endl;
}
}
|
I think its unsurprising that avoiding the encryption/masking/mangling is significantly faster, especially when executed in a loop. It suggests that the extra security measures should be configurable, since I imagine many users may not want/need that feature. That said, I think a program where Also, since I'm not the most seasoned libc reviewer, I know @frobtech has implemented similar schemes, and may have a few thoughts on ways to improve the implementation. |
I suppose the main cost just comes from spilling the values to stack and then loads them. Inlining them should speed up a lot but make the solution less portable. |
This is a continuation of Nick's work based on #88054. Besides adjust the way we populate the jump buffer, this PR also adds checksums for the jump buffer.
mul/add/xor
operations given constant size of the relevant fields, which should not be too costly to compute.