-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
111 additions
and
167 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,102 +1,105 @@ | ||
#include <chrono> | ||
#include <vector> | ||
|
||
#include "footprint.hpp" | ||
#include "msequence.hpp" | ||
#include <cassert> | ||
|
||
using Clock = std::chrono::system_clock; | ||
|
||
namespace timesync | ||
{ | ||
uint64_t sqrtsd_loop_(double* buffer, uint64_t elems, uint64_t repeat) | ||
void Footprint::high() | ||
{ | ||
unsigned long long passes, length, addr; | ||
unsigned long long a, b, c, d; | ||
uint64_t ret = 0; | ||
assert(elems >= 256 / sizeof(*buffer)); | ||
double m = 0.0; | ||
for (std::size_t r = 0; r < compute_rep; r++) | ||
{ | ||
for (size_t i = 0; i < compute_size; i++) | ||
{ | ||
m += compute_vec_a_[i] * compute_vec_b_[i]; | ||
} | ||
} | ||
if (m == 42.0) | ||
{ | ||
// prevent optimization, sure there is an easier way | ||
__asm__ __volatile__("mfence;" :::); | ||
} | ||
} | ||
|
||
passes = elems / 64; // 32 128-Bit accesses in inner loop | ||
length = passes * 32 * repeat; | ||
addr = (unsigned long long)buffer; | ||
void Footprint::low() | ||
{ | ||
for (uint64_t i = 0; i < nop_rep; i++) | ||
{ | ||
asm volatile("rep; nop" ::: "memory"); | ||
} | ||
} | ||
|
||
if (!passes) | ||
return ret; | ||
/* | ||
* Input: RAX: addr (pointer to the buffer) | ||
* RBX: passes (number of iterations) | ||
* RCX: length (total number of accesses) | ||
*/ | ||
__asm__ __volatile__("mfence;" | ||
"mov %%rax,%%r9;" // addr | ||
"mov %%rbx,%%r10;" // passes | ||
"mov %%rcx,%%r15;" // length | ||
"mov %%r9,%%r14;" // store addr | ||
"mov %%r10,%%r8;" // store passes | ||
"mov %%r15,%%r13;" // store length | ||
void Footprint::check_affinity() | ||
{ | ||
CPU_ZERO(&cpu_set_old_); | ||
auto err = sched_getaffinity(0, sizeof(cpu_set_t), &cpu_set_old_); | ||
if (err) | ||
{ | ||
Log::error() << "failed to get thread affinity: " << strerror(errno); | ||
return; | ||
} | ||
|
||
// initialize registers | ||
"movapd 0(%%r9), %%xmm0;" | ||
"movapd 0(%%r9), %%xmm8;" | ||
"movapd 16(%%r9), %%xmm9;" | ||
"movapd 32(%%r9), %%xmm10;" | ||
"movapd 48(%%r9), %%xmm11;" | ||
"movapd 64(%%r9), %%xmm12;" | ||
"movapd 80(%%r9), %%xmm13;" | ||
"movapd 96(%%r9), %%xmm14;" | ||
"movapd 112(%%r9), %%xmm15;" | ||
cpu_set_t cpu_set_target; | ||
CPU_ZERO(&cpu_set_target); | ||
CPU_SET(0, &cpu_set_target); | ||
err = sched_setaffinity(0, sizeof(cpu_set_t), &cpu_set_target); | ||
if (err) | ||
{ | ||
Log::error() << "failed to set thread affinity: " << strerror(errno); | ||
return; | ||
} | ||
restore_affinity_ = true; | ||
} | ||
|
||
".align 64;" | ||
"_work_loop_sqrt_sd:" | ||
"sqrtsd %%xmm8, %%xmm0;" | ||
"sqrtsd %%xmm9, %%xmm0;" | ||
"sqrtsd %%xmm10, %%xmm0;" | ||
"sqrtsd %%xmm11, %%xmm0;" | ||
"sqrtsd %%xmm12, %%xmm0;" | ||
"sqrtsd %%xmm13, %%xmm0;" | ||
"sqrtsd %%xmm14, %%xmm0;" | ||
"sqrtsd %%xmm15, %%xmm0;" | ||
"sqrtsd %%xmm8, %%xmm0;" | ||
"sqrtsd %%xmm9, %%xmm0;" | ||
"sqrtsd %%xmm10, %%xmm0;" | ||
"sqrtsd %%xmm11, %%xmm0;" | ||
"sqrtsd %%xmm12, %%xmm0;" | ||
"sqrtsd %%xmm13, %%xmm0;" | ||
"sqrtsd %%xmm14, %%xmm0;" | ||
"sqrtsd %%xmm15, %%xmm0;" | ||
"sqrtsd %%xmm8, %%xmm0;" | ||
"sqrtsd %%xmm9, %%xmm0;" | ||
"sqrtsd %%xmm10, %%xmm0;" | ||
"sqrtsd %%xmm11, %%xmm0;" | ||
"sqrtsd %%xmm12, %%xmm0;" | ||
"sqrtsd %%xmm13, %%xmm0;" | ||
"sqrtsd %%xmm14, %%xmm0;" | ||
"sqrtsd %%xmm15, %%xmm0;" | ||
"sqrtsd %%xmm8, %%xmm0;" | ||
"sqrtsd %%xmm9, %%xmm0;" | ||
"sqrtsd %%xmm10, %%xmm0;" | ||
"sqrtsd %%xmm11, %%xmm0;" | ||
"sqrtsd %%xmm12, %%xmm0;" | ||
"sqrtsd %%xmm13, %%xmm0;" | ||
"sqrtsd %%xmm14, %%xmm0;" | ||
"sqrtsd %%xmm15, %%xmm0;" | ||
"add $512,%%r9;" | ||
"sub $1,%%r10;" | ||
"jnz _skip_reset_sqrt_sd;" // reset buffer if the end is reached | ||
"mov %%r14,%%r9;" // restore addr | ||
"mov %%r8,%%r10;" // restore passes | ||
"_skip_reset_sqrt_sd:" | ||
"sub $32,%%r15;" | ||
"jnz _work_loop_sqrt_sd;" | ||
void Footprint::restore_affinity() | ||
{ | ||
if (restore_affinity_) | ||
{ | ||
auto err = sched_setaffinity(0, sizeof(cpu_set_t), &cpu_set_old_); | ||
if (err) | ||
{ | ||
Log::error() << "failed to restore thread affinity: " << strerror(errno); | ||
} | ||
} | ||
} | ||
|
||
void Footprint::run() | ||
{ | ||
check_affinity(); | ||
|
||
"mov %%r13,%%rcx;" // restore length | ||
: "=a"(a), "=b"(b), "=c"(c), "=d"(d) | ||
: "a"(addr), "b"(passes), "c"(length) | ||
: "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "xmm0", | ||
"xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", | ||
"xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15" | ||
recording_.resize(0); | ||
recording_.reserve(4096); | ||
|
||
); | ||
ret = c; | ||
constexpr int n = 6; | ||
constexpr auto time_quantum = std::chrono::milliseconds(32); | ||
auto sequence = GroupedBinaryMSequence(n); | ||
|
||
return ret; | ||
time_begin_ = low(std::chrono::seconds(3)); | ||
time_end_ = time_begin_; | ||
auto deadline = time_begin_; | ||
while (auto elem = sequence.take()) | ||
{ | ||
auto [is_high, length] = *elem; | ||
auto duration = time_quantum * length; | ||
deadline += duration; | ||
assert(deadline > time_end_); | ||
auto wait = deadline - time_end_; | ||
if (is_high) | ||
{ | ||
time_end_ = high(wait); | ||
} | ||
else | ||
{ | ||
time_end_ = low(wait); | ||
} | ||
} | ||
|
||
low(std::chrono::seconds(3)); | ||
|
||
restore_affinity(); | ||
} | ||
} // namespace timesync::footprint | ||
|
||
} // namespace timesync |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters