Skip to content

Commit

Permalink
m sequence?
Browse files Browse the repository at this point in the history
  • Loading branch information
tilsche committed Apr 15, 2019
1 parent 0512ae6 commit f7051b1
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 167 deletions.
175 changes: 89 additions & 86 deletions src/timesync/footprint.cpp
Original file line number Diff line number Diff line change
@@ -1,102 +1,105 @@
#include <chrono>
#include <vector>

#include "footprint.hpp"
#include "msequence.hpp"
#include <cassert>

using Clock = std::chrono::system_clock;

namespace timesync
{
uint64_t sqrtsd_loop_(double* buffer, uint64_t elems, uint64_t repeat)
void Footprint::high()
{
unsigned long long passes, length, addr;
unsigned long long a, b, c, d;
uint64_t ret = 0;
assert(elems >= 256 / sizeof(*buffer));
double m = 0.0;
for (std::size_t r = 0; r < compute_rep; r++)
{
for (size_t i = 0; i < compute_size; i++)
{
m += compute_vec_a_[i] * compute_vec_b_[i];
}
}
if (m == 42.0)
{
// prevent optimization, sure there is an easier way
__asm__ __volatile__("mfence;" :::);
}
}

passes = elems / 64; // 32 128-Bit accesses in inner loop
length = passes * 32 * repeat;
addr = (unsigned long long)buffer;
void Footprint::low()
{
for (uint64_t i = 0; i < nop_rep; i++)
{
asm volatile("rep; nop" ::: "memory");
}
}

if (!passes)
return ret;
/*
* Input: RAX: addr (pointer to the buffer)
* RBX: passes (number of iterations)
* RCX: length (total number of accesses)
*/
__asm__ __volatile__("mfence;"
"mov %%rax,%%r9;" // addr
"mov %%rbx,%%r10;" // passes
"mov %%rcx,%%r15;" // length
"mov %%r9,%%r14;" // store addr
"mov %%r10,%%r8;" // store passes
"mov %%r15,%%r13;" // store length
void Footprint::check_affinity()
{
CPU_ZERO(&cpu_set_old_);
auto err = sched_getaffinity(0, sizeof(cpu_set_t), &cpu_set_old_);
if (err)
{
Log::error() << "failed to get thread affinity: " << strerror(errno);
return;
}

// initialize registers
"movapd 0(%%r9), %%xmm0;"
"movapd 0(%%r9), %%xmm8;"
"movapd 16(%%r9), %%xmm9;"
"movapd 32(%%r9), %%xmm10;"
"movapd 48(%%r9), %%xmm11;"
"movapd 64(%%r9), %%xmm12;"
"movapd 80(%%r9), %%xmm13;"
"movapd 96(%%r9), %%xmm14;"
"movapd 112(%%r9), %%xmm15;"
cpu_set_t cpu_set_target;
CPU_ZERO(&cpu_set_target);
CPU_SET(0, &cpu_set_target);
err = sched_setaffinity(0, sizeof(cpu_set_t), &cpu_set_target);
if (err)
{
Log::error() << "failed to set thread affinity: " << strerror(errno);
return;
}
restore_affinity_ = true;
}

".align 64;"
"_work_loop_sqrt_sd:"
"sqrtsd %%xmm8, %%xmm0;"
"sqrtsd %%xmm9, %%xmm0;"
"sqrtsd %%xmm10, %%xmm0;"
"sqrtsd %%xmm11, %%xmm0;"
"sqrtsd %%xmm12, %%xmm0;"
"sqrtsd %%xmm13, %%xmm0;"
"sqrtsd %%xmm14, %%xmm0;"
"sqrtsd %%xmm15, %%xmm0;"
"sqrtsd %%xmm8, %%xmm0;"
"sqrtsd %%xmm9, %%xmm0;"
"sqrtsd %%xmm10, %%xmm0;"
"sqrtsd %%xmm11, %%xmm0;"
"sqrtsd %%xmm12, %%xmm0;"
"sqrtsd %%xmm13, %%xmm0;"
"sqrtsd %%xmm14, %%xmm0;"
"sqrtsd %%xmm15, %%xmm0;"
"sqrtsd %%xmm8, %%xmm0;"
"sqrtsd %%xmm9, %%xmm0;"
"sqrtsd %%xmm10, %%xmm0;"
"sqrtsd %%xmm11, %%xmm0;"
"sqrtsd %%xmm12, %%xmm0;"
"sqrtsd %%xmm13, %%xmm0;"
"sqrtsd %%xmm14, %%xmm0;"
"sqrtsd %%xmm15, %%xmm0;"
"sqrtsd %%xmm8, %%xmm0;"
"sqrtsd %%xmm9, %%xmm0;"
"sqrtsd %%xmm10, %%xmm0;"
"sqrtsd %%xmm11, %%xmm0;"
"sqrtsd %%xmm12, %%xmm0;"
"sqrtsd %%xmm13, %%xmm0;"
"sqrtsd %%xmm14, %%xmm0;"
"sqrtsd %%xmm15, %%xmm0;"
"add $512,%%r9;"
"sub $1,%%r10;"
"jnz _skip_reset_sqrt_sd;" // reset buffer if the end is reached
"mov %%r14,%%r9;" // restore addr
"mov %%r8,%%r10;" // restore passes
"_skip_reset_sqrt_sd:"
"sub $32,%%r15;"
"jnz _work_loop_sqrt_sd;"
void Footprint::restore_affinity()
{
if (restore_affinity_)
{
auto err = sched_setaffinity(0, sizeof(cpu_set_t), &cpu_set_old_);
if (err)
{
Log::error() << "failed to restore thread affinity: " << strerror(errno);
}
}
}

void Footprint::run()
{
check_affinity();

"mov %%r13,%%rcx;" // restore length
: "=a"(a), "=b"(b), "=c"(c), "=d"(d)
: "a"(addr), "b"(passes), "c"(length)
: "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15", "xmm0",
"xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9",
"xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
recording_.resize(0);
recording_.reserve(4096);

);
ret = c;
constexpr int n = 6;
constexpr auto time_quantum = std::chrono::milliseconds(32);
auto sequence = GroupedBinaryMSequence(n);

return ret;
time_begin_ = low(std::chrono::seconds(3));
time_end_ = time_begin_;
auto deadline = time_begin_;
while (auto elem = sequence.take())
{
auto [is_high, length] = *elem;
auto duration = time_quantum * length;
deadline += duration;
assert(deadline > time_end_);
auto wait = deadline - time_end_;
if (is_high)
{
time_end_ = high(wait);
}
else
{
time_end_ = low(wait);
}
}

low(std::chrono::seconds(3));

restore_affinity();
}
} // namespace timesync::footprint

} // namespace timesync
91 changes: 11 additions & 80 deletions src/timesync/footprint.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ uint64_t sqrtsd_loop_(double* buffer, uint64_t elems, uint64_t repeat);
class Footprint
{
public:
Footprint() : a(size, 1.0), b(size, 2.0)
Footprint() : compute_vec_a_(compute_size, 1.0), compute_vec_b_(compute_size, 2.0)
{
Log::info() << "staring synchronization pattern";
run();
Expand Down Expand Up @@ -53,24 +53,8 @@ class Footprint

private:
protected:
void low()
{
sqrtsd_loop_(a.data(), a.size(), 256);
}

void high()
{
double m = 0.0;
for (size_t i = 0; i < a.size(); i++)
{
m += a[i] * b[i];
}
if (m == 42.0)
{
// prevent optimization, sure there is an easier way
__asm__ __volatile__("mfence;" :::);
}
}
void low();
void high();

template <typename DURATION>
auto low(DURATION duration)
Expand Down Expand Up @@ -113,73 +97,20 @@ class Footprint
}
}

void run()
{
check_affinity();

recording_.resize(0);
recording_.reserve(12);

time_begin_ = low(std::chrono::seconds(3));

low(std::chrono::seconds(1));
high(std::chrono::milliseconds(419));
low(std::chrono::milliseconds(283));
high(std::chrono::milliseconds(179));
low(std::chrono::milliseconds(73));
high(std::chrono::milliseconds(31));
low(std::chrono::milliseconds(127));
high(std::chrono::milliseconds(233));
low(std::chrono::milliseconds(353));
high(std::chrono::milliseconds(467));
time_end_ = low(std::chrono::seconds(1));

low(std::chrono::seconds(3));

restore_affinity();
}

void check_affinity()
{
CPU_ZERO(&cpu_set_old_);
auto err = sched_getaffinity(0, sizeof(cpu_set_t), &cpu_set_old_);
if (err)
{
Log::error() << "failed to get thread affinity: " << strerror(errno);
return;
}

cpu_set_t cpu_set_target;
CPU_ZERO(&cpu_set_target);
CPU_SET(0, &cpu_set_target);
err = sched_setaffinity(0, sizeof(cpu_set_t), &cpu_set_target);
if (err)
{
Log::error() << "failed to set thread affinity: " << strerror(errno);
return;
}
restore_affinity_ = true;
}
void run();

void restore_affinity()
{
if (restore_affinity_)
{
auto err = sched_setaffinity(0, sizeof(cpu_set_t), &cpu_set_old_);
if (err)
{
Log::error() << "failed to restore thread affinity: " << strerror(errno);
}
}
}
void check_affinity();
void restore_affinity();

private:
static constexpr std::size_t size = 2048;
static constexpr std::size_t compute_size = 256;
static constexpr std::size_t compute_rep = 58;
static constexpr std::size_t nop_rep = 209;
Clock::time_point time_begin_;
Clock::time_point time_end_;

std::vector<double> a;
std::vector<double> b;
std::vector<double> compute_vec_a_;
std::vector<double> compute_vec_b_;
std::vector<TimeValue> recording_;

bool restore_affinity_ = false;
Expand Down
10 changes: 10 additions & 0 deletions src/timesync/msequence.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#pragma once

#include <initializer_list>
#include <iostream>
#include <optional>
Expand Down Expand Up @@ -34,6 +36,14 @@ class BinaryMSequenceIter
return { 4, 1 };
case 5:
return { 5, 2 };
case 6:
return { 6, 1 };
case 7:
return { 7, 1 };
case 8:
return { 8, 6, 5, 1 };
case 9:
return { 9, 4 };
case 10:
return { 10, 3 };
case 11:
Expand Down
2 changes: 1 addition & 1 deletion src/timesync/timesync.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ class CCTimeSync

Shifter shifter(measured_signal.size(), tag);
auto result = shifter(footprint_signal, measured_signal);
Log::debug() << "completed timesync with correlation of " << result.second << " and "
Log::debug() << "completed timesync with correlation of " << result.second << " and offset "
<< result.first;
if (std::isnan(result.second) || result.second <= 0)
{
Expand Down

0 comments on commit f7051b1

Please sign in to comment.