Skip to content

Commit

Permalink
Bug fixes for warp-synchronous execution
Browse files Browse the repository at this point in the history
  • Loading branch information
lightsighter committed Feb 5, 2015
1 parent f553eaa commit 385165b
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 47 deletions.
130 changes: 90 additions & 40 deletions src/instruction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -380,9 +380,27 @@ PTXInstruction* PTXLabel::emulate(Thread *thread)
return next;
}

void PTXLabel::update_labels(std::map<std::string,PTXInstruction*> &labels)
PTXInstruction* PTXLabel::emulate_warp(Thread **threads,
ThreadState *thread_state,
int &shared_access_id,
SharedStore &store)
{
std::map<std::string,PTXInstruction*>::const_iterator finder =
// Always check for convergence at the start of basic blocks
for (int i = 0; i < WARP_SIZE; i++)
{
if ((thread_state[i].status == THREAD_DISABLED) &&
(thread_state[i].next == this))
{
thread_state[i].status = THREAD_ENABLED;
thread_state[i].next = NULL;
}
}
return next;
}

void PTXLabel::update_labels(std::map<std::string,PTXLabel*> &labels)
{
std::map<std::string,PTXLabel*>::const_iterator finder =
labels.find(label);
assert(finder == labels.end());
labels[label] = this;
Expand Down Expand Up @@ -537,18 +555,21 @@ PTXInstruction* PTXBranch::emulate_warp(Thread **threads,
thread_state[i].status = THREAD_ENABLED;
thread_state[i].next = NULL;
}
else // Disable all threads not going to next
else if (thread_state[i].status == THREAD_ENABLED)
{
// Disable all threads not going to next that
// weren't already disabled to begin with
assert(targets[i] == target);
thread_state[i].status = THREAD_DISABLED;
thread_state[i].next = targets[i];
thread_state[i].next = target;
}
}
return next;
}

void PTXBranch::set_targets(const std::map<std::string,PTXInstruction*> &labels)
void PTXBranch::set_targets(const std::map<std::string,PTXLabel*> &labels)
{
std::map<std::string,PTXInstruction*>::const_iterator finder =
std::map<std::string,PTXLabel*>::const_iterator finder =
labels.find(label);
assert(finder != labels.end());
assert(target == NULL);
Expand Down Expand Up @@ -1605,8 +1626,20 @@ PTXInstruction* PTXBarrier::emulate_warp(Thread **threads,
// In warp-synchronous execution, if any thread in a warp arrives
// at a barrier, then it is like all of the threads in a warp arrived
// regardless of whether the thread is enabled, disabled, or exitted
bool one_enabled = false;
for (int i = 0; i < WARP_SIZE; i++)
emulate(threads[i]);
{
if (thread_state[i].status == THREAD_ENABLED)
{
one_enabled = true;
break;
}
}
if (one_enabled)
{
for (int i = 0; i < WARP_SIZE; i++)
emulate(threads[i]);
}
return next;
}

Expand Down Expand Up @@ -1660,9 +1693,9 @@ bool PTXBarrier::interpret(const std::string &line, int line_num,
}

PTXSharedAccess::PTXSharedAccess(int64_t ad, int64_t o, bool w,
int64_t ag, bool imm, int line_num)
bool has, int64_t ag, bool imm, int line_num)
: PTXInstruction(PTX_SHARED_ACCESS, line_num),
addr(ad), offset(o), arg(ag), write(w), immediate(imm)
addr(ad), offset(o), arg(ag), write(w), has_arg(has), immediate(imm)
{
}

Expand Down Expand Up @@ -1694,6 +1727,8 @@ PTXInstruction* PTXSharedAccess::emulate_warp(Thread **threads,
{
for (int i = 0; i < WARP_SIZE; i++)
{
if (thread_state[i].status != THREAD_ENABLED)
continue;
int64_t addr_value;
if (!threads[i]->get_value(addr, addr_value))
continue;
Expand All @@ -1702,20 +1737,25 @@ PTXInstruction* PTXSharedAccess::emulate_warp(Thread **threads,
new SharedWrite(address, this, threads[i], shared_access_id);
threads[i]->add_instruction(instruction);
threads[i]->update_shared_memory(instruction);
if (!immediate)
if (has_arg)
{
int64_t value;
if (threads[i]->get_value(arg, value))
store.write(address, value);
if (!immediate)
{
int64_t value;
if (threads[i]->get_value(arg, value))
store.write(address, value);
}
else
store.write(address, arg);
}
else
store.write(address, arg);
}
}
else
{
for (int i = 0; i < WARP_SIZE; i++)
{
if (thread_state[i].status != THREAD_ENABLED)
continue;
int64_t addr_value;
if (!threads[i]->get_value(addr, addr_value))
continue;
Expand All @@ -1724,10 +1764,13 @@ PTXInstruction* PTXSharedAccess::emulate_warp(Thread **threads,
new SharedRead(address, this, threads[i], shared_access_id);
threads[i]->add_instruction(instruction);
threads[i]->update_shared_memory(instruction);
assert(!immediate);
int64_t value;
if (store.read(address, value))
threads[i]->set_value(arg, value);
if (has_arg)
{
assert(!immediate);
int64_t value;
if (store.read(address, value))
threads[i]->set_value(arg, value);
}
}
}
// Increment the shared_access_id
Expand All @@ -1742,35 +1785,40 @@ bool PTXSharedAccess::interpret(const std::string &line, int line_num,
if ((line.find(".shared.") != std::string::npos) &&
(line.find(".align.") == std::string::npos))
{
std::vector<std::string> tokens;
split(tokens, line.c_str());
assert(tokens.size() == 3);
bool write = (tokens[0].find("st.") != std::string::npos);

bool write = (line.find("st.") != std::string::npos);
int64_t offset = 0;
int start_reg = line.find("[") + 1;
int end_reg = line.find("+")+1;
int64_t addr = parse_register(line.substr(start_reg));
if (end_reg != (int) std::string::npos)
offset = parse_immediate(line.substr(end_reg));
bool immediate;
int64_t arg;
if (write)
{
immediate = (tokens[2].find("%") == std::string::npos);
if (immediate)
arg = parse_immediate(tokens[2]);
else
arg = parse_register(tokens[2]);
}
else
std::vector<std::string> tokens;
split(tokens, line.c_str());
bool has_arg = false;
bool immediate = false;
int64_t arg = 0;
if (tokens.size() == 3)
{
immediate = (tokens[1].find("%") == std::string::npos);
if (immediate)
arg = parse_immediate(tokens[1]);
has_arg = true;
if (write)
{
immediate = (tokens[2].find("%") == std::string::npos);
if (immediate)
arg = parse_immediate(tokens[2]);
else
arg = parse_register(tokens[2]);
}
else
arg = parse_register(tokens[1]);
{
immediate = (tokens[1].find("%") == std::string::npos);
if (immediate)
arg = parse_immediate(tokens[1]);
else
arg = parse_register(tokens[1]);
}
}
result = new PTXSharedAccess(addr, offset, write,
result = new PTXSharedAccess(addr, offset, write, has_arg,
arg, immediate, line_num);
return true;
}
Expand Down Expand Up @@ -2153,7 +2201,9 @@ PTXInstruction* PTXExit::emulate_warp(Thread **threads,
bool PTXExit::interpret(const std::string &line, int line_num,
PTXInstruction *&result)
{
if (line.find("exit") != std::string::npos)
// We'll model both return and exit the same
if ((line.find("exit") != std::string::npos) ||
(line.find("ret") != std::string::npos))
{
std::vector<std::string> tokens;
split(tokens, line.c_str());
Expand Down
15 changes: 10 additions & 5 deletions src/instruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,12 +133,17 @@ class PTXLabel: public PTXInstruction {
PTXLabel& operator=(const PTXLabel &rhs) { assert(false); return *this; }
public:
virtual PTXInstruction* emulate(Thread *thread);
// Override for warp-synchronous execution
virtual PTXInstruction* emulate_warp(Thread **threads,
ThreadState *thread_state,
int &shared_access_id,
SharedStore &store);
public:
virtual bool is_label(void) const { return true; }
public:
virtual PTXLabel* as_label(void) { return this; }
public:
void update_labels(std::map<std::string,PTXInstruction*> &labels);
void update_labels(std::map<std::string,PTXLabel*> &labels);
protected:
std::string label;
public:
Expand Down Expand Up @@ -166,12 +171,12 @@ class PTXBranch : public PTXInstruction {
public:
virtual PTXBranch* as_branch(void) { return this; }
public:
void set_targets(const std::map<std::string,PTXInstruction*> &labels);
void set_targets(const std::map<std::string,PTXLabel*> &labels);
protected:
int64_t predicate;
bool negate;
std::string label;
PTXInstruction *target;
PTXLabel *target;
public:
static bool interpret(const std::string &line, int line_num,
PTXInstruction *&result);
Expand Down Expand Up @@ -485,7 +490,7 @@ class PTXBarrier : public PTXInstruction {
class PTXSharedAccess : public PTXInstruction {
public:
PTXSharedAccess(int64_t addr, int64_t offset, bool write,
int64_t arg, bool immediate, int line_num);
bool has_arg, int64_t arg, bool immediate, int line_num);
PTXSharedAccess(const PTXSharedAccess &rhs) { assert(false); }
virtual ~PTXSharedAccess(void) { }
public:
Expand All @@ -500,7 +505,7 @@ class PTXSharedAccess : public PTXInstruction {
SharedStore &store);
protected:
int64_t addr, offset, arg;
bool write, immediate;
bool write, has_arg, immediate;
public:
static bool interpret(const std::string &line, int line_num,
PTXInstruction *&result);
Expand Down
2 changes: 1 addition & 1 deletion src/program.cc
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ void Program::convert_to_instructions(int max_num_threads,
{
// Make a first pass and create all the instructions
// Track all the basic block program counters
std::map<std::string,PTXInstruction*> labels;
std::map<std::string,PTXLabel*> labels;
PTXInstruction *previous = NULL;
for (std::vector<std::pair<std::string,int> >::const_iterator it =
lines.begin(); it != lines.end(); it++)
Expand Down
3 changes: 2 additions & 1 deletion src/program.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ enum ThreadStatus {
class Weft;
class Thread;
class Happens;
class PTXLabel;
class WeftAccess;
class SharedMemory;
class PTXInstruction;
Expand All @@ -44,7 +45,7 @@ struct ThreadState {
: status(THREAD_ENABLED), next(NULL) { }
public:
ThreadStatus status;
PTXInstruction *next;
PTXLabel *next;
};

class Program {
Expand Down

0 comments on commit 385165b

Please sign in to comment.