Skip to content

[analysis] Simplify core analysis code #6034

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/analysis/cfg.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,8 @@ struct CFG {
reverse_iterator rbegin() const { return blocks.rbegin(); }
reverse_iterator rend() const { return blocks.rend(); }

const BasicBlock& operator[](size_t i) const { return *(begin() + i); }

static CFG fromFunction(Function* func);

void print(std::ostream& os, Module* wasm = nullptr) const;
Expand Down
5 changes: 3 additions & 2 deletions src/analysis/lattice.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

#if __cplusplus >= 202002L
#include <concepts>
#endif
#endif // __cplusplus >= 202002L

namespace wasm::analysis {

Expand All @@ -45,6 +45,7 @@ concept Lattice = requires(const L& lattice,
typename L::Element& elem) {
// Lattices must have elements.
typename L::Element;
requires std::copyable<typename L::Element>;
// We need to be able to get the bottom element.
{ lattice.getBottom() } noexcept -> std::same_as<typename L::Element>;
// Elements should be comparable. TODO: use <=> and std::three_way_comparable
Expand All @@ -57,7 +58,7 @@ concept Lattice = requires(const L& lattice,
{ elem.makeLeastUpperBound(constElem) } noexcept -> std::same_as<bool>;
};

#else
#else // __cplusplus >= 202002L

#define Lattice typename

Expand Down
12 changes: 5 additions & 7 deletions src/analysis/liveness-transfer-function.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,23 +27,21 @@ struct LivenessTransferFunction
// to be passed in, where the temp copy is modified in place to produce the
// intermediate states.
void print(std::ostream& os,
const BasicBlock* cfgBlock,
const BasicBlock& bb,
FiniteIntPowersetLattice::Element& inputState) {
os << "Intermediate States (reverse order): " << std::endl;
currState = &inputState;
currState->print(os);
os << std::endl;
auto cfgIter = cfgBlock->rbegin();

// Since we don't store the intermediate states, we need to re-run the
// transfer function on all the CFG node expressions to reconstruct
// the intermediate states here.
while (cfgIter != cfgBlock->rend()) {
os << ShallowExpression{*cfgIter} << std::endl;
visit(*cfgIter);
for (auto it = bb.rbegin(); it != bb.rend(); ++it) {
os << ShallowExpression{*it} << "\n";
visit(*it);
currState->print(os);
os << std::endl;
++cfgIter;
os << "\n";
}
currState = nullptr;
}
Expand Down
99 changes: 40 additions & 59 deletions src/analysis/monotone-analyzer-impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,89 +5,61 @@
#include <unordered_map>

#include "monotone-analyzer.h"
#include "support/unique_deferring_queue.h"

namespace wasm::analysis {

// All states are set to the bottom lattice element using the lattice in this
// constructor.
template<Lattice L>
inline BlockState<L>::BlockState(const BasicBlock* underlyingBlock, L& lattice)
: cfgBlock(underlyingBlock), inputState(lattice.getBottom()) {}

// Prints out inforamtion about a CFG node's state, but not intermediate states.
template<Lattice L> inline void BlockState<L>::print(std::ostream& os) {
os << "CFG Block: " << cfgBlock->getIndex() << std::endl;
os << "Input State: ";
inputState.print(os);
os << std::endl << "Predecessors:";
for (auto pred : cfgBlock->preds()) {
os << " " << pred.getIndex();
}
os << std::endl << "Successors:";
for (auto succ : cfgBlock->succs()) {
os << " " << succ.getIndex();
}
os << std::endl;
}

template<Lattice L, TransferFunction TxFn>
inline MonotoneCFGAnalyzer<L, TxFn>::MonotoneCFGAnalyzer(L& lattice,
TxFn& txfn,
CFG& cfg)
: lattice(lattice), txfn(txfn), cfg(cfg) {

// Construct BlockStates for each BasicBlock.
for (auto it = cfg.begin(); it != cfg.end(); it++) {
stateBlocks.emplace_back(&(*it), lattice);
}
}
: lattice(lattice), txfn(txfn), cfg(cfg),
states(cfg.size(), lattice.getBottom()) {}

template<Lattice L, TransferFunction TxFn>
inline void
MonotoneCFGAnalyzer<L, TxFn>::evaluateFunctionEntry(Function* func) {
txfn.evaluateFunctionEntry(func, stateBlocks[0].inputState);
txfn.evaluateFunctionEntry(func, states[0]);
}

template<Lattice L, TransferFunction TxFn>
inline void MonotoneCFGAnalyzer<L, TxFn>::evaluate() {
std::queue<const BasicBlock*> worklist;
UniqueDeferredQueue<Index> worklist;

// Transfer function enqueues the work in some order which is efficient.
txfn.enqueueWorklist(cfg, worklist);
// Start with all blocks on the work list. TODO: optimize the iteration order
// using e.g. strongly-connected components.
for (Index i = 0; i < cfg.size(); ++i) {
worklist.push(i);
}

while (!worklist.empty()) {
BlockState<L>& currBlockState = stateBlocks[worklist.front()->getIndex()];
worklist.pop();

// For each expression, applies the transfer function, using the expression,
// on the state of the expression it depends upon (here the next expression)
// to arrive at the expression's state. The beginning and end states of the
// CFG block will be updated.
typename L::Element outputState = currBlockState.inputState;
txfn.transfer(currBlockState.cfgBlock, outputState);

// Propagate state to dependents of currBlockState.
for (auto& dep : txfn.getDependents(currBlockState.cfgBlock)) {
// If we need to change the input state of a dependent, we need
// to enqueue the dependent to recalculate it.
if (stateBlocks[dep.getIndex()].inputState.makeLeastUpperBound(
outputState)) {
worklist.push(&dep);
// The index of the block we will analyze.
Index i = worklist.pop();

// Apply the transfer function to the input state to compute the output
// state for the block.
auto state = states[i];
txfn.transfer(cfg[i], state);

// Propagate state to the dependent blocks.
for (auto& dep : txfn.getDependents(cfg[i])) {
// If the input state for the dependent block changes, we need to
// re-analyze it.
if (states[dep.getIndex()].makeLeastUpperBound(state)) {
worklist.push(dep.getIndex());
}
}
}
}

template<Lattice L, TransferFunction TxFn>
inline void MonotoneCFGAnalyzer<L, TxFn>::collectResults() {
for (BlockState currBlockState : stateBlocks) {
typename L::Element inputStateCopy = currBlockState.inputState;

for (Index i = 0, size = cfg.size(); i < size; ++i) {
// The transfer function generates the final set of states and uses it to
// produce useful information. For example, in reaching definitions
// analysis, these final states are used to populate a mapping of
// local.get's to a set of local.set's that affect its value.
txfn.collectResults(currBlockState.cfgBlock, inputStateCopy);
txfn.collectResults(cfg[i], states[i]);
}
}

Expand All @@ -96,12 +68,21 @@ inline void MonotoneCFGAnalyzer<L, TxFn>::collectResults() {
template<Lattice L, TransferFunction TxFn>
inline void MonotoneCFGAnalyzer<L, TxFn>::print(std::ostream& os) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you explain why this print function instead of a << overload? Does it have to do with Concepts?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No particular reason. The original author probably thought print was a nicer interface than <<, and that seems like a reasonable opinion to me. A << overload would have worked just as well. Print methods generally have a slight advantage that they are easier to call from a debugger, although I'm not sure whether it's easy to get the ostream to pass into this one from the debugger. toString methods are generally the easiest for debugging.

os << "CFG Analyzer" << std::endl;
for (auto state : stateBlocks) {
state.print(os);
typename L::Element temp = state.inputState;
txfn.print(os, state.cfgBlock, temp);
for (Index i = 0, size = cfg.size(); i < size; ++i) {
os << "CFG Block: " << cfg[i].getIndex() << std::endl;
os << "Input State: ";
states[i].print(os);
for (auto& pred : cfg[i].preds()) {
os << " " << pred.getIndex();
}
os << std::endl << "Successors:";
for (auto& succ : cfg[i].succs()) {
os << " " << succ.getIndex();
}
os << "\n";
txfn.print(os, cfg[i], states[i]);
}
os << "End" << std::endl;
os << "End\n";
}

} // namespace wasm::analysis
Expand Down
22 changes: 5 additions & 17 deletions src/analysis/monotone-analyzer.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,27 +11,15 @@

namespace wasm::analysis {

// A node which contains all the lattice states for a given CFG node.
template<Lattice L> struct BlockState {

// CFG node corresponding to this state block.
const BasicBlock* cfgBlock;
// State at which the analysis flow starts for a CFG. For instance, the ending
// state for backward analysis, or the beginning state for forward analysis.
typename L::Element inputState;

// All states are set to the bottom lattice element in this constructor.
BlockState(const BasicBlock* underlyingBlock, L& lattice);

// Prints out BlockState information, but not any intermediate states.
void print(std::ostream& os);
};

template<Lattice L, TransferFunction TxFn> class MonotoneCFGAnalyzer {
using Element = typename L::Element;

L& lattice;
TxFn& txfn;
CFG& cfg;
std::vector<BlockState<L>> stateBlocks;

// The lattice element representing the program state before each block.
std::vector<Element> states;

public:
// Will constuct BlockState objects corresponding to BasicBlocks from the
Expand Down
24 changes: 7 additions & 17 deletions src/analysis/transfer-function.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,50 +18,40 @@
#define wasm_analysis_transfer_function_h

#if __cplusplus >= 202002L

#include <concepts>
#include <iterator>
#include <ranges>
#endif

#include <queue>

#include "cfg.h"
#include "lattice.h"
#include "support/unique_deferring_queue.h"

namespace wasm::analysis {

#if __cplusplus >= 202002L

template<typename T>
concept BasicBlockInputRange =
std::ranges::input_range<T> &&
std::is_same<std::ranges::range_value_t<T>, BasicBlock>::value;

template<typename TxFn, typename L>
concept TransferFunctionImpl = requires(TxFn& txfn,
const CFG& cfg,
BasicBlock* bb,
typename L::Element& elem,
std::queue<const BasicBlock*>& bbq) {
concept TransferFunctionImpl = requires(
TxFn& txfn, const CFG& cfg, const BasicBlock& bb, typename L::Element& elem) {
// Apply the transfer function to update a lattice element with information
// from a basic block.
{ txfn.transfer(bb, elem) } noexcept -> std::same_as<void>;
// Initializes the worklist of basic blocks, which can affect performance
// depending on the direction of the analysis. TODO: Unlock performance
// benefits while exposing fewer implementation details.
{ txfn.enqueueWorklist(cfg, bbq) } noexcept -> std::same_as<void>;
// Get a range over the basic blocks that depend on the given block.
{ txfn.getDependents(bb) } noexcept -> BasicBlockInputRange;
};

#define TransferFunction TransferFunctionImpl<L>

#else
} // namespace wasm::analysis

#else // __cplusplus >= 202002L

#define TransferFunction typename

#endif // __cplusplus >= 202002L

} // namespace wasm::analysis

#endif // wasm_analysis_transfer_function_h
49 changes: 13 additions & 36 deletions src/analysis/visitor-transfer-function.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

#include "cfg.h"
#include "lattice.h"
#include "support/unique_deferring_queue.h"
#include "wasm-traversal.h"

namespace wasm::analysis {
Expand Down Expand Up @@ -36,71 +37,47 @@ struct VisitorTransferFunc : public Visitor<SubType> {
// Returns an iterable to all the BasicBlocks which depend on currBlock for
// information.
BasicBlock::BasicBlockIterable
getDependents(const BasicBlock* currBlock) noexcept {
getDependents(const BasicBlock& currBlock) noexcept {
if constexpr (Direction == AnalysisDirection::Backward) {
return currBlock->preds();
return currBlock.preds();
} else {
return currBlock->succs();
return currBlock.succs();
}
}

// Executes the transfer function on all the expressions of the corresponding
// CFG node, starting with the node's input state, and changes the input state
// to the final output state of the node in place.
void transfer(const BasicBlock* cfgBlock,
void transfer(const BasicBlock& bb,
typename L::Element& inputState) noexcept {
// If the block is empty, we propagate the state by inputState =
// outputState.

currState = &inputState;
if constexpr (Direction == AnalysisDirection::Backward) {
for (auto cfgIter = cfgBlock->rbegin(); cfgIter != cfgBlock->rend();
++cfgIter) {
for (auto cfgIter = bb.rbegin(); cfgIter != bb.rend(); ++cfgIter) {
static_cast<SubType*>(this)->visit(*cfgIter);
}
} else {
for (auto cfgIter = cfgBlock->begin(); cfgIter != cfgBlock->end();
++cfgIter) {
static_cast<SubType*>(this)->visit(*cfgIter);
for (auto* inst : bb) {
static_cast<SubType*>(this)->visit(inst);
}
}
currState = nullptr;
}

// Enqueues the worklist before the worklist algorithm is run. We want to
// evaluate the blocks in an order matching the "flow" of the analysis to
// reduce the number of state propagations needed. Thus, for a forward
// analysis, we push all the blocks in order, while for backward analysis, we
// push them in reverse order, so that later blocks are evaluated before
// earlier ones.
void enqueueWorklist(const CFG& cfg,
std::queue<const BasicBlock*>& worklist) noexcept {
if constexpr (Direction == AnalysisDirection::Backward) {
for (auto it = cfg.rbegin(); it != cfg.rend(); ++it) {
worklist.push(&(*it));
}
} else {
for (auto it = cfg.begin(); it != cfg.end(); ++it) {
worklist.push(&(*it));
}
}
}

// This is for collecting results after solving an analysis. Implemented in
// the same way as transfer(), but we also set the collectingResults flag.
void collectResults(const BasicBlock* cfgBlock,
typename L::Element& inputState) {
void collectResults(const BasicBlock& bb, typename L::Element& inputState) {
collectingResults = true;
currState = &inputState;
if constexpr (Direction == AnalysisDirection::Backward) {
for (auto cfgIter = cfgBlock->rbegin(); cfgIter != cfgBlock->rend();
++cfgIter) {
static_cast<SubType*>(this)->visit(*cfgIter);
for (auto it = bb.rbegin(); it != bb.rend(); ++it) {
static_cast<SubType*>(this)->visit(*it);
}
} else {
for (auto cfgIter = cfgBlock->begin(); cfgIter != cfgBlock->end();
++cfgIter) {
static_cast<SubType*>(this)->visit(*cfgIter);
for (auto it = bb.begin(); it != bb.end(); ++it) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you using the .begin() .end() iterator pattern for symmetry with the If-true block above? Just wondering why not the range-based loop that's used on line 263 in src/tools/wasm-fuzz-lattices.cpp?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it's for symmetry. You're right that a range-based for loop would have worked here as well.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A range-based loop would be better imo, even if it looks more different than the other loop above. I see that as a benefit actually: the difference is more obvious, and separately this one becomes easier to read.

static_cast<SubType*>(this)->visit(*it);
}
}
currState = nullptr;
Expand Down
Loading