Skip to content

Commit

Permalink
Issue 115 (avast#422)
Browse files Browse the repository at this point in the history
* capstone2llvmir: check preconditions in translation routines.

* capstone2llvmir: add all registers for all architectures.

* capstone2llvmir: fix problems with regression tests.

* capstone2llvmir/x86: remove initializeRegistersParentMapCommon().

* capstone2llvmir: refactor.

* bin2llvmir/decoder: catch capstone2llvmir errors and exit.

* capstone2llvmir: provide API to query pseudo asm functions.

* capstone2llvmir: only one implementation of translatePseudoAsmGeneric().

* capstone2llvmir/powerpc: handle some crazy bcctrl (bdzctrl) variant.

* capstone2llvmir/x86: create Kx registers.

* capstone2llvmir/ppc: modify comment.
  • Loading branch information
PeterMatula authored Oct 29, 2018
1 parent 68f74bf commit c2ed626
Show file tree
Hide file tree
Showing 29 changed files with 3,569 additions and 2,473 deletions.
2 changes: 0 additions & 2 deletions include/retdec/bin2llvmir/optimizations/decoder/decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,6 @@ class Decoder : public llvm::ModulePass
/// label is not in jump table).
std::map<utils::Address, std::set<llvm::SwitchInst*>> _switchTableStarts;

std::map<llvm::CallInst*, llvm::Instruction*> _pseudoCalls;

// We create helper BBs (without name and address) to handle MIPS
// likely branches. For convenience, we map them to real BBs they will
// eventually jump to.
Expand Down
7 changes: 7 additions & 0 deletions include/retdec/bin2llvmir/providers/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,12 @@ class Config
llvm::CallInst* isLlvmX87StorePseudoFunctionCall(llvm::Value* c);
llvm::CallInst* isLlvmX87LoadPseudoFunctionCall(llvm::Value* c);

// Assembly pseudo-functions.
//
void addPseudoAsmFunction(llvm::Function* f);
bool isPseudoAsmFunction(llvm::Function* f);
llvm::CallInst* isPseudoAsmFunctionCall(llvm::Value* c);

// Other
//
llvm::GlobalVariable* getGlobalDummy();
Expand Down Expand Up @@ -200,6 +206,7 @@ class Config
llvm::Function* _x87TagLoadFunction = nullptr; // i2 (i3)

std::map<IntrinsicFunctionCreatorPtr, llvm::Function*> _intrinsicFunctions;
std::set<llvm::Function*> _pseudoAsmFunctions;
};

class ConfigProvider
Expand Down
24 changes: 24 additions & 0 deletions include/retdec/capstone2llvmir/arm/arm_defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,28 @@ enum arm_reg_cpsr_flags
ARM_REG_CPSR_V,
};

/**
* The problem: ARM uses two sets of registers (two enums), ordinary registers
* (enum arm_reg) and system registers (enum arm_sysreg).
* Most system registers have enum numbers greater than 256 - these numbers do
* not overlap with ordinary register numbers.
* But 8 registers from arm_sysreg overlap with ordinary registers.
* These are SPSR and CPSR related registers.
* We cannot use these Capstone enums, since they would collide in our maps with
* ordinary registers.
* Moreover, these 8 registers denote flag registers and can be OR combined.
* Therefore, if we wanted to capture their full semantics, we would either have
* to create registers for all combinations (e.g. C, X, S, F, CX, CS, CF, XS,
* XF, SF, CXS, ...) and use appropriate variant depending on asm instruction
* as Capstone/IDA does (e.g. msr cpsr_fc, r6), or simulate this on a single
* register using bit setting (i.e. and/or operations).
* Instead, we ignore work with individual flags - we create only two registers
* and use them every time any flag is modified.
*/
enum arm_sysreg_extension
{
ARM_SYSREG_SPSR = ARM_REG_CPSR_V + 1,
ARM_SYSREG_CPSR,
};

#endif
53 changes: 53 additions & 0 deletions include/retdec/capstone2llvmir/capstone2llvmir.h
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,43 @@ class Capstone2LlvmIrTranslator
virtual ~Capstone2LlvmIrTranslator();
//
//==============================================================================
// Translator configuration methods.
//==============================================================================
//
/**
* Should the translator ignore unexpected operands encountered in
* Capstone instructions?
* True -> ignore -> try recover or ignore the problem.
* False -> don't ignore -> throw @c UnexpectedOperandsError.
*
* Default value: true.
*/
virtual void setIgnoreUnexpectedOperands(bool f) = 0;
/**
* Should the translator ignore unhandled instructions?
* True -> ignore.
* False -> don't ignore -> throw @c UnhandledInstructionError when
* instructions without an implemented translation routine is
* encountered.
*
* Default value: true.
*/
virtual void setIgnoreUnhandledInstructions(bool f) = 0;
/**
* Should the translator generate pseudo assembly functions for
* instructions which full semantics is not implemented?
* True -> generate.
* False -> don't generate.
*
* Default value: true.
*/
virtual void setGeneratePseudoAsmFunctions(bool f) = 0;

virtual bool isIgnoreUnexpectedOperands() const = 0;
virtual bool isIgnoreUnhandledInstructions() const = 0;
virtual bool isGeneratePseudoAsmFunctions() const = 0;
//
//==============================================================================
// Mode query & modification methods.
//==============================================================================
//
Expand Down Expand Up @@ -701,6 +738,22 @@ class Capstone2LlvmIrTranslator
* @c ARM_REG_INVALID, @c MIPS_REG_INVALID).
*/
virtual uint32_t getCapstoneRegister(llvm::GlobalVariable* gv) const = 0;

/**
* Is the passed LLVM function @p f any pseudo assembly functions for
* instructions which full semantics is not implemented?
*/
virtual bool isPseudoAsmFunction(llvm::Function* f) const = 0;
/**
* Is the passed LLVM call @p c any kind of pseudo assembly call for
* instructions which full semantics is not implemented?
*/
virtual bool isPseudoAsmFunctionCall(llvm::CallInst* c) const = 0;
/**
* Get all pseudo assembly functions for instructions which full
* semantics is not implemented.
*/
virtual const std::set<llvm::Function*>& getPseudoAsmFunctions() const = 0;
};

} // namespace capstone2llvmir
Expand Down
73 changes: 62 additions & 11 deletions include/retdec/capstone2llvmir/exceptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,18 @@ namespace capstone2llvmir {

/**
* Base class for all Capstone2LllvmIr errors.
* This class itself should never be thrown.
*/
class Capstone2LlvmIrBaseError : public std::exception
class BaseError : public std::exception
{
public:
virtual ~Capstone2LlvmIrBaseError();
virtual ~BaseError();
};

/**
* An exception class encapsulating Capstone errors.
* An exception class encapsulating all Capstone errors.
*/
class CapstoneError : public Capstone2LlvmIrBaseError
class CapstoneError : public BaseError
{
public:
CapstoneError(cs_err e);
Expand All @@ -45,7 +46,7 @@ class CapstoneError : public Capstone2LlvmIrBaseError
/**
* An exception class related to Capstone mode setting errors.
*/
class Capstone2LlvmIrModeError : public Capstone2LlvmIrBaseError
class ModeSettingError : public BaseError
{
public:
enum class eType
Expand All @@ -60,8 +61,8 @@ class Capstone2LlvmIrModeError : public Capstone2LlvmIrBaseError
};

public:
Capstone2LlvmIrModeError(cs_arch a, cs_mode m, eType t);
virtual ~Capstone2LlvmIrModeError();
ModeSettingError(cs_arch a, cs_mode m, eType t);
virtual ~ModeSettingError();

std::string getMessage() const;
virtual const char* what() const noexcept override;
Expand All @@ -73,13 +74,63 @@ class Capstone2LlvmIrModeError : public Capstone2LlvmIrBaseError
};

/**
* A general exception class for all Capstone2LlvmIr errors.
* An exception class thrown when unexpected operand(s) (number, type, etc.)
* is(are) encountered.
*
* These exceptions may be suppressed and/or ignored.
*/
class Capstone2LlvmIrError : public Capstone2LlvmIrBaseError
class UnexpectedOperandsError : public BaseError
{
public:
Capstone2LlvmIrError(const std::string& message);
virtual ~Capstone2LlvmIrError();
/**
* @param i Capstone instruction in which unexpected operand
* was encountered.
* @param comment Optional comment about the problem.
*/
UnexpectedOperandsError(cs_insn* i, const std::string& comment = "");
virtual ~UnexpectedOperandsError();

virtual const char* what() const noexcept override;

private:
cs_insn* _insn = nullptr;
std::string _comment;
};

/**
* An exception class thrown when unhandled instruction is encountered.
*
* These exceptions may be suppressed and/or ignored. Not all instructions are
* handled, or will be handled in the future.
*/
class UnhandledInstructionError : public BaseError
{
public:
/**
* @param i Capstone instruction which is not handled.
* @param comment Optional comment about the problem.
*/
UnhandledInstructionError(cs_insn* i, const std::string& comment = "");
~UnhandledInstructionError();

virtual const char* what() const noexcept override;

private:
cs_insn* _insn = nullptr;
std::string _comment;
};

/**
* A generic exception class for miscellaneous Capstone2LlvmIr errors.
*
* These exceptions signal some operational problems in Capstone2LlvmIr library.
* They should not be ignored. They should be reported to RetDec developers.
*/
class GenericError : public BaseError
{
public:
GenericError(const std::string& message);
virtual ~GenericError();

virtual const char* what() const noexcept override;

Expand Down
6 changes: 3 additions & 3 deletions src/bin2llvmir/optimizations/decoder/decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ bool Decoder::runCatcher()
{
return run();
}
catch (const Capstone2LlvmIrBaseError& e)
catch (const BaseError& e)
{
LOG << "[capstone2llvmir]: " << e.what() << std::endl;
return false;
std::cerr << "[capstone2llvmir]: " << e.what() << std::endl;
exit(1);
}
}

Expand Down
5 changes: 5 additions & 0 deletions src/bin2llvmir/optimizations/decoder/decoder_init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1021,6 +1021,11 @@ void Decoder::initConfigFunctions()
cf->setSourceFileName(df->getSourceFileName());
}
}

for (auto* f : _c2l->getPseudoAsmFunctions())
{
_config->addPseudoAsmFunction(f);
}
}

} // namespace bin2llvmir
Expand Down
16 changes: 16 additions & 0 deletions src/bin2llvmir/providers/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -716,6 +716,22 @@ llvm::CallInst* Config::isLlvmAnyUncondBranchPseudoFunctionCall(llvm::Value* c)
return nullptr;
}

void Config::addPseudoAsmFunction(llvm::Function* f)
{
_pseudoAsmFunctions.insert(f);
}

bool Config::isPseudoAsmFunction(llvm::Function* f)
{
return _pseudoAsmFunctions.count(f);
}

llvm::CallInst* Config::isPseudoAsmFunctionCall(llvm::Value* c)
{
auto* cc = dyn_cast_or_null<CallInst>(c);
return isPseudoAsmFunction(cc->getCalledFunction()) ? cc : nullptr;
}

/**
* Get crypto pattern information for address \p addr - fill \p name,
* \p description, and \p type, if there is a pattern on address.
Expand Down
Loading

0 comments on commit c2ed626

Please sign in to comment.