Skip to content

Commit

Permalink
Add check if plugin strcture can be freed.
Browse files Browse the repository at this point in the history
Add combination of flags, to make usage easier in Rust.

Add rudimentary API for instruction word decoding.

Implement instruction word decoding for Hexagon.

Extract reading a new op into a helper function.

Enable CFG generation of instruction word archs

Invalidate pointer after iword was finished.

Generate iword by checking HexInsnContainer

Zero values of iword in fini

Add flag if a packet should be assumed as valid.

Set iword properties in separated function

Ignore aop->fail, since it can point to the next instr. within the packet.

Extend SetU

- Add a getter for length of SetU.
- Add a foreach macro.

Add helper functions to check if RzAnalysisOp is a jump or call.

Don't add duplicate nodes.

Don't add duplicated edges to graph.

Add track call and jump targets in a Set for iwords.

Decode entry node before adding it.

Add a test for iword CFG generation.

Fix CFG invalid node test.

Handle decoding of invalid iwords in CFG.

Add the set of call targets to a CFG node of iwords.

Revert "Add the set of call targets to a CFG node of iwords."

This reverts commit c149237.

Add instruction words as CFG nodes.

Distinguish between subtypes of different graph node types.

Add getter for log level

Degrade to warning

Add subtypes to single instructions in an iword CFG node.

Lower logging level

Check for edge duplicates when adding one.

Handle insvalid instructions as EXIT nodes in a CFG (and remove duplicate code).

Revert log level increasing

Fix hash table init after rebase

Extend SetU

- Add a getter for length of SetU.
- Add a foreach macro.

Fix cgraph after SetU update

Add binding log function for plugins without varg support.

Use rz_io_read_at_mapped since it also reads bytes between mapped regions.

Fix memleaks

Remove diff

Remove check for legal NULL condition and handle it.

Fi signature.

Replace rz_io_read_at with rz_io_nread_at and add note for others.

Add check if function is calssified as an input function.

Add missing return register role

Lower allowed buffer size.

Unify mapped reading from mem

Allow decoding if data is read from an umapped region.

Remove dot from pattern

Add strict option for CFG generation.

It will omit nodes outside of the function detected by Rizin.

Add docs and assert

Fix rebase issues

Add a workaround for threads removing instructions too quickly from the packets.

Add a bunch of warnings

Add (breaking) tests for weird disassembly atterns.

Add return register roles

Mark jump nodes

Mark jumps in the CFG

Don't attempt iword decoding outside of map

Add CFG gen over function.

Fix: Don't add node outside of function to graph

Label jumps, tail calls and program exits in CFGs.

Fix tail calls of jumpt without known target.

Bring enums ins sync
  • Loading branch information
Rot127 committed Oct 18, 2024
1 parent f2a6d2a commit fdd0012
Show file tree
Hide file tree
Showing 31 changed files with 1,659 additions and 322 deletions.
20 changes: 19 additions & 1 deletion librz/arch/fcn.c
Original file line number Diff line number Diff line change
Expand Up @@ -2749,5 +2749,23 @@ RZ_API bool rz_analysis_function_is_malloc(const RzAnalysisFunction *fcn) {
rz_return_val_if_fail(fcn, false);
// TODO We need more metrics here. Just the name is pretty naive.
// E.g. we should compare it to signatures and other characterisitics.
return rz_regex_contains(".*\\.([mc]|(re))?alloc.*", fcn->name, RZ_REGEX_ZERO_TERMINATED, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT);
return rz_regex_contains(".*([mc]|(re))?alloc.*", fcn->name, RZ_REGEX_ZERO_TERMINATED, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT);
}

/**
* \brief Determines if the given function returns unpredictable input data (e.g. by the user or peripherals).
*
* The current methods of detection (tested in order):
* - Name matches regex ".*\.fread.*"
*
* \param fcn The function to test.
*
* \return true If the function \p fcn is considered an input function.
* \return false Otherwise.
*/
RZ_API bool rz_analysis_function_is_input(const RzAnalysisFunction *fcn) {
rz_return_val_if_fail(fcn, false);
// TODO We need more metrics here. Just the name is pretty naive.
// E.g. we should compare it to signatures and other characterisitics.
return rz_regex_contains(".*fread.*", fcn->name, RZ_REGEX_ZERO_TERMINATED, RZ_REGEX_EXTENDED, RZ_REGEX_DEFAULT);
}
2 changes: 2 additions & 0 deletions librz/arch/isa/hexagon/hexagon.c
Original file line number Diff line number Diff line change
Expand Up @@ -427,12 +427,14 @@ RZ_API const char *hex_get_reg_in_class(HexRegClass cls, int reg_num, bool get_a
int resolve_n_register(const int reg_num, const ut32 addr, const HexPkt *p) {
// .new values are documented in Programmers Reference Manual
if (reg_num <= 1 || reg_num >= 8) {
RZ_LOG_DEBUG("n_register reg_num out of range.\n");
return UT32_MAX;
}

ut8 ahead = (reg_num >> 1);
ut8 i = hexagon_get_pkt_index_of_addr(addr, p);
if (i == UT8_MAX) {
RZ_LOG_DEBUG("Could not get n_register instruction packet index.\n");
return UT32_MAX;
}

Expand Down
94 changes: 85 additions & 9 deletions librz/arch/isa/hexagon/hexagon_arch.c
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ RZ_API ut8 hexagon_get_pkt_index_of_addr(const ut32 addr, const HexPkt *p) {
}
++i;
}
RZ_LOG_WARN("Failed to find index in packet for %" PFMT32x, addr);
return UT8_MAX;
}

Expand All @@ -241,8 +242,6 @@ static void hex_clear_pkt(RZ_NONNULL HexPkt *p) {
p->hw_loop0_addr = 0;
p->hw_loop1_addr = 0;
p->pkt_addr = 0;
p->last_instr_present = false;
p->is_valid = false;
p->last_access = 0;
rz_list_purge(p->bin);
rz_pvector_clear(p->il_ops);
Expand All @@ -265,6 +264,7 @@ static HexPkt *hex_get_stale_pkt(HexState *state) {
stale_state_pkt = &state->pkts[i];
}
}
hex_clear_pkt(stale_state_pkt);
return stale_state_pkt;
}

Expand All @@ -291,6 +291,7 @@ RZ_API HexPkt *hex_get_pkt(RZ_BORROW HexState *state, const ut32 addr) {
}
}
}
RZ_LOG_DEBUG("Failed to get packet at 0x%" PFMT32x, addr);
return NULL;
}

Expand Down Expand Up @@ -352,6 +353,7 @@ static ut8 get_state_pkt_index(HexState *state, const HexPkt *p) {
return i;
}
}
RZ_LOG_WARN("Failed to find state packet index");
return UT8_MAX;
}

Expand Down Expand Up @@ -891,9 +893,7 @@ static void print_state_pkt(const HexState *state, st32 index, HexBufferAction a
* \return The pointer to the added instruction. Null if the instruction could not be copied.
*/
static HexInsnContainer *hex_add_hic_to_state(HexState *state, const HexInsnContainer *new_hic) {
if (!new_hic) {
return NULL;
}
rz_return_val_if_fail(state && new_hic, NULL);
bool add_to_pkt = false;
bool new_pkt = false;
bool write_to_stale_pkt = false;
Expand Down Expand Up @@ -1258,9 +1258,12 @@ static inline bool do_decoding_loop(ut64 current_addr, ut64 requested_addr, cons
* \param buf The buffer which stores the current opcode.
* \param addr The address of the current opcode.
* \param copy_result If set, it copies the result. Otherwise it only buffers it in the internal state.
*
* \return true If the decoded instruction was the last instruction in a _valid_ packet.
* \return false Otherwise.
*/
RZ_API void hexagon_reverse_opcode(HexReversedOpcode *rz_reverse, const ut64 addr, RzAsm *rz_asm, RzAnalysis *rz_analysis) {
rz_return_if_fail(rz_reverse);
RZ_API RZ_OWN HexInsnContainer *hexagon_reverse_opcode(HexReversedOpcode *rz_reverse, const ut64 addr, RzAsm *rz_asm, RzAnalysis *rz_analysis) {
rz_return_val_if_fail(rz_reverse, NULL);
HexState *state;
RzBuffer *buffer;
perform_hacks(&state, &buffer, &rz_asm, &rz_analysis, rz_reverse);
Expand All @@ -1270,7 +1273,7 @@ RZ_API void hexagon_reverse_opcode(HexReversedOpcode *rz_reverse, const ut64 add
// For bytes buffers (e.g. given in case of `rz-asm`) the address is not a valid seek, but distinct.
if (buffer->type == RZ_BUFFER_IO && rz_buf_seek(buffer, addr, RZ_BUF_SET) != addr) {
RZ_LOG_DEBUG("Could not seek to address: 0x%" PFMT64x ". Attempting to read out of mapped memory region?\n", addr);
return;
return NULL;
}

ut64 current_addr = get_pre_decoding_start(buffer, addr);
Expand Down Expand Up @@ -1309,10 +1312,83 @@ RZ_API void hexagon_reverse_opcode(HexReversedOpcode *rz_reverse, const ut64 add
if (!hic) {
RZ_LOG_DEBUG("Could not decode packet.\n");
rz_buf_free(buffer);
return;
return NULL;
}
HexPkt *p = hex_get_pkt(state, hic->addr);
rz_reverse->pkt_fully_decoded = p && p->is_valid;
copy_asm_ana_ops(state, rz_reverse, hic);
rz_buf_free(buffer);
return hic;
}

static void set_iword_properties(ut32 anaop_type, RzAnalysisInsnWord *iword) {
rz_return_if_fail(iword);
switch (anaop_type & ~RZ_ANALYSIS_OP_HINT_MASK) {
default:
break;
case RZ_ANALYSIS_OP_TYPE_CALL:
case RZ_ANALYSIS_OP_TYPE_UCALL:
case RZ_ANALYSIS_OP_TYPE_RCALL:
case RZ_ANALYSIS_OP_TYPE_ICALL:
case RZ_ANALYSIS_OP_TYPE_IRCALL:
case RZ_ANALYSIS_OP_TYPE_CCALL:
case RZ_ANALYSIS_OP_TYPE_UCCALL:
iword->props |= RZ_ANALYSIS_IWORD_CALL;
break;
case RZ_ANALYSIS_OP_TYPE_JMP:
case RZ_ANALYSIS_OP_TYPE_UJMP:
case RZ_ANALYSIS_OP_TYPE_RJMP:
case RZ_ANALYSIS_OP_TYPE_IJMP:
case RZ_ANALYSIS_OP_TYPE_IRJMP:
case RZ_ANALYSIS_OP_TYPE_CJMP:
case RZ_ANALYSIS_OP_TYPE_RCJMP:
case RZ_ANALYSIS_OP_TYPE_MJMP:
case RZ_ANALYSIS_OP_TYPE_MCJMP:
case RZ_ANALYSIS_OP_TYPE_UCJMP:
iword->props |= RZ_ANALYSIS_IWORD_JUMP;
break;
case RZ_ANALYSIS_OP_TYPE_RET:
iword->props |= RZ_ANALYSIS_IWORD_RET;
break;
}

switch (anaop_type & RZ_ANALYSIS_OP_HINT_MASK) {
default:
break;
case RZ_ANALYSIS_OP_TYPE_TAIL:
iword->props |= RZ_ANALYSIS_IWORD_TAIL;
break;
case RZ_ANALYSIS_OP_TYPE_COND:
iword->props |= RZ_ANALYSIS_IWORD_COND;
break;
}
}

RZ_API bool hexagon_decode_iword(RzAnalysis *analysis, HexReversedOpcode *rev, RZ_OUT RzAnalysisInsnWord *iword, ut64 addr) {
rz_return_val_if_fail(rev && iword, false);
iword->addr = addr;
HexInsnContainer *hic = NULL;
do {
hic = hexagon_reverse_opcode(rev, addr, NULL, analysis);
rz_pvector_push(iword->insns, rev->ana_op);
rz_strbuf_appendf(iword->asm_str, "%s\n", hic->text);
iword->size_bytes += 4;
iword->size_bits += 32;

set_iword_properties(rev->ana_op->type, iword);
if (iword->props & RZ_ANALYSIS_IWORD_CALL && rev->ana_op->jump != UT64_MAX) {
rz_set_u_add(iword->call_targets, rev->ana_op->jump);
} else if (rev->ana_op->jump != UT64_MAX) {
rz_set_u_add(iword->jump_targets, rev->ana_op->jump);
}

if (hic->pkt_info.last_insn) {
if (rev->ana_op->type != RZ_ANALYSIS_OP_TYPE_RET && !rz_analysis_op_is_jump(rev->ana_op)) {
ut64 next_iword_addr = addr + iword->size_bytes;
rz_set_u_add(iword->jump_targets, next_iword_addr);
}
return true;
}
} while (!hic->pkt_info.last_insn);
return true;
}
3 changes: 2 additions & 1 deletion librz/arch/isa/hexagon/hexagon_arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,8 @@ RZ_API void hex_insn_container_free(RZ_NULLABLE HexInsnContainer *c);
RZ_API void hex_const_ext_free(RZ_NULLABLE HexConstExt *ce);
RZ_IPI RZ_OWN HexState *hexagon_state_new();
RZ_IPI void hexagon_state_fini(RZ_NULLABLE HexState *state);
RZ_API void hexagon_reverse_opcode(HexReversedOpcode *rz_reverse, const ut64 addr, RzAsm *rz_asm, RzAnalysis *rz_analysis);
RZ_API RZ_OWN HexInsnContainer *hexagon_reverse_opcode(HexReversedOpcode *rz_reverse, const ut64 addr, RzAsm *rz_asm, RzAnalysis *rz_analysis);
RZ_API bool hexagon_decode_iword(RzAnalysis *analysis, HexReversedOpcode *rev, RZ_OUT RzAnalysisInsnWord *iword, ut64 addr);
RZ_API ut8 hexagon_get_pkt_index_of_addr(const ut32 addr, const HexPkt *p);
RZ_API HexLoopAttr hex_get_loop_flag(const HexPkt *p);
RZ_API const HexOp *hex_isa_to_reg(const HexInsn *hi, const char isa_id, bool new_reg);
Expand Down
6 changes: 3 additions & 3 deletions librz/arch/isa/hexagon/hexagon_il.c
Original file line number Diff line number Diff line change
Expand Up @@ -208,11 +208,11 @@ RZ_IPI bool hex_shuffle_insns(RZ_INOUT HexPkt *p) {
}

static RzILOpEffect *hex_il_op_to_effect(const HexILOp *il_op, HexPkt *pkt) {
rz_return_val_if_fail(il_op && il_op->get_il_op, NULL);
rz_return_val_if_fail(il_op, NULL);
HexInsnPktBundle bundle = { 0 };
bundle.insn = (HexInsn *)il_op->hi;
bundle.pkt = pkt;
return il_op->get_il_op(&bundle);
return il_op->get_il_op ? il_op->get_il_op(&bundle) : EMPTY();
}

/**
Expand All @@ -228,7 +228,7 @@ static RZ_OWN RzILOpEffect *hex_pkt_to_il_seq(HexPkt *pkt) {
rz_pvector_clear(pkt->il_ops);
// We need at least the instruction op and the packet commit.
// So if there aren't at least two ops something went wrong.
RZ_LOG_WARN("Invalid il ops sequence! There should be at least two il ops per packet.\n");
RZ_LOG_DEBUG("Invalid il ops sequence! There should be at least two il ops per packet.\n");
return NULL;
}
RzILOpEffect *complete_seq = EMPTY();
Expand Down
49 changes: 49 additions & 0 deletions librz/arch/op.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-FileCopyrightText: 2010-2020 nibble <nibble.ds@gmail.com>
// SPDX-License-Identifier: LGPL-3.0-only

#include <rz_util/rz_set.h>
#include <rz_analysis.h>
#include <rz_util.h>
#include <rz_list.h>
Expand Down Expand Up @@ -691,3 +692,51 @@ RZ_API int rz_analysis_op_reg_delta(RzAnalysis *analysis, ut64 addr, const char
rz_analysis_op_fini(&op);
return delta;
}

RZ_API RZ_OWN RzAnalysisInsnWord *rz_analysis_insn_word_new() {
RzAnalysisInsnWord *iword = RZ_NEW0(RzAnalysisInsnWord);
if (!iword) {
return NULL;
}
iword->asm_str = rz_strbuf_new("");
iword->insns = rz_pvector_new(rz_analysis_op_free);
iword->jump_targets = rz_set_u_new();
iword->call_targets = rz_set_u_new();
if (!iword->asm_str || !iword->insns || !iword->jump_targets) {
rz_analysis_insn_word_free(iword);
return NULL;
}
return iword;
}

RZ_API void rz_analysis_insn_word_free(RZ_OWN RZ_NULLABLE RzAnalysisInsnWord *iword) {
if (!iword) {
return;
}
rz_analysis_insn_word_fini(iword);
free(iword);
}

RZ_API void rz_analysis_insn_word_setup(RZ_BORROW RZ_NONNULL RzAnalysisInsnWord *iword) {
rz_return_if_fail(iword);
rz_analysis_insn_word_fini(iword);
iword->asm_str = rz_strbuf_new("");
iword->insns = rz_pvector_new(rz_analysis_op_free);
iword->jump_targets = rz_set_u_new();
iword->call_targets = rz_set_u_new();
if (!iword->asm_str || !iword->insns || !iword->jump_targets) {
rz_analysis_insn_word_fini(iword);
}
}

RZ_API void rz_analysis_insn_word_fini(RZ_OWN RZ_NULLABLE RzAnalysisInsnWord *iword) {
if (!iword) {
return;
}
rz_strbuf_free(iword->asm_str);
rz_pvector_free(iword->insns);
rz_set_u_free(iword->jump_targets);
rz_set_u_free(iword->call_targets);
rz_il_op_effect_free(iword->il_op);
rz_mem_memzero(iword, sizeof(RzAnalysisInsnWord));
}
4 changes: 4 additions & 0 deletions librz/arch/p/analysis/analysis_arm_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -2167,6 +2167,8 @@ static char *get_reg_profile(RzAnalysis *analysis) {
"=A1 x1\n"
"=A2 x2\n"
"=A3 x3\n"
"=R0 x0\n"
"=R1 x1\n"
"=ZF zf\n"
"=SF nf\n"
"=OF vf\n"
Expand Down Expand Up @@ -2397,6 +2399,8 @@ static char *get_reg_profile(RzAnalysis *analysis) {
"=A1 r1\n"
"=A2 r2\n"
"=A3 r3\n"
"=R0 r0\n"
"=R1 r1\n"
"=ZF zf\n"
"=SF nf\n"
"=OF vf\n"
Expand Down
13 changes: 13 additions & 0 deletions librz/arch/p/analysis/analysis_hexagon.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,18 @@ RZ_API int hexagon_v6_op(RzAnalysis *analysis, RzAnalysisOp *op, ut64 addr, cons
return HEX_INSN_SIZE;
}

RZ_API bool rz_hexagon_decode_iword(RzAnalysis *a, RZ_OUT RzAnalysisInsnWord *iword, ut64 addr, const ut8 *buf, size_t len, size_t buf_off_iword) {
rz_return_val_if_fail(a && iword && buf, false);

RzAnalysisOp aop = { 0 };
HexReversedOpcode rev = { .action = HEXAGON_ANALYSIS, .ana_op = &aop, .asm_op = NULL, .state = NULL, .pkt_fully_decoded = false, .bytes_buf = buf, .bytes_buf_len = len };
bool success = hexagon_decode_iword(a, &rev, iword, addr);
if (success) {
iword->il_op = hex_get_il_op(addr, true, rev.state);
}
return success;
}

static RzAnalysisILConfig *rz_hexagon_il_config(RzAnalysis *a) {
rz_return_val_if_fail(a, NULL);
// Hacky getter for the plugin data until RzArch is implemented
Expand Down Expand Up @@ -749,4 +761,5 @@ RzAnalysisPlugin rz_analysis_plugin_hexagon = {
.esil = false,
.get_reg_profile = get_reg_profile,
.il_config = rz_hexagon_il_config,
.decode_iword = rz_hexagon_decode_iword,
};
3 changes: 3 additions & 0 deletions librz/arch/p/analysis/analysis_x86_cs.c
Original file line number Diff line number Diff line change
Expand Up @@ -3259,6 +3259,7 @@ static char *get_reg_profile(RzAnalysis *analysis) {
"=PC ip\n"
"=SP sp\n"
"=BP bp\n"
"=R0 ax\n"
"=A0 ax\n"
"=A1 bx\n"
"=A2 cx\n"
Expand Down Expand Up @@ -3314,6 +3315,7 @@ static char *get_reg_profile(RzAnalysis *analysis) {
"=PC eip\n"
"=SP esp\n"
"=BP ebp\n"
"=R0 eax\n"
"=A0 eax\n"
"=A1 ebx\n"
"=A2 ecx\n"
Expand Down Expand Up @@ -3450,6 +3452,7 @@ static char *get_reg_profile(RzAnalysis *analysis) {
"=PC rip\n"
"=SP rsp\n"
"=BP rbp\n"
"=R0 rax\n"
"=A0 rdi\n"
"=A1 rsi\n"
"=A2 rdx\n"
Expand Down
19 changes: 14 additions & 5 deletions librz/core/agraph.c
Original file line number Diff line number Diff line change
Expand Up @@ -3709,23 +3709,32 @@ RZ_API RZ_BORROW RzANode *rz_agraph_add_node_from_node_info(RZ_NONNULL const RzA
}
an->offset = info->def.offset;
break;
case RZ_GRAPH_NODE_TYPE_CFG: {
char *annotation = rz_graph_get_node_subtype_annotation(info->subtype, utf8);
case RZ_GRAPH_NODE_TYPE_CFG:
case RZ_GRAPH_NODE_TYPE_CFG_IWORD: {
char *annotation = NULL;
ut64 addr = 0;
if (info->type == RZ_GRAPH_NODE_TYPE_CFG) {
annotation = rz_graph_get_node_subtype_annotation_cfg(info->cfg.subtype, true, utf8);
addr = info->cfg.address;
} else {
annotation = rz_graph_get_node_subtype_annotation_cfg_iword(info->cfg_iword.subtype, true, utf8);
addr = info->cfg_iword.address;
}
rz_return_val_if_fail(annotation, NULL);
char *cfg_title = rz_str_appendf(NULL, "0x%" PFMT64x "%s", info->cfg.address, annotation);
char *cfg_title = rz_str_appendf(NULL, "0x%" PFMT64x "%s", addr, annotation);
rz_return_val_if_fail(cfg_title, NULL);
an = rz_agraph_add_node(g, cfg_title, "");
free(annotation);
free(cfg_title);
if (!an) {
return NULL;
}
an->offset = info->cfg.address;
an->offset = addr;
break;
}
case RZ_GRAPH_NODE_TYPE_ICFG:
rz_strf(title, "0x%" PFMT64x "%s", info->icfg.address,
info->subtype & RZ_GRAPH_NODE_SUBTYPE_ICFG_MALLOC ? " (alloc)" : "");
info->icfg.subtype & RZ_GRAPH_NODE_SUBTYPE_ICFG_MALLOC ? " (alloc)" : "");
an = rz_agraph_add_node(g, title, "");
if (!an) {
return NULL;
Expand Down
Loading

0 comments on commit fdd0012

Please sign in to comment.