Skip to content

Commit

Permalink
Unification of for-loops with the same codegen, fixing of their (Viru…
Browse files Browse the repository at this point in the history
…sTotal#1701)

...short-circuit evaluation and usage of none with them.

`for .. in` and `for .. of` loops now use the same codegen for their
bytecode using 2 instructions `ITER_CONDITION` and `ITER_END`.
`ITER_CONDITION` evaluates each iteration whether the condition of the
loop was satisfied prematurely or whether the loop is still satisfiable,
so essentially making sure that loops are properly short-circuited.
`ITER_END` is used for final evaluation of the loop, whether it is
satisfied or not, after the iterator got exhausted or the loop was
short-circuited.

In order to be able to use single codegen for both iterators and string
sets, new iterator type was instroduced - `YR_ITERATOR_STRING_SET`. When
`ITER_START_STRING_SET` is executed, it is capable of consuming all
strings pushed on the stack and construct `YR_ITERATOR_STRING_SET`
object which can then be used as a regular iterator.
  • Loading branch information
metthal authored May 2, 2022
1 parent 8df8794 commit 4872f95
Show file tree
Hide file tree
Showing 11 changed files with 1,937 additions and 1,754 deletions.
131 changes: 131 additions & 0 deletions libyara/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,36 @@ static int iter_int_enum_next(YR_ITERATOR* self, YR_VALUE_STACK* stack)
return ERROR_SUCCESS;
}

static int iter_string_set_next(YR_ITERATOR* self, YR_VALUE_STACK* stack)
{
// Check that there's two available slots in the stack, one for the next
// item returned by the iterator and another one for the boolean that
// indicates if there are more items.
if (stack->sp + 1 >= stack->capacity)
return ERROR_EXEC_STACK_OVERFLOW;

// If the current index is equal or larger than array's length the iterator
// has reached the end of the array.
if (self->string_set_it.index >= self->string_set_it.count)
goto _stop_iter;

// Push the false value that indicates that the iterator is not exhausted.
stack->items[stack->sp++].i = 0;
stack->items[stack->sp++].s = self->string_set_it.strings[self->string_set_it.index];
self->string_set_it.index++;

return ERROR_SUCCESS;

_stop_iter:

// Push true for indicating the iterator has been exhausted.
stack->items[stack->sp++].i = 1;
// Push YR_UNDEFINED as a placeholder for the next item.
stack->items[stack->sp++].i = YR_UNDEFINED;

return ERROR_SUCCESS;
}

int yr_execute_code(YR_SCAN_CONTEXT* context)
{
YR_DEBUG_FPRINTF(2, stderr, "+ %s() {\n", __FUNCTION__);
Expand Down Expand Up @@ -513,6 +543,39 @@ int yr_execute_code(YR_SCAN_CONTEXT* context)
stop = (result != ERROR_SUCCESS);
break;

case OP_ITER_START_STRING_SET:
YR_DEBUG_FPRINTF(
2, stderr, "- case OP_ITER_START_STRING_SET: // %s()\n", __FUNCTION__);

pop(r1);

r3.p = yr_notebook_alloc(
it_notebook, sizeof(YR_ITERATOR) + sizeof(YR_STRING*) * (size_t) r1.i);

if (r3.p == NULL)
{
result = ERROR_INSUFFICIENT_MEMORY;
}
else
{
r3.it->string_set_it.count = r1.i;
r3.it->string_set_it.index = 0;
r3.it->next = iter_string_set_next;

for (int64_t i = r1.i; i > 0; i--)
{
pop(r2);
r3.it->string_set_it.strings[i - 1] = r2.s;
}

// One last pop of the UNDEFINED string
pop(r2);
push(r3);
}

stop = (result != ERROR_SUCCESS);
break;

case OP_ITER_NEXT:
YR_DEBUG_FPRINTF(
2, stderr, "- case OP_ITER_NEXT: // %s()\n", __FUNCTION__);
Expand All @@ -527,6 +590,74 @@ int yr_execute_code(YR_SCAN_CONTEXT* context)
stop = (result != ERROR_SUCCESS);
break;

case OP_ITER_CONDITION:
YR_DEBUG_FPRINTF(
2, stderr, "- case OP_ITER_CONDITION: // %s()\n", __FUNCTION__);

// Evaluate the iteration condition of the loop. This instruction
// evaluates to 1 if the loop should continue and 0 if it shouldn't
// (due to short-circuit evaluation).

pop(r2); // min. expression - all, any, none, integer
pop(r3); // number of true expressions
pop(r4); // last expression result

// In case of 'all' loop, end once we the body failed
if (is_undef(r2))
{
r1.i = r4.i != 0 ? 1 : 0;
}
// In case of 'none' loop, end once the body succeed
else if (r2.i == 0)
{
r1.i = r4.i != 1 ? 1 : 0;
}
// In case of other loops, end once we satified min. expr.
else
{
r1.i = r3.i + r4.i < r2.i ? 1 : 0;
}

// Push whether loop should continue and repush
// the last expression result
push(r1);
push(r4);
break;

case OP_ITER_END:
YR_DEBUG_FPRINTF(
2, stderr, "- case OP_ITER_END: // %s()\n", __FUNCTION__);

// Evaluate the whole loop. Whether it was successful or not
// and whether it satisfied it's quantifier.

pop(r2); // min. expression - all, any, none, integer
pop(r3); // number of true expressions
pop(r4); // number of total iterations

// If there was 0 iterations in total, it doesn't
// matter what other numbers show. We can't evaluate
// the loop as true.
if (r4.i == 0)
{
r1.i = 0;
}
else if (is_undef(r2))
{
r1.i = r3.i == r4.i ? 1 : 0;
}
else if (r2.i == 0)
{
r1.i = r3.i == 0 ? 1 : 0;
}
else
{
r1.i = r3.i >= r2.i ? 1 : 0;
}

push(r1);
break;

case OP_PUSH:
YR_DEBUG_FPRINTF(2, stderr, "- case OP_PUSH: // %s()\n", __FUNCTION__);
memcpy(&r1.i, ip, sizeof(uint64_t));
Expand Down
Loading

0 comments on commit 4872f95

Please sign in to comment.