Skip to content

Commit

Permalink
Implement notebook data structure and use it in Aho-Corasick automaton.
Browse files Browse the repository at this point in the history
  • Loading branch information
plusvic committed Mar 4, 2020
1 parent 6f65926 commit e7b1801
Show file tree
Hide file tree
Showing 8 changed files with 207 additions and 36 deletions.
2 changes: 2 additions & 0 deletions bazel/yara.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def yara_library(name, defines=[], modules=[], modules_srcs=[],
"libyara/include/yara/macho.h",
"libyara/include/yara/mem.h",
"libyara/include/yara/modules.h",
"libyara/include/yara/notebook.h",
"libyara/include/yara/object.h",
"libyara/include/yara/parser.h",
"libyara/include/yara/pe.h",
Expand All @@ -160,6 +161,7 @@ def yara_library(name, defines=[], modules=[], modules_srcs=[],
"libyara/libyara.c",
"libyara/mem.c",
"libyara/modules.c",
"libyara/notebook.c",
"libyara/object.c",
"libyara/parser.c",
"libyara/proc.c",
Expand Down
2 changes: 2 additions & 0 deletions libyara/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ yarainclude_HEADERS = \
include/yara/limits.h \
include/yara/mem.h \
include/yara/modules.h \
include/yara/notebook.h \
include/yara/object.h \
include/yara/parser.h \
include/yara/proc.h \
Expand Down Expand Up @@ -175,6 +176,7 @@ libyara_la_SOURCES = \
libyara.c \
mem.c \
modules.c \
notebook.c \
object.c \
parser.c \
proc.c \
Expand Down
47 changes: 14 additions & 33 deletions libyara/ahocorasick.c
Original file line number Diff line number Diff line change
Expand Up @@ -227,22 +227,6 @@ static YR_AC_STATE* _yr_ac_state_create(
static int _yr_ac_state_destroy(
YR_AC_STATE* state)
{
YR_AC_MATCH_LIST_ENTRY* match = state->matches;

while (match != NULL)
{
YR_AC_MATCH_LIST_ENTRY* next = match->next;

if (match->xref > 0)
{
if (match->xref-- == 0)
yr_free(match);
}

match = next;
}


YR_AC_STATE* child_state = state->first_child;

while (child_state != NULL)
Expand All @@ -258,17 +242,6 @@ static int _yr_ac_state_destroy(
}


static void _yr_ac_incr_xrefs(
YR_AC_MATCH_LIST_ENTRY* match)
{
while (match != NULL)
{
match->xref++;
match = match->next;
}
}


//
// _yr_ac_create_failure_links
//
Expand Down Expand Up @@ -324,15 +297,16 @@ static int _yr_ac_create_failure_links(
if (match->backtrack > 0)
{
match->next = root_state->matches;
_yr_ac_incr_xrefs(root_state->matches);
}
}
else
{
// This state doesn't have any matches, its matches will be those
// in the root state, if any.
current_state->matches = root_state->matches;
_yr_ac_incr_xrefs(root_state->matches);
}

// Iterate over all the states that the current state can transition to.
transition_state = current_state->first_child;

while (transition_state != NULL)
Expand Down Expand Up @@ -363,7 +337,6 @@ static int _yr_ac_create_failure_links(
match->next = temp_state->matches;
}

_yr_ac_incr_xrefs(temp_state->matches);
break;
}
else
Expand Down Expand Up @@ -816,6 +789,13 @@ int yr_ac_automaton_create(
return ERROR_INSUFFICIENT_MEMORY;
}

FAIL_ON_ERROR_WITH_CLEANUP(yr_notebook_create(
sizeof(YR_AC_MATCH_TABLE_ENTRY) * 1024,
&new_automaton->matches_nb),
// cleanup
yr_free(new_automaton);
yr_free(root_state));

root_state->depth = 0;
root_state->matches = NULL;
root_state->failure = NULL;
Expand Down Expand Up @@ -846,6 +826,8 @@ int yr_ac_automaton_destroy(
{
_yr_ac_state_destroy(automaton->root);

yr_notebook_destroy(automaton->matches_nb);

yr_free(automaton->t_table);
yr_free(automaton->m_table);
yr_free(automaton->bitmask);
Expand Down Expand Up @@ -888,8 +870,8 @@ int yr_ac_add_string(
state = next_state;
}

YR_AC_MATCH_LIST_ENTRY* new_match = yr_malloc(
sizeof(struct YR_AC_MATCH_LIST_ENTRY));
YR_AC_MATCH_LIST_ENTRY* new_match = yr_notebook_alloc(
automaton->matches_nb, sizeof(struct YR_AC_MATCH_LIST_ENTRY));

if (new_match == NULL)
return ERROR_INSUFFICIENT_MEMORY;
Expand All @@ -898,7 +880,6 @@ int yr_ac_add_string(
new_match->string_idx = string_idx;
new_match->forward_code_ref = atom->forward_code_ref;
new_match->backward_code_ref = atom->backward_code_ref;
new_match->xref = 1;
new_match->ref = YR_ARENA_NULL_REF;

// Add newly created match to the list of matches for the state.
Expand Down
27 changes: 27 additions & 0 deletions libyara/include/yara/notebook.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
//
// Created by Victor Manuel Alvarez on 3/4/20.
//

#ifndef YR_NOTEBOOK_H
#define YR_NOTEBOOK_H

#include <stdlib.h>

typedef struct YR_NOTEBOOK YR_NOTEBOOK;


int yr_notebook_create(
size_t page_size,
YR_NOTEBOOK** pool);


int yr_notebook_destroy(
YR_NOTEBOOK* pool);


void* yr_notebook_alloc(
YR_NOTEBOOK* notebook,
size_t size);


#endif // YR_NOTEBOOK_H
9 changes: 6 additions & 3 deletions libyara/include/yara/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <yara/sizedstr.h>
#include <yara/stopwatch.h>
#include <yara/threading.h>
#include "notebook.h"


#define DECLARE_REFERENCE(type, name) \
Expand Down Expand Up @@ -527,7 +528,6 @@ struct YR_AC_MATCH_LIST_ENTRY
{
uint16_t backtrack;
uint32_t string_idx;
uint32_t xref;

YR_ARENA2_REF ref;
YR_ARENA2_REF forward_code_ref;
Expand Down Expand Up @@ -555,15 +555,18 @@ struct YR_AC_AUTOMATON
// details.
YR_AC_TRANSITION* t_table;

// Pointer to an array of YR_AC_MATCH_LIST_ENTRY* pointers. This array has the same
// number of entries than the transition table. If entry N in the transition
// Pointer to an array of YR_AC_MATCH_LIST_ENTRY* pointers. This array has the
// same number of entries than the transition table. If entry N in the transition
// table corresponds to an Aho-Corasick state, the N-th entry in the array
// points to the first item of the list of matches corresponding to that state.
// If entry N in the transition table does not corresponds to a state, or the
// state doesn't have any match, the N-th entry in this array will be a NULL
// pointer.
YR_AC_MATCH_TABLE_ENTRY* m_table;

// Notebook where the YR_AC_MATCH_TABLE_ENTRY structures will be allocated.
YR_NOTEBOOK* matches_nb;

// Pointer to the root Aho-Corasick state.
YR_AC_STATE* root;
};
Expand Down
154 changes: 154 additions & 0 deletions libyara/notebook.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
#include <stdint.h>

/*
Copyright (c) 2020. The YARA Authors. All Rights Reserved.
Redistribution and use in source and binary forms, with or without modification,
are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation and/or
other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its contributors
may be used to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include <assert.h>

#include <yara/error.h>
#include <yara/mem.h>
#include <yara/notebook.h>


// Forward declaration of YR_NOTEBOOK_PAGE.
typedef struct YR_NOTEBOOK_PAGE YR_NOTEBOOK_PAGE;


// A notebook is a data structure that can be used for allocating memory
// space in the same way malloc() would do. However, the buffers returned
// by yr_notebook_alloc() are backed by a larger buffer reserved by the notebook
// beforehand called a "page". The notebook fulfills the allocations performed
// via yr_notebook_alloc() with space taken from the current page, or creates
// a new page when necessary. It's recommended that the page size is at least
// 4x the size of the buffers you plan to allocate with yr_notebook_alloc().

// Once the notebook is destroyed all the pages are freed, and consequently
// all the buffers allocated via yr_notebook_alloc().
struct YR_NOTEBOOK
{
// Size of each page in the notebook.
size_t page_size;
// Pointer to the first page in the notebook.
YR_NOTEBOOK_PAGE* page_list_head;
// Pointer to the page that is being filled.
YR_NOTEBOOK_PAGE* current_page;
};


// YR_NOTEBOOK_PAGE
struct YR_NOTEBOOK_PAGE
{
// Amount of bytes in the page that are actually used.
size_t used;
// Pointer to next page.
YR_NOTEBOOK_PAGE* next;
// Page's data.
uint8_t data[0];
};



// Creates a new notebook. The notebook initially has a single page of the
// specified size, but more pages are created if needed.
int yr_notebook_create(
size_t page_size,
YR_NOTEBOOK** pool)
{
YR_NOTEBOOK* new_notebook = yr_malloc(sizeof(YR_NOTEBOOK));

if (new_notebook == NULL)
return ERROR_INSUFFICIENT_MEMORY;

new_notebook->page_list_head = yr_malloc(
sizeof(YR_NOTEBOOK_PAGE) + page_size);

if (new_notebook->page_list_head == NULL)
{
yr_free(new_notebook);
return ERROR_INSUFFICIENT_MEMORY;
}

new_notebook->page_size = page_size;
new_notebook->current_page = new_notebook->page_list_head;
new_notebook->current_page->used = 0;
new_notebook->current_page->next = NULL;

*pool = new_notebook;

return ERROR_SUCCESS;
}


// Destroys a notebook and frees all the notebook's pages.
int yr_notebook_destroy(
YR_NOTEBOOK* pool)
{
YR_NOTEBOOK_PAGE* page = pool->page_list_head;

while (page != NULL)
{
YR_NOTEBOOK_PAGE* next = page->next;
yr_free(page);
page = next;
}

yr_free(pool);

return ERROR_SUCCESS;
}


void* yr_notebook_alloc(
YR_NOTEBOOK* notebook,
size_t size)
{
// The requested memory size can't be larger than a notebook's page.
assert(size <= notebook->page_size);

// If the requested size doesn't fit in current page's free space, allocate
// a new page.
if (notebook->page_size - notebook->current_page->used < size)
{
YR_NOTEBOOK_PAGE* new_page = yr_malloc(
sizeof(YR_NOTEBOOK_PAGE) + notebook->page_size);

if (new_page == NULL)
return NULL;

new_page->used = 0;
new_page->next = notebook->current_page;
notebook->current_page = new_page;
}

void *ptr = notebook->current_page->data + notebook->current_page->used;

notebook->current_page->used += size;

return ptr;
}
1 change: 1 addition & 0 deletions windows/vs2015/libyara/libyara.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@
<ClCompile Include="..\..\..\libyara\modules\pe\pe_utils.c" />
<ClCompile Include="..\..\..\libyara\modules\tests\tests.c" />
<ClCompile Include="..\..\..\libyara\modules\time\time.c" />
<ClCompile Include="..\..\..\libyara\notebook.c" />
<ClCompile Include="..\..\..\libyara\object.c" />
<ClCompile Include="..\..\..\libyara\parser.c" />
<ClCompile Include="..\..\..\libyara\proc.c" />
Expand Down
1 change: 1 addition & 0 deletions windows/vs2017/libyara/libyara.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@
<ClCompile Include="..\..\..\libyara\modules\pe\pe_utils.c" />
<ClCompile Include="..\..\..\libyara\modules\tests\tests.c" />
<ClCompile Include="..\..\..\libyara\modules\time\time.c" />

<ClCompile Include="..\..\..\libyara\object.c" />
<ClCompile Include="..\..\..\libyara\parser.c" />
<ClCompile Include="..\..\..\libyara\proc.c" />
Expand Down

0 comments on commit e7b1801

Please sign in to comment.