diff --git a/BUILD.bazel b/BUILD.bazel index 6cf217e08a..21666869e2 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -78,6 +78,8 @@ yara_library( "pe", "tests", "time", + "console", + "string", ], modules_srcs = [ "libyara/modules/cuckoo/cuckoo.c", @@ -92,6 +94,8 @@ yara_library( "libyara/modules/pe/pe_utils.c", "libyara/modules/tests/tests.c", "libyara/modules/time/time.c", + "libyara/modules/console/console.c", + "libyara/modules/string/string.c", ], deps = [ "@jansson", diff --git a/Makefile.am b/Makefile.am index ba66dd3421..95060970ee 100644 --- a/Makefile.am +++ b/Makefile.am @@ -86,6 +86,8 @@ test_bitmask_SOURCES = tests/test-bitmask.c tests/util.c test_bitmask_LDADD = libyara/.libs/libyara.a test_math_SOURCES = tests/test-math.c tests/util.c test_math_LDADD = libyara/.libs/libyara.a +test_string_SOURCES = tests/test-string.c tests/util.c +test_string_LDADD = libyara/.libs/libyara.a test_stack_SOURCES = tests/test-stack.c tests/util.c test_stack_LDADD = libyara/.libs/libyara.a test_re_split_SOURCES = tests/test-re-split.c tests/util.c @@ -109,7 +111,8 @@ check_PROGRAMS = \ test-math \ test-stack \ test-re-split \ - test-async + test-async \ + test-string EXTRA_PROGRAMS = tests/mapper CLEANFILES = tests/mapper$(EXEEXT) diff --git a/docs/modules.rst b/docs/modules.rst index ed437d17a5..a0f8bbf5bc 100644 --- a/docs/modules.rst +++ b/docs/modules.rst @@ -21,6 +21,7 @@ modules in the :ref:`writing-modules` section. Dotnet Time Console + String diff --git a/docs/modules/math.rst b/docs/modules/math.rst index bd4c855c57..b2baf4fb35 100644 --- a/docs/modules/math.rst +++ b/docs/modules/math.rst @@ -184,27 +184,3 @@ file and create signatures based on those results. *Example: math.to_string(32, 16) == "20"* *Example: math.to_string(-1, 16) == "ffffffffffffffff"* - -.. c:function:: to_int(string) - - .. versionadded:: 4.3.0 - - Convert the given string to a signed integer. If the string starts with "0x" - it is treated as base 16. If the string starts with "0" it is treated base - 8. Leading '+' or '-' is also supported. - - *Example: math.to_int("1234") == 1234* - *Example: math.to_int("-10") == -10* - *Example: math.to_int("-010" == -8* - -.. c:function:: to_int(string, base) - - .. versionadded:: 4.3.0 - - Convert the given string, interpreted with the given base, to a signed - integer. Base must be 0 or between 2 and 32 inclusive. If it is zero then - the string will be intrepreted as base 16 if it starts with "0x" or as base - 8 if it starts with "0". Leading '+' or '-' is also supported. - - *Example: math.to_int("011", 8) == "9"* - *Example: math.to_int("-011", 0) == "-9"* diff --git a/docs/modules/pe.rst b/docs/modules/pe.rst index 939ba9dbce..f5a234250c 100644 --- a/docs/modules/pe.rst +++ b/docs/modules/pe.rst @@ -1373,6 +1373,42 @@ Reference *Example: pe.delayed_import_details[1].name == "library_name" +.. c:function:: import_rva(dll, function) + + .. versionadded:: 4.3.0 + + Function returning the RVA of an import that matches the DLL name and + function name. + + *Example: pe.import_rva("PtImageRW.dll", "ord4") == 254924 + +.. c:function:: import_rva(dll, ordinal) + + .. versionadded:: 4.3.0 + + Function returning the RVA of an import that matches the DLL name and + ordinal number. + + *Example: pe.import_rva("PtPDF417Decode.dll", 4) == 254924 + +.. c:function:: delayed_import_rva(dll, function) + + .. versionadded:: 4.3.0 + + Function returning the RVA of a delayed import that matches the DLL name and + function name. + + *Example: pe.delayed_import_rva("QDB.dll", "ord116") == 6110705 + +.. c:function:: delayed_import_rva(dll, ordinal) + + .. versionadded:: 4.3.0 + + Function returning the RVA of a delayed import that matches the DLL name and + ordinal number. + + *Example: pe.delayed_import_rva("QDB.dll", 116) == 6110705 + .. c:function:: locale(locale_identifier) .. versionadded:: 3.2.0 diff --git a/docs/modules/string.rst b/docs/modules/string.rst new file mode 100644 index 0000000000..2a2424b28c --- /dev/null +++ b/docs/modules/string.rst @@ -0,0 +1,46 @@ + +.. _string-module: + +########### +String module +########### + +.. versionadded:: 4.3.0 + +The String module provides functions for manipulating strings as returned by +modules. The strings referenced here are not YARA strings as defined in the +strings section of your rule. + +.. c:function:: to_int(string) + + .. versionadded:: 4.3.0 + + Convert the given string to a signed integer. If the string starts with "0x" + it is treated as base 16. If the string starts with "0" it is treated base + 8. Leading '+' or '-' is also supported. + + *Example: string.to_int("1234") == 1234* + *Example: string.to_int("-10") == -10* + *Example: string.to_int("-010" == -8* + +.. c:function:: to_int(string, base) + + .. versionadded:: 4.3.0 + + Convert the given string, interpreted with the given base, to a signed + integer. Base must be 0 or between 2 and 32 inclusive. If it is zero then + the string will be intrepreted as base 16 if it starts with "0x" or as base + 8 if it starts with "0". Leading '+' or '-' is also supported. + + *Example: string.to_int("011", 8) == "9"* + *Example: string.to_int("-011", 0) == "-9"* + +.. c:function:: length(string) + + .. versionadded:: 4.3.0 + + Return the length of the string, which can be any sequence of bytes. NULL + bytes included. + + *Example: string.length("AXS\x00ERS") == 7* + diff --git a/libyara/Makefile.am b/libyara/Makefile.am index 628194a02d..a608913ff5 100644 --- a/libyara/Makefile.am +++ b/libyara/Makefile.am @@ -48,6 +48,8 @@ MODULES += modules/pe/pe_utils.c MODULES += modules/console/console.c +MODULES += modules/string/string.c + if CUCKOO_MODULE MODULES += modules/cuckoo/cuckoo.c endif diff --git a/libyara/include/yara/pe.h b/libyara/include/yara/pe.h index 004e3c100b..096e75afb0 100644 --- a/libyara/include/yara/pe.h +++ b/libyara/include/yara/pe.h @@ -874,6 +874,9 @@ typedef struct _RICH_SIGNATURE #define RICH_DANS 0x536e6144 // "DanS" #define RICH_RICH 0x68636952 // "Rich" +#define PE_PAGE_SIZE 0x1000 +#define PE_SECTOR_SIZE 0x0200 + #pragma pack(pop) #endif diff --git a/libyara/modules/math/math.c b/libyara/modules/math/math.c index 9aa70b8942..98193c4a63 100644 --- a/libyara/modules/math/math.c +++ b/libyara/modules/math/math.c @@ -28,7 +28,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include -#include #include #include #include @@ -759,21 +758,6 @@ define_function(to_string_base) return_string(&str); } -define_function(to_int) -{ - char* s = string_argument(1); - int64_t result = strtoll(s, NULL, 0); - return_integer(result == 0 && errno ? YR_UNDEFINED : result); -} - -define_function(to_int_base) -{ - char* s = string_argument(1); - int64_t base = integer_argument(2); - int64_t result = strtoll(s, NULL, base); - return_integer(result == 0 && errno ? YR_UNDEFINED : result); -} - begin_declarations declare_float("MEAN_BYTES"); declare_function("in_range", "fff", "i", in_range); @@ -799,8 +783,6 @@ begin_declarations declare_function("mode", "", "i", mode_global); declare_function("to_string", "i", "s", to_string); declare_function("to_string", "ii", "s", to_string_base); - declare_function("to_int", "s", "i", to_int); - declare_function("to_int", "si", "i", to_int_base); end_declarations int module_initialize(YR_MODULE* module) diff --git a/libyara/modules/module_list b/libyara/modules/module_list index c612a54695..35e848aed2 100644 --- a/libyara/modules/module_list +++ b/libyara/modules/module_list @@ -4,6 +4,7 @@ MODULE(elf) MODULE(math) MODULE(time) MODULE(console) +MODULE(string) #ifdef DOTNET_MODULE MODULE(dotnet) diff --git a/libyara/modules/pe/pe.c b/libyara/modules/pe/pe.c index 0c4c832c79..61da53cc0f 100644 --- a/libyara/modules/pe/pe.c +++ b/libyara/modules/pe/pe.c @@ -2949,6 +2949,186 @@ define_function(imports_dll) return_integer(result); } +define_function(import_rva) +{ + SIZED_STRING* in_dll_name = sized_string_argument(1); + SIZED_STRING* in_function_name = sized_string_argument(2); + + SIZED_STRING* dll_name; + SIZED_STRING* function_name; + YR_OBJECT* module = yr_module(); + PE* pe = (PE*) module->data; + + if (!pe) + return_integer(YR_UNDEFINED); + + int64_t num_imports = yr_get_integer(pe->object, "number_of_imports"); + if (IS_UNDEFINED(num_imports)) + return_integer(YR_UNDEFINED); + + for (int i = 0; i < num_imports; i++) + { + dll_name = yr_get_string(module, "import_details[%i].library_name", i); + if (dll_name == NULL || IS_UNDEFINED(dll_name) || + ss_compare(in_dll_name, dll_name) != 0) + continue; + + int64_t num_functions = yr_get_integer( + module, "import_details[%i].number_of_functions", i); + if (IS_UNDEFINED(num_functions)) + return_integer(YR_UNDEFINED); + + for (int j = 0; j < num_functions; j++) + { + function_name = yr_get_string( + module, "import_details[%i].functions[%i].name", i, j); + if (function_name == NULL || IS_UNDEFINED(function_name)) + continue; + + if (ss_compare(in_function_name, function_name) == 0) + return_integer(yr_get_integer( + module, "import_details[%i].functions[%i].rva", i, j)); + } + } + + return_integer(YR_UNDEFINED); +} + +define_function(import_rva_ordinal) +{ + SIZED_STRING* in_dll_name = sized_string_argument(1); + int64_t in_ordinal = integer_argument(2); + + SIZED_STRING* dll_name; + int64_t ordinal; + YR_OBJECT* module = yr_module(); + PE* pe = (PE*) module->data; + + if (!pe) + return_integer(YR_UNDEFINED); + + int64_t num_imports = yr_get_integer(pe->object, "number_of_imports"); + if (IS_UNDEFINED(num_imports)) + return_integer(YR_UNDEFINED); + + for (int i = 0; i < num_imports; i++) + { + dll_name = yr_get_string(module, "import_details[%i].library_name", i); + if (dll_name == NULL || IS_UNDEFINED(dll_name) || + ss_compare(in_dll_name, dll_name) != 0) + continue; + + int64_t num_functions = yr_get_integer( + module, "import_details[%i].number_of_functions", i); + if (IS_UNDEFINED(num_functions)) + return_integer(YR_UNDEFINED); + + for (int j = 0; j < num_functions; j++) + { + ordinal = yr_get_integer( + module, "import_details[%i].functions[%i].ordinal", i, j); + if (IS_UNDEFINED(ordinal)) + continue; + + if (ordinal == in_ordinal) + return_integer(yr_get_integer( + module, "import_details[%i].functions[%i].rva", i, j)); + } + } + + return_integer(YR_UNDEFINED); +} + +define_function(delayed_import_rva) +{ + SIZED_STRING* in_dll_name = sized_string_argument(1); + SIZED_STRING* in_function_name = sized_string_argument(2); + + SIZED_STRING* dll_name; + SIZED_STRING* function_name; + YR_OBJECT* module = yr_module(); + PE* pe = (PE*) module->data; + + if (!pe) + return_integer(YR_UNDEFINED); + + int64_t num_imports = yr_get_integer(pe->object, "number_of_delayed_imports"); + if (IS_UNDEFINED(num_imports)) + return_integer(YR_UNDEFINED); + + for (int i = 0; i < num_imports; i++) + { + dll_name = yr_get_string(module, "delayed_import_details[%i].library_name", i); + if (dll_name == NULL || IS_UNDEFINED(dll_name) || + ss_compare(in_dll_name, dll_name) != 0) + continue; + + int64_t num_functions = yr_get_integer( + module, "delayed_import_details[%i].number_of_functions", i); + if (IS_UNDEFINED(num_functions)) + return_integer(YR_UNDEFINED); + + for (int j = 0; j < num_functions; j++) + { + function_name = yr_get_string( + module, "delayed_import_details[%i].functions[%i].name", i, j); + if (function_name == NULL || IS_UNDEFINED(function_name)) + continue; + + if (ss_compare(in_function_name, function_name) == 0) + return_integer(yr_get_integer( + module, "delayed_import_details[%i].functions[%i].rva", i, j)); + } + } + + return_integer(YR_UNDEFINED); +} + +define_function(delayed_import_rva_ordinal) +{ + SIZED_STRING* in_dll_name = sized_string_argument(1); + int64_t in_ordinal = integer_argument(2); + + SIZED_STRING* dll_name; + int64_t ordinal; + YR_OBJECT* module = yr_module(); + PE* pe = (PE*) module->data; + + if (!pe) + return_integer(YR_UNDEFINED); + + int64_t num_imports = yr_get_integer(pe->object, "number_of_delayed_imports"); + if (IS_UNDEFINED(num_imports)) + return_integer(YR_UNDEFINED); + + for (int i = 0; i < num_imports; i++) + { + dll_name = yr_get_string(module, "delayed_import_details[%i].library_name", i); + if (dll_name == NULL || IS_UNDEFINED(dll_name) || + ss_compare(in_dll_name, dll_name) != 0) + continue; + + int64_t num_functions = yr_get_integer( + module, "delayed_import_details[%i].number_of_functions", i); + if (IS_UNDEFINED(num_functions)) + return_integer(YR_UNDEFINED); + + for (int j = 0; j < num_functions; j++) + { + ordinal = yr_get_integer( + module, "delayed_import_details[%i].functions[%i].ordinal", i, j); + if (IS_UNDEFINED(ordinal)) + continue; + + if (ordinal == in_ordinal) + return_integer(yr_get_integer( + module, "delayed_import_details[%i].functions[%i].rva", i, j)); + } + } + + return_integer(YR_UNDEFINED); +} + define_function(locale) { YR_OBJECT* module = yr_module(); @@ -3490,6 +3670,10 @@ begin_declarations declare_function("imports", "isi", "i", imports_ordinal); declare_function("imports", "is", "i", imports_dll); declare_function("imports", "irr", "i", imports_regex); + declare_function("import_rva", "ss", "i", import_rva); + declare_function("import_rva", "si", "i", import_rva_ordinal); + declare_function("delayed_import_rva", "ss", "i", delayed_import_rva); + declare_function("delayed_import_rva", "si", "i", delayed_import_rva_ordinal); declare_function("locale", "i", "i", locale); declare_function("language", "i", "i", language); declare_function("is_dll", "", "i", is_dll); diff --git a/libyara/modules/pe/pe_utils.c b/libyara/modules/pe/pe_utils.c index 3716411411..3db6774b8f 100644 --- a/libyara/modules/pe/pe_utils.c +++ b/libyara/modules/pe/pe_utils.c @@ -141,8 +141,13 @@ int64_t pe_rva_to_offset(PE* pe, uint64_t rva) lowest_section_rva = yr_le32toh(section->VirtualAddress); } + uint32_t virtualSize = yr_le32toh( + section->Misc.VirtualSize != 0 ? + section->Misc.VirtualSize : + section->SizeOfRawData); + if (rva >= yr_le32toh(section->VirtualAddress) && - rva - yr_le32toh(section->VirtualAddress) < yr_le32toh(section->Misc.VirtualSize) && + rva - yr_le32toh(section->VirtualAddress) < virtualSize && section_rva <= yr_le32toh(section->VirtualAddress)) { // Round section_offset @@ -170,6 +175,11 @@ int64_t pe_rva_to_offset(PE* pe, uint64_t rva) if (rest) section_offset -= rest; } + + // For multi-section images, real pointer to raw data is aligned down to sector size + if (yr_le32toh(OptionalHeader(pe, SectionAlignment)) >= PE_PAGE_SIZE) + section_offset = section_offset & ~(PE_SECTOR_SIZE - 1); + } section++; diff --git a/libyara/modules/string/string.c b/libyara/modules/string/string.c new file mode 100644 index 0000000000..c6f6b17eb2 --- /dev/null +++ b/libyara/modules/string/string.c @@ -0,0 +1,88 @@ +/* +Copyright (c) 2014-2022. The YARA Authors. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation and/or +other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors +may be used to endorse or promote products derived from this software without +specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include +#include +#include +#include +#include + +#define MODULE_NAME string + +define_function(to_int) +{ + char* s = string_argument(1); + int64_t result = strtoll(s, NULL, 0); + return_integer(result == 0 && errno ? YR_UNDEFINED : result); +} + +define_function(to_int_base) +{ + char* s = string_argument(1); + int64_t base = integer_argument(2); + int64_t result = strtoll(s, NULL, base); + return_integer(result == 0 && errno ? YR_UNDEFINED : result); +} + +define_function(string_length) +{ + SIZED_STRING* s = sized_string_argument(1); + return_integer(s->length); +} + +begin_declarations + declare_function("to_int", "s", "i", to_int); + declare_function("to_int", "si", "i", to_int_base); + declare_function("length", "s", "i", string_length); +end_declarations + +int module_initialize(YR_MODULE* module) +{ + return ERROR_SUCCESS; +} + +int module_finalize(YR_MODULE* module) +{ + return ERROR_SUCCESS; +} + +int module_load( + YR_SCAN_CONTEXT* context, + YR_OBJECT* module_object, + void* module_data, + size_t module_data_size) +{ + return ERROR_SUCCESS; +} + +int module_unload(YR_OBJECT* module_object) +{ + return ERROR_SUCCESS; +} diff --git a/tests/BUILD.bazel b/tests/BUILD.bazel index c1ea88e535..4da4db7e96 100644 --- a/tests/BUILD.bazel +++ b/tests/BUILD.bazel @@ -183,6 +183,17 @@ cc_test( ], ) +cc_test( + name = "test_string", + srcs = ["test-string.c"], + copts = COPTS, + linkstatic = True, + deps = [ + ":util", + "@//:libyara", + ], +) + cc_test( name = "test_stack", srcs = ["test-stack.c"], diff --git a/tests/data/c6f9709feccf42f2d9e22057182fe185f177fb9daaa2649b4669a24f2ee7e3ba_0h_410h b/tests/data/c6f9709feccf42f2d9e22057182fe185f177fb9daaa2649b4669a24f2ee7e3ba_0h_410h new file mode 100644 index 0000000000..606af93589 Binary files /dev/null and b/tests/data/c6f9709feccf42f2d9e22057182fe185f177fb9daaa2649b4669a24f2ee7e3ba_0h_410h differ diff --git a/tests/test-math.c b/tests/test-math.c index 6e4bb02403..89dcabcb8f 100644 --- a/tests/test-math.c +++ b/tests/test-math.c @@ -284,79 +284,6 @@ int main(int argc, char** argv) }", NULL); - assert_true_rule( - "import \"math\" \ - rule test { \ - condition: \ - math.to_int(\"1234\") == 1234 \ - }", - NULL); - - assert_true_rule( - "import \"math\" \ - rule test { \ - condition: \ - math.to_int(\"-1\") == -1 \ - }", - NULL); - - // Leading spaces and + are allowed. - assert_true_rule( - "import \"math\" \ - rule test { \ - condition: \ - math.to_int(\" +1\") == 1 \ - }", - NULL); - - // Strings can be prefixed with 0x and will be interpreted as hexadecimal. - assert_true_rule( - "import \"math\" \ - rule test { \ - condition: \ - math.to_int(\"0x10\") == 16 \ - }", - NULL); - - // Strings prefixed with 0 will be interpreted as octal. - assert_true_rule( - "import \"math\" \ - rule test { \ - condition: \ - math.to_int(\"010\") == 8 \ - }", - NULL); - - // Strings that are only partially converted are still fine. - assert_true_rule( - "import \"math\" \ - rule test { \ - condition: \ - math.to_int(\"10A20\") == 10 \ - }", - NULL); - - assert_true_rule( - "import \"math\" \ - rule test { \ - condition: \ - math.to_int(\"10\", 8) == 8 \ - }", - NULL); - - // Base 0 is a special case that tries to interpret the string by prefix, or - // default to decimal. We aren't doing anything special to get this, it is - // part of strtoll by default. - assert_true_rule( - "import \"math\" \ - rule test { \ - condition: \ - math.to_int(\"010\", 0) == 8 and \ - math.to_int(\"0x10\", 0) == 16 and \ - math.to_int(\"10\", 0) == 10 \ - }", - NULL); - yr_finalize(); YR_DEBUG_FPRINTF( diff --git a/tests/test-pe.c b/tests/test-pe.c index 2eaca1c848..4bb1b6b97a 100644 --- a/tests/test-pe.c +++ b/tests/test-pe.c @@ -873,6 +873,37 @@ int main(int argc, char** argv) }", "tests/data/pe_mingw"); + assert_true_rule_file( + "import \"pe\" \ + rule test { \ + condition: \ + pe.import_rva(\"PtImageRW.dll\", \"ord4\") == 254924 and \ + pe.import_rva(\"PtPDF417Decode.dll\", 4) == 254948 \ + }", + "tests/data/" + "ca21e1c32065352d352be6cde97f89c141d7737ea92434831f998080783d5386"); + + assert_true_rule_file( + "import \"pe\" \ + rule test { \ + condition: \ + pe.delayed_import_rva(\"QDB.dll\", \"ord116\") == \ + pe.delayed_import_rva(\"QDB.dll\", 116) \ + }", + "tests/data/" + "079a472d22290a94ebb212aa8015cdc8dd28a968c6b4d3b88acdd58ce2d3b885"); + + // The first 0x410 bytes of + // c6f9709feccf42f2d9e22057182fe185f177fb9daaa2649b4669a24f2ee7e3ba are enough + // to trigger the bug in https://github.com/VirusTotal/yara/pull/1561 + assert_true_rule_file( + "import \"pe\" \ + rule rva_to_offset_weird_sections { \ + condition: \ + pe.rva_to_offset(4096) == 1024 \ + }", + "tests/data/c6f9709feccf42f2d9e22057182fe185f177fb9daaa2649b4669a24f2ee7e3ba_0h_410h"); + yr_finalize(); YR_DEBUG_FPRINTF(