diff --git a/CHANGELOG.md b/CHANGELOG.md index 2de58a7b9..bc8910bd5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,21 @@ This is a list of notable changes to Hyperscan, in reverse chronological order. +## [5.2.0] 2019-07-12 +- Literal API: add new API `hs_compile_lit()` and `hs_compile_lit_multi()` to + process pure literal rule sets. The 2 literal APIs treat each expression text + in a literal sense without recognizing any regular grammers. +- Logical combination: add support for purely negative combinations, which + report match at EOD in case of no sub-expressions matched. +- Windows porting: support shared library (DLL) on Windows with available tools + hscheck, hsbench and hsdump. +- Bugfix for issue #148: fix uninitialized use of `scatter_unit_uX` due to + padding. +- Bugfix for issue #155: fix numerical result out of range error. +- Bugfix for issue #165: avoid corruption of pending combination report in + streaming mode. +- Bugfix for issue #174: fix scratch free issue when memory allocation fails. + ## [5.1.1] 2019-04-03 - Add extra detection and handling when invalid rose programs are triggered. - Bugfix for issue #136: fix CMake parsing of CPU architecure for GCC-9. diff --git a/CMakeLists.txt b/CMakeLists.txt index d39953629..3801f994a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,8 +2,8 @@ cmake_minimum_required (VERSION 2.8.11) project (hyperscan C CXX) set (HS_MAJOR_VERSION 5) -set (HS_MINOR_VERSION 1) -set (HS_PATCH_VERSION 1) +set (HS_MINOR_VERSION 2) +set (HS_PATCH_VERSION 0) set (HS_VERSION ${HS_MAJOR_VERSION}.${HS_MINOR_VERSION}.${HS_PATCH_VERSION}) set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) @@ -31,6 +31,7 @@ else() endif() if(CMAKE_BUILD_TYPE MATCHES RELEASE|RELWITHDEBINFO|MINSIZEREL) + message(STATUS "using release build") set(RELEASE_BUILD TRUE) else() set(RELEASE_BUILD FALSE) @@ -109,11 +110,9 @@ option(BUILD_SHARED_LIBS "Build shared libs instead of static" OFF) option(BUILD_STATIC_AND_SHARED "Build shared libs as well as static" OFF) if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) - if (WIN32) - message(FATAL_ERROR "Windows DLLs currently not supported") - else() message(STATUS "Building shared libraries") - endif() +else() + message(STATUS "Building static libraries") endif() if (NOT BUILD_SHARED_LIBS) @@ -151,9 +150,6 @@ if(MSVC OR MSVC_IDE) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O3 /Qstd=c99 /Qrestrict /wd4267 /Qdiag-disable:remark") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 /Qstd=c++11 /Qrestrict /QxHost /wd4267 /wd4800 /Qdiag-disable:remark -DBOOST_DETAIL_NO_CONTAINER_FWD -D_SCL_SECURE_NO_WARNINGS") else() - # todo: change these as required - set(ARCH_C_FLAGS "/arch:AVX2") - set(ARCH_CXX_FLAGS "/arch:AVX2") set(MSVC_WARNS "/wd4101 /wd4146 /wd4172 /wd4200 /wd4244 /wd4267 /wd4307 /wd4334 /wd4805 /wd4996 -D_CRT_SECURE_NO_WARNINGS") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} /O2 ${MSVC_WARNS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /O2 ${MSVC_WARNS} /wd4800 -DBOOST_DETAIL_NO_CONTAINER_FWD") @@ -1298,12 +1294,14 @@ endif() if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) if (NOT FAT_RUNTIME) add_library(hs_runtime_shared SHARED src/hs_version.c - src/hs_valid_platform.c $) + src/hs_valid_platform.c $ + hs_runtime.def) else() add_library(hs_runtime_shared SHARED src/hs_version.c src/hs_valid_platform.c $ - ${RUNTIME_SHLIBS}) + ${RUNTIME_SHLIBS} + hs_runtime.def) endif() set_target_properties(hs_runtime_shared PROPERTIES VERSION ${LIB_VERSION} @@ -1349,7 +1347,7 @@ if (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS) ${RUNTIME_SHLIBS}) endif () - add_library(hs_shared SHARED ${hs_shared_SRCS}) + add_library(hs_shared SHARED ${hs_shared_SRCS} hs.def) add_dependencies(hs_shared ragel_Parser) set_target_properties(hs_shared PROPERTIES diff --git a/chimera/ch_compile.cpp b/chimera/ch_compile.cpp index 374bd7ad8..46536f312 100644 --- a/chimera/ch_compile.cpp +++ b/chimera/ch_compile.cpp @@ -322,7 +322,7 @@ PatternData::PatternData(const char *pattern, u32 flags, u32 idx, u32 id_in, ch_misc_free(info); u32 guardflags; - guardflags = (flags | HS_FLAG_PREFILTER) & ~HS_FLAG_SINGLEMATCH; + guardflags = flags | HS_FLAG_PREFILTER; guard = isHyperscanSupported(pattern, guardflags, platform); } else { // We can't even prefilter this pattern, so we're dependent on Big Dumb diff --git a/doc/dev-reference/compilation.rst b/doc/dev-reference/compilation.rst index 214f4abc4..5d2c70f79 100644 --- a/doc/dev-reference/compilation.rst +++ b/doc/dev-reference/compilation.rst @@ -54,6 +54,75 @@ version of Hyperscan used to scan with it. Hyperscan provides support for targeting a database at a particular CPU platform; see :ref:`instr_specialization` for details. +===================== +Compile Pure Literals +===================== + +Pure literal is a special case of regular expression. A character sequence is +regarded as a pure literal if and only if each character is read and +interpreted independently. No syntax association happens between any adjacent +characters. + +For example, given an expression written as :regexp:`/bc?/`. We could say it is +a regluar expression, with the meaning that character ``b`` followed by nothing +or by one character ``c``. On the other view, we could also say it is a pure +literal expression, with the meaning that this is a character sequence of 3-byte +length, containing characters ``b``, ``c`` and ``?``. In regular case, the +question mark character ``?`` has a particular syntax role called 0-1 quantifier, +which has an syntax association with the character ahead of it. Similar +characters exist in regular grammer like ``[``, ``]``, ``(``, ``)``, ``{``, +``}``, ``-``, ``*``, ``+``, ``\``, ``|``, ``/``, ``:``, ``^``, ``.``, ``$``. +While in pure literal case, all these meta characters lost extra meanings +expect for that they are just common ASCII codes. + +Hyperscan is initially designed to process common regualr expressions. It is +hence embedded with a complex parser to do comprehensive regular grammer +interpretion. Particularly, the identification of above meta characters is the +basic step for the interpretion of far more complex regular grammers. + +However in real cases, patterns may not always be regualr expressions. They +could just be pure literals. Problem will come if the pure literals contain +regular meta characters. Supposing fed directly into traditional Hyperscan +compile API, all these meta characters will be interpreted in predefined ways, +which is unnecessary and the result is totally out of expectation. To avoid +such misunderstanding by traditional API, users have to preprocess these +literal patterns by converting the meta characters into some other formats: +either by adding a backslash ``\`` before certain meta characters, or by +converting all the characters into a hexadecimal representation. + +In ``v5.2.0``, Hyperscan introduces 2 new compile APIs for pure literal patterns: + +#. :c:func:`hs_compile_lit`: compiles a single pure literal into a pattern + database. + +#. :c:func:`hs_compile_lit_multi`: compiles an array of pure literals into a + pattern database. All of the supplied patterns will be scanned for + concurrently at scan time, with user-supplied identifiers returned when they + match. + +These 2 APIs are designed for use cases where all patterns contained in the +target rule set are pure literals. Users can pass the initial pure literal +content directly into these APIs without worrying about writing regular meta +characters in their patterns. No preprocessing work is needed any more. + +For new APIs, the ``length`` of each literal pattern is a newly added parameter. +Hyperscan needs to locate the end position of the input expression via clearly +knowing each literal's length, not by simply identifying character ``\0`` of a +string. + +Supported flags: :c:member:`HS_FLAG_CASELESS`, :c:member:`HS_FLAG_MULTILINE`, +:c:member:`HS_FLAG_SINGLEMATCH`, :c:member:`HS_FLAG_SOM_LEFTMOST`. + +.. note:: We don't support literal compilation API with :ref:`extparam`. And + for runtime implementation, traditional runtime APIs can still be + used to match pure literal patterns. + +.. note:: If the target rule set contains at least one regular expression, + please use traditional compile APIs :c:func:`hs_compile`, + :c:func:`hs_compile_multi` and :c:func:`hs_compile_ext_multi`. + The new literal APIs introduced here are designed for rule sets + containing only pure literal expressions. + *************** Pattern Support *************** diff --git a/hs.def b/hs.def new file mode 100644 index 000000000..28f7877c6 --- /dev/null +++ b/hs.def @@ -0,0 +1,43 @@ +; Hyperscan DLL export definitions + +LIBRARY hs + +EXPORTS + hs_alloc_scratch + hs_clone_scratch + hs_close_stream + hs_compile + hs_compile_ext_multi + hs_compile_multi + hs_compress_stream + hs_copy_stream + hs_database_info + hs_database_size + hs_deserialize_database + hs_deserialize_database_at + hs_expand_stream + hs_expression_ext_info + hs_expression_info + hs_free_compile_error + hs_free_database + hs_free_scratch + hs_open_stream + hs_populate_platform + hs_reset_and_copy_stream + hs_reset_and_expand_stream + hs_reset_stream + hs_scan + hs_scan_stream + hs_scan_vector + hs_scratch_size + hs_serialize_database + hs_serialized_database_info + hs_serialized_database_size + hs_set_allocator + hs_set_database_allocator + hs_set_misc_allocator + hs_set_scratch_allocator + hs_set_stream_allocator + hs_stream_size + hs_valid_platform + hs_version diff --git a/hs_runtime.def b/hs_runtime.def new file mode 100644 index 000000000..6c434bedb --- /dev/null +++ b/hs_runtime.def @@ -0,0 +1,36 @@ +; Hyperscan DLL export definitions + +LIBRARY hs_runtime + +EXPORTS + hs_alloc_scratch + hs_clone_scratch + hs_close_stream + hs_compress_stream + hs_copy_stream + hs_database_info + hs_database_size + hs_deserialize_database + hs_deserialize_database_at + hs_expand_stream + hs_free_database + hs_free_scratch + hs_open_stream + hs_reset_and_copy_stream + hs_reset_and_expand_stream + hs_reset_stream + hs_scan + hs_scan_stream + hs_scan_vector + hs_scratch_size + hs_serialize_database + hs_serialized_database_info + hs_serialized_database_size + hs_set_allocator + hs_set_database_allocator + hs_set_misc_allocator + hs_set_scratch_allocator + hs_set_stream_allocator + hs_stream_size + hs_valid_platform + hs_version \ No newline at end of file diff --git a/src/compiler/compiler.cpp b/src/compiler/compiler.cpp index a34eadd0f..3382ff421 100644 --- a/src/compiler/compiler.cpp +++ b/src/compiler/compiler.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -56,11 +56,13 @@ #include "parser/unsupported.h" #include "parser/utf8_validate.h" #include "rose/rose_build.h" +#include "rose/rose_internal.h" #include "som/slot_manager_dump.h" #include "util/bytecode_ptr.h" #include "util/compile_error.h" #include "util/target_info.h" #include "util/verify_types.h" +#include "util/ue2string.h" #include #include @@ -107,6 +109,46 @@ void validateExt(const hs_expr_ext &ext) { } +void ParsedLitExpression::parseLiteral(const char *expression, size_t len, + bool nocase) { + const char *c = expression; + for (size_t i = 0; i < len; i++) { + lit.push_back(*c, nocase); + c++; + } +} + +ParsedLitExpression::ParsedLitExpression(unsigned index_in, + const char *expression, + size_t expLength, unsigned flags, + ReportID report) + : expr(index_in, false, flags & HS_FLAG_SINGLEMATCH, false, false, + SOM_NONE, report, 0, MAX_OFFSET, 0, 0, 0, false) { + // For pure literal expression, below 'HS_FLAG_'s are unuseful: + // DOTALL/ALLOWEMPTY/UTF8/UCP/PREFILTER/COMBINATION/QUIET + + if (flags & ~HS_FLAG_ALL) { + DEBUG_PRINTF("Unrecognised flag, flags=%u.\n", flags); + throw CompileError("Unrecognised flag."); + } + + // FIXME: we disallow highlander + SOM, see UE-1850. + if ((flags & HS_FLAG_SINGLEMATCH) && (flags & HS_FLAG_SOM_LEFTMOST)) { + throw CompileError("HS_FLAG_SINGLEMATCH is not supported in " + "combination with HS_FLAG_SOM_LEFTMOST."); + } + + // Set SOM type. + if (flags & HS_FLAG_SOM_LEFTMOST) { + expr.som = SOM_LEFT; + } + + // Transfer expression text into ue2_literal. + bool nocase = flags & HS_FLAG_CASELESS ? true : false; + parseLiteral(expression, expLength, nocase); + +} + ParsedExpression::ParsedExpression(unsigned index_in, const char *expression, unsigned flags, ReportID report, const hs_expr_ext *ext) @@ -345,6 +387,49 @@ void addExpression(NG &ng, unsigned index, const char *expression, } } +void addLitExpression(NG &ng, unsigned index, const char *expression, + unsigned flags, const hs_expr_ext *ext, ReportID id, + size_t expLength) { + assert(expression); + const CompileContext &cc = ng.cc; + DEBUG_PRINTF("index=%u, id=%u, flags=%u, expr='%s', len='%zu'\n", index, + id, flags, expression, expLength); + + // Extended parameters are not supported for pure literal patterns. + if (ext && ext->flags != 0LLU) { + throw CompileError("Extended parameters are not supported for pure " + "literal matching API."); + } + + // Ensure that our pattern isn't too long (in characters). + if (strlen(expression) > cc.grey.limitPatternLength) { + throw CompileError("Pattern length exceeds limit."); + } + + // filter out flags not supported by pure literal API. + u64a not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 | + HS_FLAG_UCP | HS_FLAG_PREFILTER | HS_FLAG_COMBINATION | + HS_FLAG_QUIET; + + if (flags & not_supported) { + throw CompileError("Only HS_FLAG_CASELESS, HS_FLAG_MULTILINE, " + "HS_FLAG_SINGLEMATCH and HS_FLAG_SOM_LEFTMOST are " + "supported in literal API."); + } + + // This expression must be a pure literal, we can build ue2_literal + // directly based on expression text. + ParsedLitExpression ple(index, expression, expLength, flags, id); + + // Feed the ue2_literal into Rose. + const auto &expr = ple.expr; + if (ng.addLiteral(ple.lit, expr.index, expr.report, expr.highlander, + expr.som, expr.quiet)) { + DEBUG_PRINTF("took pure literal\n"); + return; + } +} + static bytecode_ptr generateRoseEngine(NG &ng) { const u32 minWidth = @@ -416,10 +501,13 @@ hs_database_t *dbCreate(const char *in_bytecode, size_t len, u64a platform) { } -struct hs_database *build(NG &ng, unsigned int *length) { +struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag) { assert(length); auto rose = generateRoseEngine(ng); + struct RoseEngine *roseHead = rose.get(); + roseHead->pureLiteral = pureFlag; + if (!rose) { throw CompileError("Unable to generate bytecode."); } diff --git a/src/compiler/compiler.h b/src/compiler/compiler.h index 60d7ca33c..b42cb1425 100644 --- a/src/compiler/compiler.h +++ b/src/compiler/compiler.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -38,6 +38,7 @@ #include "compiler/expression_info.h" #include "parser/Component.h" #include "util/noncopyable.h" +#include "util/ue2string.h" #include @@ -66,6 +67,22 @@ class ParsedExpression : noncopyable { std::unique_ptr component; }; + +/** \brief Class gathering together the pieces of a parsed lit-expression. */ +class ParsedLitExpression : noncopyable { +public: + ParsedLitExpression(unsigned index, const char *expression, + size_t expLength, unsigned flags, ReportID report); + + void parseLiteral(const char *expression, size_t len, bool nocase); + + /** \brief Expression information (from flags, extparam etc) */ + ExpressionInfo expr; + + /** \brief Format the lit-expression text into Hyperscan literal type. */ + ue2_literal lit; +}; + /** * \brief Class gathering together the pieces of an expression that has been * built into an NFA graph. @@ -99,6 +116,10 @@ struct BuiltExpression { void addExpression(NG &ng, unsigned index, const char *expression, unsigned flags, const hs_expr_ext *ext, ReportID report); +void addLitExpression(NG &ng, unsigned index, const char *expression, + unsigned flags, const hs_expr_ext *ext, ReportID id, + size_t expLength); + /** * Build a Hyperscan database out of the expressions we've been given. A * fatal error will result in an exception being thrown. @@ -107,11 +128,13 @@ void addExpression(NG &ng, unsigned index, const char *expression, * The global NG object. * @param[out] length * The number of bytes occupied by the compiled structure. + * @param pureFlag + * The flag indicating invocation from literal API or not. * @return * The compiled structure. Should be deallocated with the * hs_database_free() function. */ -struct hs_database *build(NG &ng, unsigned int *length); +struct hs_database *build(NG &ng, unsigned int *length, u8 pureFlag); /** * Constructs an NFA graph from the given expression tree. diff --git a/src/dispatcher.c b/src/dispatcher.c index 70b82277a..a786b806d 100644 --- a/src/dispatcher.c +++ b/src/dispatcher.c @@ -51,7 +51,7 @@ } \ \ /* resolver */ \ - static void(*JOIN(resolve_, NAME)(void)) { \ + static RTYPE (*JOIN(resolve_, NAME)(void))(__VA_ARGS__) { \ if (check_avx512()) { \ return JOIN(avx512_, NAME); \ } \ diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp index 65c5020ef..fcfc08638 100644 --- a/src/fdr/fdr_compile.cpp +++ b/src/fdr/fdr_compile.cpp @@ -282,8 +282,8 @@ const array Scorer::count_lut{{ }}; const array Scorer::len_lut{{ - pow(0, -3.0), pow(1, -3.0), pow(2, -3.0), pow(3, -3.0), pow(4, -3.0), - pow(5, -3.0), pow(6, -3.0), pow(7, -3.0), pow(8, -3.0)}}; + 0, pow(1, -3.0), pow(2, -3.0), pow(3, -3.0), pow(4, -3.0), + pow(5, -3.0), pow(6, -3.0), pow(7, -3.0), pow(8, -3.0)}}; /** * Returns true if the two given literals should be placed in the same chunk as @@ -807,9 +807,6 @@ void findIncludedLits(vector &lits, for (size_t i = 0; i < cnt; i++) { u32 bucket1 = group[i].first; u32 id1 = group[i].second; - if (lits[id1].pure) { - continue; - } buildSquashMask(lits, id1, bucket1, i + 1, group, parent_map, exception_map); } diff --git a/src/fdr/fdr_confirm.h b/src/fdr/fdr_confirm.h index 9490df43f..a23082cc6 100644 --- a/src/fdr/fdr_confirm.h +++ b/src/fdr/fdr_confirm.h @@ -62,7 +62,6 @@ struct LitInfo { u8 size; u8 flags; //!< bitfield of flags from FDR_LIT_FLAG_* above. u8 next; - u8 pure; //!< The pass-on of pure flag from hwlmLiteral. }; #define FDRC_FLAG_NO_CONFIRM 1 diff --git a/src/fdr/fdr_confirm_compile.cpp b/src/fdr/fdr_confirm_compile.cpp index 3eab21b20..8e3690895 100644 --- a/src/fdr/fdr_confirm_compile.cpp +++ b/src/fdr/fdr_confirm_compile.cpp @@ -87,7 +87,6 @@ void fillLitInfo(const vector &lits, vector &tmpLitInfo, info.flags = flags; info.size = verify_u8(max(lit.msk.size(), lit.s.size())); info.groups = lit.groups; - info.pure = lit.pure; // these are built up assuming a LE machine CONF_TYPE msk = all_ones; diff --git a/src/fdr/fdr_confirm_runtime.h b/src/fdr/fdr_confirm_runtime.h index 67e0d692e..5a2164952 100644 --- a/src/fdr/fdr_confirm_runtime.h +++ b/src/fdr/fdr_confirm_runtime.h @@ -65,7 +65,6 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a u8 oldNext; // initialized in loop do { assert(ISALIGNED(li)); - scratch->pure = li->pure; if (unlikely((conf_key & li->msk) != li->v)) { goto out; @@ -100,7 +99,6 @@ void confWithBit(const struct FDRConfirm *fdrc, const struct FDR_Runtime_Args *a li++; } while (oldNext); scratch->fdr_conf = NULL; - scratch->pure = 0; } #endif diff --git a/src/hs.cpp b/src/hs.cpp index 329702d40..ab54105c5 100644 --- a/src/hs.cpp +++ b/src/hs.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -251,7 +251,7 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags, ng.rm.logicalKeyRenumber(); unsigned length = 0; - struct hs_database *out = build(ng, &length); + struct hs_database *out = build(ng, &length, 0); assert(out); // should have thrown exception on error assert(length); @@ -281,6 +281,130 @@ hs_compile_multi_int(const char *const *expressions, const unsigned *flags, } } +hs_error_t +hs_compile_lit_multi_int(const char *const *expressions, const unsigned *flags, + const unsigned *ids, const hs_expr_ext *const *ext, + const size_t *lens, unsigned elements, unsigned mode, + const hs_platform_info_t *platform, hs_database_t **db, + hs_compile_error_t **comp_error, const Grey &g) { + // Check the args: note that it's OK for flags, ids or ext to be null. + if (!comp_error) { + if (db) { + *db = nullptr; + } + // nowhere to write the string, but we can still report an error code + return HS_COMPILER_ERROR; + } + if (!db) { + *comp_error = generateCompileError("Invalid parameter: db is NULL", -1); + return HS_COMPILER_ERROR; + } + if (!expressions) { + *db = nullptr; + *comp_error + = generateCompileError("Invalid parameter: expressions is NULL", + -1); + return HS_COMPILER_ERROR; + } + if (!lens) { + *db = nullptr; + *comp_error = generateCompileError("Invalid parameter: len is NULL", -1); + return HS_COMPILER_ERROR; + } + if (elements == 0) { + *db = nullptr; + *comp_error = generateCompileError("Invalid parameter: elements is zero", -1); + return HS_COMPILER_ERROR; + } + +#if defined(FAT_RUNTIME) + if (!check_ssse3()) { + *db = nullptr; + *comp_error = generateCompileError("Unsupported architecture", -1); + return HS_ARCH_ERROR; + } +#endif + + if (!checkMode(mode, comp_error)) { + *db = nullptr; + assert(*comp_error); // set by checkMode. + return HS_COMPILER_ERROR; + } + + if (!checkPlatform(platform, comp_error)) { + *db = nullptr; + assert(*comp_error); // set by checkPlattform. + return HS_COMPILER_ERROR; + } + + if (elements > g.limitPatternCount) { + *db = nullptr; + *comp_error = generateCompileError("Number of patterns too large", -1); + return HS_COMPILER_ERROR; + } + + // This function is simply a wrapper around both the parser and compiler + bool isStreaming = mode & (HS_MODE_STREAM | HS_MODE_VECTORED); + bool isVectored = mode & HS_MODE_VECTORED; + unsigned somPrecision = getSomPrecision(mode); + + target_t target_info = platform ? target_t(*platform) + : get_current_target(); + + try { + CompileContext cc(isStreaming, isVectored, target_info, g); + NG ng(cc, elements, somPrecision); + + for (unsigned int i = 0; i < elements; i++) { + // Add this expression to the compiler + try { + addLitExpression(ng, i, expressions[i], flags ? flags[i] : 0, + ext ? ext[i] : nullptr, ids ? ids[i] : 0, + lens[i]); + } catch (CompileError &e) { + /* Caught a parse error; + * throw it upstream as a CompileError with a specific index */ + e.setExpressionIndex(i); + throw; /* do not slice */ + } + } + + // Check sub-expression ids + ng.rm.pl.validateSubIDs(ids, expressions, flags, elements); + // Renumber and assign lkey to reports + ng.rm.logicalKeyRenumber(); + + unsigned length = 0; + struct hs_database *out = build(ng, &length, 1); + + assert(out); //should have thrown exception on error + assert(length); + + *db = out; + *comp_error = nullptr; + + return HS_SUCCESS; + } + catch (const CompileError &e) { + // Compiler error occurred + *db = nullptr; + *comp_error = generateCompileError(e.reason, + e.hasIndex ? (int)e.index : -1); + return HS_COMPILER_ERROR; + } + catch (const std::bad_alloc &) { + *db = nullptr; + *comp_error = const_cast(&hs_enomem); + return HS_COMPILER_ERROR; + } + catch (...) { + assert(!"Internal errror, unexpected exception"); + *db = nullptr; + *comp_error = const_cast(&hs_einternal); + return HS_COMPILER_ERROR; + } +} + } // namespace ue2 extern "C" HS_PUBLIC_API @@ -326,6 +450,41 @@ hs_error_t HS_CDECL hs_compile_ext_multi(const char * const *expressions, platform, db, error, Grey()); } +extern "C" HS_PUBLIC_API +hs_error_t HS_CDECL hs_compile_lit(const char *expression, unsigned flags, + const size_t len, unsigned mode, + const hs_platform_info_t *platform, + hs_database_t **db, + hs_compile_error_t **error) { + if (expression == nullptr) { + *db = nullptr; + *error = generateCompileError("Invalid parameter: expression is NULL", + -1); + return HS_COMPILER_ERROR; + } + + unsigned id = 0; // single expressions get zero as an ID + const hs_expr_ext * const *ext = nullptr; // unused for this call. + + return hs_compile_lit_multi_int(&expression, &flags, &id, ext, &len, 1, + mode, platform, db, error, Grey()); +} + +extern "C" HS_PUBLIC_API +hs_error_t HS_CDECL hs_compile_lit_multi(const char * const *expressions, + const unsigned *flags, + const unsigned *ids, + const size_t *lens, + unsigned elements, unsigned mode, + const hs_platform_info_t *platform, + hs_database_t **db, + hs_compile_error_t **error) { + const hs_expr_ext * const *ext = nullptr; // unused for this call. + return hs_compile_lit_multi_int(expressions, flags, ids, ext, lens, + elements, mode, platform, db, error, + Grey()); +} + static hs_error_t hs_expression_info_int(const char *expression, unsigned int flags, const hs_expr_ext_t *ext, unsigned int mode, diff --git a/src/hs_compile.h b/src/hs_compile.h index c8dcfdf21..4c372ffe0 100644 --- a/src/hs_compile.h +++ b/src/hs_compile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -323,6 +323,10 @@ typedef struct hs_expr_ext { * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset * when a match is found. + * - HS_FLAG_COMBINATION - Parse the expression in logical combination + * syntax. + * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for + * the sub-expressions in logical combinations. * * @param mode * Compiler mode flags that affect the database as a whole. One of @ref @@ -392,6 +396,10 @@ hs_error_t HS_CDECL hs_compile(const char *expression, unsigned int flags, * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset * when a match is found. + * - HS_FLAG_COMBINATION - Parse the expression in logical combination + * syntax. + * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for + * the sub-expressions in logical combinations. * * @param ids * An array of integers specifying the ID number to be associated with the @@ -472,6 +480,10 @@ hs_error_t HS_CDECL hs_compile_multi(const char *const *expressions, * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset * when a match is found. + * - HS_FLAG_COMBINATION - Parse the expression in logical combination + * syntax. + * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for + * the sub-expressions in logical combinations. * * @param ids * An array of integers specifying the ID number to be associated with the @@ -527,6 +539,165 @@ hs_error_t HS_CDECL hs_compile_ext_multi(const char *const *expressions, const hs_platform_info_t *platform, hs_database_t **db, hs_compile_error_t **error); +/** + * The basic pure literal expression compiler. + * + * This is the function call with which a pure literal expression (not a + * common regular expression) is compiled into a Hyperscan database which + * can be passed to the runtime functions (such as @ref hs_scan(), + * @ref hs_open_stream(), etc.) + * + * @param expression + * The NULL-terminated expression to parse. Note that this string must + * represent ONLY the pattern to be matched, with no delimiters or flags; + * any global flags should be specified with the @p flags argument. For + * example, the expression `/abc?def/i` should be compiled by providing + * `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a + * flags. Meanwhile, the string content shall be fully parsed in a literal + * sense without any regular grammars. For example, the @p expression + * `abc?` simply means a char sequence of `a`, `b`, `c`, and `?`. The `?` + * here doesn't mean 0 or 1 quantifier under regular semantics. + * + * @param flags + * Flags which modify the behaviour of the expression. Multiple flags may + * be used by ORing them together. Compared to @ref hs_compile(), fewer + * valid values are provided: + * - HS_FLAG_CASELESS - Matching will be performed case-insensitively. + * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data. + * - HS_FLAG_SINGLEMATCH - Only one match will be generated for the + * expression per stream. + * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset + * when a match is found. + * + * @param len + * The length of the text content of the pure literal expression. As the + * text content indicated by @p expression is treated as single character + * one by one, the special terminating character `\0` should be allowed + * to appear in expression, and not treated as a terminator for a string. + * Thus, the end of a pure literal expression cannot be indicated by + * identifying `\0`, but by counting to the expression length. + * + * @param mode + * Compiler mode flags that affect the database as a whole. One of @ref + * HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be + * supplied, to select between the generation of a streaming, block or + * vectored database. In addition, other flags (beginning with HS_MODE_) + * may be supplied to enable specific features. See @ref HS_MODE_FLAG for + * more details. + * + * @param platform + * If not NULL, the platform structure is used to determine the target + * platform for the database. If NULL, a database suitable for running + * on the current host platform is produced. + * + * @param db + * On success, a pointer to the generated database will be returned in + * this parameter, or NULL on failure. The caller is responsible for + * deallocating the buffer using the @ref hs_free_database() function. + * + * @param error + * If the compile fails, a pointer to a @ref hs_compile_error_t will be + * returned, providing details of the error condition. The caller is + * responsible for deallocating the buffer using the @ref + * hs_free_compile_error() function. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR on failure, with details provided in the error + * parameter. + */ +hs_error_t HS_CDECL hs_compile_lit(const char *expression, unsigned flags, + const size_t len, unsigned mode, + const hs_platform_info_t *platform, + hs_database_t **db, + hs_compile_error_t **error); +/** + * The multiple pure literal expression compiler. + * + * This is the function call with which a set of pure literal expressions is + * compiled into a database which can be passed to the runtime functions (such + * as @ref hs_scan(), @ref hs_open_stream(), etc.) Each expression can be + * labelled with a unique integer which is passed into the match callback to + * identify the pattern that has matched. + * + * @param expressions + * The NULL-terminated expression to parse. Note that this string must + * represent ONLY the pattern to be matched, with no delimiters or flags; + * any global flags should be specified with the @p flags argument. For + * example, the expression `/abc?def/i` should be compiled by providing + * `abc?def` as the @p expression, and @ref HS_FLAG_CASELESS as the @a + * flags. Meanwhile, the string content shall be fully parsed in a literal + * sense without any regular grammars. For example, the @p expression + * `abc?` simply means a char sequence of `a`, `b`, `c`, and `?`. The `?` + * here doesn't mean 0 or 1 quantifier under regular semantics. + * + * @param flags + * Array of flags which modify the behaviour of each expression. Multiple + * flags may be used by ORing them together. Specifying the NULL pointer + * in place of an array will set the flags value for all patterns to zero. + * Compared to @ref hs_compile_multi(), fewer valid values are provided: + * - HS_FLAG_CASELESS - Matching will be performed case-insensitively. + * - HS_FLAG_MULTILINE - `^` and `$` anchors match any newlines in data. + * - HS_FLAG_SINGLEMATCH - Only one match will be generated for the + * expression per stream. + * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset + * when a match is found. + * + * @param ids + * An array of integers specifying the ID number to be associated with the + * corresponding pattern in the expressions array. Specifying the NULL + * pointer in place of an array will set the ID value for all patterns to + * zero. + * + * @param lens + * Array of lengths of the text content of each pure literal expression. + * As the text content indicated by @p expression is treated as single + * character one by one, the special terminating character `\0` should be + * allowed to appear in expression, and not treated as a terminator for a + * string. Thus, the end of a pure literal expression cannot be indicated + * by identifying `\0`, but by counting to the expression length. + * + * @param elements + * The number of elements in the input arrays. + * + * @param mode + * Compiler mode flags that affect the database as a whole. One of @ref + * HS_MODE_STREAM or @ref HS_MODE_BLOCK or @ref HS_MODE_VECTORED must be + * supplied, to select between the generation of a streaming, block or + * vectored database. In addition, other flags (beginning with HS_MODE_) + * may be supplied to enable specific features. See @ref HS_MODE_FLAG for + * more details. + * + * @param platform + * If not NULL, the platform structure is used to determine the target + * platform for the database. If NULL, a database suitable for running + * on the current host platform is produced. + * + * @param db + * On success, a pointer to the generated database will be returned in + * this parameter, or NULL on failure. The caller is responsible for + * deallocating the buffer using the @ref hs_free_database() function. + * + * @param error + * If the compile fails, a pointer to a @ref hs_compile_error_t will be + * returned, providing details of the error condition. The caller is + * responsible for deallocating the buffer using the @ref + * hs_free_compile_error() function. + * + * @return + * @ref HS_SUCCESS is returned on successful compilation; @ref + * HS_COMPILER_ERROR on failure, with details provided in the error + * parameter. + */ +hs_error_t HS_CDECL hs_compile_lit_multi(const char * const *expressions, + const unsigned *flags, + const unsigned *ids, + const size_t *lens, + unsigned elements, unsigned mode, + const hs_platform_info_t *platform, + hs_database_t **db, + hs_compile_error_t **error); + /** * Free an error structure generated by @ref hs_compile(), @ref * hs_compile_multi() or @ref hs_compile_ext_multi(). @@ -579,6 +750,10 @@ hs_error_t HS_CDECL hs_free_compile_error(hs_compile_error_t *error); * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset * when a match is found. + * - HS_FLAG_COMBINATION - Parse the expression in logical combination + * syntax. + * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for + * the sub-expressions in logical combinations. * * @param info * On success, a pointer to the pattern information will be returned in @@ -641,6 +816,10 @@ hs_error_t HS_CDECL hs_expression_info(const char *expression, * - HS_FLAG_PREFILTER - Compile pattern in prefiltering mode. * - HS_FLAG_SOM_LEFTMOST - Report the leftmost start of match offset * when a match is found. + * - HS_FLAG_COMBINATION - Parse the expression in logical combination + * syntax. + * - HS_FLAG_QUIET - Ignore match reporting for this expression. Used for + * the sub-expressions in logical combinations. * * @param ext * A pointer to a filled @ref hs_expr_ext_t structure that defines diff --git a/src/hs_internal.h b/src/hs_internal.h index 2a00fa2f9..adf07b22c 100644 --- a/src/hs_internal.h +++ b/src/hs_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -52,6 +52,17 @@ hs_error_t hs_compile_multi_int(const char *const *expressions, hs_database_t **db, hs_compile_error_t **comp_error, const Grey &g); +/** \brief Internal use only: takes a Grey argument so that we can use it in + * tools. */ +hs_error_t hs_compile_lit_multi_int(const char *const *expressions, + const unsigned *flags, const unsigned *ids, + const hs_expr_ext *const *ext, + const size_t *lens, unsigned elements, + unsigned mode, + const hs_platform_info_t *platform, + hs_database_t **db, + hs_compile_error_t **comp_error, + const Grey &g); } // namespace ue2 extern "C" diff --git a/src/hwlm/hwlm_literal.cpp b/src/hwlm/hwlm_literal.cpp index b257dfb07..692f7c6c0 100644 --- a/src/hwlm/hwlm_literal.cpp +++ b/src/hwlm/hwlm_literal.cpp @@ -83,10 +83,9 @@ bool maskIsConsistent(const std::string &s, bool nocase, const vector &msk, * \ref HWLM_MASKLEN. */ hwlmLiteral::hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in, u32 id_in, hwlm_group_t groups_in, - const vector &msk_in, const vector &cmp_in, - bool pure_in) + const vector &msk_in, const vector &cmp_in) : s(s_in), id(id_in), nocase(nocase_in), noruns(noruns_in), - groups(groups_in), msk(msk_in), cmp(cmp_in), pure(pure_in) { + groups(groups_in), msk(msk_in), cmp(cmp_in) { assert(s.size() <= HWLM_LITERAL_MAX_LEN); assert(msk.size() <= HWLM_MASKLEN); assert(msk.size() == cmp.size()); diff --git a/src/hwlm/hwlm_literal.h b/src/hwlm/hwlm_literal.h index 72a57f944..598de8147 100644 --- a/src/hwlm/hwlm_literal.h +++ b/src/hwlm/hwlm_literal.h @@ -113,16 +113,13 @@ struct hwlmLiteral { */ std::vector cmp; - bool pure; //!< \brief The pass-on of pure flag from LitFragment. - /** \brief Complete constructor, takes group information and msk/cmp. * * This constructor takes a msk/cmp pair. Both must be vectors of length <= * \ref HWLM_MASKLEN. */ hwlmLiteral(const std::string &s_in, bool nocase_in, bool noruns_in, u32 id_in, hwlm_group_t groups_in, - const std::vector &msk_in, const std::vector &cmp_in, - bool pure_in = false); + const std::vector &msk_in, const std::vector &cmp_in); /** \brief Simple constructor: no group information, no msk/cmp. * diff --git a/src/parser/logical_combination.cpp b/src/parser/logical_combination.cpp index b78390b07..49e060c98 100644 --- a/src/parser/logical_combination.cpp +++ b/src/parser/logical_combination.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, Intel Corporation + * Copyright (c) 2018-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -254,44 +254,6 @@ void popOperator(vector &op_stack, vector &subid_stack, op_stack.pop_back(); } -static -char getValue(const vector &lv, u32 ckey) { - if (ckey & LOGICAL_OP_BIT) { - return lv[ckey & ~LOGICAL_OP_BIT]; - } else { - return 0; - } -} - -static -bool hasMatchFromPurelyNegative(const vector &tree, - u32 start, u32 result) { - vector lv(tree.size()); - assert(start <= result); - for (u32 i = start; i <= result; i++) { - assert(i & LOGICAL_OP_BIT); - const LogicalOp &op = tree[i & ~LOGICAL_OP_BIT]; - assert(i == op.id); - switch (op.op) { - case LOGICAL_OP_NOT: - lv[op.id & ~LOGICAL_OP_BIT] = !getValue(lv, op.ro); - break; - case LOGICAL_OP_AND: - lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) & - getValue(lv, op.ro); - break; - case LOGICAL_OP_OR: - lv[op.id & ~LOGICAL_OP_BIT] = getValue(lv, op.lo) | - getValue(lv, op.ro); - break; - default: - assert(0); - break; - } - } - return lv[result & ~LOGICAL_OP_BIT]; -} - void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical, u32 ekey, u64a min_offset, u64a max_offset) { @@ -366,9 +328,6 @@ void ParsedLogical::parseLogicalCombination(unsigned id, const char *logical, if (lkey_start == INVALID_LKEY) { throw CompileError("No logical operation."); } - if (hasMatchFromPurelyNegative(logicalTree, lkey_start, lkey_result)) { - throw CompileError("Has match from purely negative sub-expressions."); - } combinationInfoAdd(ckey, id, ekey, lkey_start, lkey_result, min_offset, max_offset); } diff --git a/src/parser/shortcut_literal.cpp b/src/parser/shortcut_literal.cpp index d08bab3c0..a5d67f30d 100644 --- a/src/parser/shortcut_literal.cpp +++ b/src/parser/shortcut_literal.cpp @@ -185,7 +185,6 @@ bool shortcutLiteral(NG &ng, const ParsedExpression &pe) { return false; } - vis.lit.set_pure(); const ue2_literal &lit = vis.lit; if (lit.empty()) { diff --git a/src/report.h b/src/report.h index a2e2d0f3d..b35f4c052 100644 --- a/src/report.h +++ b/src/report.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018, Intel Corporation + * Copyright (c) 2016-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -222,6 +222,58 @@ char isLogicalCombination(const struct RoseEngine *rose, char *lvec, return getLogicalVal(rose, lvec, result); } +/** \brief Returns 1 if combination matches when no sub-expression matches. */ +static really_inline +char isPurelyNegativeMatch(const struct RoseEngine *rose, char *lvec, + u32 start, u32 result) { + const struct LogicalOp *logicalTree = (const struct LogicalOp *) + ((const char *)rose + rose->logicalTreeOffset); + assert(start >= rose->lkeyCount); + assert(start <= result); + assert(result < rose->lkeyCount + rose->lopCount); + for (u32 i = start; i <= result; i++) { + const struct LogicalOp *op = logicalTree + (i - rose->lkeyCount); + assert(i == op->id); + assert(op->op <= LAST_LOGICAL_OP); + switch ((enum LogicalOpType)op->op) { + case LOGICAL_OP_NOT: + if ((op->ro < rose->lkeyCount) && + getLogicalVal(rose, lvec, op->ro)) { + // sub-expression not negative + return 0; + } + setLogicalVal(rose, lvec, op->id, + !getLogicalVal(rose, lvec, op->ro)); + break; + case LOGICAL_OP_AND: + if (((op->lo < rose->lkeyCount) && + getLogicalVal(rose, lvec, op->lo)) || + ((op->ro < rose->lkeyCount) && + getLogicalVal(rose, lvec, op->ro))) { + // sub-expression not negative + return 0; + } + setLogicalVal(rose, lvec, op->id, + getLogicalVal(rose, lvec, op->lo) & + getLogicalVal(rose, lvec, op->ro)); // && + break; + case LOGICAL_OP_OR: + if (((op->lo < rose->lkeyCount) && + getLogicalVal(rose, lvec, op->lo)) || + ((op->ro < rose->lkeyCount) && + getLogicalVal(rose, lvec, op->ro))) { + // sub-expression not negative + return 0; + } + setLogicalVal(rose, lvec, op->id, + getLogicalVal(rose, lvec, op->lo) | + getLogicalVal(rose, lvec, op->ro)); // || + break; + } + } + return getLogicalVal(rose, lvec, result); +} + /** \brief Clear all keys in the logical vector. */ static really_inline void clearLvec(const struct RoseEngine *rose, char *lvec, char *cvec) { diff --git a/src/rose/block.c b/src/rose/block.c index a32113f4b..b3f424cb7 100644 --- a/src/rose/block.c +++ b/src/rose/block.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: diff --git a/src/rose/match.c b/src/rose/match.c index 192b4709d..84d3b1fdc 100644 --- a/src/rose/match.c +++ b/src/rose/match.c @@ -238,10 +238,10 @@ hwlmcb_rv_t roseProcessMatchInline(const struct RoseEngine *t, assert(id && id < t->size); // id is an offset into bytecode const u64a som = 0; const u8 flags = 0; - if (!scratch->pure) { - return roseRunProgram(t, scratch, id, som, end, flags); - } else { + if (t->pureLiteral) { return roseRunProgram_l(t, scratch, id, som, end, flags); + } else { + return roseRunProgram(t, scratch, id, som, end, flags); } } @@ -591,6 +591,23 @@ int roseRunFlushCombProgram(const struct RoseEngine *rose, return MO_CONTINUE_MATCHING; } +/** + * \brief Execute last flush combination program. + * + * Returns MO_HALT_MATCHING if the stream is exhausted or the user has + * instructed us to halt, or MO_CONTINUE_MATCHING otherwise. + */ +int roseRunLastFlushCombProgram(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a end) { + hwlmcb_rv_t rv = roseRunProgram(rose, scratch, + rose->lastFlushCombProgramOffset, + 0, end, 0); + if (rv == HWLM_TERMINATE_MATCHING) { + return MO_HALT_MATCHING; + } + return MO_CONTINUE_MATCHING; +} + int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) { struct hs_scratch *scratch = context; assert(scratch && scratch->magic == SCRATCH_MAGIC); @@ -602,8 +619,12 @@ int roseReportAdaptor(u64a start, u64a end, ReportID id, void *context) { // Our match ID is the program offset. const u32 program = id; const u8 flags = ROSE_PROG_FLAG_SKIP_MPV_CATCHUP; - hwlmcb_rv_t rv = - roseRunProgram(rose, scratch, program, start, end, flags); + hwlmcb_rv_t rv; + if (rose->pureLiteral) { + rv = roseRunProgram_l(rose, scratch, program, start, end, flags); + } else { + rv = roseRunProgram(rose, scratch, program, start, end, flags); + } if (rv == HWLM_TERMINATE_MATCHING) { return MO_HALT_MATCHING; } diff --git a/src/rose/program_runtime.c b/src/rose/program_runtime.c index 7f5150e03..0f2d1083b 100644 --- a/src/rose/program_runtime.c +++ b/src/rose/program_runtime.c @@ -480,6 +480,25 @@ hwlmcb_rv_t roseReport(const struct RoseEngine *t, struct hs_scratch *scratch, return roseHaltIfExhausted(t, scratch); } +static rose_inline +hwlmcb_rv_t roseReportComb(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end, + ReportID onmatch, s32 offset_adjust, u32 ekey) { + DEBUG_PRINTF("firing callback onmatch=%u, end=%llu\n", onmatch, end); + + int cb_rv = roseDeliverReport(end, onmatch, offset_adjust, scratch, ekey); + if (cb_rv == MO_HALT_MATCHING) { + DEBUG_PRINTF("termination requested\n"); + return HWLM_TERMINATE_MATCHING; + } + + if (ekey == INVALID_EKEY || cb_rv == ROSE_CONTINUE_MATCHING_NO_EXHAUST) { + return HWLM_CONTINUE_MATCHING; + } + + return roseHaltIfExhausted(t, scratch); +} + /* catches up engines enough to ensure any earlier mpv triggers are enqueued * and then adds the trigger to the mpv queue. */ static rose_inline @@ -1866,8 +1885,8 @@ hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, } DEBUG_PRINTF("Logical Combination Passed!\n"); - if (roseReport(t, scratch, end, ci->id, 0, - ci->ekey) == HWLM_TERMINATE_MATCHING) { + if (roseReportComb(t, scratch, end, ci->id, 0, + ci->ekey) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; } } @@ -1875,6 +1894,49 @@ hwlmcb_rv_t flushActiveCombinations(const struct RoseEngine *t, return HWLM_CONTINUE_MATCHING; } +static rose_inline +hwlmcb_rv_t checkPurelyNegatives(const struct RoseEngine *t, + struct hs_scratch *scratch, u64a end) { + for (u32 i = 0; i < t->ckeyCount; i++) { + const struct CombInfo *combInfoMap = (const struct CombInfo *) + ((const char *)t + t->combInfoMapOffset); + const struct CombInfo *ci = combInfoMap + i; + if ((ci->min_offset != 0) && (end < ci->min_offset)) { + DEBUG_PRINTF("halt: before min_offset=%llu\n", ci->min_offset); + continue; + } + if ((ci->max_offset != MAX_OFFSET) && (end > ci->max_offset)) { + DEBUG_PRINTF("halt: after max_offset=%llu\n", ci->max_offset); + continue; + } + + DEBUG_PRINTF("check ekey %u\n", ci->ekey); + if (ci->ekey != INVALID_EKEY) { + assert(ci->ekey < t->ekeyCount); + const char *evec = scratch->core_info.exhaustionVector; + if (isExhausted(t, evec, ci->ekey)) { + DEBUG_PRINTF("ekey %u already set, match is exhausted\n", + ci->ekey); + continue; + } + } + + DEBUG_PRINTF("check ckey %u purely negative\n", i); + char *lvec = scratch->core_info.logicalVector; + if (!isPurelyNegativeMatch(t, lvec, ci->start, ci->result)) { + DEBUG_PRINTF("Logical Combination from purely negative Failed!\n"); + continue; + } + + DEBUG_PRINTF("Logical Combination from purely negative Passed!\n"); + if (roseReportComb(t, scratch, end, ci->id, 0, + ci->ekey) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + return HWLM_CONTINUE_MATCHING; +} + #if !defined(_WIN32) #define PROGRAM_CASE(name) \ case ROSE_INSTR_##name: { \ @@ -2004,7 +2066,8 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, &&LABEL_ROSE_INSTR_SET_LOGICAL, &&LABEL_ROSE_INSTR_SET_COMBINATION, &&LABEL_ROSE_INSTR_FLUSH_COMBINATION, - &&LABEL_ROSE_INSTR_SET_EXHAUST + &&LABEL_ROSE_INSTR_SET_EXHAUST, + &&LABEL_ROSE_INSTR_LAST_FLUSH_COMBINATION }; #endif @@ -2772,6 +2835,19 @@ hwlmcb_rv_t roseRunProgram(const struct RoseEngine *t, } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(LAST_FLUSH_COMBINATION) { + assert(end >= tctxt->lastCombMatchOffset); + if (flushActiveCombinations(t, scratch) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + if (checkPurelyNegatives(t, scratch, end) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + PROGRAM_NEXT_INSTRUCTION + default: { assert(0); // unreachable scratch->core_info.status |= STATUS_ERROR; @@ -2808,6 +2884,7 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, assert(programOffset >= sizeof(struct RoseEngine)); assert(programOffset < t->size); + const char in_catchup = prog_flags & ROSE_PROG_FLAG_IN_CATCHUP; const char from_mpv = prog_flags & ROSE_PROG_FLAG_FROM_MPV; const char *pc_base = getByOffset(t, programOffset); @@ -2835,6 +2912,56 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, } L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(CHECK_GROUPS) { + DEBUG_PRINTF("groups=0x%llx, checking instr groups=0x%llx\n", + tctxt->groups, ri->groups); + if (!(ri->groups & tctxt->groups)) { + DEBUG_PRINTF("halt: no groups are set\n"); + return HWLM_CONTINUE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(CHECK_MASK) { + struct core_info *ci = &scratch->core_info; + if (!roseCheckMask(ci, ri->and_mask, ri->cmp_mask, + ri->neg_mask, ri->offset, end)) { + DEBUG_PRINTF("failed mask check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(CHECK_MASK_32) { + struct core_info *ci = &scratch->core_info; + if (!roseCheckMask32(ci, ri->and_mask, ri->cmp_mask, + ri->neg_mask, ri->offset, end)) { + assert(ri->fail_jump); + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(CHECK_BYTE) { + const struct core_info *ci = &scratch->core_info; + if (!roseCheckByte(ci, ri->and_mask, ri->cmp_mask, + ri->negation, ri->offset, end)) { + DEBUG_PRINTF("failed byte check\n"); + assert(ri->fail_jump); // must progress + pc += ri->fail_jump; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + L_PROGRAM_NEXT_INSTRUCTION + + L_PROGRAM_CASE(PUSH_DELAYED) { + rosePushDelayedMatch(t, scratch, ri->delay, ri->index, end); + } + L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(CATCH_UP) { if (roseCatchUpTo(t, scratch, end) == HWLM_TERMINATE_MATCHING) { return HWLM_TERMINATE_MATCHING; @@ -2891,6 +3018,17 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, } L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(REPORT_CHAIN) { + // Note: sequence points updated inside this function. + if (roseCatchUpAndHandleChainMatch( + t, scratch, ri->event, ri->top_squash_distance, end, + in_catchup) == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + work_done = 1; + } + L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(REPORT) { updateSeqPoint(tctxt, end, from_mpv); if (roseReport(t, scratch, end, ri->onmatch, ri->offset_adjust, @@ -3041,6 +3179,24 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, } L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(INCLUDED_JUMP) { + if (scratch->fdr_conf) { + // squash the bucket of included literal + u8 shift = scratch->fdr_conf_offset & ~7U; + u64a mask = ((~(u64a)ri->squash) << shift); + *(scratch->fdr_conf) &= mask; + + pc = getByOffset(t, ri->child_offset); + pc_base = pc; + programOffset = (const u8 *)pc_base -(const u8 *)t; + DEBUG_PRINTF("pc_base %p pc %p child_offset %u squash %u\n", + pc_base, pc, ri->child_offset, ri->squash); + work_done = 0; + L_PROGRAM_NEXT_INSTRUCTION_JUMP + } + } + L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(SET_LOGICAL) { DEBUG_PRINTF("set logical value of lkey %u, offset_adjust=%d\n", ri->lkey, ri->offset_adjust); @@ -3082,6 +3238,19 @@ hwlmcb_rv_t roseRunProgram_l(const struct RoseEngine *t, } L_PROGRAM_NEXT_INSTRUCTION + L_PROGRAM_CASE(LAST_FLUSH_COMBINATION) { + assert(end >= tctxt->lastCombMatchOffset); + if (flushActiveCombinations(t, scratch) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + if (checkPurelyNegatives(t, scratch, end) + == HWLM_TERMINATE_MATCHING) { + return HWLM_TERMINATE_MATCHING; + } + } + L_PROGRAM_NEXT_INSTRUCTION + default: { assert(0); // unreachable scratch->core_info.status |= STATUS_ERROR; diff --git a/src/rose/rose.h b/src/rose/rose.h index c2b682f6b..409b70028 100644 --- a/src/rose/rose.h +++ b/src/rose/rose.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -56,4 +56,7 @@ int roseRunBoundaryProgram(const struct RoseEngine *rose, u32 program, int roseRunFlushCombProgram(const struct RoseEngine *rose, struct hs_scratch *scratch, u64a end); +int roseRunLastFlushCombProgram(const struct RoseEngine *rose, + struct hs_scratch *scratch, u64a end); + #endif // ROSE_H diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp index 0ef20f213..5cbb5c848 100644 --- a/src/rose/rose_build_bytecode.cpp +++ b/src/rose/rose_build_bytecode.cpp @@ -2843,34 +2843,9 @@ vector groupByFragment(const RoseBuildImpl &build) { DEBUG_PRINTF("fragment candidate: lit_id=%u %s\n", lit_id, dumpString(lit.s).c_str()); - - /** 0:/xxabcdefgh/ */ - /** 1:/yyabcdefgh/ */ - /** 2:/yyabcdefgh.+/ */ - // Above 3 patterns should firstly convert into RoseLiteralMap with - // 2 elements ("xxabcdefgh" and "yyabcdefgh"), then convert into - // LitFragment with 1 element ("abcdefgh"). Special care should be - // taken to handle the 'pure' flag during the conversion. - - rose_literal_id lit_frag = getFragment(lit); - auto it = frag_info.find(lit_frag); - if (it != frag_info.end()) { - if (!lit_frag.s.get_pure() && it->first.s.get_pure()) { - struct FragmentInfo f_info = it->second; - f_info.lit_ids.push_back(lit_id); - f_info.groups |= groups; - frag_info.erase(it->first); - frag_info.emplace(lit_frag, f_info); - } else { - it->second.lit_ids.push_back(lit_id); - it->second.groups |= groups; - } - } else { - struct FragmentInfo f_info; - f_info.lit_ids.push_back(lit_id); - f_info.groups |= groups; - frag_info.emplace(lit_frag, f_info); - } + auto &fi = frag_info[getFragment(lit)]; + fi.lit_ids.push_back(lit_id); + fi.groups |= groups; } for (auto &m : frag_info) { @@ -3370,6 +3345,15 @@ RoseProgram makeFlushCombProgram(const RoseEngine &t) { return program; } +static +RoseProgram makeLastFlushCombProgram(const RoseEngine &t) { + RoseProgram program; + if (t.ckeyCount) { + addLastFlushCombinationProgram(program); + } + return program; +} + static u32 history_required(const rose_literal_id &key) { if (key.msk.size() < key.s.length()) { @@ -3740,6 +3724,10 @@ bytecode_ptr RoseBuildImpl::buildFinalEngine(u32 minWidth) { auto flushComb_prog = makeFlushCombProgram(proto); proto.flushCombProgramOffset = writeProgram(bc, move(flushComb_prog)); + auto lastFlushComb_prog = makeLastFlushCombProgram(proto); + proto.lastFlushCombProgramOffset = + writeProgram(bc, move(lastFlushComb_prog)); + // Build anchored matcher. auto atable = buildAnchoredMatcher(*this, fragments, anchored_dfas); if (atable) { diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp index 2eb7bb51b..8999daef2 100644 --- a/src/rose/rose_build_dump.cpp +++ b/src/rose/rose_build_dump.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -1486,6 +1486,9 @@ void dumpProgram(ofstream &os, const RoseEngine *t, const char *pc) { } PROGRAM_NEXT_INSTRUCTION + PROGRAM_CASE(LAST_FLUSH_COMBINATION) {} + PROGRAM_NEXT_INSTRUCTION + default: os << " UNKNOWN (code " << int{code} << ")" << endl; os << " " << endl; @@ -1557,6 +1560,25 @@ void dumpRoseFlushCombPrograms(const RoseEngine *t, const string &filename) { os.close(); } +static +void dumpRoseLastFlushCombPrograms(const RoseEngine *t, + const string &filename) { + ofstream os(filename); + const char *base = (const char *)t; + + if (t->lastFlushCombProgramOffset) { + os << "Last Flush Combination Program @ " + << t->lastFlushCombProgramOffset + << ":" << endl; + dumpProgram(os, t, base + t->lastFlushCombProgramOffset); + os << endl; + } else { + os << "" << endl; + } + + os.close(); +} + static void dumpRoseReportPrograms(const RoseEngine *t, const string &filename) { ofstream os(filename); @@ -2249,6 +2271,8 @@ void roseDumpPrograms(const vector &fragments, const RoseEngine *t, dumpRoseLitPrograms(fragments, t, base + "/rose_lit_programs.txt"); dumpRoseEodPrograms(t, base + "/rose_eod_programs.txt"); dumpRoseFlushCombPrograms(t, base + "/rose_flush_comb_programs.txt"); + dumpRoseLastFlushCombPrograms(t, + base + "/rose_last_flush_comb_programs.txt"); dumpRoseReportPrograms(t, base + "/rose_report_programs.txt"); dumpRoseAnchoredPrograms(t, base + "/rose_anchored_programs.txt"); dumpRoseDelayPrograms(t, base + "/rose_delay_programs.txt"); diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h index fe48da4c0..7780848b1 100644 --- a/src/rose/rose_build_impl.h +++ b/src/rose/rose_build_impl.h @@ -340,14 +340,7 @@ class RoseLiteralMap { std::pair insert(const rose_literal_id &lit) { auto it = lits_index.find(lit); if (it != lits_index.end()) { - u32 idx = it->second; - auto &l = lits.at(idx); - if (!lit.s.get_pure() && l.s.get_pure()) { - lits_index.erase(l); - l.s.unset_pure(); - lits_index.emplace(l, idx); - } - return {idx, false}; + return {it->second, false}; } u32 id = verify_u32(lits.size()); lits.push_back(lit); diff --git a/src/rose/rose_build_instructions.cpp b/src/rose/rose_build_instructions.cpp index 2fe534559..c503f7311 100644 --- a/src/rose/rose_build_instructions.cpp +++ b/src/rose/rose_build_instructions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, Intel Corporation + * Copyright (c) 2017-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -48,6 +48,7 @@ RoseInstrMatcherEod::~RoseInstrMatcherEod() = default; RoseInstrEnd::~RoseInstrEnd() = default; RoseInstrClearWorkDone::~RoseInstrClearWorkDone() = default; RoseInstrFlushCombination::~RoseInstrFlushCombination() = default; +RoseInstrLastFlushCombination::~RoseInstrLastFlushCombination() = default; using OffsetMap = RoseInstruction::OffsetMap; diff --git a/src/rose/rose_build_instructions.h b/src/rose/rose_build_instructions.h index 61e6d7a65..306a4166c 100644 --- a/src/rose/rose_build_instructions.h +++ b/src/rose/rose_build_instructions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018, Intel Corporation + * Copyright (c) 2017-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -2206,6 +2206,14 @@ class RoseInstrFlushCombination ~RoseInstrFlushCombination() override; }; +class RoseInstrLastFlushCombination + : public RoseInstrBaseTrivial { +public: + ~RoseInstrLastFlushCombination() override; +}; + class RoseInstrSetExhaust : public RoseInstrBaseNoTargets()); } +void addLastFlushCombinationProgram(RoseProgram &program) { + program.add_before_end(make_unique()); +} + static void makeRoleCheckLeftfix(const RoseBuildImpl &build, const map &leftfix_info, diff --git a/src/rose/rose_build_program.h b/src/rose/rose_build_program.h index 8c8c37ed9..7d781f319 100644 --- a/src/rose/rose_build_program.h +++ b/src/rose/rose_build_program.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018, Intel Corporation + * Copyright (c) 2016-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -188,6 +188,7 @@ void addEnginesEodProgram(u32 eodNfaIterOffset, RoseProgram &program); void addSuffixesEodProgram(RoseProgram &program); void addMatcherEodProgram(RoseProgram &program); void addFlushCombinationProgram(RoseProgram &program); +void addLastFlushCombinationProgram(RoseProgram &program); static constexpr u32 INVALID_QUEUE = ~0U; diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h index 386b035ca..7bd6779c3 100644 --- a/src/rose/rose_internal.h +++ b/src/rose/rose_internal.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -328,6 +328,7 @@ struct RoseBoundaryReports { * nfas). Rose nfa info table can distinguish the cases. */ struct RoseEngine { + u8 pureLiteral; /* Indicator of pure literal API */ u8 noFloatingRoots; /* only need to run the anchored table if something * matched in the anchored table */ u8 requiresEodCheck; /* stuff happens at eod time */ @@ -426,6 +427,8 @@ struct RoseEngine { u32 eodProgramOffset; //!< EOD program, otherwise 0. u32 flushCombProgramOffset; /**< FlushCombination program, otherwise 0 */ + u32 lastFlushCombProgramOffset; /**< LastFlushCombination program, + * otherwise 0 */ u32 lastByteHistoryIterOffset; // if non-zero diff --git a/src/rose/rose_program.h b/src/rose/rose_program.h index 7feee04fe..e5485476b 100644 --- a/src/rose/rose_program.h +++ b/src/rose/rose_program.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -201,7 +201,14 @@ enum RoseInstructionCode { /** \brief Mark as exhausted instead of report while quiet. */ ROSE_INSTR_SET_EXHAUST, - LAST_ROSE_INSTRUCTION = ROSE_INSTR_SET_EXHAUST //!< Sentinel. + /** + * \brief Calculate any combination's logical value if none of its + * sub-expression matches until EOD, then check if compliant with any + * logical constraints. + */ + ROSE_INSTR_LAST_FLUSH_COMBINATION, + + LAST_ROSE_INSTRUCTION = ROSE_INSTR_LAST_FLUSH_COMBINATION //!< Sentinel. }; struct ROSE_STRUCT_END { @@ -674,4 +681,8 @@ struct ROSE_STRUCT_SET_EXHAUST { u8 code; //!< From enum RoseInstructionCode. u32 ekey; //!< Exhaustion key. }; + +struct ROSE_STRUCT_LAST_FLUSH_COMBINATION { + u8 code; //!< From enum RoseInstructionCode. +}; #endif // ROSE_ROSE_PROGRAM_H diff --git a/src/runtime.c b/src/runtime.c index cfcd0f7c8..a3659348c 100644 --- a/src/runtime.c +++ b/src/runtime.c @@ -141,7 +141,6 @@ void populateCoreInfo(struct hs_scratch *s, const struct RoseEngine *rose, s->deduper.current_report_offset = ~0ULL; s->deduper.som_log_dirty = 1; /* som logs have not been cleared */ s->fdr_conf = NULL; - s->pure = 0; // Rose program execution (used for some report paths) depends on these // values being initialised. @@ -455,8 +454,9 @@ hs_error_t HS_CDECL hs_scan(const hs_database_t *db, const char *data, return HS_UNKNOWN_ERROR; } - if (rose->flushCombProgramOffset) { - if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) { + if (rose->lastFlushCombProgramOffset) { + if (roseRunLastFlushCombProgram(rose, scratch, length) + == MO_HALT_MATCHING) { if (unlikely(internal_matching_error(scratch))) { unmarkScratchInUse(scratch); return HS_UNKNOWN_ERROR; @@ -651,7 +651,9 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch, scratch->core_info.logicalVector = state + rose->stateOffsets.logicalVec; scratch->core_info.combVector = state + rose->stateOffsets.combVec; - scratch->tctxt.lastCombMatchOffset = id->offset; + if (!id->offset) { + scratch->tctxt.lastCombMatchOffset = id->offset; + } } if (rose->somLocationCount) { @@ -698,8 +700,9 @@ void report_eod_matches(hs_stream_t *id, hs_scratch_t *scratch, } } - if (rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) { - if (roseRunFlushCombProgram(rose, scratch, ~0ULL) == MO_HALT_MATCHING) { + if (rose->lastFlushCombProgramOffset && !told_to_stop_matching(scratch)) { + if (roseRunLastFlushCombProgram(rose, scratch, id->offset) + == MO_HALT_MATCHING) { DEBUG_PRINTF("told to stop matching\n"); scratch->core_info.status |= STATUS_TERMINATED; } @@ -906,7 +909,9 @@ hs_error_t hs_scan_stream_internal(hs_stream_t *id, const char *data, scratch->core_info.logicalVector = state + rose->stateOffsets.logicalVec; scratch->core_info.combVector = state + rose->stateOffsets.combVec; - scratch->tctxt.lastCombMatchOffset = id->offset; + if (!id->offset) { + scratch->tctxt.lastCombMatchOffset = id->offset; + } } assert(scratch->core_info.hlen <= id->offset && scratch->core_info.hlen <= rose->historyRequired); @@ -1013,18 +1018,6 @@ hs_error_t HS_CDECL hs_close_stream(hs_stream_t *id, hs_scratch_t *scratch, unmarkScratchInUse(scratch); } - if (id->rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) { - if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL) - == MO_HALT_MATCHING) { - scratch->core_info.status |= STATUS_TERMINATED; - if (unlikely(internal_matching_error(scratch))) { - unmarkScratchInUse(scratch); - return HS_UNKNOWN_ERROR; - } - unmarkScratchInUse(scratch); - } - } - hs_stream_free(id); return HS_SUCCESS; @@ -1054,18 +1047,6 @@ hs_error_t HS_CDECL hs_reset_stream(hs_stream_t *id, UNUSED unsigned int flags, unmarkScratchInUse(scratch); } - if (id->rose->flushCombProgramOffset && !told_to_stop_matching(scratch)) { - if (roseRunFlushCombProgram(id->rose, scratch, ~0ULL) - == MO_HALT_MATCHING) { - scratch->core_info.status |= STATUS_TERMINATED; - if (unlikely(internal_matching_error(scratch))) { - unmarkScratchInUse(scratch); - return HS_UNKNOWN_ERROR; - } - unmarkScratchInUse(scratch); - } - } - // history already initialised init_stream(id, id->rose, 0); diff --git a/src/scratch.c b/src/scratch.c index c23b5b3c3..25991e2bb 100644 --- a/src/scratch.c +++ b/src/scratch.c @@ -137,7 +137,6 @@ hs_error_t alloc_scratch(const hs_scratch_t *proto, hs_scratch_t **scratch) { s->scratchSize = alloc_size; s->scratch_alloc = (char *)s_tmp; s->fdr_conf = NULL; - s->pure = 0; // each of these is at an offset from the previous char *current = (char *)s + sizeof(*s); @@ -280,7 +279,9 @@ hs_error_t HS_CDECL hs_alloc_scratch(const hs_database_t *db, hs_error_t proto_ret = hs_check_alloc(proto_tmp); if (proto_ret != HS_SUCCESS) { hs_scratch_free(proto_tmp); - hs_scratch_free(*scratch); + if (*scratch) { + hs_scratch_free((*scratch)->scratch_alloc); + } *scratch = NULL; return proto_ret; } diff --git a/src/scratch.h b/src/scratch.h index e2e8039a1..1256f7aba 100644 --- a/src/scratch.h +++ b/src/scratch.h @@ -211,7 +211,6 @@ struct ALIGN_CL_DIRECTIVE hs_scratch { u64a *fdr_conf; /**< FDR confirm value */ u8 fdr_conf_offset; /**< offset where FDR/Teddy front end matches * in buffer */ - u8 pure; /**< indicator of pure-literal or cutting-literal */ }; /* array of fatbit ptr; TODO: why not an array of fatbits? */ diff --git a/src/util/multibit_build.cpp b/src/util/multibit_build.cpp index ad6a0d6a6..67bb9ec70 100644 --- a/src/util/multibit_build.cpp +++ b/src/util/multibit_build.cpp @@ -192,11 +192,11 @@ vector mmbBuildSparseIterator(const vector &bits, template static void add_scatter(vector *out, u32 offset, u64a mask) { - T su; + out->emplace_back(); + T &su = out->back(); memset(&su, 0, sizeof(su)); su.offset = offset; su.val = mask; - out->push_back(su); DEBUG_PRINTF("add %llu at offset %u\n", mask, offset); } diff --git a/src/util/ue2string.cpp b/src/util/ue2string.cpp index 98b007d4a..50b2bbcc8 100644 --- a/src/util/ue2string.cpp +++ b/src/util/ue2string.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -283,7 +283,6 @@ ue2_literal &ue2_literal::erase(size_type pos, size_type n) { } void ue2_literal::push_back(char c, bool nc) { - assert(!nc || ourisalpha(c)); if (nc) { c = mytoupper(c); } diff --git a/src/util/ue2string.h b/src/util/ue2string.h index 1ce51b2f1..0aa846896 100644 --- a/src/util/ue2string.h +++ b/src/util/ue2string.h @@ -211,17 +211,10 @@ struct ue2_literal : totally_ordered { size_t hash() const; - void set_pure() { pure = true; } - void unset_pure() { pure = false; } - bool get_pure() const { return pure; } - - /* TODO: consider existing member functions possibly related with pure. */ - private: friend const_iterator; std::string s; boost::dynamic_bitset<> nocase; - bool pure = false; /**< born from cutting or not (pure literal). */ }; /// Return a reversed copy of this literal. diff --git a/tools/hsbench/CMakeLists.txt b/tools/hsbench/CMakeLists.txt index 465081a8b..bbceda41c 100644 --- a/tools/hsbench/CMakeLists.txt +++ b/tools/hsbench/CMakeLists.txt @@ -56,10 +56,7 @@ if (BUILD_CHIMERA) engine_pcre.cpp engine_pcre.h ) -endif() - -add_executable(hsbench ${hsbench_SOURCES}) -if (BUILD_CHIMERA) + add_executable(hsbench ${hsbench_SOURCES}) include_directories(${PCRE_INCLUDE_DIRS}) if(NOT WIN32) target_link_libraries(hsbench hs chimera ${PCRE_LDFLAGS} databaseutil @@ -69,6 +66,11 @@ if (BUILD_CHIMERA) expressionutil ${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT}) endif() else() + if(WIN32 AND (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)) + add_executable(hsbench ${hsbench_SOURCES} $ $) + else() + add_executable(hsbench ${hsbench_SOURCES}) + endif() target_link_libraries(hsbench hs databaseutil expressionutil ${SQLITE3_LDFLAGS} ${CMAKE_THREAD_LIBS_INIT}) endif() diff --git a/tools/hsbench/common.h b/tools/hsbench/common.h index 820cad7c3..7c2c8f9d9 100644 --- a/tools/hsbench/common.h +++ b/tools/hsbench/common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018, Intel Corporation + * Copyright (c) 2016-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -41,6 +41,7 @@ extern unsigned int somPrecisionMode; extern bool forceEditDistance; extern unsigned editDistance; extern bool printCompressSize; +extern bool useLiteralApi; /** Structure for the result of a single complete scan. */ struct ResultEntry { diff --git a/tools/hsbench/engine_hyperscan.cpp b/tools/hsbench/engine_hyperscan.cpp index 3390c2638..c1f1e8c49 100644 --- a/tools/hsbench/engine_hyperscan.cpp +++ b/tools/hsbench/engine_hyperscan.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018, Intel Corporation + * Copyright (c) 2016-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -411,22 +411,30 @@ buildEngineHyperscan(const ExpressionMap &expressions, ScanMode scan_mode, ext_ptr[i] = &ext[i]; } - Timer timer; - timer.start(); - hs_compile_error_t *compile_err; + Timer timer; -#ifndef RELEASE_BUILD - err = hs_compile_multi_int(patterns.data(), flags.data(), ids.data(), - ext_ptr.data(), count, full_mode, nullptr, - &db, &compile_err, grey); -#else - err = hs_compile_ext_multi(patterns.data(), flags.data(), ids.data(), - ext_ptr.data(), count, full_mode, nullptr, - &db, &compile_err); -#endif + if (useLiteralApi) { + // Pattern length computation should be done before timer start. + vector lens(count); + for (unsigned int i = 0; i < count; i++) { + lens[i] = strlen(patterns[i]); + } + timer.start(); + err = hs_compile_lit_multi_int(patterns.data(), flags.data(), + ids.data(), ext_ptr.data(), + lens.data(), count, full_mode, + nullptr, &db, &compile_err, grey); + timer.complete(); + } else { + timer.start(); + err = hs_compile_multi_int(patterns.data(), flags.data(), + ids.data(), ext_ptr.data(), count, + full_mode, nullptr, &db, &compile_err, + grey); + timer.complete(); + } - timer.complete(); compileSecs = timer.seconds(); peakMemorySize = getPeakHeap(); diff --git a/tools/hsbench/main.cpp b/tools/hsbench/main.cpp index de9fde07d..8e85d7aea 100644 --- a/tools/hsbench/main.cpp +++ b/tools/hsbench/main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018, Intel Corporation + * Copyright (c) 2016-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -87,6 +87,7 @@ unsigned int somPrecisionMode = HS_MODE_SOM_HORIZON_LARGE; bool forceEditDistance = false; unsigned editDistance = 0; bool printCompressSize = false; +bool useLiteralApi = false; // Globals local to this file. static bool compressStream = false; @@ -218,6 +219,7 @@ void usage(const char *error) { printf(" --per-scan Display per-scan Mbit/sec results.\n"); printf(" --echo-matches Display all matches that occur during scan.\n"); printf(" --sql-out FILE Output sqlite db.\n"); + printf(" --literal-on Use Hyperscan pure literal matching.\n"); printf(" -S NAME Signature set name (for sqlite db).\n"); printf("\n\n"); @@ -250,6 +252,7 @@ void processArgs(int argc, char *argv[], vector &sigSets, int do_echo_matches = 0; int do_sql_output = 0; int option_index = 0; + int literalFlag = 0; vector sigFiles; static struct option longopts[] = { @@ -257,6 +260,7 @@ void processArgs(int argc, char *argv[], vector &sigSets, {"echo-matches", no_argument, &do_echo_matches, 1}, {"compress-stream", no_argument, &do_compress, 1}, {"sql-out", required_argument, &do_sql_output, 1}, + {"literal-on", no_argument, &literalFlag, 1}, {nullptr, 0, nullptr, 0} }; @@ -463,6 +467,8 @@ void processArgs(int argc, char *argv[], vector &sigSets, loadSignatureList(file, sigs); sigSets.emplace_back(file, move(sigs)); } + + useLiteralApi = (bool)literalFlag; } /** Start the global timer. */ diff --git a/tools/hscheck/CMakeLists.txt b/tools/hscheck/CMakeLists.txt index 8f45765a8..2ae06137c 100644 --- a/tools/hscheck/CMakeLists.txt +++ b/tools/hscheck/CMakeLists.txt @@ -16,7 +16,11 @@ if (BUILD_CHIMERA) target_link_libraries(hscheck hs chimera pcre expressionutil) endif() else() - add_executable(hscheck ${hscheck_SOURCES}) + if(WIN32 AND (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)) + add_executable(hscheck ${hscheck_SOURCES} $ $) + else() + add_executable(hscheck ${hscheck_SOURCES}) + endif() if(NOT WIN32) target_link_libraries(hscheck hs expressionutil pthread) else() diff --git a/tools/hscheck/main.cpp b/tools/hscheck/main.cpp index 595c8b84f..9cfe73dff 100644 --- a/tools/hscheck/main.cpp +++ b/tools/hscheck/main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -92,6 +92,7 @@ bool g_allSignatures = false; bool g_forceEditDistance = false; bool build_sigs = false; bool check_logical = false; +bool use_literal_api = false; unsigned int g_signature; unsigned int g_editDistance; unsigned int globalFlags = 0; @@ -322,11 +323,26 @@ void checkExpression(UNUSED void *threadarg) { #if !defined(RELEASE_BUILD) // This variant is available in non-release builds and allows us to // modify greybox settings. - err = hs_compile_multi_int(®exp, &flags, nullptr, &extp, 1, mode, - nullptr, &db, &compile_err, *g_grey); + if (use_literal_api) { + size_t len = strlen(regexp); + err = hs_compile_lit_multi_int(®exp, &flags, nullptr, &extp, + &len, 1, mode, nullptr, &db, + &compile_err, *g_grey); + } else { + err = hs_compile_multi_int(®exp, &flags, nullptr, &extp, 1, + mode, nullptr, &db, &compile_err, + *g_grey); + } #else - err = hs_compile_ext_multi(®exp, &flags, nullptr, &extp, 1, mode, - nullptr, &db, &compile_err); + if (use_literal_api) { + size_t len = strlen(regexp); + err = hs_compile_lit_multi_int(®exp, &flags, nullptr, &extp, + &len, 1, mode, nullptr, &db, + &compile_err, *g_grey); + } else { + err = hs_compile_ext_multi(®exp, &flags, nullptr, &extp, 1, + mode, nullptr, &db, &compile_err); + } #endif if (err == HS_SUCCESS) { @@ -381,6 +397,11 @@ void checkLogicalExpression(UNUSED void *threadarg) { ExprExtMap::const_iterator it; while (getNextLogicalExpression(it)) { + if (use_literal_api) { + recordSuccess(g_exprMap, it->first); + continue; + } + const ParsedExpr &comb = it->second; vector subIds; @@ -470,6 +491,7 @@ void usage() { << " -h Display this help." << endl << " -B Build signature set." << endl << " -C Check logical combinations (default: off)." << endl + << " --literal-on Processing pure literals, no need to check." << endl << endl; } @@ -477,9 +499,15 @@ static void processArgs(int argc, char *argv[], UNUSED unique_ptr &grey) { const char options[] = "e:E:s:z:hHLNV8G:T:BC"; bool signatureSet = false; + int literalFlag = 0; + + static struct option longopts[] = { + {"literal-on", no_argument, &literalFlag, 1}, + {nullptr, 0, nullptr, 0} + }; for (;;) { - int c = getopt_long(argc, argv, options, nullptr, nullptr); + int c = getopt_long(argc, argv, options, longopts, nullptr); if (c < 0) { break; } @@ -539,6 +567,9 @@ void processArgs(int argc, char *argv[], UNUSED unique_ptr &grey) { case 'C': check_logical = true; break; + case 0: + case 1: + break; default: usage(); exit(1); @@ -564,6 +595,8 @@ void processArgs(int argc, char *argv[], UNUSED unique_ptr &grey) { usage(); exit(1); } + + use_literal_api = (bool)literalFlag; } static diff --git a/tools/hscollider/GraphTruth.cpp b/tools/hscollider/GraphTruth.cpp index b4b3f809b..0b67b11c5 100644 --- a/tools/hscollider/GraphTruth.cpp +++ b/tools/hscollider/GraphTruth.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -299,6 +299,46 @@ char isLogicalCombination(vector &lv, const vector &comb, return lv[result]; } +/** \brief Returns 1 if combination matches when no sub-expression matches. */ +static +char isPurelyNegativeMatch(vector &lv, const vector &comb, + size_t lkeyCount, unsigned start, unsigned result) { + assert(start <= result); + for (unsigned i = start; i <= result; i++) { + const LogicalOp &op = comb[i - lkeyCount]; + assert(i == op.id); + switch (op.op) { + case LOGICAL_OP_NOT: + if ((op.ro < lkeyCount) && lv[op.ro]) { + // sub-expression not negative + return 0; + } + lv[op.id] = !lv[op.ro]; + break; + case LOGICAL_OP_AND: + if (((op.lo < lkeyCount) && lv[op.lo]) || + ((op.ro < lkeyCount) && lv[op.ro])) { + // sub-expression not negative + return 0; + } + lv[op.id] = lv[op.lo] & lv[op.ro]; // && + break; + case LOGICAL_OP_OR: + if (((op.lo < lkeyCount) && lv[op.lo]) || + ((op.ro < lkeyCount) && lv[op.ro])) { + // sub-expression not negative + return 0; + } + lv[op.id] = lv[op.lo] | lv[op.ro]; // || + break; + default: + assert(0); + break; + } + } + return lv[result]; +} + bool GraphTruth::run(unsigned, const CompiledNG &cng, const CNGInfo &cngi, const string &buffer, ResultSet &rs, string &error) { if (cngi.quiet) { @@ -359,6 +399,13 @@ bool GraphTruth::run(unsigned, const CompiledNG &cng, const CNGInfo &cngi, } } } + if (isPurelyNegativeMatch(lv, comb, m_lkey.size(), + li.start, li.result)) { + u64a to = buffer.length(); + if ((to >= cngi.min_offset) && (to <= cngi.max_offset)) { + rs.addMatch(0, to); + } + } return true; } diff --git a/tools/hscollider/GroundTruth.cpp b/tools/hscollider/GroundTruth.cpp index fe038c818..f30a8f5eb 100644 --- a/tools/hscollider/GroundTruth.cpp +++ b/tools/hscollider/GroundTruth.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -43,6 +43,7 @@ #include "parser/Parser.h" #include "parser/parse_error.h" #include "util/make_unique.h" +#include "util/string_util.h" #include "util/unicode_def.h" #include "util/unordered.h" @@ -111,6 +112,15 @@ bool decodeExprPcre(string &expr, unsigned *flags, bool *highlander, return false; } + if (use_literal_api) { + // filter out flags not supported by pure literal API. + u32 not_supported = HS_FLAG_DOTALL | HS_FLAG_ALLOWEMPTY | HS_FLAG_UTF8 | + HS_FLAG_UCP | HS_FLAG_PREFILTER; + hs_flags &= ~not_supported; + force_utf8 = false; + force_prefilter = false; + } + expr.swap(regex); if (!getPcreFlags(hs_flags, flags, highlander, prefilter, som, @@ -260,9 +270,29 @@ GroundTruth::compile(unsigned id, bool no_callouts) { throw PcreCompileFailure("Unable to decode flags."); } + // When hyperscan literal api is on, transfer the regex string into hex. + if (use_literal_api && !combination) { + unsigned char *pat + = reinterpret_cast(const_cast(re.c_str())); + char *str = makeHex(pat, re.length()); + if (!str) { + throw PcreCompileFailure("makeHex() malloc failure."); + } + re.assign(str); + free(str); + } + // filter out flags not supported by PCRE u64a supported = HS_EXT_FLAG_MIN_OFFSET | HS_EXT_FLAG_MAX_OFFSET | HS_EXT_FLAG_MIN_LENGTH; + if (use_literal_api) { + ext.flags &= 0ULL; + ext.min_offset = 0; + ext.max_offset = MAX_OFFSET; + ext.min_length = 0; + ext.edit_distance = 0; + ext.hamming_distance = 0; + } if (ext.flags & ~supported) { // edit distance is a known unsupported flag, so just throw a soft error if (ext.flags & HS_EXT_FLAG_EDIT_DISTANCE) { @@ -314,7 +344,6 @@ GroundTruth::compile(unsigned id, bool no_callouts) { return compiled; } - compiled->bytecode = pcre_compile2(re.c_str(), flags, &errcode, &errptr, &errloc, nullptr); @@ -557,6 +586,46 @@ char isLogicalCombination(vector &lv, const vector &comb, return lv[result]; } +/** \brief Returns 1 if combination matches when no sub-expression matches. */ +static +char isPurelyNegativeMatch(vector &lv, const vector &comb, + size_t lkeyCount, unsigned start, unsigned result) { + assert(start <= result); + for (unsigned i = start; i <= result; i++) { + const LogicalOp &op = comb[i - lkeyCount]; + assert(i == op.id); + switch (op.op) { + case LOGICAL_OP_NOT: + if ((op.ro < lkeyCount) && lv[op.ro]) { + // sub-expression not negative + return 0; + } + lv[op.id] = !lv[op.ro]; + break; + case LOGICAL_OP_AND: + if (((op.lo < lkeyCount) && lv[op.lo]) || + ((op.ro < lkeyCount) && lv[op.ro])) { + // sub-expression not negative + return 0; + } + lv[op.id] = lv[op.lo] & lv[op.ro]; // && + break; + case LOGICAL_OP_OR: + if (((op.lo < lkeyCount) && lv[op.lo]) || + ((op.ro < lkeyCount) && lv[op.ro])) { + // sub-expression not negative + return 0; + } + lv[op.id] = lv[op.lo] | lv[op.ro]; // || + break; + default: + assert(0); + break; + } + } + return lv[result]; +} + bool GroundTruth::run(unsigned, const CompiledPcre &compiled, const string &buffer, ResultSet &rs, string &error) { if (compiled.quiet) { @@ -616,6 +685,13 @@ bool GroundTruth::run(unsigned, const CompiledPcre &compiled, } } } + if (isPurelyNegativeMatch(lv, comb, m_lkey.size(), + li.start, li.result)) { + u64a to = buffer.length(); + if ((to >= compiled.min_offset) && (to <= compiled.max_offset)) { + rs.addMatch(0, to); + } + } return true; } diff --git a/tools/hscollider/NfaGeneratedCorpora.cpp b/tools/hscollider/NfaGeneratedCorpora.cpp index b7c77ee15..66ae270be 100644 --- a/tools/hscollider/NfaGeneratedCorpora.cpp +++ b/tools/hscollider/NfaGeneratedCorpora.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -32,6 +32,7 @@ #include "ng_corpus_generator.h" #include "NfaGeneratedCorpora.h" #include "ExpressionParser.h" +#include "common.h" #include "grey.h" #include "hs_compile.h" @@ -44,6 +45,7 @@ #include "util/compile_context.h" #include "util/compile_error.h" #include "util/report_manager.h" +#include "util/string_util.h" #include "util/target_info.h" #include @@ -80,6 +82,18 @@ void NfaGeneratedCorpora::generate(unsigned id, vector &data) { throw CorpusFailure("Expression could not be read: " + i->second); } + // When hyperscan literal api is on, transfer the regex string into hex. + if (use_literal_api && !(hs_flags & HS_FLAG_COMBINATION)) { + unsigned char *pat + = reinterpret_cast(const_cast(re.c_str())); + char *str = makeHex(pat, re.length()); + if (!str) { + throw CorpusFailure("makeHex() malloc failure."); + } + re.assign(str); + free(str); + } + // Combination's corpus is consist of sub-expressions' corpuses. if (hs_flags & HS_FLAG_COMBINATION) { ParsedLogical pl; diff --git a/tools/hscollider/UltimateTruth.cpp b/tools/hscollider/UltimateTruth.cpp index c37e39ba3..038fbf777 100644 --- a/tools/hscollider/UltimateTruth.cpp +++ b/tools/hscollider/UltimateTruth.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -925,11 +925,22 @@ compileHyperscan(vector &patterns, vector &flags, const unsigned count = patterns.size(); hs_database_t *db = nullptr; hs_compile_error_t *compile_err; + hs_error_t err; - hs_error_t err = hs_compile_multi_int(&patterns[0], &flags[0], - &idsvec[0], ext.c_array(), count, - mode, platform, &db, - &compile_err, grey); + if (use_literal_api) { + // Compute length of each pattern. + vector lens(count); + for (unsigned int i = 0; i < count; i++) { + lens[i] = strlen(patterns[i]); + } + err = hs_compile_lit_multi_int(&patterns[0], &flags[0], &idsvec[0], + ext.c_array(), &lens[0], count, mode, + platform, &db, &compile_err, grey); + } else { + err = hs_compile_multi_int(&patterns[0], &flags[0], &idsvec[0], + ext.c_array(), count, mode, platform, &db, + &compile_err, grey); + } if (err != HS_SUCCESS) { error = compile_err->message; diff --git a/tools/hscollider/args.cpp b/tools/hscollider/args.cpp index 3b515027f..2eb510e00 100644 --- a/tools/hscollider/args.cpp +++ b/tools/hscollider/args.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -116,6 +116,7 @@ void usage(const char *name, const char *error) { printf(" --abort-on-fail Abort, rather than exit, on failure.\n"); printf(" --no-signal-handler Do not handle handle signals (to generate " "backtraces).\n"); + printf(" --literal-on Use Hyperscan pure literal matching.\n"); printf("\n"); printf("Memory and resource control options:\n"); printf("\n"); @@ -174,6 +175,7 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop, int mangleScratch = 0; int compressFlag = 0; int compressResetFlag = 0; + int literalFlag = 0; static const struct option longopts[] = { {"copy-scratch", 0, ©Scratch, 1}, {"copy-stream", 0, ©Stream, 1}, @@ -187,6 +189,7 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop, {"compress-expand", 0, &compressFlag, 1}, {"compress-reset-expand", 0, &compressResetFlag, 1}, {"no-groups", 0, &no_groups, 1}, + {"literal-on", 0, &literalFlag, 1}, {nullptr, 0, nullptr, 0}}; for (;;) { @@ -589,4 +592,5 @@ void processArgs(int argc, char *argv[], CorpusProperties &corpus_gen_prop, use_mangle_scratch = (bool) mangleScratch; use_compress_expand = (bool)compressFlag; use_compress_reset_expand = (bool)compressResetFlag; + use_literal_api = (bool)literalFlag; } diff --git a/tools/hscollider/common.h b/tools/hscollider/common.h index d9a0144cc..67e488c00 100644 --- a/tools/hscollider/common.h +++ b/tools/hscollider/common.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -82,6 +82,7 @@ extern bool use_copy_stream; extern bool use_mangle_scratch; extern bool use_compress_expand; extern bool use_compress_reset_expand; +extern bool use_literal_api; extern int abort_on_failure; extern int no_signal_handler; extern bool force_edit_distance; diff --git a/tools/hscollider/main.cpp b/tools/hscollider/main.cpp index 18d7a016d..afa6ef5a9 100644 --- a/tools/hscollider/main.cpp +++ b/tools/hscollider/main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -118,6 +118,7 @@ bool use_copy_stream = false; bool use_mangle_scratch = false; bool use_compress_expand = false; bool use_compress_reset_expand = false; +bool use_literal_api = false; int abort_on_failure = 0; int no_signal_handler = 0; size_t max_scan_queue_len = 25000; diff --git a/tools/hscollider/sig.cpp b/tools/hscollider/sig.cpp index dc8151400..7d580e410 100644 --- a/tools/hscollider/sig.cpp +++ b/tools/hscollider/sig.cpp @@ -42,7 +42,10 @@ #ifdef HAVE_BACKTRACE #include -#include +#endif + +#ifdef HAVE_UNISTD_H +#include // for _exit #endif #define BACKTRACE_BUFFER_SIZE 200 diff --git a/tools/hsdump/CMakeLists.txt b/tools/hsdump/CMakeLists.txt index 4350b0f6d..0466d5720 100644 --- a/tools/hsdump/CMakeLists.txt +++ b/tools/hsdump/CMakeLists.txt @@ -10,6 +10,10 @@ include_directories(${PROJECT_SOURCE_DIR}/util) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${EXTRA_C_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") -add_executable(hsdump main.cpp) +if(WIN32 AND (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)) + add_executable(hsdump main.cpp $ $) +else() + add_executable(hsdump main.cpp) +endif() target_link_libraries(hsdump hs expressionutil crosscompileutil) diff --git a/tools/hsdump/main.cpp b/tools/hsdump/main.cpp index 3221d1b69..75db1c4f3 100644 --- a/tools/hsdump/main.cpp +++ b/tools/hsdump/main.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2018, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -106,6 +106,8 @@ bool dump_intermediate = true; bool force_edit_distance = false; u32 edit_distance = 0; +int use_literal_api = 0; + } // namespace // Usage statement. @@ -139,6 +141,7 @@ void usage(const char *name, const char *error) { printf(" -8 Force UTF8 mode on all patterns.\n"); printf(" -L Apply HS_FLAG_SOM_LEFTMOST to all patterns.\n"); printf(" --prefilter Apply HS_FLAG_PREFILTER to all patterns.\n"); + printf(" --literal-on Use Hyperscan pure literal matching API.\n"); printf("\n"); printf("Example:\n"); printf("$ %s -e pattern.file -s sigfile\n", name); @@ -163,6 +166,7 @@ void processArgs(int argc, char *argv[], Grey &grey) { {"utf8", no_argument, nullptr, '8'}, {"prefilter", no_argument, &force_prefilter, 1}, {"som-width", required_argument, nullptr, 'd'}, + {"literal-on", no_argument, &use_literal_api, 1}, {nullptr, 0, nullptr, 0} }; @@ -501,9 +505,23 @@ unsigned int dumpDataMulti(const vector &patterns, hs_database_t *db = nullptr; hs_compile_error_t *compile_err; - hs_error_t err = hs_compile_multi_int( - patterns.data(), flags.data(), ids.data(), ext.c_array(), - patterns.size(), mode, plat_info.get(), &db, &compile_err, grey); + hs_error_t err; + const size_t count = patterns.size(); + if (use_literal_api) { + // Compute length of each pattern. + vector lens(count); + for (unsigned int i = 0; i < count; i++) { + lens[i] = strlen(patterns[i]); + } + err = hs_compile_lit_multi_int(patterns.data(), flags.data(), + ids.data(), ext.c_array(), lens.data(), + count, mode, plat_info.get(), &db, + &compile_err, grey); + } else { + err = hs_compile_multi_int(patterns.data(), flags.data(), ids.data(), + ext.c_array(), count, mode, plat_info.get(), + &db, &compile_err, grey); + } if (err != HS_SUCCESS) { if (compile_err && compile_err->message) { diff --git a/unit/CMakeLists.txt b/unit/CMakeLists.txt index 32e014508..b0706fa8e 100644 --- a/unit/CMakeLists.txt +++ b/unit/CMakeLists.txt @@ -129,7 +129,11 @@ set(unit_internal_SOURCES internal/main.cpp ) +if(WIN32 AND (BUILD_STATIC_AND_SHARED OR BUILD_SHARED_LIBS)) +add_executable(unit-internal ${unit_internal_SOURCES} $ $) +else() add_executable(unit-internal ${unit_internal_SOURCES}) +endif() set_target_properties(unit-internal PROPERTIES COMPILE_FLAGS "${HS_CXX_FLAGS}") target_link_libraries(unit-internal hs corpusomatic) endif(NOT (RELEASE_BUILD OR FAT_RUNTIME)) diff --git a/unit/hyperscan/bad_patterns.txt b/unit/hyperscan/bad_patterns.txt index 6d4283dac..c4a9f13c4 100644 --- a/unit/hyperscan/bad_patterns.txt +++ b/unit/hyperscan/bad_patterns.txt @@ -155,11 +155,6 @@ 158:/141 & (142|!143) )| 144/C #Not enough left parentheses at index 17. 159:/1234567890 & (142|!143 )/C #Expression id too large at index 10. 160:/141 & (142|!143 )|/C #Not enough operand at index 18. -161:/!141/C #Has match from purely negative sub-expressions. -162:/!141 | 142 | 143/C #Has match from purely negative sub-expressions. -163:/!141 & !142 & !143/C #Has match from purely negative sub-expressions. -164:/(141 | !142 & !143)/C #Has match from purely negative sub-expressions. -165:/!(141 | 142 | 143)/C #Has match from purely negative sub-expressions. -166:/141/C #No logical operation. -167:/119 & 121/C #Unknown sub-expression id. -168:/166 & 167/C #Unknown sub-expression id. +161:/141/C #No logical operation. +162:/119 & 121/C #Unknown sub-expression id. +163:/166 & 167/C #Unknown sub-expression id. diff --git a/unit/hyperscan/logical_combination.cpp b/unit/hyperscan/logical_combination.cpp index 169de333b..9558948fb 100644 --- a/unit/hyperscan/logical_combination.cpp +++ b/unit/hyperscan/logical_combination.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018, Intel Corporation + * Copyright (c) 2018-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -694,3 +694,229 @@ TEST(LogicalCombination, MultiCombQuietUniSub5) { err = hs_free_scratch(scratch); ASSERT_EQ(HS_SUCCESS, err); } + +TEST(LogicalCombination, SingleCombPurelyNegative6) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "(!201 | 202 & 203) & (!204 | 205)"}; + unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION}; + unsigned ids[] = {201, 202, 203, 204, 205, 1002}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(1U, c.matches.size()); + ASSERT_EQ(MatchRecord(53, 1002), c.matches[0]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, SingleCombQuietPurelyNegative6) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "(!201 | 202 & 203) & (!204 | 205)"}; + unsigned flags[] = {0, 0, 0, 0, 0, HS_FLAG_COMBINATION | HS_FLAG_QUIET}; + unsigned ids[] = {201, 202, 203, 204, 205, 1002}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(0U, c.matches.size()); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, MultiCombPurelyNegativeUniSub6) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "-----------------------------------------------" + "xxxfedxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "-----------------------------------------------" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "------------------------------------------"; + const char *expr[] = {"abc", "def", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "cba", "fed", "google.*cn", + "haystacks{4,8}", "ijkl[oOp]", "cab", "fee", + "goobar.*jp", "shockwave{4,6}", "ijkl[rRs]", + "(101 & 102 & 103) | (!104 & !105)", + "(!201 | 202 & 203) & (!204 | 205)", + "((301 | 302) & 303) & (304 | 305)"}; + unsigned flags[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + HS_FLAG_COMBINATION, HS_FLAG_COMBINATION, + HS_FLAG_COMBINATION}; + unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301, + 302, 303, 304, 305, 1001, 1002, 1003}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 18, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(3U, c.matches.size()); + ASSERT_EQ(MatchRecord(106, 202), c.matches[0]); + ASSERT_EQ(MatchRecord(106, 1002), c.matches[1]); + ASSERT_EQ(MatchRecord(300, 1001), c.matches[2]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, MultiCombPurelyNegativeUniSubEOD6) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "-----------------------------------------------" + "xdefedxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "-----------------------------------------------" + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + "-------------------------------------defed"; + const char *expr[] = {"abc", "defed", "foobar.*gh", "teakettle{4,10}", + "ijkl[mMn]", "cba", "fed", "google.*cn", + "haystacks{4,8}", "ijkl[oOp]", "cab", "fee", + "goobar.*jp", "shockwave{4,6}", "ijkl[rRs]", + "(101 & 102 & 103) | (!104 & !105)", + "(!201 | 202 & 203) & (!204 | 205)", + "((301 | 302) & 303) & (304 | 305)"}; + unsigned flags[] = {0, 0, 0, 0, 0, 0, HS_FLAG_MULTILINE, + 0, 0, 0, 0, 0, 0, 0, 0, + HS_FLAG_COMBINATION, HS_FLAG_COMBINATION, + HS_FLAG_COMBINATION}; + unsigned ids[] = {101, 102, 103, 104, 105, 201, 202, 203, 204, 205, 301, + 302, 303, 304, 305, 1001, 1002, 1003}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 18, HS_MODE_NOSTREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + c.halt = 0; + err = hs_scan(db, data.c_str(), data.size(), 0, scratch, record_cb, + (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_EQ(8U, c.matches.size()); + ASSERT_EQ(MatchRecord(106, 102), c.matches[0]); + ASSERT_EQ(MatchRecord(106, 202), c.matches[1]); + ASSERT_EQ(MatchRecord(106, 1001), c.matches[2]); + ASSERT_EQ(MatchRecord(106, 1002), c.matches[3]); + ASSERT_EQ(MatchRecord(300, 102), c.matches[4]); + ASSERT_EQ(MatchRecord(300, 202), c.matches[5]); + ASSERT_EQ(MatchRecord(300, 1001), c.matches[6]); + ASSERT_EQ(MatchRecord(300, 1002), c.matches[7]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} + +TEST(LogicalCombination, MultiCombStream1) { + hs_database_t *db = nullptr; + hs_compile_error_t *compile_err = nullptr; + CallBackContext c; + string data[] = {"xxxxxxxabcxxxxxxxdefxxxghixxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "xxxxxxxxxxxxxxxxghixxxxxxxxxxxabcxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "xxxxxxxxxxxxxxxxdefxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "xxxxxxxxxxxxxxxxxyzxxxxxxxxxxxxxxxxxxxxxghixxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "xxxxxghixxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", + "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxzxy", + "z"}; + const char *expr[] = {"abc", "def", "xyz", "zxyz", + "101 & 102", "201 & !202"}; + unsigned flags[] = {0, 0, 0, 0, HS_FLAG_COMBINATION, HS_FLAG_COMBINATION}; + unsigned ids[] = {101, 102, 201, 202, 1001, 1002}; + hs_error_t err = hs_compile_multi(expr, flags, ids, 6, HS_MODE_STREAM, + nullptr, &db, &compile_err); + + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(db != nullptr); + + hs_scratch_t *scratch = nullptr; + err = hs_alloc_scratch(db, &scratch); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(scratch != nullptr); + + hs_stream_t *stream = nullptr; + err = hs_open_stream(db, 0, &stream); + ASSERT_EQ(HS_SUCCESS, err); + ASSERT_TRUE(stream != nullptr); + + c.halt = 0; + int i; + for (i = 0; i < 11; i++) { + err = hs_scan_stream(stream, data[i].c_str(), data[i].size(), 0, + scratch, record_cb, (void *)&c); + ASSERT_EQ(HS_SUCCESS, err); + } + err = hs_close_stream(stream, scratch, dummy_cb, nullptr); + ASSERT_EQ(HS_SUCCESS, err); + + ASSERT_EQ(11U, c.matches.size()); + ASSERT_EQ(MatchRecord(10, 101), c.matches[0]); + ASSERT_EQ(MatchRecord(20, 102), c.matches[1]); + ASSERT_EQ(MatchRecord(20, 1001), c.matches[2]); + ASSERT_EQ(MatchRecord(109, 101), c.matches[3]); + ASSERT_EQ(MatchRecord(109, 1001), c.matches[4]); + ASSERT_EQ(MatchRecord(171, 102), c.matches[5]); + ASSERT_EQ(MatchRecord(171, 1001), c.matches[6]); + ASSERT_EQ(MatchRecord(247, 201), c.matches[7]); + ASSERT_EQ(MatchRecord(247, 1002), c.matches[8]); + ASSERT_EQ(MatchRecord(761, 201), c.matches[9]); + ASSERT_EQ(MatchRecord(761, 202), c.matches[10]); + + hs_free_database(db); + err = hs_free_scratch(scratch); + ASSERT_EQ(HS_SUCCESS, err); +} diff --git a/unit/internal/uniform_ops.cpp b/unit/internal/uniform_ops.cpp index 10defdbd3..7d394e02a 100644 --- a/unit/internal/uniform_ops.cpp +++ b/unit/internal/uniform_ops.cpp @@ -75,7 +75,7 @@ TEST(Uniform, loadstore_u16) { TEST(Uniform, loadstore_u32) { for (int i = 0; i < 32; i++) { - u32 in = 1 << i; + u32 in = 1U << i; const char *cin = (const char *)(&in); u32 out = load_u32(cin); EXPECT_EQ(in, out); @@ -106,7 +106,7 @@ TEST(Uniform, loadstore_m128) { } in; for (int i = 0; i < 128; i++) { memset(&in, 0, sizeof(in)); - in.words[i/32] = 1 << (i % 32); + in.words[i/32] = 1U << (i % 32); const char *cin = (const char *)(&in); m128 out = load_m128(cin); EXPECT_EQ(0, memcmp(&out, &in, sizeof(out))); @@ -124,7 +124,7 @@ TEST(Uniform, loadstore_m256) { } in; for (int i = 0; i < 256; i++) { memset(&in, 0, sizeof(in)); - in.words[i/32] = 1 << (i % 32); + in.words[i/32] = 1U << (i % 32); const char *cin = (const char *)(&in); m256 out = load_m256(cin); EXPECT_EQ(0, memcmp(&out, &in, sizeof(out))); @@ -142,7 +142,7 @@ TEST(Uniform, loadstore_m512) { } in; for (int i = 0; i < 512; i++) { memset(&in, 0, sizeof(in)); - in.words[i/32] = 1 << (i % 32); + in.words[i/32] = 1U << (i % 32); const char *cin = (const char *)(&in); m512 out = load_m512(cin); EXPECT_EQ(0, memcmp(&out, &in, sizeof(out))); diff --git a/util/expression_path.h b/util/expression_path.h index 3075b4d42..ac4ca97da 100644 --- a/util/expression_path.h +++ b/util/expression_path.h @@ -56,9 +56,8 @@ std::string inferExpressionPath(const std::string &sigFile) { // POSIX variant. // dirname() may modify its argument, so we must make a copy. - std::vector path(sigFile.size() + 1); - memcpy(path.data(), sigFile.c_str(), sigFile.size()); - path[sigFile.size()] = 0; // ensure null termination. + std::vector path(sigFile.begin(), sigFile.end()); + path.push_back(0); // ensure null termination. std::string rv = dirname(path.data()); #else diff --git a/util/string_util.h b/util/string_util.h index 658eb7043..b44586ea7 100644 --- a/util/string_util.h +++ b/util/string_util.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015, Intel Corporation + * Copyright (c) 2015-2019, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -127,4 +127,18 @@ void prettyPrintRange(std::ostream &out, it_t begin, it_t end) { } } +// Transfer given string into a hex-escaped pattern. +static really_inline +char *makeHex(const unsigned char *pat, unsigned patlen) { + size_t hexlen = patlen * 4; + char *hexbuf = (char *)malloc(hexlen + 1); + unsigned i; + char *buf; + for (i = 0, buf = hexbuf; i < patlen; i++, buf += 4) { + snprintf(buf, 5, "\\x%02x", (unsigned char)pat[i]); + } + hexbuf[hexlen] = '\0'; + return hexbuf; +} + #endif // STRING_UTIL_H