From c06d5e1c148bbc3b4fc8ab47f903b8e1de0dcc1a Mon Sep 17 00:00:00 2001 From: "Hong, Yang A" Date: Wed, 19 Dec 2018 17:49:09 +0800 Subject: [PATCH] DFA state compression: 16-bit wide and sherman co-exist --- src/grey.cpp | 4 +- src/grey.h | 3 +- src/nfa/accel_dfa_build_strat.h | 9 +- src/nfa/goughcompile.cpp | 1 + src/nfa/mcclellan.c | 216 +++++++++--- src/nfa/mcclellan_common_impl.h | 107 +++++- src/nfa/mcclellan_internal.h | 51 +++ src/nfa/mcclellancompile.cpp | 569 ++++++++++++++++++++++++++++++-- src/nfa/mcclellancompile.h | 3 +- src/nfa/shengcompile.h | 3 +- 10 files changed, 894 insertions(+), 72 deletions(-) diff --git a/src/grey.cpp b/src/grey.cpp index 3762a4975..fa8da2b49 100644 --- a/src/grey.cpp +++ b/src/grey.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -82,6 +82,7 @@ Grey::Grey(void) : onlyOneOutfix(false), allowShermanStates(true), allowMcClellan8(true), + allowWideStates(true), // enable wide state for McClellan8 highlanderPruneDFA(true), minimizeDFA(true), accelerateDFA(true), @@ -251,6 +252,7 @@ void applyGreyOverrides(Grey *g, const string &s) { G_UPDATE(onlyOneOutfix); G_UPDATE(allowShermanStates); G_UPDATE(allowMcClellan8); + G_UPDATE(allowWideStates); G_UPDATE(highlanderPruneDFA); G_UPDATE(minimizeDFA); G_UPDATE(accelerateDFA); diff --git a/src/grey.h b/src/grey.h index 34c62918d..ed2f845a4 100644 --- a/src/grey.h +++ b/src/grey.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -87,6 +87,7 @@ struct Grey { bool allowShermanStates; bool allowMcClellan8; + bool allowWideStates; // enable wide state for McClellan8 bool highlanderPruneDFA; bool minimizeDFA; diff --git a/src/nfa/accel_dfa_build_strat.h b/src/nfa/accel_dfa_build_strat.h index 881892ed4..53a6f35b3 100644 --- a/src/nfa/accel_dfa_build_strat.h +++ b/src/nfa/accel_dfa_build_strat.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -40,6 +40,11 @@ namespace ue2 { class ReportManager; struct Grey; +enum DfaType { + McClellan, + Sheng, + Gough +}; class accel_dfa_build_strat : public dfa_build_strat { public: @@ -53,6 +58,8 @@ class accel_dfa_build_strat : public dfa_build_strat { virtual void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out); virtual std::map getAccelInfo(const Grey &grey); + virtual DfaType getType() const = 0; + private: bool only_accel_init; }; diff --git a/src/nfa/goughcompile.cpp b/src/nfa/goughcompile.cpp index 3f1614dd1..d41c6f423 100644 --- a/src/nfa/goughcompile.cpp +++ b/src/nfa/goughcompile.cpp @@ -91,6 +91,7 @@ class gough_build_strat : public mcclellan_build_strat { void buildAccel(dstate_id_t this_idx, const AccelScheme &info, void *accel_out) override; u32 max_allowed_offset_accel() const override { return 0; } + DfaType getType() const override { return Gough; } raw_som_dfa &rdfa; const GoughGraph ≫ diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c index ceedb9db5..1521de5bc 100644 --- a/src/nfa/mcclellan.c +++ b/src/nfa/mcclellan.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -167,9 +167,68 @@ u32 doNormal16(const struct mcclellan *m, const u8 **c_inout, const u8 *end, } static really_inline -char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **c_final, enum MatchMode mode) { +u32 doNormalWide16(const struct mcclellan *m, const u8 **c_inout, + const u8 *end, u32 s, char *qstate, u16 *offset, + char do_accel, enum MatchMode mode) { + const u8 *c = *c_inout; + + u32 wide_limit = m->wide_limit; + const char *wide_base + = (const char *)m - sizeof(struct NFA) + m->wide_offset; + + const u16 *succ_table + = (const u16 *)((const char *)m + sizeof(struct mcclellan)); + assert(ISALIGNED_N(succ_table, 2)); + u32 sherman_base = m->sherman_limit; + const char *sherman_base_offset + = (const char *)m - sizeof(struct NFA) + m->sherman_offset; + u32 as = m->alphaShift; + + s &= STATE_MASK; + + while (c < end && s) { + u8 cprime = m->remap[*c]; + DEBUG_PRINTF("c: %02hhx '%c' cp:%02hhx (s=%u) &c: %p\n", *c, + ourisprint(*c) ? *c : '?', cprime, s, c); + + if (unlikely(s >= wide_limit)) { + const char *wide_entry + = findWideEntry16(m, wide_base, wide_limit, s); + DEBUG_PRINTF("doing wide head (%u)\n", s); + s = doWide16(wide_entry, &c, end, m->remap, (u16 *)&s, qstate, + offset); + } else if (s >= sherman_base) { + const char *sherman_state + = findShermanState(m, sherman_base_offset, sherman_base, s); + DEBUG_PRINTF("doing sherman (%u)\n", s); + s = doSherman16(sherman_state, cprime, succ_table, as); + } else { + DEBUG_PRINTF("doing normal\n"); + s = succ_table[(s << as) + cprime]; + } + + DEBUG_PRINTF("s: %u (%u)\n", s, s & STATE_MASK); + c++; + + if (do_accel && (s & ACCEL_FLAG)) { + break; + } + if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { + break; + } + + s &= STATE_MASK; + } + + *c_inout = c; + return s; +} + +static really_inline +char mcclellanExec16_i(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, NfaCallback cb, + void *ctxt, char single, const u8 **c_final, + enum MatchMode mode) { assert(ISALIGNED_N(state, 2)); if (!len) { if (mode == STOP_AT_MATCH) { @@ -179,6 +238,7 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf, } u32 s = *state; + u16 offset = 0; const u8 *c = buf; const u8 *c_end = buf + len; const struct mstate_aux *aux @@ -207,7 +267,12 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf, goto exit; } - s = doNormal16(m, &c, min_accel_offset, s, 0, mode); + if (unlikely(m->has_wide)) { + s = doNormalWide16(m, &c, min_accel_offset, s, qstate, &offset, 0, + mode); + } else { + s = doNormal16(m, &c, min_accel_offset, s, 0, mode); + } if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { if (mode == STOP_AT_MATCH) { @@ -259,7 +324,11 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf, } } - s = doNormal16(m, &c, c_end, s, 1, mode); + if (unlikely(m->has_wide)) { + s = doNormalWide16(m, &c, c_end, s, qstate, &offset, 1, mode); + } else { + s = doNormal16(m, &c, c_end, s, 1, mode); + } if (mode != NO_MATCHES && (s & ACCEPT_FLAG)) { if (mode == STOP_AT_MATCH) { @@ -297,44 +366,47 @@ char mcclellanExec16_i(const struct mcclellan *m, u32 *state, const u8 *buf, } static never_inline -char mcclellanExec16_i_cb(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, CALLBACK_OUTPUT); +char mcclellanExec16_i_cb(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, + NfaCallback cb, void *ctxt, char single, + const u8 **final_point) { + return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, + single, final_point, CALLBACK_OUTPUT); } static never_inline -char mcclellanExec16_i_sam(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, STOP_AT_MATCH); +char mcclellanExec16_i_sam(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, + NfaCallback cb, void *ctxt, char single, + const u8 **final_point) { + return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, + single, final_point, STOP_AT_MATCH); } static never_inline -char mcclellanExec16_i_nm(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point) { - return mcclellanExec16_i(m, state, buf, len, offAdj, cb, ctxt, single, - final_point, NO_MATCHES); +char mcclellanExec16_i_nm(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, + NfaCallback cb, void *ctxt, char single, + const u8 **final_point) { + return mcclellanExec16_i(m, state, qstate, buf, len, offAdj, cb, ctxt, + single, final_point, NO_MATCHES); } static really_inline -char mcclellanExec16_i_ni(const struct mcclellan *m, u32 *state, const u8 *buf, - size_t len, u64a offAdj, NfaCallback cb, void *ctxt, - char single, const u8 **final_point, - enum MatchMode mode) { +char mcclellanExec16_i_ni(const struct mcclellan *m, u32 *state, char *qstate, + const u8 *buf, size_t len, u64a offAdj, + NfaCallback cb, void *ctxt, char single, + const u8 **final_point, enum MatchMode mode) { if (mode == CALLBACK_OUTPUT) { - return mcclellanExec16_i_cb(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); + return mcclellanExec16_i_cb(m, state, qstate, buf, len, offAdj, cb, + ctxt, single, final_point); } else if (mode == STOP_AT_MATCH) { - return mcclellanExec16_i_sam(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); + return mcclellanExec16_i_sam(m, state, qstate, buf, len, offAdj, cb, + ctxt, single, final_point); } else { assert(mode == NO_MATCHES); - return mcclellanExec16_i_nm(m, state, buf, len, offAdj, cb, ctxt, - single, final_point); + return mcclellanExec16_i_nm(m, state, qstate, buf, len, offAdj, cb, + ctxt, single, final_point); } } @@ -540,6 +612,10 @@ char mcclellanCheckEOD(const struct NFA *nfa, u32 s, u64a offset, const struct mcclellan *m = getImplNfa(nfa); const struct mstate_aux *aux = get_aux(m, s); + if (m->has_wide == 1 && s >= m->wide_limit) { + return MO_CONTINUE_MATCHING; + } + if (!aux->accept_eod) { return MO_CONTINUE_MATCHING; } @@ -612,9 +688,9 @@ char nfaExecMcClellan16_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, /* do main buffer region */ const u8 *final_look; - char rv = mcclellanExec16_i_ni(m, &s, cur_buf + sp, local_ep - sp, - offset + sp, cb, context, single, - &final_look, mode); + char rv = mcclellanExec16_i_ni(m, &s, q->state, cur_buf + sp, + local_ep - sp, offset + sp, cb, context, + single, &final_look, mode); if (rv == MO_DEAD) { *(u16 *)q->state = 0; return MO_DEAD; @@ -684,12 +760,16 @@ char nfaExecMcClellan16_Bi(const struct NFA *n, u64a offset, const u8 *buffer, const struct mcclellan *m = getImplNfa(n); u32 s = m->start_anchored; - if (mcclellanExec16_i(m, &s, buffer, length, offset, cb, context, single, - NULL, CALLBACK_OUTPUT) + if (mcclellanExec16_i(m, &s, NULL, buffer, length, offset, cb, context, + single, NULL, CALLBACK_OUTPUT) == MO_DEAD) { return s ? MO_ALIVE : MO_DEAD; } + if (m->has_wide == 1 && s >= m->wide_limit) { + return MO_ALIVE; + } + const struct mstate_aux *aux = get_aux(m, s); if (aux->accept_eod) { @@ -768,6 +848,7 @@ char nfaExecMcClellan8_Q2i(const struct NFA *n, u64a offset, const u8 *buffer, char rv = mcclellanExec8_i_ni(m, &s, cur_buf + sp, local_ep - sp, offset + sp, cb, context, single, &final_look, mode); + if (rv == MO_HALT_MATCHING) { *(u8 *)q->state = 0; return MO_DEAD; @@ -1016,7 +1097,8 @@ char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report, u16 s = *(u16 *)q->state; DEBUG_PRINTF("checking accepts for %hu\n", s); - return mcclellanHasAccept(m, get_aux(m, s), report); + return (m->has_wide == 1 && s >= m->wide_limit) ? + 0 : mcclellanHasAccept(m, get_aux(m, s), report); } char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) { @@ -1026,7 +1108,8 @@ char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) { u16 s = *(u16 *)q->state; DEBUG_PRINTF("checking accepts for %hu\n", s); - return !!get_aux(m, s)->accept; + return (m->has_wide == 1 && s >= m->wide_limit) ? + 0 : !!get_aux(m, s)->accept; } char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) { @@ -1111,6 +1194,12 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset, void *state, UNUSED u8 key) { const struct mcclellan *m = getImplNfa(nfa); u16 s = offset ? m->start_floating : m->start_anchored; + + // new byte + if (m->has_wide) { + *((u16 *)state + 1) = 0; + } + if (s) { unaligned_store_u16(state, s); return 1; @@ -1140,14 +1229,24 @@ void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state, const u8 *buf, char top, size_t start_off, size_t len, NfaCallback cb, void *ctxt) { const struct mcclellan *m = getImplNfa(nfa); + u32 s; + + if (top) { + s = m->start_anchored; - u32 s = top ? m->start_anchored : unaligned_load_u16(state); + // new byte + if (m->has_wide) { + *((u16 *)state + 1) = 0; + } + } else { + s = unaligned_load_u16(state); + } if (m->flags & MCCLELLAN_FLAG_SINGLE) { - mcclellanExec16_i(m, &s, buf + start_off, len - start_off, + mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off, start_off, cb, ctxt, 1, NULL, CALLBACK_OUTPUT); } else { - mcclellanExec16_i(m, &s, buf + start_off, len - start_off, + mcclellanExec16_i(m, &s, state, buf + start_off, len - start_off, start_off, cb, ctxt, 0, NULL, CALLBACK_OUTPUT); } @@ -1178,9 +1277,16 @@ char nfaExecMcClellan8_queueInitState(UNUSED const struct NFA *nfa, char nfaExecMcClellan16_queueInitState(UNUSED const struct NFA *nfa, struct mq *q) { - assert(nfa->scratchStateSize == 2); + const struct mcclellan *m = getImplNfa(nfa); + assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 + : nfa->scratchStateSize == 2); assert(ISALIGNED_N(q->state, 2)); *(u16 *)q->state = 0; + + // new byte + if (m->has_wide) { + *((u16 *)q->state + 1) = 0; + } return 0; } @@ -1206,21 +1312,39 @@ char nfaExecMcClellan8_expandState(UNUSED const struct NFA *nfa, void *dest, char nfaExecMcClellan16_queueCompressState(UNUSED const struct NFA *nfa, const struct mq *q, UNUSED s64a loc) { + const struct mcclellan *m = getImplNfa(nfa); void *dest = q->streamState; const void *src = q->state; - assert(nfa->scratchStateSize == 2); - assert(nfa->streamStateSize == 2); + assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 + : nfa->scratchStateSize == 2); + assert(m->has_wide == 1 ? nfa->streamStateSize == 4 + : nfa->streamStateSize == 2); + assert(ISALIGNED_N(src, 2)); unaligned_store_u16(dest, *(const u16 *)(src)); + + // new byte + if (m->has_wide) { + *((u16 *)dest + 1) = *((const u16 *)src + 1); + } return 0; } char nfaExecMcClellan16_expandState(UNUSED const struct NFA *nfa, void *dest, const void *src, UNUSED u64a offset, UNUSED u8 key) { - assert(nfa->scratchStateSize == 2); - assert(nfa->streamStateSize == 2); + const struct mcclellan *m = getImplNfa(nfa); + assert(m->has_wide == 1 ? nfa->scratchStateSize == 4 + : nfa->scratchStateSize == 2); + assert(m->has_wide == 1 ? nfa->streamStateSize == 4 + : nfa->streamStateSize == 2); + assert(ISALIGNED_N(dest, 2)); *(u16 *)dest = unaligned_load_u16(src); + + // new byte + if (m->has_wide) { + *((u16 *)dest + 1) = *((const u16 *)src + 1); + } return 0; } diff --git a/src/nfa/mcclellan_common_impl.h b/src/nfa/mcclellan_common_impl.h index be1307159..b6af672d9 100644 --- a/src/nfa/mcclellan_common_impl.h +++ b/src/nfa/mcclellan_common_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2016, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -82,3 +82,108 @@ u32 doSherman16(const char *sherman_state, u8 cprime, const u16 *succ_table, u32 daddy = *(const u16 *)(sherman_state + SHERMAN_DADDY_OFFSET); return succ_table[(daddy << as) + cprime]; } + +static really_inline +u16 doWide16(const char *wide_entry, const u8 **c_inout, const u8 *end, + const u8 *remap, const u16 *s, char *qstate, u16 *offset) { + // Internal relative offset after the last visit of the wide state. + if (qstate != NULL) { // stream mode + *offset = *(const u16 *)(qstate + 2); + } + + u8 successful = 0; + const u8 *c = *c_inout; + u32 len_c = end - c; + + u16 width = *(const u16 *)(wide_entry + WIDE_WIDTH_OFFSET); + assert(width >= 8); + const u8 *symbols = (const u8 *)(wide_entry + WIDE_SYMBOL_OFFSET16); + const u16 *trans = (const u16 *)(wide_entry + + WIDE_TRANSITION_OFFSET16(width)); + + assert(*offset < width); + u16 len_w = width - *offset; + const u8 *sym = symbols + *offset; + + char tmp[16]; + u16 pos = 0; + + if (*offset == 0 && remap[*c] != *sym) { + goto normal; + } + + // both in (16, +oo). + while (len_w >= 16 && len_c >= 16) { + m128 str_w = loadu128(sym); + for (size_t i = 0; i < 16; i++) { + tmp[i] = remap[*(c + i)]; + } + m128 str_c = loadu128(tmp); + + u32 z = movemask128(eq128(str_w, str_c)); + pos = ctz32(~z); + assert(pos <= 16); + + if (pos < 16) { + goto normal; + } + + sym += 16; + c += 16; + len_w -= 16; + len_c -= 16; + } + + pos = 0; + // at least one in (0, 16). + u32 loadLength_w = MIN(len_w, 16); + u32 loadLength_c = MIN(len_c, 16); + m128 str_w = loadbytes128(sym, loadLength_w); + for (size_t i = 0; i < loadLength_c; i++) { + tmp[i] = remap[*(c + i)]; + } + m128 str_c = loadbytes128(tmp, loadLength_c); + + u32 z = movemask128(eq128(str_w, str_c)); + pos = ctz32(~z); + + pos = MIN(pos, MIN(loadLength_w, loadLength_c)); + + if (loadLength_w <= loadLength_c) { + assert(pos <= loadLength_w); + // successful matching. + if (pos == loadLength_w) { + c -= 1; + successful = 1; + } + // failure, do nothing. + } else { + assert(pos <= loadLength_c); + // successful partial matching. + if (pos == loadLength_c) { + c -= 1; + goto partial; + } + // failure, do nothing. + } + +normal: + *offset = 0; + if (qstate != NULL) { + // Internal relative offset. + unaligned_store_u16(qstate + 2, *offset); + } + c += pos; + *c_inout = c; + return successful ? *trans : *(trans + 1 + remap[*c]); + +partial: + *offset = sym - symbols + pos; + if (qstate != NULL) { + // Internal relative offset. + unaligned_store_u16(qstate + 2, *offset); + } + c += pos; + *c_inout = c; + return *s; +} diff --git a/src/nfa/mcclellan_internal.h b/src/nfa/mcclellan_internal.h index 5289b074c..0981f99ee 100644 --- a/src/nfa/mcclellan_internal.h +++ b/src/nfa/mcclellan_internal.h @@ -50,6 +50,16 @@ extern "C" #define SHERMAN_CHARS_OFFSET 4 #define SHERMAN_STATES_OFFSET(sso_len) (4 + (sso_len)) +#define WIDE_STATE 2 +#define WIDE_ENTRY_OFFSET8(weo_pos) (2 + (weo_pos)) +#define WIDE_ENTRY_OFFSET16(weo_pos) (3 + (weo_pos)) + +#define WIDE_WIDTH_OFFSET 0 +#define WIDE_SYMBOL_OFFSET8 1 +#define WIDE_TRANSITION_OFFSET8(wto_width) (1 + (wto_width)) +#define WIDE_SYMBOL_OFFSET16 2 +#define WIDE_TRANSITION_OFFSET16(wto_width) (2 + (wto_width)) + struct report_list { u32 count; ReportID report[]; @@ -79,13 +89,17 @@ struct mcclellan { u16 accel_limit_8; /**< 8 bit, lowest accelerable state */ u16 accept_limit_8; /**< 8 bit, lowest accept state */ u16 sherman_limit; /**< lowest sherman state */ + u16 wide_limit; /**< 8/16 bit, lowest wide head state */ u8 alphaShift; u8 flags; u8 has_accel; /**< 1 iff there are any accel plans */ + u8 has_wide; /**< 1 iff there exists any wide state */ u8 remap[256]; /**< remaps characters to a smaller alphabet */ ReportID arb_report; /**< one of the accepts that this dfa may raise */ u32 accel_offset; /**< offset of accel structures from start of McClellan */ u32 haig_offset; /**< reserved for use by Haig, relative to start of NFA */ + u32 wide_offset; /**< offset of the wide state entries to the start of the + * nfa structure */ }; static really_inline @@ -106,6 +120,43 @@ char *findMutableShermanState(char *sherman_base_offset, u16 sherman_base, return sherman_base_offset + SHERMAN_FIXED_SIZE * (s - sherman_base); } +static really_inline +const char *findWideEntry8(UNUSED const struct mcclellan *m, + const char *wide_base, u32 wide_limit, u32 s) { + UNUSED u8 type = *(const u8 *)wide_base; + assert(type == WIDE_STATE); + const u32 entry_offset + = *(const u32 *)(wide_base + + WIDE_ENTRY_OFFSET8((s - wide_limit) * sizeof(u32))); + + const char *rv = wide_base + entry_offset; + assert(rv < (const char *)m + m->length - sizeof(struct NFA)); + return rv; +} + +static really_inline +const char *findWideEntry16(UNUSED const struct mcclellan *m, + const char *wide_base, u32 wide_limit, u32 s) { + UNUSED u8 type = *(const u8 *)wide_base; + assert(type == WIDE_STATE); + const u32 entry_offset + = *(const u32 *)(wide_base + + WIDE_ENTRY_OFFSET16((s - wide_limit) * sizeof(u32))); + + const char *rv = wide_base + entry_offset; + assert(rv < (const char *)m + m->length - sizeof(struct NFA)); + return rv; +} + +static really_inline +char *findMutableWideEntry16(char *wide_base, u32 wide_limit, u32 s) { + u32 entry_offset + = *(const u32 *)(wide_base + + WIDE_ENTRY_OFFSET16((s - wide_limit) * sizeof(u32))); + + return wide_base + entry_offset; +} + #ifdef __cplusplus } #endif diff --git a/src/nfa/mcclellancompile.cpp b/src/nfa/mcclellancompile.cpp index 8e3a744cb..db142f862 100644 --- a/src/nfa/mcclellancompile.cpp +++ b/src/nfa/mcclellancompile.cpp @@ -56,13 +56,19 @@ #include #include #include +#include #include #include #include +#include "mcclellandump.h" +#include "util/dump_util.h" +#include "util/dump_charclass.h" + using namespace std; using boost::adaptors::map_keys; +using boost::dynamic_bitset; #define ACCEL_DFA_MAX_OFFSET_DEPTH 4 @@ -82,6 +88,8 @@ namespace /* anon */ { struct dstate_extra { u16 daddytaken = 0; bool shermanState = false; + bool wideState = false; + bool wideHead = false; }; struct dfa_info { @@ -89,6 +97,8 @@ struct dfa_info { raw_dfa &raw; vector &states; vector extra; + vector> wide_state_chain; + vector> wide_symbol_chain; const u16 alpha_size; /* including special symbols */ const array &alpha_remap; const u16 impl_alpha_size; @@ -112,6 +122,14 @@ struct dfa_info { return extra[raw_id].shermanState; } + bool is_widestate(dstate_id_t raw_id) const { + return extra[raw_id].wideState; + } + + bool is_widehead(dstate_id_t raw_id) const { + return extra[raw_id].wideHead; + } + size_t size(void) const { return states.size(); } }; @@ -124,6 +142,35 @@ u8 dfa_info::getAlphaShift() const { } } +struct state_prev_info { + vector> prev_vec; + explicit state_prev_info(size_t alpha_size) : prev_vec(alpha_size) {} +}; + +struct DfaPrevInfo { + u16 impl_alpha_size; + u16 state_num; + vector states; + set accepts; + + explicit DfaPrevInfo(raw_dfa &rdfa); +}; + +DfaPrevInfo::DfaPrevInfo(raw_dfa &rdfa) + : impl_alpha_size(rdfa.getImplAlphaSize()), state_num(rdfa.states.size()), + states(state_num, state_prev_info(impl_alpha_size)){ + for (size_t i = 0; i < states.size(); i++) { + for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { + dstate_id_t curr = rdfa.states[i].next[sym]; + states[curr].prev_vec[sym].push_back(i); + } + if (!rdfa.states[i].reports.empty() + || !rdfa.states[i].reports_eod.empty()) { + DEBUG_PRINTF("accept raw state: %ld\n", i); + accepts.insert(i); + } + } +} } // namespace static @@ -151,6 +198,11 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { for (size_t j = 0; j < alphaSize; j++) { size_t c_prime = (i << alphaShift) + j; + // wide state has no aux structure. + if (m->has_wide && succ_table[c_prime] >= m->wide_limit) { + continue; + } + mstate_aux *aux = getAux(n, succ_table[c_prime]); if (aux->accept) { @@ -165,7 +217,8 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { /* handle the sherman states */ char *sherman_base_offset = (char *)n + m->sherman_offset; - for (u16 j = m->sherman_limit; j < m->state_count; j++) { + u16 sherman_ceil = m->has_wide == 1 ? m->wide_limit : m->state_count; + for (u16 j = m->sherman_limit; j < sherman_ceil; j++) { char *sherman_cur = findMutableShermanState(sherman_base_offset, m->sherman_limit, j); assert(*(sherman_cur + SHERMAN_TYPE_OFFSET) == SHERMAN_STATE); @@ -174,6 +227,11 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { for (u8 i = 0; i < len; i++) { u16 succ_i = unaligned_load_u16((u8 *)&succs[i]); + // wide state has no aux structure. + if (m->has_wide && succ_i >= m->wide_limit) { + continue; + } + mstate_aux *aux = getAux(n, succ_i); if (aux->accept) { @@ -187,6 +245,49 @@ void markEdges(NFA *n, u16 *succ_table, const dfa_info &info) { unaligned_store_u16((u8 *)&succs[i], succ_i); } } + + /* handle the wide states */ + if (m->has_wide) { + u32 wide_limit = m->wide_limit; + char *wide_base = (char *)n + m->wide_offset; + assert(*wide_base == WIDE_STATE); + u16 wide_number = verify_u16(info.wide_symbol_chain.size()); + // traverse over wide head states. + for (u16 j = wide_limit; j < wide_limit + wide_number; j++) { + char *wide_cur + = findMutableWideEntry16(wide_base, wide_limit, j); + u16 width = *(const u16 *)(wide_cur + WIDE_WIDTH_OFFSET); + u16 *trans = (u16 *)(wide_cur + WIDE_TRANSITION_OFFSET16(width)); + + // check successful transition + u16 next = unaligned_load_u16((u8 *)trans); + if (next >= wide_limit) { + continue; + } + mstate_aux *aux = getAux(n, next); + if (aux->accept) { + next |= ACCEPT_FLAG; + } + if (aux->accel_offset) { + next |= ACCEL_FLAG; + } + unaligned_store_u16((u8 *)trans, next); + trans ++; + + // check failure transition + for (symbol_t k = 0; k < alphaSize; k++) { + u16 next_k = unaligned_load_u16((u8 *)&trans[k]); + mstate_aux *aux_k = getAux(n, next_k); + if (aux_k->accept) { + next_k |= ACCEPT_FLAG; + } + if (aux_k->accel_offset) { + next_k |= ACCEL_FLAG; + } + unaligned_store_u16((u8 *)&trans[k], next_k); + } + } + } } u32 mcclellan_build_strat::max_allowed_offset_accel() const { @@ -232,6 +333,19 @@ void populateBasicInfo(size_t state_size, const dfa_info &info, m->start_anchored = info.implId(info.raw.start_anchored); m->start_floating = info.implId(info.raw.start_floating); m->has_accel = accel_count ? 1 : 0; + m->has_wide = info.wide_state_chain.size() > 0 ? 1 : 0; + + if (state_size == sizeof(u8) && m->has_wide == 1) { + // allocate 1 more byte for wide state use. + nfa->scratchStateSize += sizeof(u8); + nfa->streamStateSize += sizeof(u8); + } + + if (state_size == sizeof(u16) && m->has_wide == 1) { + // allocate 2 more bytes for wide state use. + nfa->scratchStateSize += sizeof(u16); + nfa->streamStateSize += sizeof(u16); + } if (single) { m->flags |= MCCLELLAN_FLAG_SINGLE; @@ -404,6 +518,23 @@ size_t calcShermanRegionSize(const dfa_info &info) { return ROUNDUP_16(rv); } +static +size_t calcWideRegionSize(const dfa_info &info) { + if (info.wide_state_chain.empty()) { + return 0; + } + + // wide info header + size_t rv = info.wide_symbol_chain.size() * sizeof(u32) + 3; + + // wide info body + for (const auto &chain : info.wide_symbol_chain) { + rv += chain.size() + (info.impl_alpha_size + 1) * sizeof(u16) + 2; + } + + return ROUNDUP_16(rv); +} + static void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, const vector &reports, const vector &reports_eod, @@ -418,42 +549,60 @@ void fillInAux(mstate_aux *aux, dstate_id_t i, const dfa_info &info, /* returns false on error */ static -bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base) { +bool allocateFSN16(dfa_info &info, dstate_id_t *sherman_base, + dstate_id_t *wide_limit) { info.states[0].impl_id = 0; /* dead is always 0 */ vector norm; vector sherm; + vector wideHead; + vector wideState; if (info.size() > (1 << 16)) { DEBUG_PRINTF("too many states\n"); - *sherman_base = 0; + *wide_limit = 0; return false; } for (u32 i = 1; i < info.size(); i++) { - if (info.is_sherman(i)) { + if (info.is_widehead(i)) { + wideHead.push_back(i); + } else if (info.is_widestate(i)) { + wideState.push_back(i); + } else if (info.is_sherman(i)) { sherm.push_back(i); } else { norm.push_back(i); } } - dstate_id_t next_norm = 1; + dstate_id_t next = 1; for (const dstate_id_t &s : norm) { - info.states[s].impl_id = next_norm++; + DEBUG_PRINTF("[norm] mapping state %u to %u\n", s, next); + info.states[s].impl_id = next++; } - *sherman_base = next_norm; - dstate_id_t next_sherman = next_norm; - + *sherman_base = next; for (const dstate_id_t &s : sherm) { - info.states[s].impl_id = next_sherman++; + DEBUG_PRINTF("[sherm] mapping state %u to %u\n", s, next); + info.states[s].impl_id = next++; + } + + *wide_limit = next; + for (const dstate_id_t &s : wideHead) { + DEBUG_PRINTF("[widehead] mapping state %u to %u\n", s, next); + info.states[s].impl_id = next++; + } + + for (const dstate_id_t &s : wideState) { + DEBUG_PRINTF("[wide] mapping state %u to %u\n", s, next); + info.states[s].impl_id = next++; } /* Check to see if we haven't over allocated our states */ - DEBUG_PRINTF("next sherman %u masked %u\n", next_sherman, - (dstate_id_t)(next_sherman & STATE_MASK)); - return (next_sherman - 1) == ((next_sherman - 1) & STATE_MASK); + DEBUG_PRINTF("next sherman %u masked %u\n", next, + (dstate_id_t)(next & STATE_MASK)); + return (next - 1) == ((next - 1) & STATE_MASK); } static @@ -470,12 +619,16 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, assert(alphaShift <= 8); u16 count_real_states; - if (!allocateFSN16(info, &count_real_states)) { + u16 wide_limit; + if (!allocateFSN16(info, &count_real_states, &wide_limit)) { DEBUG_PRINTF("failed to allocate state numbers, %zu states total\n", info.size()); return nullptr; } + DEBUG_PRINTF("count_real_states: %d\n", count_real_states); + DEBUG_PRINTF("non_wide_states: %d\n", wide_limit); + auto ri = info.strat.gatherReports(reports, reports_eod, &single, &arb); map accel_escape_info = info.strat.getAccelInfo(cc.grey); @@ -483,7 +636,7 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, size_t tran_size = (1 << info.getAlphaShift()) * sizeof(u16) * count_real_states; - size_t aux_size = sizeof(mstate_aux) * info.size(); + size_t aux_size = sizeof(mstate_aux) * wide_limit; size_t aux_offset = ROUNDUP_16(sizeof(NFA) + sizeof(mcclellan) + tran_size); size_t accel_size = info.strat.accelSize() * accel_escape_info.size(); @@ -491,12 +644,24 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, + ri->getReportListSize(), 32); size_t sherman_offset = ROUNDUP_16(accel_offset + accel_size); size_t sherman_size = calcShermanRegionSize(info); - - size_t total_size = sherman_offset + sherman_size; + size_t wide_offset = ROUNDUP_16(sherman_offset + sherman_size); + size_t wide_size = calcWideRegionSize(info); + size_t total_size = wide_offset + wide_size; accel_offset -= sizeof(NFA); /* adj accel offset to be relative to m */ assert(ISALIGNED_N(accel_offset, alignof(union AccelAux))); + DEBUG_PRINTF("aux_offset %zu\n", aux_offset); + DEBUG_PRINTF("aux_size %zu\n", aux_size); + DEBUG_PRINTF("rl size %u\n", ri->getReportListSize()); + DEBUG_PRINTF("accel_offset %zu\n", accel_offset + sizeof(NFA)); + DEBUG_PRINTF("accel_size %zu\n", accel_size); + DEBUG_PRINTF("sherman_offset %zu\n", sherman_offset); + DEBUG_PRINTF("sherman_size %zu\n", sherman_size); + DEBUG_PRINTF("wide_offset %zu\n", wide_offset); + DEBUG_PRINTF("wide_size %zu\n", wide_size); + DEBUG_PRINTF("total_size %zu\n", total_size); + auto nfa = make_zeroed_bytecode_ptr(total_size); char *nfa_base = (char *)nfa.get(); @@ -511,6 +676,9 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, mstate_aux *aux = (mstate_aux *)(nfa_base + aux_offset); mcclellan *m = (mcclellan *)getMutableImplNfa(nfa.get()); + m->wide_limit = wide_limit; + m->wide_offset = wide_offset; + /* copy in the mc header information */ m->sherman_offset = sherman_offset; m->sherman_end = total_size; @@ -518,7 +686,7 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, /* do normal states */ for (size_t i = 0; i < info.size(); i++) { - if (info.is_sherman(i)) { + if (info.is_sherman(i) || info.is_widestate(i)) { continue; } @@ -556,6 +724,7 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, mstate_aux *this_aux = getAux(nfa.get(), fs); assert(fs >= count_real_states); + assert(fs < wide_limit); char *curr_sherman_entry = sherman_table + (fs - m->sherman_limit) * SHERMAN_FIXED_SIZE; @@ -599,6 +768,70 @@ bytecode_ptr mcclellanCompile16(dfa_info &info, const CompileContext &cc, } } + if (!info.wide_state_chain.empty()) { + /* do wide states using info */ + u16 wide_number = verify_u16(info.wide_symbol_chain.size()); + char *wide_base = nfa_base + m->wide_offset; + assert(ISALIGNED_16(wide_base)); + + char *wide_top = wide_base; + *(u8 *)(wide_top++) = WIDE_STATE; + *(u16 *)(wide_top) = wide_number; + wide_top += 2; + + char *curr_wide_entry = wide_top + wide_number * sizeof(u32); + u32 *wide_offset_list = (u32 *)wide_top; + + /* get the order of writing wide states */ + vector order(wide_number); + for (size_t i = 0; i < wide_number; i++) { + dstate_id_t head = info.wide_state_chain[i].front(); + size_t pos = info.implId(head) - m->wide_limit; + order[pos] = i; + } + + for (size_t i : order) { + vector &state_chain = info.wide_state_chain[i]; + vector &symbol_chain = info.wide_symbol_chain[i]; + + u16 width = verify_u16(symbol_chain.size()); + *(u16 *)(curr_wide_entry + WIDE_WIDTH_OFFSET) = width; + u8 *chars = (u8 *)(curr_wide_entry + WIDE_SYMBOL_OFFSET16); + + // store wide state symbol chain + for (size_t j = 0; j < width; j++) { + *(chars++) = verify_u8(symbol_chain[j]); + } + + // store wide state transition table + u16 *trans = (u16 *)(curr_wide_entry + + WIDE_TRANSITION_OFFSET16(width)); + dstate_id_t tail = state_chain[width - 1]; + symbol_t last = symbol_chain[width -1]; + dstate_id_t tran = info.states[tail].next[last]; + // 1. successful transition + *trans++ = info.implId(tran); + // 2. failure transition + for (size_t j = 0; verify_u16(j) < width - 1; j++) { + if (symbol_chain[j] != last) { + tran = info.states[state_chain[j]].next[last]; + } + } + for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) { + if (sym != last) { + *trans++ = info.implId(info.states[tail].next[sym]); + } + else { + *trans++ = info.implId(tran); + } + } + + *wide_offset_list++ = verify_u32(curr_wide_entry - wide_base); + + curr_wide_entry = (char *)trans; + } + } + markEdges(nfa.get(), succ_table, info); if (accel_states && nfa) { @@ -844,12 +1077,16 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, if (trust_daddy_states) { // Use the daddy already set for this state so long as it isn't already // a Sherman state. - if (!info.is_sherman(currState.daddy)) { + dstate_id_t daddy = currState.daddy; + if (!info.is_sherman(daddy) && !info.is_widestate(daddy)) { hinted.insert(currState.daddy); } else { // Fall back to granddaddy, which has already been processed (due // to BFS ordering) and cannot be a Sherman state. dstate_id_t granddaddy = info.states[currState.daddy].daddy; + if (info.is_widestate(granddaddy)) { + return; + } assert(!info.is_sherman(granddaddy)); hinted.insert(granddaddy); } @@ -861,7 +1098,7 @@ void find_better_daddy(dfa_info &info, dstate_id_t curr_id, bool using8bit, assert(donor < curr_id); u32 score = 0; - if (info.is_sherman(donor)) { + if (info.is_sherman(donor) || info.is_widestate(donor)) { continue; } @@ -934,6 +1171,290 @@ bool is_cyclic_near(const raw_dfa &raw, dstate_id_t root) { return false; } +/* \brief Test for only-one-predecessor property. */ +static +bool check_property1(const DfaPrevInfo &info, const u16 impl_alpha_size, + const dstate_id_t curr_id, dstate_id_t &prev_id, + symbol_t &prev_sym) { + u32 num_prev = 0; + bool test_p1 = false; + + for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { + num_prev += info.states[curr_id].prev_vec[sym].size(); + DEBUG_PRINTF("Check symbol: %u, with its vector size: %lu\n", sym, + info.states[curr_id].prev_vec[sym].size()); + if (num_prev == 1 && !test_p1) { + test_p1 = true; + prev_id = info.states[curr_id].prev_vec[sym].front(); //[0] for sure??? + prev_sym = sym; + } + } + + return num_prev == 1; +} + +/* \brief Test for same-failure-action property. */ +static +bool check_property2(const raw_dfa &rdfa, const u16 impl_alpha_size, + const dstate_id_t curr_id, const dstate_id_t prev_id, + const symbol_t curr_sym, const symbol_t prev_sym) { + const dstate &prevState = rdfa.states[prev_id]; + const dstate &currState = rdfa.states[curr_id]; + + // Compare transition tables between currState and prevState. + u16 score = 0; + for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { + if (currState.next[sym] == prevState.next[sym] + && sym != curr_sym && sym != prev_sym) { + score++; + } + } + DEBUG_PRINTF("(Score: %u/%u)\n", score, impl_alpha_size); + + // 2 cases. + if (curr_sym != prev_sym && score >= impl_alpha_size - 2 + && currState.next[prev_sym] == prevState.next[curr_sym]) { + return true; + } else if (curr_sym == prev_sym && score == impl_alpha_size - 1) { + return true; + } + return false; +} + +/* \brief Check whether adding current prev_id will generate a circle.*/ +static +bool check_circle(const DfaPrevInfo &info, const u16 impl_alpha_size, + const vector &chain, const dstate_id_t id) { + const vector> &prev_vec = info.states[id].prev_vec; + const dstate_id_t tail = chain.front(); + for (symbol_t sym = 0; sym < impl_alpha_size; sym++) { + auto iter = find(prev_vec[sym].begin(), prev_vec[sym].end(), tail); + if (iter != prev_vec[sym].end()) { + // Tail is one of id's predecessors, forming a circle. + return true; + } + } + return false; +} + +/* \brief Returns a chain of state ids and symbols. */ +static +dstate_id_t find_chain_candidate(const raw_dfa &rdfa, const DfaPrevInfo &info, + const dstate_id_t curr_id, + const symbol_t curr_sym, + vector &temp_chain) { + //Record current id first. + temp_chain.push_back(curr_id); + + const u16 size = info.impl_alpha_size; + + // Stop when entering root cloud. + if (rdfa.start_anchored != DEAD_STATE + && is_cyclic_near(rdfa, rdfa.start_anchored) + && curr_id < size) { + return curr_id; + } + if (rdfa.start_floating != DEAD_STATE + && curr_id >= rdfa.start_floating + && curr_id < rdfa.start_floating + size * 3) { + return curr_id; + } + + // Stop when reaching anchored or floating. + if (curr_id == rdfa.start_anchored || curr_id == rdfa.start_floating) { + return curr_id; + } + + dstate_id_t prev_id = 0; + symbol_t prev_sym = ALPHABET_SIZE; + + // Check the only-one-predecessor property. + if (!check_property1(info, size, curr_id, prev_id, prev_sym)) { + return curr_id; + } + assert(prev_id != 0 && prev_sym != ALPHABET_SIZE); + DEBUG_PRINTF("(P1 test passed.)\n"); + + // Circle testing for the prev_id that passes the P1 test. + if (check_circle(info, size, temp_chain, prev_id)) { + DEBUG_PRINTF("(A circle is found.)\n"); + return curr_id; + } + + // Check the same-failure-action property. + if (!check_property2(rdfa, size, curr_id, prev_id, curr_sym, prev_sym)) { + return curr_id; + } + DEBUG_PRINTF("(P2 test passed.)\n"); + + if (!rdfa.states[prev_id].reports.empty() + || !rdfa.states[prev_id].reports_eod.empty()) { + return curr_id; + } else { + return find_chain_candidate(rdfa, info, prev_id, prev_sym, temp_chain); + } +} + +/* \brief Always store the non-extensible chains found till now. */ +static +bool store_chain_longest(vector> &candidate_chain, + vector &temp_chain, + dynamic_bitset<> &added, bool head_is_new) { + dstate_id_t head = temp_chain.front(); + u16 length = temp_chain.size(); + + if (head_is_new) { + DEBUG_PRINTF("This is a new chain!\n"); + + // Add this new chain and get it marked. + candidate_chain.push_back(temp_chain); + + for (auto &id : temp_chain) { + DEBUG_PRINTF("(Marking s%u ...)\n", id); + added.set(id); + } + + return true; + } + + DEBUG_PRINTF("This is a longer chain!\n"); + assert(!candidate_chain.empty()); + + auto chain = find_if(candidate_chain.begin(), candidate_chain.end(), + [&](const vector &it) { + return it.front() == head; + }); + + // Not a valid head, just do nothing and return. + if (chain == candidate_chain.end()) { + return false; + } + + u16 len = chain->size(); + + if (length > len) { + // Find out the branch node first. + size_t piv = 0; + for (; piv < length; piv++) { + if ((*chain)[piv] != temp_chain[piv]) { + break; + } + } + + for (size_t j = piv + 1; j < length; j++) { + DEBUG_PRINTF("(Marking s%u (new branch) ...)\n", temp_chain[j]); + added.set(temp_chain[j]); + } + + // Unmark old unuseful nodes. + // (Except the tail node, which is in working queue) + for (size_t j = piv + 1; j < verify_u16(len - 1); j++) { + DEBUG_PRINTF("(UnMarking s%u (old branch)...)\n", (*chain)[j]); + added.reset((*chain)[j]); + } + + chain->assign(temp_chain.begin(), temp_chain.end()); + } + + return false; +} + +/* \brief Generate wide_symbol_chain from wide_state_chain. */ +static +void generate_symbol_chain(dfa_info &info, vector &chain_tail) { + raw_dfa &rdfa = info.raw; + assert(chain_tail.size() == info.wide_state_chain.size()); + + for (size_t i = 0; i < info.wide_state_chain.size(); i++) { + vector &state_chain = info.wide_state_chain[i]; + vector symbol_chain; + + info.extra[state_chain[0]].wideHead = true; + size_t width = state_chain.size() - 1; + + for (size_t j = 0; j < width; j++) { + dstate_id_t curr_id = state_chain[j]; + dstate_id_t next_id = state_chain[j + 1]; + + // The last state of the chain doesn't belong to a wide state. + info.extra[curr_id].wideState = true; + + // The tail symbol comes from vector chain_tail; + if (j == width - 1) { + symbol_chain.push_back(chain_tail[i]); + } else { + for (symbol_t sym = 0; sym < info.impl_alpha_size; sym++) { + if (rdfa.states[curr_id].next[sym] == next_id) { + symbol_chain.push_back(sym); + break; + } + } + } + } + + info.wide_symbol_chain.push_back(symbol_chain); + } +} + +/* \brief Find potential regions of states to be packed into wide states. */ +static +void find_wide_state(dfa_info &info) { + DfaPrevInfo dinfo(info.raw); + queue work_queue; + + dynamic_bitset<> added(info.raw.states.size()); + for (auto it : dinfo.accepts) { + work_queue.push(it); + added.set(it); + } + + vector chain_tail; + while (!work_queue.empty()) { + dstate_id_t curr_id = work_queue.front(); + work_queue.pop(); + DEBUG_PRINTF("Newly popped state: s%u\n", curr_id); + + for (symbol_t sym = 0; sym < dinfo.impl_alpha_size; sym++) { + for (auto info_it : dinfo.states[curr_id].prev_vec[sym]) { + if (added.test(info_it)) { + DEBUG_PRINTF("(s%u already marked.)\n", info_it); + continue; + } + + vector temp_chain; + // Head is a state failing the test of the chain. + dstate_id_t head = find_chain_candidate(info.raw, dinfo, + info_it, sym, + temp_chain); + + // A candidate chain should contain 8 substates at least. + if (temp_chain.size() < 8) { + DEBUG_PRINTF("(Not enough substates, continue.)\n"); + continue; + } + + bool head_is_new = !added.test(head); + if (head_is_new) { + added.set(head); + work_queue.push(head); + DEBUG_PRINTF("Newly pushed state: s%u\n", head); + } + + reverse(temp_chain.begin(), temp_chain.end()); + temp_chain.push_back(curr_id); + + assert(head > 0 && head == temp_chain.front()); + if (store_chain_longest(info.wide_state_chain, temp_chain, + added, head_is_new)) { + chain_tail.push_back(sym); + } + } + } + } + + generate_symbol_chain(info, chain_tail); +} + bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, const CompileContext &cc, bool trust_daddy_states, @@ -952,11 +1473,19 @@ bytecode_ptr mcclellanCompile_i(raw_dfa &raw, accel_dfa_build_strat &strat, bytecode_ptr nfa; if (!using8bit) { + if (cc.grey.allowWideStates && strat.getType() == McClellan + && !is_triggered(raw.kind)) { + find_wide_state(info); + } + u16 total_daddy = 0; bool any_cyclic_near_anchored_state = is_cyclic_near(raw, raw.start_anchored); for (u32 i = 0; i < info.size(); i++) { + if (info.is_widestate(i)) { + continue; + } find_better_daddy(info, i, using8bit, any_cyclic_near_anchored_state, trust_daddy_states, cc.grey); diff --git a/src/nfa/mcclellancompile.h b/src/nfa/mcclellancompile.h index ce63fbbfa..73cb9fd77 100644 --- a/src/nfa/mcclellancompile.h +++ b/src/nfa/mcclellancompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2015-2017, Intel Corporation + * Copyright (c) 2015-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -60,6 +60,7 @@ class mcclellan_build_strat : public accel_dfa_build_strat { u32 max_allowed_offset_accel() const override; u32 max_stop_char() const override; u32 max_floating_stop_char() const override; + DfaType getType() const override { return McClellan; } private: raw_dfa &rdfa; diff --git a/src/nfa/shengcompile.h b/src/nfa/shengcompile.h index 2fe1e3569..d795b3623 100644 --- a/src/nfa/shengcompile.h +++ b/src/nfa/shengcompile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2017, Intel Corporation + * Copyright (c) 2016-2018, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -61,6 +61,7 @@ class sheng_build_strat : public accel_dfa_build_strat { u32 max_allowed_offset_accel() const override; u32 max_stop_char() const override; u32 max_floating_stop_char() const override; + DfaType getType() const override { return Sheng; } private: raw_dfa &rdfa;