forked from VectorCamp/vectorscan
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathch_database.h
158 lines (138 loc) · 5.61 KB
/
ch_database.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
/*
* Copyright (c) 2018, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of Intel Corporation nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
/** \file
* \brief Runtime code for ch_database manipulation.
*/
#ifndef CH_DATABASE_H_
#define CH_DATABASE_H_
#ifdef __cplusplus
extern "C"
{
#endif
#define PCRE_STATIC
#include <pcre.h>
#include "ch_compile.h" // for CH_MODE_ flags
#include "ue2common.h"
#include "hs_version.h"
#include "hs.h"
#define CH_DB_MAGIC 0xdedededeU //!< Magic number stored in \ref ch_database
/** \brief Main Chimera database header. */
struct ch_database {
u32 magic; //!< must be \ref CH_DB_MAGIC
u32 version; //!< release version
u32 length; //!< total allocated length in bytes
u32 reserved0; //!< unused
u32 reserved1; //!< unused
u32 bytecode; //!< offset relative to db start
u32 padding[16]; //!< padding for alignment of rest of bytecode
char bytes[];
};
/** \brief Chimera bytecode header, which follows the \ref ch_database and is
* always 64-byte aligned. */
struct ch_bytecode {
u32 length; //!< length of bytecode including this header struct
u32 flags; //!< whole-database flags (CHIMERA_FLAG_NO_MULTIMATCH,
// CHIMERA_FLAG_GROUPS)
u32 patternCount; //!< total number of patterns
u32 activeSize; //!< size of mmbit to store active pattern ids
u32 databaseOffset; //!< offset for database following \ref ch_bytecode
// header
u32 patternOffset; //!< points to an array of u32 offsets, each pointing to
// a \ref ch_pattern
u32 unguardedOffset; //!< pointer to a list of unguarded pattern indices
u32 unguardedCount; //!< number of unguarded patterns
u32 maxCaptureGroups; //!< max number of capture groups used by any pattern
};
/** \brief Per-pattern header.
*
* struct is followed in bytecode by:
* 1. pcre bytecode (always present)
* 2. pcre study data (sometimes)
*/
struct ch_pattern {
u32 id; //!< pattern ID to report to the user
u32 flags; //!< per-pattern flags (e.g. \ref CHIMERA_PATTERN_FLAG_UTF8)
u32 maxWidth; //!< maximum width of a match, or UINT_MAX for inf.
u32 minWidth; //!< minimum width of a match.
u32 fixedWidth;//!< pattern has fixed width.
u32 studyOffset; //!< offset relative to struct start of study data,
// or zero if there is none
u32 length; //!< length of struct plus pcre bytecode and study data
pcre_extra extra; //!< pcre_extra struct, used to store study data ptr for
// the currently-running pcre at runtime.
};
static really_inline
const void *ch_get_bytecode(const struct ch_database *db) {
assert(db);
const void *bytecode = (const char *)db + db->bytecode;
assert(ISALIGNED_16(bytecode));
return bytecode;
}
struct hs_database;
static really_inline
const struct hs_database *getHyperscanDatabase(const struct ch_bytecode *db) {
assert(db);
const char *ptr = (const char *)db;
const struct hs_database *hs_db;
hs_db = (const struct hs_database *)(ptr + db->databaseOffset);
assert(ISALIGNED_CL(hs_db));
return hs_db;
}
static really_inline
const u32 *getUnguarded(const struct ch_bytecode *db) {
assert(db);
const char *ptr = (const char *)db;
const u32 *unguarded = (const u32 *)(ptr + db->unguardedOffset);
assert(ISALIGNED_N(unguarded, sizeof(u32)));
return unguarded;
}
static really_inline
const struct ch_pattern *getPattern(const struct ch_bytecode *db, u32 i) {
assert(db);
assert(i < db->patternCount);
const char *ptr = (const char *)db;
const u32 *patternOffset = (const u32 *)(ptr + db->patternOffset);
assert(patternOffset[i] < db->length);
return (const struct ch_pattern *)(ptr + patternOffset[i]);
}
static really_inline
ch_error_t hydbIsValid(const struct ch_database *hydb) {
if (!hydb || hydb->magic != CH_DB_MAGIC) {
DEBUG_PRINTF("bad magic (%u != %u)\n", hydb->magic, CH_DB_MAGIC);
return CH_INVALID;
}
if (hydb->version != HS_VERSION_32BIT) {
DEBUG_PRINTF("bad version\n");
return CH_DB_VERSION_ERROR;
}
return CH_SUCCESS;
}
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* CH_DATABASE_H_ */