Skip to content

Commit

Permalink
released 4.1.0
Browse files Browse the repository at this point in the history
fix #118 fix #284 fix #285 fix #286 fix #287 fix #289 and other additions and improvements
  • Loading branch information
genivia-inc committed Sep 17, 2023
1 parent 040ded3 commit 4af28c8
Show file tree
Hide file tree
Showing 259 changed files with 19,185 additions and 19,041 deletions.
234 changes: 71 additions & 163 deletions README.md

Large diffs are not rendered by default.

Binary file modified bin/win32/ugrep.exe
Binary file not shown.
Binary file modified bin/win64/ugrep.exe
Binary file not shown.
24 changes: 12 additions & 12 deletions configure
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for ugrep 3.12.
# Generated by GNU Autoconf 2.71 for ugrep 4.1.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
Expand All @@ -10,7 +10,7 @@
# This configure script is free software; the Free Software Foundation
# gives unlimited permission to copy, distribute and modify it.
#
# Copyright (C) 2019-2022 Robert van Engelen, Genivia Inc.
# Copyright (C) 2019-2023 Robert van Engelen, Genivia Inc.
## -------------------- ##
## M4sh Initialization. ##
## -------------------- ##
Expand Down Expand Up @@ -609,8 +609,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='ugrep'
PACKAGE_TARNAME='ugrep'
PACKAGE_VERSION='3.12'
PACKAGE_STRING='ugrep 3.12'
PACKAGE_VERSION='4.1'
PACKAGE_STRING='ugrep 4.1'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''

Expand Down Expand Up @@ -1349,7 +1349,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures ugrep 3.12 to adapt to many kinds of systems.
\`configure' configures ugrep 4.1 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
Expand Down Expand Up @@ -1420,7 +1420,7 @@ fi

if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of ugrep 3.12:";;
short | recursive ) echo "Configuration of ugrep 4.1:";;
esac
cat <<\_ACEOF
Expand Down Expand Up @@ -1562,14 +1562,14 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
ugrep configure 3.12
ugrep configure 4.1
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
This configure script is free software; the Free Software Foundation
gives unlimited permission to copy, distribute and modify it.
Copyright (C) 2019-2022 Robert van Engelen, Genivia Inc.
Copyright (C) 2019-2023 Robert van Engelen, Genivia Inc.
_ACEOF
exit
fi
Expand Down Expand Up @@ -2099,7 +2099,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by ugrep $as_me 3.12, which was
It was created by ugrep $as_me 4.1, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
Expand Down Expand Up @@ -3586,7 +3586,7 @@ fi

# Define the identity of the package.
PACKAGE='ugrep'
VERSION='3.12'
VERSION='4.1'


printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
Expand Down Expand Up @@ -9358,7 +9358,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by ugrep $as_me 3.12, which was
This file was extended by ugrep $as_me 4.1, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
Expand Down Expand Up @@ -9426,7 +9426,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
ugrep config.status 3.12
ugrep config.status 4.1
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"
Expand Down
4 changes: 2 additions & 2 deletions configure.ac
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
AC_INIT([ugrep],[3.12])
AC_INIT([ugrep],[4.1])
AM_INIT_AUTOMAKE([foreign])
AC_CONFIG_HEADERS([config.h])
AC_COPYRIGHT([Copyright (C) 2019-2022 Robert van Engelen, Genivia Inc.])
AC_COPYRIGHT([Copyright (C) 2019-2023 Robert van Engelen, Genivia Inc.])

AC_CONFIG_MACRO_DIR([m4])

Expand Down
9 changes: 1 addition & 8 deletions include/reflex/absmatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -1183,17 +1183,10 @@ class AbstractMatcher {
set_current(txt_ - buf_);
return true;
}
if (eof_)
break;
pos_ = cur_ = end_;
txt_ = buf_ + end_;
(void)grow();
end_ += get(buf_ + end_, blk_ > 0 ? blk_ : max_ - end_ - 1);
if (pos_ >= end_ && !wrap())
{
eof_ = true;
if (peek_more() == EOF)
break;
}
}
set_current(end_);
return false;
Expand Down
8 changes: 4 additions & 4 deletions include/reflex/boostmatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ class BoostMatcher : public PatternMatcher<boost::regex> {
public:
/// Convert a regex to an acceptable form, given the specified regex library signature `"[decls:]escapes[?+]"`, see reflex::convert.
template<typename T>
static std::string convert(T regex, convert_flag_type flags = convert_flag::none)
static std::string convert(T regex, convert_flag_type flags = convert_flag::none, bool *multiline = NULL)
{
return reflex::convert(regex, "imPRsx!#<>=&'(0123456789:abcdefghklnrstuvwxzABCDHLNQSUWZ0123456789<>?+", flags);
return reflex::convert(regex, "imPRsx!#<>=&'(0123456789:abcdefghklnrstuvwxzABCDHLNQSUWZ0123456789<>?+", flags, multiline);
}
/// Default constructor.
BoostMatcher()
Expand Down Expand Up @@ -368,9 +368,9 @@ class BoostPosixMatcher : public BoostMatcher {
public:
/// Convert a regex to an acceptable form, given the specified regex library signature `"[decls:]escapes[?+]"`, see reflex::convert.
template<typename T>
static std::string convert(T regex, convert_flag_type flags = convert_flag::none)
static std::string convert(T regex, convert_flag_type flags = convert_flag::none, bool *multiline = NULL)
{
return reflex::convert(regex, "imsx!#<=:abcdefghlnrstuvwxzABDHLNQSUWZ0<>", flags);
return reflex::convert(regex, "imsx!#<=:abcdefghlnrstuvwxzABDHLNQSUWZ0<>", flags, multiline);
}
/// Default constructor.
BoostPosixMatcher() : BoostMatcher()
Expand Down
6 changes: 4 additions & 2 deletions include/reflex/convert.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ namespace convert_flag {
const convert_flag_type freespace = 0x0100; ///< convert regex by removing spacing, same as `(?x)`
const convert_flag_type notnewline = 0x0200; ///< inverted character classes and \s do not match newline `\n`
const convert_flag_type permissive = 0x0400; ///< convert Unicode to compact UTF-8 patterns, permits some invalid UTF-8 sequences
const convert_flag_type closing = 0x8000; ///< permit matching ) when it has no opening (
const convert_flag_type closing = 0x8000; ///< permit matching ) literally when it has no opening (
}

/// @brief Returns the converted regex string given a regex library signature and conversion flags, throws regex_error.
Expand Down Expand Up @@ -162,16 +162,18 @@ std::string convert(
const char *pattern, ///< regex string pattern to convert
const char *signature, ///< regex library signature
convert_flag_type flags = convert_flag::none, ///< conversion flags
bool *multiline = NULL, ///< set to true if pattern may be multiline
const std::map<std::string,std::string> *macros = NULL) ///< {name} macros to expand
;

inline std::string convert(
const std::string& pattern,
const char *signature,
convert_flag_type flags = convert_flag::none,
bool *multiline = NULL,
const std::map<std::string,std::string> *macros = NULL)
{
return convert(pattern.c_str(), signature, flags, macros);
return convert(pattern.c_str(), signature, flags, multiline, macros);
}

} // namespace reflex
Expand Down
72 changes: 42 additions & 30 deletions include/reflex/fuzzymatcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,9 +289,9 @@ class FuzzyMatcher : public Matcher {
bpt.sub = false;
bpt.pc1 += !bpt.alt;
}
else if (ins_)
else if (del_)
{
// try inserting a pattern char that should have matched a deleted char in the text
// try inserting a pattern char in the text to match a missing char in the text
DBGLOG("Delete: jump to %u at pos %zu char %d (0x%x)", jump, pos_, c1, c1);
bpt.sub = bpt.alt;
++bpt.pc1;
Expand Down Expand Up @@ -610,11 +610,11 @@ class FuzzyMatcher : public Matcher {
}
else
{
if (Pattern::is_opcode_halt(opcode))
break;
if (c1 == EOF)
break;
c1 = get();
if (Pattern::is_opcode_halt(opcode))
break;
DBGLOG("Get: c1 = %d (0x%x) at pos %zu", c1, c1, pos_ - 1);
if (bin_ || (c1 & 0xC0) != 0x80 || c1 == EOF)
{
Expand Down Expand Up @@ -666,9 +666,13 @@ class FuzzyMatcher : public Matcher {
jump = Pattern::index_of(opcode);
if (jump == 0)
{
// loop back to start state after only one char matched (one transition) but w/o full match, then optimize
if (cap_ == 0 && pos_ == cur_ + 1 && method == Const::FIND)
cur_ = pos_; // set cur_ to move forward from cur_ + 1 with FIND advance()
// loop back to start state w/o full match: advance to avoid backtracking
if (cap_ == 0 && pos_ > cur_ && method == Const::FIND)
{
// use bit_[] to check each char in buf_[cur_+1..pos_-1] if it is a starting char, if not then increase cur_
while (++cur_ < pos_ && (pat_->bit_[static_cast<uint8_t>(buf_[cur_])] & 1))
continue;
}
}
else if (jump >= Pattern::Const::LONG)
{
Expand All @@ -684,37 +688,45 @@ class FuzzyMatcher : public Matcher {
// match, i.e. cap_ > 0?
if (method == Const::MATCH)
{
// exit fuzzy loop if fuzzy match succeeds till end of input
// exit fuzzy loop if fuzzy match succeeds till end of input when insertions are allowed
if (cap_ > 0)
{
if (c1 == EOF)
break;
while (err_ < max_)
if (c1 != EOF && ins_)
{
c1 = get();
if (c1 == EOF)
break;
// skip one (multibyte) char
if (!bin_ && c1 >= 0xC0)
// text insertions are allowed
while (err_ < max_)
{
int n = (c1 >= 0xE0) + (c1 >= 0xF0);
while (n-- >= 0)
if ((c1 = get()) == EOF)
break;
++err_;
c1 = get();
// reached the end?
if (c1 == EOF)
break;
// skip one (multibyte) char
if (!bin_ && c1 >= 0xC0)
{
int n = (c1 >= 0xE0) + (c1 >= 0xF0);
while (n-- >= 0)
if ((c1 = get()) == EOF)
break;
}
}
++err_;
}
if (at_end())
if (c1 == EOF || ins_)
{
DBGLOG("Match pos = %zu", pos_);
set_current(pos_);
break;
// reached the end?
if (at_end())
{
DBGLOG("Match pos = %zu", pos_);
set_current(pos_);
break;
}
}
cap_ = 0;
}
}
else
{
// exit fuzzy loop if match or first char mismatched
// exit fuzzy loop if match or if first char mismatched
if (cap_ > 0 || pos_ == static_cast<size_t>(txt_ + len_ - buf_ + 1))
break;
}
Expand All @@ -729,7 +741,7 @@ class FuzzyMatcher : public Matcher {
if (stack == 0 || bpt_[stack - 1].pc0 != pc0)
{
point(bpt_[stack++], pc0, len0, false, c1 == EOF);
DBGLOG("Point[%u] at %zu EOF", stack - 1, pc0 - pat_->opc_);
DBGLOG("Point[%u] at %zu pos %zu (\\0|\\nEOF)", stack - 1, pc0 - pat_->opc_, pos_ - 1);
}
}
else
Expand Down Expand Up @@ -801,7 +813,7 @@ class FuzzyMatcher : public Matcher {
}
}
}
// if fuzzy matched with errors then perform a second pass ahead of this match to check for an exact match
// if fuzzy find/split with errors then perform a second pass ahead of this match to check for an exact match
if (cap_ > 0 && err_ > 0 && !sst.use && (method == Const::FIND || method == Const::SPLIT))
{
// this part is based on advance() in matcher.cpp, limited to advancing ahead till the one of the first pattern char(s) match excluding \n
Expand Down Expand Up @@ -1076,8 +1088,8 @@ class FuzzyMatcher : public Matcher {
std::vector<BacktrackPoint> bpt_; ///< vector of backtrack points, max_ size
uint8_t max_; ///< max errors
uint8_t err_; ///< accumulated edit distance (not guaranteed minimal)
bool ins_; ///< fuzzy match permits inserted chars (extra chars)
bool del_; ///< fuzzy match permits deleted chars (missing chars)
bool ins_; ///< fuzzy match permits inserted chars (extra chars in the input)
bool del_; ///< fuzzy match permits deleted chars (missing chars in the input)
bool sub_; ///< fuzzy match permits substituted chars
bool bin_; ///< fuzzy match bytes, not UTF-8 multibyte encodings
};
Expand Down
4 changes: 2 additions & 2 deletions include/reflex/matcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ class Matcher : public PatternMatcher<reflex::Pattern> {
public:
/// Convert a regex to an acceptable form, given the specified regex library signature `"[decls:]escapes[?+]"`, see reflex::convert.
template<typename T>
static std::string convert(T regex, convert_flag_type flags = convert_flag::none)
static std::string convert(T regex, convert_flag_type flags = convert_flag::none, bool *multiline = NULL)
{
return reflex::convert(regex, "imsx#=^:abcdefhijklnrstuvwxzABDHLNQSUW<>?", flags);
return reflex::convert(regex, "imsx#=^:abcdefhijklnrstuvwxzABDHLNQSUW<>?", flags, multiline);
}
/// Default constructor.
Matcher() : PatternMatcher<reflex::Pattern>()
Expand Down
6 changes: 5 additions & 1 deletion include/reflex/pattern.h
Original file line number Diff line number Diff line change
Expand Up @@ -820,19 +820,23 @@ class Pattern {
/// Meta characters.
enum Meta {
META_MIN = 0x100,
// word boundaries
META_NWB = 0x101, ///< non-word boundary at begin `\Bx`
META_NWE = 0x102, ///< non-word boundary at end `x\B`
META_BWB = 0x103, ///< begin of word at begin `\<x` where \bx=(\<|\>)x
META_EWB = 0x104, ///< end of word at begin `\>x`
META_BWE = 0x105, ///< begin of word at end `x\<` where x\b=x(\<|\>)
META_EWE = 0x106, ///< end of word at end `x\>`
// line and buffer boundaries
META_BOL = 0x107, ///< begin of line `^`
META_EOL = 0x108, ///< end of line `$`
META_BOB = 0x109, ///< begin of buffer `\A`
META_EOB = 0x10A, ///< end of buffer `\Z`
// indent boundaries
META_UND = 0x10B, ///< undent boundary `\k`
META_IND = 0x10C, ///< indent boundary `\i` (must be one but the largest META code)
META_DED = 0x10D, ///< dedent boundary `\j` (must be the largest META code)
// end of boundaries
META_MAX ///< max meta characters
};
/// Initialize the pattern at construction.
Expand Down Expand Up @@ -1142,7 +1146,7 @@ class Pattern {
{
return ((h << 3) ^ b) & (Const::HASH - 1);
}
/// file indexing hash 0 <= indexhash() < 65536, must be additive: indexhash(x,b+1) = indexhash(x,b)+1 modulo 2^16
/// file indexing hash 0 <= indexhash() < 65536, must be additive: indexhash(x,b+1) = indexhash(x,b)+1 modulo 2^16.
static inline Hash indexhash(Hash h, uint8_t b)
{
return (h << 6) - h - h - h + b;
Expand Down
8 changes: 4 additions & 4 deletions include/reflex/pcre2matcher.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ class PCRE2Matcher : public PatternMatcher<std::string> {
public:
/// Convert a regex to an acceptable form, given the specified regex library signature `"[decls:]escapes[?+]"`, see reflex::convert.
template<typename T>
static std::string convert(T regex, convert_flag_type flags = convert_flag::none)
static std::string convert(T regex, convert_flag_type flags = convert_flag::none, bool *multiline = NULL)
{
return reflex::convert(regex, "imPRsx!#<>=&|'(0123456789*:abcdefghnrstvwxzABCDGHKNQRSVWXZ0123456789?+", flags);
return reflex::convert(regex, "imPRsx!#<>=&|'(0123456789*:abcdefghnrstvwxzABCDGHKNQRSVWXZ0123456789?+", flags, multiline);
}
/// Default constructor.
PCRE2Matcher()
Expand Down Expand Up @@ -458,9 +458,9 @@ class PCRE2UTFMatcher : public PCRE2Matcher {
public:
/// Convert a regex to an acceptable form, given the specified regex library signature `"[decls:]escapes[?+]"`, see reflex::convert.
template<typename T>
static std::string convert(T regex, convert_flag_type flags = convert_flag::none)
static std::string convert(T regex, convert_flag_type flags = convert_flag::none, bool *multiline = NULL)
{
return reflex::convert(regex, "imPRsx!#<>=&|'(0123456789*:abcdefghknprstvwxzABCDGHKNPQRSVWXZ0123456789?+", flags);
return reflex::convert(regex, "imPRsx!#<>=&|'(0123456789*:abcdefghknprstvwxzABCDGHKNPQRSVWXZ0123456789?+", flags, multiline);
}
/// Default constructor.
PCRE2UTFMatcher() : PCRE2Matcher()
Expand Down
Loading

0 comments on commit 4af28c8

Please sign in to comment.