Skip to content

Commit

Permalink
released 3.12.7
Browse files Browse the repository at this point in the history
Like GNU grep, unpaired ) in regex are matched literally when option -Y is used and by the grep and egrep aliases of ugrep
  • Loading branch information
genivia-inc committed Aug 14, 2023
1 parent 8f2895d commit c361d55
Show file tree
Hide file tree
Showing 12 changed files with 75 additions and 51 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ The ugrep tools include the following powerful commands:
Development roadmap
-------------------

- my highest priority is testing and quality assurance to continue to make sure ugrep has no bugs and is 100% reliable, my nightmare would be something like ripgrep's bugs (that I actually uncovered in benchmarking ugrep)
- listen to users to continue to improve ugrep with new and updated features
- my highest priority is testing and quality assurance to continue to make sure ugrep has no bugs and is reliable
- make ugrep even faster, see [my latest blog article](https://www.genivia.com/ugrep.html) demonstrating with a reproducible benchmark that ugrep beats GNU grep and ripgrep in terms of raw performance
- listen to users to continue to improve ugrep with new and updated features
- share reproducible performance data with the community
- improve the interactive TUI with a split screen
- add file indexing to accelerate cold search performance, see the [ugrep-indexer](https://github.com/Genivia/ugrep-indexer) for details on a new indexing method that I call a *monotonic indexer*
- add file indexing to speed up cold search performance, see [ugrep-indexer](https://github.com/Genivia/ugrep-indexer)

Overview
--------
Expand Down Expand Up @@ -5358,7 +5358,7 @@ in markdown:



ugrep 3.12.6 August 6, 2023 UGREP(1)
ugrep 3.12.7 August 14, 2023 UGREP(1)

🔝 [Back to table of contents](#toc)

Expand Down
Binary file modified bin/win32/ugrep.exe
Binary file not shown.
Binary file modified bin/win64/ugrep.exe
Binary file not shown.
3 changes: 2 additions & 1 deletion include/reflex/convert.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ namespace convert_flag {
const convert_flag_type dotall = 0x0080; ///< convert `.` (dot) to match all, same as `(?s)`
const convert_flag_type freespace = 0x0100; ///< convert regex by removing spacing, same as `(?x)`
const convert_flag_type notnewline = 0x0200; ///< inverted character classes and \s do not match newline `\n`
const convert_flag_type permissive = 0x0400; ///< convert Unicode to compact UTF-8 patterns and DFA, permits some invalid UTF-8 sequences
const convert_flag_type permissive = 0x0400; ///< convert Unicode to compact UTF-8 patterns, permits some invalid UTF-8 sequences
const convert_flag_type closing = 0x8000; ///< permit matching ) when it has no opening (
}

/// @brief Returns the converted regex string given a regex library signature and conversion flags, throws regex_error.
Expand Down
6 changes: 3 additions & 3 deletions include/reflex/pattern.h
Original file line number Diff line number Diff line change
Expand Up @@ -803,15 +803,15 @@ class Pattern {
Option() : b(), h(), e(), f(), i(), m(), n(), o(), p(), q(), r(), s(), w(), x(), z() { }
bool b; ///< disable escapes in bracket lists
bool h; ///< construct indexing hash finite state automaton
Char e; ///< escape character, or > 255 for none, '\\' default
std::vector<std::string> f; ///< output to files
Char e; ///< escape character, or > 255 for none, a backslash by default
std::vector<std::string> f; ///< output the patterns and/or DFA to files(s)
bool i; ///< case insensitive mode, also `(?i:X)`
bool m; ///< multi-line mode, also `(?m:X)`
std::string n; ///< pattern name (for use in generated code)
bool o; ///< generate optimized FSM code for option f
bool p; ///< with option f also output predict match array for fast search with find()
bool q; ///< enable "X" quotation of verbatim content, also `(?q:X)`
bool r; ///< raise syntax errors
bool r; ///< raise syntax errors as exceptions
bool s; ///< single-line mode (dotall mode), also `(?s:X)`
bool w; ///< write error message to stderr
bool x; ///< free-spacing mode, also `(?x:X)`
Expand Down
40 changes: 26 additions & 14 deletions lib/convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1750,19 +1750,35 @@ std::string convert(const char *pattern, const char *signature, convert_flag_typ
else
{
if (lev == 1)
throw regex_error(regex_error::mismatched_parens, pattern, pos);
if (beg)
{
if (!(flags & convert_flag::closing))
throw regex_error(regex_error::mismatched_parens, pattern, pos);
if (!(flags & convert_flag::basic) || bre)
{
// translate a closing ) to \) when it has no opening (
regex.append(&pattern[loc], pos - loc).push_back('\\');
loc = pos;
anc = false;
beg = false;
}
}
else if (beg)
{
throw regex_error(regex_error::empty_expression, pattern, pos);
if (lap == lev)
}
else
{
// lex lookahead: translate ) to ))
regex.append(&pattern[loc], pos - loc).push_back(')');
loc = pos;
lap = 0;
if (lap == lev)
{
// lex lookahead: translate ) to ))
regex.append(&pattern[loc], pos - loc).push_back(')');
loc = pos;
lap = 0;
}
// terminate (?isx:...)
mod[lev].clear();
--lev;
}
// terminate (?isx:...)
mod[lev].clear();
--lev;
}
break;
case '|':
Expand Down Expand Up @@ -2005,10 +2021,6 @@ std::string convert(const char *pattern, const char *signature, convert_flag_typ
anc = false;
beg = false;
}
else
{
throw regex_error(regex_error::mismatched_braces, pattern, pos);
}
break;
case '#':
if ((flags & convert_flag::lex) && (flags & convert_flag::freespace))
Expand Down
39 changes: 21 additions & 18 deletions lib/pattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -493,17 +493,17 @@ void Pattern::init_options(const char *options)
opt_.x = true;
break;
case 'z':
for (const char *t = s += (s[1] == '='); *s != ';' && *s != '\0'; ++t)
for (const char *t = s += (s[1] == '='); *s != ';' && *s != '\0'; ++t)
{
if (std::isspace(*t) || *t == ';' || *t == '\0')
{
if (std::isspace(*t) || *t == ';' || *t == '\0')
{
if (t > s + 1)
opt_.z = std::string(s + 1, t - s - 1);
s = t;
}
if (t > s + 1)
opt_.z = std::string(s + 1, t - s - 1);
s = t;
}
--s;
break;
}
--s;
break;
case 'f':
case 'n':
for (const char *t = s += (s[1] == '='); *s != ';' && *s != '\0'; ++t)
Expand Down Expand Up @@ -596,7 +596,10 @@ void Pattern::parse(
Char c = at(end);
if (c == '\0' || c == '|')
break;
if (c == '.' || c == '^' || c == '$' || c == '(' || c == ')' || c == '[' || c == '{' || c == '?' || c == '*' || c == '+')
if (c == '.' || c == '^' || c == '$' ||
c == '(' || c == '[' || c == '{' ||
c == '?' || c == '*' || c == '+' ||
c == ')')
{
end = loc;
break;
Expand Down Expand Up @@ -1115,6 +1118,10 @@ void Pattern::parse3(
lazyset.clear();
}
}
else if (at(loc) == '\0')
{
error(regex_error::mismatched_braces, loc);
}
else
{
error(regex_error::invalid_repeat, loc);
Expand Down Expand Up @@ -1381,17 +1388,13 @@ void Pattern::parse4(
{
error(begin ? regex_error::empty_expression : regex_error::mismatched_parens, loc++);
}
else if (c == '}')
{
error(regex_error::mismatched_braces, loc++);
}
else if (c != '\0' && c != '|' && c != '?' && c != '*' && c != '+')
{
pos_add(firstpos, loc);
pos_add(lastpos, loc);
nullable = false;
if (c == opt_.e)
(void)parse_esc(loc);
c = parse_esc(loc);
else
++loc;
}
Expand Down Expand Up @@ -4060,8 +4063,8 @@ bool Pattern::match_hfa(const uint8_t *indexed, size_t size) const
{
if (!has_hfa())
return false;
HFA::VisitSet visit[2]; // we alternate and swap two visit bitsets, to produce a new one from the previous
bool accept = false; // a flag to indicate that we reached an accept (or dead) state, i.e. a possible match is found
HFA::VisitSet visit[2]; // we alternate two state visit bitsets, to produce a new one from the previous
bool accept = false; // a flag to indicate that we reached an accept (= dead) state, i.e. a possible match is found
for (size_t level = 0; level < HFA::MAX_DEPTH && !accept; ++level)
if (!match_hfa_transitions(level, hfa_.hashes[level], indexed, size, visit[level & 1], visit[~level & 1], accept))
return false;
Expand Down Expand Up @@ -4098,7 +4101,7 @@ bool Pattern::match_hfa_transitions(size_t level, const HFA::Hashes& hashes, con
{
HFA::States::const_iterator state = hfa_.states.find(next->first);
if (state == hfa_.states.end() || state->second.empty())
return accept = true; // reached an accepting (dead) state (dead means accept in HFA)
return accept = true; // reached an accepting (= dead) state (dead means accept in HFA)
const HFA::StateSet::const_iterator index_end = state->second.end();
for (HFA::StateSet::const_iterator index = state->second.begin(); index != index_end; ++index)
next_visit.set(*index, true);
Expand Down
2 changes: 1 addition & 1 deletion man/ugrep.1
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.TH UGREP "1" "August 06, 2023" "ugrep 3.12.6" "User Commands"
.TH UGREP "1" "August 14, 2023" "ugrep 3.12.7" "User Commands"
.SH NAME
\fBugrep\fR, \fBug\fR -- file pattern searcher
.SH SYNOPSIS
Expand Down
19 changes: 12 additions & 7 deletions src/query.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -452,14 +452,14 @@ void Query::redraw()
Screen::put( 9, 0, "\033[7mM-Left\033[m \033[7mM-Right\033[m pan ½ page");
Screen::put(10, 0, "\033[7mM-Up\033[m \033[7mM-Down\033[m scroll ½ pg");
#endif
Screen::put(11, 0, "");
Screen::put(12, 0, "\033[7mHome\033[m \033[7mEnd\033[m begin/end of line");
Screen::put(11, 0, "\033[7mHome\033[m \033[7mEnd\033[m of pattern");
Screen::put(12, 0, "");
Screen::put(13, 0, "\033[7m^K\033[m delete after cursor");
Screen::put(14, 0, "\033[7m^L\033[m refresh screen");
Screen::put(15, 0, "\033[7m^Q\033[m quick exit and output");
Screen::put(16, 0, "\033[7m^R\033[m or \033[7mF4\033[m restore bookmark");
Screen::put(17, 0, "\033[7m^S\033[m scroll to next file/dir");
Screen::put(18, 0, "\033[7m^T\033[m toggle colors on/off");
Screen::put(18, 0, "\033[7m^T\033[m toggle colors");
Screen::put(19, 0, "\033[7m^U\033[m delete before cursor");
Screen::put(20, 0, "\033[7m^V\033[m verbatim character");
Screen::put(21, 0, "\033[7m^W\033[m scroll back one file/dir");
Expand Down Expand Up @@ -502,9 +502,9 @@ void Query::redraw()
if (!message_)
{
#ifdef WITH_MACOS_META_KEY
Screen::put(0, 0, "\033[7mF1\033[m help and options: \033[7m^\033[m=\033[7mCtrl\033[m \033[7mS-\033[m=\033[7mShift\033[m \033[7mM-\033[m=\033[7mAlt\033[m/\033[7mOption\033[m or use \033[7m^O\033[m+key");
Screen::put(0, 0, " HELP AND OPTIONS \033[7m^\033[m=Ctrl \033[7mS-\033[m=Shift \033[7mM-\033[m=Alt/Option or \033[7m^O\033[m+key");
#else
Screen::put(0, 0, "\033[7mF1\033[m help and options: \033[7m^\033[m=\033[7mCtrl\033[m \033[7mS-\033[m=\033[7mShift\033[m \033[7mM-\033[m=\033[7mAlt\033[m or use \033[7m^O\033[m+key");
Screen::put(0, 0, " HELP AND OPTIONS \033[7m^\033[m=Ctrl \033[7mS-\033[m=Shift \033[7mM-\033[m=Alt or \033[7m^O\033[m+key");
#endif
}

Expand Down Expand Up @@ -750,6 +750,8 @@ void Query::query_ui()
eof_ = true;
buflen_ = 0;

Screen::clear();

// if -e PATTERN specified, collect patterns on the line to edit
if (!flag_regexp.empty())
{
Expand Down Expand Up @@ -787,8 +789,11 @@ void Query::query_ui()

move(len_);
}

Screen::clear();
else
{
// display a brief help message
message("press CTRL-Z or F1 for help");
}

set_prompt();

Expand Down
2 changes: 1 addition & 1 deletion src/query.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@

// the max time that a message (to confirm a command) is shown at the query line, in steps of 100ms
#ifndef QUERY_MESSAGE_DELAY
#define QUERY_MESSAGE_DELAY 12
#define QUERY_MESSAGE_DELAY 15
#endif

class Query {
Expand Down
5 changes: 4 additions & 1 deletion src/ugrep.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7601,9 +7601,12 @@ void ugrep()
// reflex::Matcher options
std::string matcher_options;

// -Y: permit empty pattern matches
// -Y: permit empty pattern matches and match closing ) when no opening (
if (flag_empty)
{
convert_flags |= reflex::convert_flag::closing;
matcher_options.push_back('N');
}

// -w: match whole words, i.e. make \< and \> match only left side and right side, respectively
if (flag_word_regexp)
Expand Down
2 changes: 1 addition & 1 deletion src/ugrep.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
#define UGREP_HPP

// ugrep version
#define UGREP_VERSION "3.12.6"
#define UGREP_VERSION "3.12.7"

// disable mmap because mmap is almost always slower than the file reading speed improvements since 3.0.0
#define WITH_NO_MMAP
Expand Down

0 comments on commit c361d55

Please sign in to comment.