Skip to content

Commit

Permalink
released 4.3.0
Browse files Browse the repository at this point in the history
  • Loading branch information
genivia-inc committed Oct 7, 2023
1 parent 0c6678c commit 295382b
Show file tree
Hide file tree
Showing 34 changed files with 1,976 additions and 1,483 deletions.
142 changes: 75 additions & 67 deletions README.md

Large diffs are not rendered by default.

Binary file modified bin/win32/ugrep.exe
Binary file not shown.
Binary file modified bin/win64/ugrep.exe
Binary file not shown.
20 changes: 10 additions & 10 deletions configure
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#! /bin/sh
# Guess values for system-dependent variables and create Makefiles.
# Generated by GNU Autoconf 2.71 for ugrep 4.1.
# Generated by GNU Autoconf 2.71 for ugrep 4.3.
#
#
# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation,
Expand Down Expand Up @@ -609,8 +609,8 @@ MAKEFLAGS=
# Identity of this package.
PACKAGE_NAME='ugrep'
PACKAGE_TARNAME='ugrep'
PACKAGE_VERSION='4.1'
PACKAGE_STRING='ugrep 4.1'
PACKAGE_VERSION='4.3'
PACKAGE_STRING='ugrep 4.3'
PACKAGE_BUGREPORT=''
PACKAGE_URL=''

Expand Down Expand Up @@ -1349,7 +1349,7 @@ if test "$ac_init_help" = "long"; then
# Omit some internal or obsolete options to make the list less imposing.
# This message is too long to be a string in the A/UX 3.1 sh.
cat <<_ACEOF
\`configure' configures ugrep 4.1 to adapt to many kinds of systems.
\`configure' configures ugrep 4.3 to adapt to many kinds of systems.
Usage: $0 [OPTION]... [VAR=VALUE]...
Expand Down Expand Up @@ -1420,7 +1420,7 @@ fi

if test -n "$ac_init_help"; then
case $ac_init_help in
short | recursive ) echo "Configuration of ugrep 4.1:";;
short | recursive ) echo "Configuration of ugrep 4.3:";;
esac
cat <<\_ACEOF
Expand Down Expand Up @@ -1562,7 +1562,7 @@ fi
test -n "$ac_init_help" && exit $ac_status
if $ac_init_version; then
cat <<\_ACEOF
ugrep configure 4.1
ugrep configure 4.3
generated by GNU Autoconf 2.71
Copyright (C) 2021 Free Software Foundation, Inc.
Expand Down Expand Up @@ -2099,7 +2099,7 @@ cat >config.log <<_ACEOF
This file contains any messages produced by compilers while
running configure, to aid debugging if configure makes a mistake.
It was created by ugrep $as_me 4.1, which was
It was created by ugrep $as_me 4.3, which was
generated by GNU Autoconf 2.71. Invocation command line was
$ $0$ac_configure_args_raw
Expand Down Expand Up @@ -3586,7 +3586,7 @@ fi

# Define the identity of the package.
PACKAGE='ugrep'
VERSION='4.1'
VERSION='4.3'


printf "%s\n" "#define PACKAGE \"$PACKAGE\"" >>confdefs.h
Expand Down Expand Up @@ -9358,7 +9358,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
# report actual input values of CONFIG_FILES etc. instead of their
# values after options handling.
ac_log="
This file was extended by ugrep $as_me 4.1, which was
This file was extended by ugrep $as_me 4.3, which was
generated by GNU Autoconf 2.71. Invocation command line was
CONFIG_FILES = $CONFIG_FILES
Expand Down Expand Up @@ -9426,7 +9426,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
ac_cs_config='$ac_cs_config_escaped'
ac_cs_version="\\
ugrep config.status 4.1
ugrep config.status 4.3
configured by $0, generated by GNU Autoconf 2.71,
with options \\"\$ac_cs_config\\"
Expand Down
2 changes: 1 addition & 1 deletion configure.ac
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
AC_INIT([ugrep],[4.1])
AC_INIT([ugrep],[4.3])
AM_INIT_AUTOMAKE([foreign])
AC_CONFIG_HEADERS([config.h])
AC_COPYRIGHT([Copyright (C) 2019-2023 Robert van Engelen, Genivia Inc.])
Expand Down
24 changes: 24 additions & 0 deletions include/reflex/convert.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
#include <reflex/error.h>
#include <string>
#include <map>
#include <cstring>

#if (defined(__WIN32__) || defined(_WIN32) || defined(WIN32) || defined(_WIN64) || defined(__BORLANDC__)) && !defined(__CYGWIN__) && !defined(__MINGW32__) && !defined(__MINGW64__)
# pragma warning( disable : 4290 )
Expand Down Expand Up @@ -176,6 +177,29 @@ inline std::string convert(
return convert(pattern.c_str(), signature, flags, multiline, macros);
}

inline bool supports_modifier(
const char *signature,
int modchar)
{
if (signature == NULL)
return false;
const char *escapes = std::strchr(signature, ':');
if (escapes == NULL)
return false;
const char *s = std::strchr(signature, modchar);
return s && s < escapes;
}

inline bool supports_escape(
const char *signature,
int escape)
{
if (signature == NULL)
return false;
const char *escapes = std::strchr(signature, ':');
return std::strchr(escapes != NULL ? escapes : signature, escape) != NULL;
}

} // namespace reflex

#endif
11 changes: 8 additions & 3 deletions include/reflex/pattern.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,9 @@ class Pattern {
opc_(NULL),
nop_(0),
fsm_(NULL)
{ }
{
init(NULL);
}
/// Construct a pattern object given a regex string.
explicit Pattern(
const char *regex,
Expand Down Expand Up @@ -613,12 +615,14 @@ class Pattern {
{
clear();
}
/// delete the tree DFA.
/// delete the tree DFA and reset to the intial state.
void clear()
{
for (List::iterator i = list.begin(); i != list.end(); ++i)
delete[] *i;
list.clear();
tree = NULL;
next = ALLOC;
}
/// return the root of the tree.
Node *root()
Expand Down Expand Up @@ -707,12 +711,13 @@ class Pattern {
{
clear();
}
/// delete DFA
/// delete DFA and reset to initial state.
void clear()
{
for (List::iterator i = list.begin(); i != list.end(); ++i)
delete[] *i;
list.clear();
next = ALLOC;
}
#ifdef WITH_TREE_DFA
/// new DFA state.
Expand Down
44 changes: 19 additions & 25 deletions lib/convert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,23 +219,6 @@ inline bool is_modified(const std::map<size_t,std::string>& mod, int c)
return false;
}

inline bool supports_modifier(const char *signature, int c)
{
const char *escapes = std::strchr(signature, ':');
if (escapes == NULL)
return false;
const char *s = std::strchr(signature, c);
return s && s < escapes;
}

inline bool supports_escape(const char *signature, int escape)
{
if (!signature)
return false;
const char *escapes = std::strchr(signature, ':');
return std::strchr(escapes != NULL ? escapes : signature, escape) != NULL;
}

inline int hex_or_octal_escape(const char *signature)
{
if (supports_escape(signature, 'x'))
Expand Down Expand Up @@ -1601,11 +1584,11 @@ std::string convert(const char *pattern, const char *signature, convert_flag_typ
if (!is_modified(mod, 'u') || !supports_escape(signature, 'X'))
{
#ifndef WITH_UTF8_UNRESTRICTED
// translate \X to match any valid UTF-8
regex.append(&pattern[loc], pos - loc - 1).append(par).append("[\\x00-\\xff]|[\\xc2-\\xdf][\\x80-\\xbf]|\\xe0[\\xa0-\\xbf][\\x80-\\xbf]|[\\xe1-\\xec][\\x80-\\xbf][\\x80-\\xbf]|\\xed[\\x80-\\x9f][\\x80-\\xbf]|[\\xee\\xef][\\x80-\\xbf][\\x80-\\xbf]|\\xf0[\\x90-\\xbf][\\x80-\\xbf][\\x80-\\xbf]|[\\xf1-\\xf3][\\x80-\\xbf][\\x80-\\xbf][\\x80-\\xbf]|\\xf4[\\x80-\\x8f][\\x80-\\xbf][\\x80-\\xbf]").push_back(')');
// translate \X to match any valid UTF-8 even beyond
regex.append(&pattern[loc], pos - loc - 1).append(par).append("[\\x00-\\x7f]|[\\xc2-\\xdf][\\x80-\\xbf]|\\xe0[\\xa0-\\xbf][\\x80-\\xbf]|[\\xe1-\\xec][\\x80-\\xbf][\\x80-\\xbf]|\\xed[\\x80-\\x9f][\\x80-\\xbf]|[\\xee\\xef][\\x80-\\xbf][\\x80-\\xbf]|\\xf0[\\x90-\\xbf][\\x80-\\xbf][\\x80-\\xbf]|[\\xf1-\\xf3][\\x80-\\xbf][\\x80-\\xbf][\\x80-\\xbf]|\\xf4[\\x80-\\x8f][\\x80-\\xbf][\\x80-\\xbf]").push_back(')');
#else
// translate \X to match any UTF-8 encoding, including malformed UTF-8 with overruns
regex.append(&pattern[loc], pos - loc - 1).append(par).append("[\\x00-\\xff]|[\\xc0-\\xff][\\x80-\\xbf]+").push_back(')');
// translate \X to match any valid UTF-8 encoding (including overruns)
regex.append(&pattern[loc], pos - loc - 1).append(par).append("[\\x00-\\x7f]|[\\xc2-\\xf4][\\x80-\\xbf]+").push_back(')');
#endif
loc = pos + 1;
}
Expand Down Expand Up @@ -2099,11 +2082,22 @@ std::string convert(const char *pattern, const char *signature, convert_flag_typ
}
else if (!supports_escape(signature, 'p'))
{
// \p is not supported: this indicates that . is non-Unicode
if (supports_modifier(signature, 's') || supports_escape(signature, '.'))
regex.append(&pattern[loc], pos - loc).append(par).append(".[\\x80-\\xbf]*)");
// \p is not supported: this indicates that . is matches as non-Unicode but we want to match Unicode
if ((flags & convert_flag::lex))
{
// lex . matches any byte (including invalid UTF-8) and any Unicode character e.g. in a catch-all-else rule
if (supports_modifier(signature, 's') || supports_escape(signature, '.'))
regex.append(&pattern[loc], pos - loc).append(par).append(".[\\x80-\\xbf]*)");
else
regex.append(&pattern[loc], pos - loc).append(par).append("[^\\n][\\x80-\\xbf]*)");
}
else
regex.append(&pattern[loc], pos - loc).append(par).append("[^\\n][\\x80-\\xbf]*)");
{
if (is_modified(mod, 's'))
regex.append(&pattern[loc], pos - loc).append(par).append("[^\\x80-\\xbf][\\x80-\\xbf]*)");
else
regex.append(&pattern[loc], pos - loc).append(par).append("[^\\n\\x80-\\xbf][\\x80-\\xbf]*)");
}
loc = pos + 1;
}
}
Expand Down
30 changes: 16 additions & 14 deletions man/ugrep.1
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
.TH UGREP "1" "September 22, 2023" "ugrep 4.2.0" "User Commands"
.TH UGREP "1" "October 07, 2023" "ugrep 4.3.0" "User Commands"
.SH NAME
\fBugrep\fR, \fBug\fR -- file pattern searcher
.SH SYNOPSIS
Expand Down Expand Up @@ -245,7 +245,7 @@ options specified in the configuration FILE are parsed first,
followed by the remaining options specified on the command line.
.TP
\fB\-\-confirm\fR
Confirm actions in \fB\-Q\fR query mode. The default is confirm.
Confirm actions in \fB\-Q\fR query TUI. The default is confirm.
.TP
\fB\-\-cpp\fR
Output file matches in C++. See also options \fB\-\-format\fR and \fB\-u\fR.
Expand All @@ -272,7 +272,7 @@ each directory, recursively, following symbolic links. This is
equivalent to the \fB\-R\fR option.
.TP
\fB\-\-delay\fR=\fIDELAY\fR
Set the default \fB\-Q\fR response delay (nonzero). Default is 5.
Set the default \fB\-Q\fR key response delay. Default is 3 for 300ms.
.TP
\fB\-\-depth\fR=[\fIMIN\fR,][\fIMAX\fR], \fB\-1\fR, \fB\-2\fR, \fB\-3\fR, ... \fB\-9\fR, \fB\-\-10\fR, \fB\-\-11\fR, \fB\-\-12\fR, ...
Restrict recursive searches from MIN to MAX directory levels deep,
Expand Down Expand Up @@ -580,7 +580,7 @@ Force output to be line buffered instead of block buffered.
.TP
\fB\-\-lines\fR
Apply Boolean queries to match lines, the opposite of \fB\-\-files\fR.
This is the default Boolean query mode to match specific lines.
This is the default Boolean mode to match specific lines.
.TP
\fB\-M\fR \fIMAGIC\fR, \fB\-\-file\-magic\fR=\fIMAGIC\fR
Only files matching the signature pattern MAGIC are searched. The
Expand Down Expand Up @@ -675,20 +675,19 @@ symbolic links are specified on the command line.
.TP
\fB\-\-pager\fR[=\fICOMMAND\fR]
When output is sent to the terminal, uses COMMAND to page through
the output. COMMAND defaults to environment variable $PAGER when
defined or `less'. Enables \fB\-\-heading\fR and
\fB\-\-line\-buffered\fR.
the output. COMMAND defaults to environment variable PAGER when
defined or `less'. Enables \fB\-\-heading\fR and \fB\-\-line\-buffered\fR.
.TP
\fB\-\-pretty\fR
When output is sent to a terminal, enables \fB\-\-color\fR, \fB\-\-heading\fR, \fB\-n\fR,
\fB\-\-sort\fR, \fB\-\-tree\fR and \fB\-T\fR when not explicitly disabled.
.TP
\fB\-Q\fR[=\fIDELAY\fR], \fB\-\-query\fR[=\fIDELAY\fR]
Query mode: user interface to perform interactive searches. This
mode requires an ANSI capable terminal. An optional DELAY argument
may be specified to reduce or increase the response time to execute
Query mode: start a TUI to perform interactive searches. This mode
requires an ANSI capable terminal. An optional DELAY argument may
be specified to reduce or increase the response time to execute
searches after the last key press, in increments of 100ms, where
the default is 5 (0.5s delay). No whitespace may be given between
the default is 3 (300ms delay). No whitespace may be given between
\fB\-Q\fR and its argument DELAY. Initial patterns may be specified with
\fB\-e\fR \fIPATTERN\fR, i.e. a PATTERN argument requires option \fB\-e\fR. Press F1
or CTRL\-Z to view the help screen. Press F2 or CTRL\-Y to invoke a
Expand All @@ -698,8 +697,8 @@ environment variable PAGER when defined, or EDITOR. Press Tab and
Shift\-Tab to navigate directories and to select a file to search.
Press Enter to select lines to output. Press ALT\-l for option \fB\-l\fR
to list files, ALT\-n for \fB\-n\fR, etc. Non\-option commands include
ALT\-] to increase fuzziness and ALT\-} to increase context. Enables
\fB\-\-heading\fR. See also options \fB\-\-confirm\fR and \fB\-\-view\fR.
ALT\-] to increase context and ALT\-} to increase fuzzyness. See
also options \fB\-\-confirm\fR, \fB\-\-delay\fR, \fB\-\-split\fR and \fB\-\-view\fR.
.TP
\fB\-q\fR, \fB\-\-quiet\fR, \fB\-\-silent\fR
Quiet mode: suppress all output. Only search a file until a match
Expand Down Expand Up @@ -740,6 +739,9 @@ number, byte offset and the matched line. The default is a colon
(`:'), a plus (`+') for additional matches on the same line, and a
bar (`|') for multi\-line pattern matches.
.TP
\fB\-\-split\fR
Split the \fB\-Q\fR query TUI screen on startup.
.TP
\fB\-\-sort\fR[=\fIKEY\fR]
Displays matching files in the order specified by KEY in recursive
searches. Normally the ug command sorts by name whereas the ugrep
Expand Down Expand Up @@ -819,7 +821,7 @@ Selected lines are those not matching any of the specified
patterns.
.TP
\fB\-\-view\fR[=\fICOMMAND\fR]
Use COMMAND to view/edit a file in query mode when pressing CTRL\-Y.
Use COMMAND to view/edit a file in \fB\-Q\fR query TUI by pressing CTRL\-Y.
.TP
\fB\-W\fR, \fB\-\-with\-hex\fR
Output binary matches in hexadecimal, leaving text matches alone.
Expand Down
5 changes: 3 additions & 2 deletions src/flag.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,11 @@ extern bool flag_only_line_number;
extern bool flag_only_matching;
extern bool flag_perl_regexp;
extern bool flag_pretty;
extern bool flag_query;
extern bool flag_quiet;
extern bool flag_sort_rev; // internal flag
extern bool flag_stdin;
extern bool flag_split;
extern bool flag_stdin; // internal flag
extern bool flag_tty_term; // internal flag
extern bool flag_usage_warnings; // internal flag
extern bool flag_word_regexp;
Expand Down Expand Up @@ -162,7 +164,6 @@ extern size_t flag_min_line;
extern size_t flag_min_magic;
extern size_t flag_min_steal;
extern size_t flag_not_magic;
extern size_t flag_query;
extern size_t flag_tabs;
extern size_t flag_width;
extern size_t flag_zmax;
Expand Down
Loading

0 comments on commit 295382b

Please sign in to comment.