From 79b43cfb96ec60739c9465bb6b9b44c4f3a09779 Mon Sep 17 00:00:00 2001 From: fyodor Date: Sun, 11 Sep 2005 09:22:55 +0000 Subject: [PATCH] Upgraded from libpcre 4.3 to 6.3 - not tested on Windows yet --- libpcre/AUTHORS | 21 +- libpcre/COPYING | 54 - libpcre/LICENCE | 78 +- libpcre/Makefile.in | 210 +- libpcre/NEWS | 123 - libpcre/NMAP_MODIFICATIONS | 15 +- libpcre/NON-UNIX-USE | 192 +- libpcre/README | 297 +- libpcre/RunTest.in | 135 - libpcre/aclocal.m4 | 3337 -------- libpcre/config.guess | 197 +- libpcre/config.in | 41 +- libpcre/config.sub | 134 +- libpcre/configure | 324 +- libpcre/configure.ac | 64 +- libpcre/dftables.c | 168 +- libpcre/ltmain.sh | 5069 ------------- libpcre/pcre-config.in | 59 - libpcre/pcre.c | 7596 ------------------- libpcre/pcre.def | 22 - libpcre/pcre.h | 258 + libpcre/pcre.in | 192 +- libpcre/{chartables.c => pcre_chartables.c} | 8 +- libpcre/pcre_compile.c | 5059 ++++++++++++ libpcre/pcre_config.c | 112 + libpcre/pcre_dfa_exec.c | 1922 +++++ libpcre/pcre_exec.c | 3632 +++++++++ libpcre/pcre_fullinfo.c | 149 + libpcre/{get.c => pcre_get.c} | 63 +- libpcre/pcre_globals.c | 69 + libpcre/pcre_info.c | 89 + libpcre/{internal.h => pcre_internal.h} | 689 +- libpcre/{maketables.c => pcre_maketables.c} | 75 +- libpcre/{printint.c => pcre_printint.c} | 237 +- libpcre/pcre_refcount.c | 77 + libpcre/{study.c => pcre_study.c} | 138 +- libpcre/pcre_tables.c | 129 + libpcre/pcre_try_flipped.c | 132 + libpcre/pcre_version.c | 61 + libpcre/pcre_win.h | 184 - libpcre/pcre_xclass.c | 121 + libpcre/pcreposix.c | 209 +- libpcre/pcreposix.h | 39 +- libpcre/perltest | 211 - libpcre/ucp.h | 60 + 45 files changed, 14200 insertions(+), 17851 deletions(-) delete mode 100644 libpcre/COPYING delete mode 100644 libpcre/NEWS delete mode 100755 libpcre/RunTest.in delete mode 100644 libpcre/aclocal.m4 delete mode 100644 libpcre/ltmain.sh delete mode 100644 libpcre/pcre-config.in delete mode 100644 libpcre/pcre.c delete mode 100644 libpcre/pcre.def create mode 100644 libpcre/pcre.h rename libpcre/{chartables.c => pcre_chartables.c} (96%) create mode 100644 libpcre/pcre_compile.c create mode 100644 libpcre/pcre_config.c create mode 100644 libpcre/pcre_dfa_exec.c create mode 100644 libpcre/pcre_exec.c create mode 100644 libpcre/pcre_fullinfo.c rename libpcre/{get.c => pcre_get.c} (85%) create mode 100644 libpcre/pcre_globals.c create mode 100644 libpcre/pcre_info.c rename libpcre/{internal.h => pcre_internal.h} (50%) rename libpcre/{maketables.c => pcre_maketables.c} (56%) rename libpcre/{printint.c => pcre_printint.c} (54%) create mode 100644 libpcre/pcre_refcount.c rename libpcre/{study.c => pcre_study.c} (73%) create mode 100644 libpcre/pcre_tables.c create mode 100644 libpcre/pcre_try_flipped.c create mode 100644 libpcre/pcre_version.c delete mode 100644 libpcre/pcre_win.h create mode 100644 libpcre/pcre_xclass.c delete mode 100755 libpcre/perltest create mode 100644 libpcre/ucp.h diff --git a/libpcre/AUTHORS b/libpcre/AUTHORS index 832dddca45..33df90f8df 100644 --- a/libpcre/AUTHORS +++ b/libpcre/AUTHORS @@ -1,6 +1,23 @@ -Written by: Philip Hazel +THE MAIN PCRE LIBRARY +--------------------- + +Written by: Philip Hazel +Email local part: ph10 +Email domain: cam.ac.uk University of Cambridge Computing Service, Cambridge, England. Phone: +44 1223 334714. -Copyright (c) 1997-2001 University of Cambridge +Copyright (c) 1997-2005 University of Cambridge +All rights reserved + + +THE C++ WRAPPER LIBRARY +----------------------- + +Written by: Google Inc. + +Copyright (c) 2005 Google Inc +All rights reserved + +#### diff --git a/libpcre/COPYING b/libpcre/COPYING deleted file mode 100644 index 8d680612c4..0000000000 --- a/libpcre/COPYING +++ /dev/null @@ -1,54 +0,0 @@ -PCRE LICENCE ------------- - -PCRE is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. - -Written by: Philip Hazel - -University of Cambridge Computing Service, -Cambridge, England. Phone: +44 1223 334714. - -Copyright (c) 1997-2001 University of Cambridge - -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. In practice, this means that if you use - PCRE in software that you distribute to others, commercially or - otherwise, you must put a sentence like this - - Regular expression support is provided by the PCRE library package, - which is open source software, written by Philip Hazel, and copyright - by the University of Cambridge, England. - - somewhere reasonably visible in your documentation and in any relevant - files or online help data or similar. A reference to the ftp site for - the source, that is, to - - ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/ - - should also be given in the documentation. However, this condition is not - intended to apply to whole chains of software. If package A includes PCRE, - it must acknowledge it, but if package B is software that includes package - A, the condition is not imposed on package B (unless it uses PCRE - independently). - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), or Lesser General Purpose Licence (LGPL), - then the terms of that licence shall supersede any condition above with - which it is incompatible. - -The documentation for PCRE, supplied in the "doc" directory, is distributed -under the same terms as the software itself. - -End diff --git a/libpcre/LICENCE b/libpcre/LICENCE index 8d680612c4..e8eb0d9370 100644 --- a/libpcre/LICENCE +++ b/libpcre/LICENCE @@ -4,51 +4,65 @@ PCRE LICENCE PCRE is a library of functions to support regular expressions whose syntax and semantics are as close as possible to those of the Perl 5 language. -Written by: Philip Hazel +Release 6 of PCRE is distributed under the terms of the "BSD" licence, as +specified below. The documentation for PCRE, supplied in the "doc" +directory, is distributed under the same terms as the software itself. + +The basic library functions are written in C and are freestanding. Also +included in the distribution is a set of C++ wrapper functions. + + +THE BASIC LIBRARY FUNCTIONS +--------------------------- + +Written by: Philip Hazel +Email local part: ph10 +Email domain: cam.ac.uk University of Cambridge Computing Service, Cambridge, England. Phone: +44 1223 334714. -Copyright (c) 1997-2001 University of Cambridge +Copyright (c) 1997-2005 University of Cambridge +All rights reserved. + -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: +THE C++ WRAPPER FUNCTIONS +------------------------- -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +Contributed by: Google Inc. -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. In practice, this means that if you use - PCRE in software that you distribute to others, commercially or - otherwise, you must put a sentence like this +Copyright (c) 2005, Google Inc. +All rights reserved. - Regular expression support is provided by the PCRE library package, - which is open source software, written by Philip Hazel, and copyright - by the University of Cambridge, England. - somewhere reasonably visible in your documentation and in any relevant - files or online help data or similar. A reference to the ftp site for - the source, that is, to +THE "BSD" LICENCE +----------------- - ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/ +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: - should also be given in the documentation. However, this condition is not - intended to apply to whole chains of software. If package A includes PCRE, - it must acknowledge it, but if package B is software that includes package - A, the condition is not imposed on package B (unless it uses PCRE - independently). + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), or Lesser General Purpose Licence (LGPL), - then the terms of that licence shall supersede any condition above with - which it is incompatible. + * Neither the name of the University of Cambridge nor the name of Google + Inc. nor the names of their contributors may be used to endorse or + promote products derived from this software without specific prior + written permission. -The documentation for PCRE, supplied in the "doc" directory, is distributed -under the same terms as the software itself. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. End diff --git a/libpcre/Makefile.in b/libpcre/Makefile.in index 6a97fe9b7c..fe1b601c3f 100644 --- a/libpcre/Makefile.in +++ b/libpcre/Makefile.in @@ -1,14 +1,33 @@ # Makefile.in for PCRE (Perl-Compatible Regular Expression) library. + # Edited substantially by fyodor@insecure.org to remove libtool dependency # and strip it to just building the library. +############################################################################# + +# PCRE is developed on a Unix system. I do not use Windows or Macs, and know +# nothing about building software on them. Although the code of PCRE should +# be very portable, the building system in this Makefile is designed for Unix +# systems. However, there are features that have been supplied to me by various +# people that should make it work on MinGW and Cygwin systems. + # This setting enables Unix-style directory scanning in pcregrep, triggered # by the -f option. Maybe one day someone will add code for other systems. PCREGREP_OSTYPE=-DIS_UNIX +############################################################################# + + +# Libtool places .o files in the .libs directory; this can mean that "make" +# thinks is it not up-to-date when in fact it is. This setting helps when +# GNU "make" is being used. It presumably does no harm in other cases. + +VPATH=.libs + + #---------------------------------------------------------------------------# # The following lines are modified by "configure" to insert data that it is # # given in its arguments, or which it finds out for itself. # @@ -49,19 +68,36 @@ OBJEXT = @OBJEXT@ BUILD_EXEEXT = @BUILD_EXEEXT@ BUILD_OBJEXT = @BUILD_OBJEXT@ +# POSIX_OBJ and POSIX_LOBJ are either set empty, or to the names of the +# POSIX object files. + +POSIX_OBJ = @POSIX_OBJ@ +POSIX_LOBJ = @POSIX_LOBJ@ + # The compiler, C flags, preprocessor flags, etc CC = @CC@ AR = ar RANLIB = @RANLIB@ +CXX = @CXX@ CFLAGS = @CFLAGS@ +CXXFLAGS = @CXXFLAGS@ +LDFLAGS = @LDFLAGS@ + CC_FOR_BUILD = @CC_FOR_BUILD@ CFLAGS_FOR_BUILD = @CFLAGS_FOR_BUILD@ -UTF8 = @UTF8@ +CXX_FOR_BUILD = @CXX_FOR_BUILD@ +CXXFLAGS_FOR_BUILD = @CXXFLAGS_FOR_BUILD@ +LDFLAGS_FOR_BUILD = $(LDFLAGS) + +UCP = @UCP@ NEWLINE = @NEWLINE@ POSIX_MALLOC_THRESHOLD = @POSIX_MALLOC_THRESHOLD@ LINK_SIZE = @LINK_SIZE@ -MATCH_LIMIT= @MATCH_LIMIT@ +MATCH_LIMIT = @MATCH_LIMIT@ +NO_RECURSE = @NO_RECURSE@ +EBCDIC = @EBCDIC@ + INCLS = -I. -I$(top_srcdir) DEFS = $(NEWLINE) $(LINK_SIZE) $(MATCH_LIMIT) TARGET=libpcre.a @@ -75,58 +111,150 @@ PCREPOSIXLIBVERSION = @PCRE_POSIXLIB_VERSION@ ############################################################################## -OBJ = maketables.@OBJEXT@ get.@OBJEXT@ study.@OBJEXT@ pcre.@OBJEXT@ @POSIX_OBJ@ -$(TARGET): $(OBJ) - -rm -f libpcre.a - $(AR) cr $@ $(OBJ) - $(RANLIB) $@ +OBJ = pcre_chartables.@OBJEXT@ \ + pcre_compile.@OBJEXT@ \ + pcre_config.@OBJEXT@ \ + pcre_dfa_exec.@OBJEXT@ \ + pcre_exec.@OBJEXT@ \ + pcre_fullinfo.@OBJEXT@ \ + pcre_get.@OBJEXT@ \ + pcre_globals.@OBJEXT@ \ + pcre_info.@OBJEXT@ \ + pcre_maketables.@OBJEXT@ \ + pcre_printint.@OBJEXT@ \ + pcre_refcount.@OBJEXT@ \ + pcre_study.@OBJEXT@ \ + pcre_tables.@OBJEXT@ \ + pcre_try_flipped.@OBJEXT@ \ + pcre_version.@OBJEXT@ \ + pcre_xclass.@OBJEXT@ \ + $(POSIX_OBJ) all: $(TARGET) -pcre.@OBJEXT@: $(top_srcdir)/chartables.c $(top_srcdir)/pcre.c \ - $(top_srcdir)/internal.h $(top_srcdir)/printint.c \ - pcre.h config.h Makefile - $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(UTF8) $(POSIX_MALLOC_THRESHOLD) $(top_srcdir)/pcre.c - -pcreposix.@OBJEXT@: $(top_srcdir)/pcreposix.c $(top_srcdir)/pcreposix.h \ - $(top_srcdir)/internal.h pcre.h config.h Makefile - $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) $(top_srcdir)/pcreposix.c - -maketables.@OBJEXT@: $(top_srcdir)/maketables.c $(top_srcdir)/internal.h \ - pcre.h config.h Makefile - $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(top_srcdir)/maketables.c - -get.@OBJEXT@: $(top_srcdir)/get.c $(top_srcdir)/internal.h \ +# Note that files generated by ./configure and by dftables are in the current +# directory, not the source directory. + +pcre_chartables.@OBJEXT@: pcre_chartables.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) \ + $(POSIX_MALLOC_THRESHOLD) pcre_chartables.c + +pcre_compile.@OBJEXT@: Makefile config.h pcre.h \ + $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_compile.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \ + $(top_srcdir)/pcre_compile.c + +pcre_config.@OBJEXT@: Makefile config.h pcre.h \ + $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_config.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \ + $(top_srcdir)/pcre_config.c + +pcre_dfa_exec.@OBJEXT@: Makefile config.h pcre.h \ + $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_dfa_exec.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \ + $(top_srcdir)/pcre_dfa_exec.c + +pcre_exec.@OBJEXT@: Makefile config.h pcre.h \ + $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_exec.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \ + $(top_srcdir)/pcre_exec.c + +pcre_fullinfo.@OBJEXT@: Makefile config.h pcre.h \ + $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_fullinfo.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \ + $(top_srcdir)/pcre_fullinfo.c + +pcre_get.@OBJEXT@: Makefile config.h pcre.h \ + $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_get.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \ + $(top_srcdir)/pcre_get.c + +pcre_globals.@OBJEXT@: Makefile config.h pcre.h \ + $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_globals.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \ + $(top_srcdir)/pcre_globals.c + +pcre_info.@OBJEXT@: Makefile config.h pcre.h \ + $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_info.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \ + $(top_srcdir)/pcre_info.c + +pcre_maketables.@OBJEXT@: Makefile config.h pcre.h \ + $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_maketables.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \ + $(top_srcdir)/pcre_maketables.c + +pcre_printint.@OBJEXT@: Makefile config.h pcre.h \ + $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_printint.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \ + $(top_srcdir)/pcre_printint.c + +pcre_refcount.@OBJEXT@: Makefile config.h pcre.h \ + $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_refcount.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \ + $(top_srcdir)/pcre_refcount.c + +pcre_study.@OBJEXT@: Makefile config.h pcre.h \ + $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_study.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \ + $(top_srcdir)/pcre_study.c + +pcre_tables.@OBJEXT@: Makefile config.h pcre.h \ + $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_tables.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \ + $(top_srcdir)/pcre_tables.c + +pcre_try_flipped.@OBJEXT@: Makefile config.h pcre.h \ + $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_try_flipped.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \ + $(top_srcdir)/pcre_try_flipped.c + +pcre_version.@OBJEXT@: Makefile config.h pcre.h \ + $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_version.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \ + $(top_srcdir)/pcre_version.c + +pcre_xclass.@OBJEXT@: Makefile config.h pcre.h \ + $(top_srcdir)/pcre_internal.h $(top_srcdir)/pcre_xclass.c + $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(POSIX_MALLOC_THRESHOLD) \ + $(top_srcdir)/pcre_xclass.c + +pcreposix.@OBJEXT@: $(top_srcdir)/pcreposix.c $(top_srcdir)/pcreposix.h \ + $(top_srcdir)/pcre_internal.h pcre.h config.h Makefile + @$(LTCOMPILE) $(POSIX_MALLOC_THRESHOLD) $(top_srcdir)/pcreposix.c + +$(TARGET): $(OBJ) + -rm -f libpcre.a + $(AR) cr $@ $(OBJ) + $(RANLIB) $@ + +# An auxiliary program makes the default character table source. This is put +# in the current directory, NOT the $top_srcdir directory. + +pcre_chartables.c: dftables@BUILD_EXEEXT@ + ./dftables@BUILD_EXEEXT@ pcre_chartables.c + +dftables.@BUILD_OBJEXT@: $(top_srcdir)/dftables.c \ + $(top_srcdir)/pcre_maketables.c $(top_srcdir)/pcre_internal.h \ pcre.h config.h Makefile - $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(top_srcdir)/get.c - -study.@OBJEXT@: $(top_srcdir)/study.c $(top_srcdir)/internal.h \ - pcre.h config.h Makefile - $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) $(UTF8) $(top_srcdir)/study.c - -# An auxiliary program makes the default character table source - -$(top_srcdir)/chartables.c: dftables - ./dftables >$(top_srcdir)/chartables.c - -dftables.@BUILD_OBJEXT@: $(top_srcdir)/dftables.c $(top_srcdir)/maketables.c \ - $(top_srcdir)/internal.h pcre.h config.h Makefile $(CC) -c $(CFLAGS) $(INCLS) $(DEFS) -I. $(top_srcdir)/dftables.c -dftables: dftables.@BUILD_OBJEXT@ - $(CC) $(CFLAGS) -o dftables dftables.@OBJEXT@ +dftables@BUILD_EXEEXT@: dftables.@BUILD_OBJEXT@ + $(CC) $(CFLAGS) -o dftables dftables.@OBJEXT@ -# We deliberately omit dftables and chartables.c from 'make clean'; once made -# chartables.c shouldn't change, and if people have edited the tables by hand, -# you don't want to throw them away. +# We deliberately omit dftables and pcre_chartables.c from 'make clean'; once +# made pcre_chartables.c shouldn't change, and if people have edited the tables +# by hand, you don't want to throw them away. -clean:; -rm -rf *.@OBJEXT@ *.lo *.a *.la .libs pcretest@EXEEXT@ pcregrep@EXEEXT@ testtry +clean:; -rm -rf *.@OBJEXT@ *.lo *.a *.la .libs pcretest@EXEEXT@ pcre_stringpiece_unittest@EXEEXT@ pcrecpp_unittest@EXEEXT@ pcre_scanner_unittest@EXEEXT@ pcregrep@EXEEXT@ testtry # But "make distclean" should get back to a virgin distribution distclean: clean - -rm -f chartables.c libtool pcre-config pcre.h \ + -rm -f pcre_chartables.c libtool pcre-config libpcre.pc \ + pcre.h pcre_stringpiece.h pcrecpp.h \ + dftables@EXEEXT@ RunGrepTest RunTest \ Makefile config.h config.status config.log config.cache # End diff --git a/libpcre/NEWS b/libpcre/NEWS deleted file mode 100644 index e620b2d727..0000000000 --- a/libpcre/NEWS +++ /dev/null @@ -1,123 +0,0 @@ -News about PCRE releases ------------------------- - -Release 4.0 17-Feb-03 ---------------------- - -There have been a lot of changes for the 4.0 release, adding additional -functionality and mending bugs. Below is a list of the highlights of the new -functionality. For full details of these features, please consult the -documentation. For a complete list of changes, see the ChangeLog file. - -1. Support for Perl's \Q...\E escapes. - -2. "Possessive quantifiers" ?+, *+, ++, and {,}+ which come from Sun's Java -package. They provide some syntactic sugar for simple cases of "atomic -grouping". - -3. Support for the \G assertion. It is true when the current matching position -is at the start point of the match. - -4. A new feature that provides some of the functionality that Perl provides -with (?{...}). The facility is termed a "callout". The way it is done in PCRE -is for the caller to provide an optional function, by setting pcre_callout to -its entry point. To get the function called, the regex must include (?C) at -appropriate points. - -5. Support for recursive calls to individual subpatterns. This makes it really -easy to get totally confused. - -6. Support for named subpatterns. The Python syntax (?P...) is used to -name a group. - -7. Several extensions to UTF-8 support; it is now fairly complete. There is an -option for pcregrep to make it operate in UTF-8 mode. - -8. The single man page has been split into a number of separate man pages. -These also give rise to individual HTML pages which are put in a separate -directory. There is an index.html page that lists them all. Some hyperlinking -between the pages has been installed. - - -Release 3.5 15-Aug-01 ---------------------- - -1. The configuring system has been upgraded to use later versions of autoconf -and libtool. By default it builds both a shared and a static library if the OS -supports it. You can use --disable-shared or --disable-static on the configure -command if you want only one of them. - -2. The pcretest utility is now installed along with pcregrep because it is -useful for users (to test regexs) and by doing this, it automatically gets -relinked by libtool. The documentation has been turned into a man page, so -there are now .1, .txt, and .html versions in /doc. - -3. Upgrades to pcregrep: - (i) Added long-form option names like gnu grep. - (ii) Added --help to list all options with an explanatory phrase. - (iii) Added -r, --recursive to recurse into sub-directories. - (iv) Added -f, --file to read patterns from a file. - -4. Added --enable-newline-is-cr and --enable-newline-is-lf to the configure -script, to force use of CR or LF instead of \n in the source. On non-Unix -systems, the value can be set in config.h. - -5. The limit of 200 on non-capturing parentheses is a _nesting_ limit, not an -absolute limit. Changed the text of the error message to make this clear, and -likewise updated the man page. - -6. The limit of 99 on the number of capturing subpatterns has been removed. -The new limit is 65535, which I hope will not be a "real" limit. - - -Release 3.3 01-Aug-00 ---------------------- - -There is some support for UTF-8 character strings. This is incomplete and -experimental. The documentation describes what is and what is not implemented. -Otherwise, this is just a bug-fixing release. - - -Release 3.0 01-Feb-00 ---------------------- - -1. A "configure" script is now used to configure PCRE for Unix systems. It -builds a Makefile, a config.h file, and the pcre-config script. - -2. PCRE is built as a shared library by default. - -3. There is support for POSIX classes such as [:alpha:]. - -5. There is an experimental recursion feature. - ----------------------------------------------------------------------------- - IMPORTANT FOR THOSE UPGRADING FROM VERSIONS BEFORE 2.00 - -Please note that there has been a change in the API such that a larger -ovector is required at matching time, to provide some additional workspace. -The new man page has details. This change was necessary in order to support -some of the new functionality in Perl 5.005. - - IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.00 - -Another (I hope this is the last!) change has been made to the API for the -pcre_compile() function. An additional argument has been added to make it -possible to pass over a pointer to character tables built in the current -locale by pcre_maketables(). To use the default tables, this new arguement -should be passed as NULL. - - IMPORTANT FOR THOSE UPGRADING FROM VERSION 2.05 - -Yet another (and again I hope this really is the last) change has been made -to the API for the pcre_exec() function. An additional argument has been -added to make it possible to start the match other than at the start of the -subject string. This is important if there are lookbehinds. The new man -page has the details, but you just want to convert existing programs, all -you need to do is to stick in a new fifth argument to pcre_exec(), with a -value of zero. For example, change - - pcre_exec(pattern, extra, subject, length, options, ovec, ovecsize) -to - pcre_exec(pattern, extra, subject, length, 0, options, ovec, ovecsize) - -**** diff --git a/libpcre/NMAP_MODIFICATIONS b/libpcre/NMAP_MODIFICATIONS index c956a9b437..81783620a9 100644 --- a/libpcre/NMAP_MODIFICATIONS +++ b/libpcre/NMAP_MODIFICATIONS @@ -7,7 +7,7 @@ o rm -rf doc testdata pcretest.c pcregrep.c pcredemo.c ChangeLog o Renamed configure.in to configure.ac -o Removed Runtest, Runtest.in +o Removed RunGrepTest.in, Runtest.in o Removed pcretest and pcregrep from the Makefile.in all target: @@ -29,5 +29,16 @@ o Stripped down Makefile and configure.ac substantially to remove worries about building libpcre.a (which is now build in the libpcre directory rather than in libpcre/.libs ). +o removed libpcre.pc.in -o Regenerated configure script by running 'aclocal; autoconf' \ No newline at end of file +o Regenerated configure script by running 'aclocal; autoconf' + +o Get rid of C++ wrapper, which included this step: + rm -f pcrecpp.cc pcrecpp_unittest.cc pcre_scanner.cc pcre_scanner_unittest.cc pcre_stringpiece.cc pcre_stringpiece_unittest.cc pcrecpp.h pcrecpp.h.in pcre_scanner.h pcre_stringpiece.h pcre_stringpiece.h.in + +o Removed COPYING file as there is already a LICENSE file with exactly + the same contents. + +o Removed pcre_ucp_findchar.c and ucptable.c + +o Added this NMAP_MODIFICATIONS file diff --git a/libpcre/NON-UNIX-USE b/libpcre/NON-UNIX-USE index a000c29d41..b7fe4301a6 100644 --- a/libpcre/NON-UNIX-USE +++ b/libpcre/NON-UNIX-USE @@ -1,12 +1,25 @@ Compiling PCRE on non-Unix systems ---------------------------------- -See below for comments on Cygwin or MinGW usage. +See below for comments on Cygwin or MinGW and OpenVMS usage. I (Philip Hazel) +have no knowledge of Windows or VMS sytems and how their libraries work. The +items in the PCRE Makefile that relate to anything other than Unix-like systems +have been contributed by PCRE users. There are some other comments and files in +the Contrib directory on the ftp site that you may find useful. See -If you want to compile PCRE for a non-Unix system, note that it consists -entirely of code written in Standard C, and so should compile successfully -on any machine with a Standard C compiler and library, using normal compiling -commands to do the following: + ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/Contrib + +If you want to compile PCRE for a non-Unix system (or perhaps, more strictly, +for a system that does not support "configure" and "make" files), note that +PCRE consists entirely of code written in Standard C, and so should compile +successfully on any system that has a Standard C compiler and library. + + +GENERIC INSTRUCTIONS FOR THE C LIBRARY + +The following are generic comments about building PCRE. The interspersed +indented commands are suggestions from Mark Tetrode as to which commands you +might use on a Windows system to build a static library. (1) Copy or rename the file config.in as config.h, and change the macros that define HAVE_STRERROR and HAVE_MEMMOVE to define them as 1 rather than 0. @@ -16,30 +29,116 @@ particular, if you want to force a specific value for newline, you can define the NEWLINE macro. The default is to use '\n', thereby using whatever value your compiler gives to '\n'. + rem Mark Tetrode's commands + copy config.in config.h + rem Use write, because notepad cannot handle UNIX files. Change values. + write config.h + (2) Copy or rename the file pcre.in as pcre.h, and change the macro definitions for PCRE_MAJOR, PCRE_MINOR, and PCRE_DATE near its start to the values set in configure.in. + rem Mark Tetrode's commands + copy pcre.in pcre.h + rem Read values from configure.in + write configure.in + rem Change values + write pcre.h + (3) Compile dftables.c as a stand-alone program, and then run it with -the standard output sent to chartables.c. This generates a set of standard -character tables. +the single argument "chartables.c". This generates a set of standard +character tables and writes them to that file. + + rem Mark Tetrode's commands + rem Compile & run + cl -DSUPPORT_UTF8 -DSUPPORT_UCP dftables.c + dftables.exe chartables.c -(4) Compile maketables.c, get.c, study.c and pcre.c and link them all -together into an object library in whichever form your system keeps such -libraries. This is the pcre library (chartables.c is included by means of an -#include directive). +(4) Compile the following source files: -(5) Similarly, compile pcreposix.c and link it as the pcreposix library. + pcre_chartables.c + pcre_compile.c + pcre_config.c + pcre_dfa_exec.c + pcre_exec.c + pcre_fullinfo.c + pcre_get.c + pcre_globals.c + pcre_info.c + pcre_maketables.c + pcre_ord2utf8.c + pcre_printint.c + pcre_refcount.c + pcre_study.c + pcre_tables.c + pcre_try_flipped.c + pcre_ucp_findchar.c + pcre_valid_utf8.c + pcre_version.c + pcre_xclass.c + +and link them all together into an object library in whichever form your system +keeps such libraries. This is the pcre C library. If your system has static and +shared libraries, you may have to do this once for each type. + + rem These comments are out-of-date, referring to a previous release which + rem had fewer source files. Replace with the file names from above. + rem Mark Tetrode's commands, for a static library + rem Compile & lib + cl -DSUPPORT_UTF8 -DSUPPORT_UCP -DPOSIX_MALLOC_THRESHOLD=10 /c maketables.c get.c study.c pcre.c + lib /OUT:pcre.lib maketables.obj get.obj study.obj pcre.obj + +(5) Similarly, compile pcreposix.c and link it (on its own) as the pcreposix +library. + + rem Mark Tetrode's commands, for a static library + rem Compile & lib + cl -DSUPPORT_UTF8 -DSUPPORT_UCP -DPOSIX_MALLOC_THRESHOLD=10 /c pcreposix.c + lib /OUT:pcreposix.lib pcreposix.obj (6) Compile the test program pcretest.c. This needs the functions in the pcre and pcreposix libraries when linking. + rem Mark Tetrode's commands + rem compile & link + cl /F0x400000 pcretest.c pcre.lib pcreposix.lib + (7) Run pcretest on the testinput files in the testdata directory, and check that the output matches the corresponding testoutput files. You must use the -i option when checking testinput2. Note that the supplied files are in Unix format, with just LF characters as line terminators. You may need to edit them to change this if your system uses a different convention. + rem Mark Tetrode's commands + pcretest testdata\testinput1 testdata\myoutput1 + windiff testdata\testoutput1 testdata\myoutput1 + pcretest -i testdata\testinput2 testdata\myoutput2 + windiff testdata\testoutput2 testdata\myoutput2 + pcretest testdata\testinput3 testdata\myoutput3 + windiff testdata\testoutput3 testdata\myoutput3 + pcretest testdata\testinput4 testdata\myoutput4 + windiff testdata\testoutput4 testdata\myoutput4 + pcretest testdata\testinput5 testdata\myoutput5 + windiff testdata\testoutput5 testdata\myoutput5 + pcretest testdata\testinput6 testdata\myoutput6 + windiff testdata\testoutput6 testdata\myoutput6 + +Note that there are now three more tests (7, 8, 9) that did not exist when Mark +wrote those comments. The test the new pcre_dfa_exec() function. + + +THE C++ WRAPPER FUNCTIONS + +The PCRE distribution now contains some C++ wrapper functions and tests, +contributed by Google Inc. On a system that can use "configure" and "make", +the functions are automatically built into a library called pcrecpp. It should +be straightforward to compile the .cc files manually on other systems. The +files called xxx_unittest.cc are test programs for each of the corresponding +xxx.cc files. + + +FURTHER REMARKS + If you have a system without "configure" but where you can use a Makefile, edit Makefile.in to create Makefile, substituting suitable values for the variables at the head of the file. @@ -79,7 +178,7 @@ These are some further comments about Win32 builds from Mark Evans. They were contributed before Fred Cox's changes were made, so it is possible that they may no longer be relevant. -The documentation for Win32 builds is a bit shy. Under MSVC6 I +"The documentation for Win32 builds is a bit shy. Under MSVC6 I followed their instructions to the letter, but there were still some things missing. @@ -89,7 +188,7 @@ some things missing. (2) Missing some #ifdefs relating to the function pointers pcre_malloc and pcre_free. See my solution below. (The stubs - may not be mandatory but they made me feel better.) + may not be mandatory but they made me feel better.)" ========================= #ifdef _WIN32 @@ -110,4 +209,69 @@ void (*pcre_free)(void *) = free; #endif ========================= + +BUILDING PCRE ON OPENVMS + +Dan Mooney sent the following comments about building PCRE on OpenVMS. They +relate to an older version of PCRE that used fewer source files, so the exact +commands will need changing. See the current list of source files above. + +"It was quite easy to compile and link the library. I don't have a formal +make file but the attached file [reproduced below] contains the OpenVMS DCL +commands I used to build the library. I had to add #define +POSIX_MALLOC_THRESHOLD 10 to pcre.h since it was not defined anywhere. + +The library was built on: +O/S: HP OpenVMS v7.3-1 +Compiler: Compaq C v6.5-001-48BCD +Linker: vA13-01 + +The test results did not match 100% due to the issues you mention in your +documentation regarding isprint(), iscntrl(), isgraph() and ispunct(). I +modified some of the character tables temporarily and was able to get the +results to match. Tests using the fr locale did not match since I don't have +that locale loaded. The study size was always reported to be 3 less than the +value in the standard test output files." + +========================= +$! This DCL procedure builds PCRE on OpenVMS +$! +$! I followed the instructions in the non-unix-use file in the distribution. +$! +$ COMPILE == "CC/LIST/NOMEMBER_ALIGNMENT/PREFIX_LIBRARY_ENTRIES=ALL_ENTRIES +$ COMPILE DFTABLES.C +$ LINK/EXE=DFTABLES.EXE DFTABLES.OBJ +$ RUN DFTABLES.EXE/OUTPUT=CHARTABLES.C +$ COMPILE MAKETABLES.C +$ COMPILE GET.C +$ COMPILE STUDY.C +$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol +$! did not seem to be defined anywhere. +$! I edited pcre.h and added #DEFINE SUPPORT_UTF8 to enable UTF8 support. +$ COMPILE PCRE.C +$ LIB/CREATE PCRE MAKETABLES.OBJ, GET.OBJ, STUDY.OBJ, PCRE.OBJ +$! I had to set POSIX_MALLOC_THRESHOLD to 10 in PCRE.H since the symbol +$! did not seem to be defined anywhere. +$ COMPILE PCREPOSIX.C +$ LIB/CREATE PCREPOSIX PCREPOSIX.OBJ +$ COMPILE PCRETEST.C +$ LINK/EXE=PCRETEST.EXE PCRETEST.OBJ, PCRE/LIB, PCREPOSIX/LIB +$! C programs that want access to command line arguments must be +$! defined as a symbol +$ PCRETEST :== "$ SYS$ROADSUSERS:[DMOONEY.REGEXP]PCRETEST.EXE" +$! Arguments must be enclosed in quotes. +$ PCRETEST "-C" +$! Test results: +$! +$! The test results did not match 100%. The functions isprint(), iscntrl(), +$! isgraph() and ispunct() on OpenVMS must not produce the same results +$! as the system that built the test output files provided with the +$! distribution. +$! +$! The study size did not match and was always 3 less on OpenVMS. +$! +$! Locale could not be set to fr +$! +========================= + **** diff --git a/libpcre/README b/libpcre/README index 6cef77c7e8..2ede6fc2e1 100644 --- a/libpcre/README +++ b/libpcre/README @@ -7,14 +7,49 @@ The latest release of PCRE is always available from Please read the NEWS file if you are upgrading from a previous release. -PCRE has its own native API, but a set of "wrapper" functions that are based on -the POSIX API are also supplied in the library libpcreposix. Note that this -just provides a POSIX calling interface to PCRE: the regular expressions -themselves still follow Perl syntax and semantics. The header file -for the POSIX-style functions is called pcreposix.h. The official POSIX name is -regex.h, but I didn't want to risk possible problems with existing files of -that name by distributing it that way. To use it with an existing program that -uses the POSIX API, it will have to be renamed or pointed at by a link. + +The PCRE APIs +------------- + +PCRE is written in C, and it has its own API. The distribution now includes a +set of C++ wrapper functions, courtesy of Google Inc. (see the pcrecpp man page +for details). + +Also included are a set of C wrapper functions that are based on the POSIX +API. These end up in the library called libpcreposix. Note that this just +provides a POSIX calling interface to PCRE: the regular expressions themselves +still follow Perl syntax and semantics. The header file for the POSIX-style +functions is called pcreposix.h. The official POSIX name is regex.h, but I +didn't want to risk possible problems with existing files of that name by +distributing it that way. To use it with an existing program that uses the +POSIX API, it will have to be renamed or pointed at by a link. + +If you are using the POSIX interface to PCRE and there is already a POSIX regex +library installed on your system, you must take care when linking programs to +ensure that they link with PCRE's libpcreposix library. Otherwise they may pick +up the "real" POSIX functions of the same name. + + +Documentation for PCRE +---------------------- + +If you install PCRE in the normal way, you will end up with an installed set of +man pages whose names all start with "pcre". The one that is called "pcre" +lists all the others. In addition to these man pages, the PCRE documentation is +supplied in two other forms; however, as there is no standard place to install +them, they are left in the doc directory of the unpacked source distribution. +These forms are: + + 1. Files called doc/pcre.txt, doc/pcregrep.txt, and doc/pcretest.txt. The + first of these is a concatenation of the text forms of all the section 3 + man pages except those that summarize individual functions. The other two + are the text forms of the section 1 man pages for the pcregrep and + pcretest commands. Text forms are provided for ease of scanning with text + editors or similar tools. + + 2. A subdirectory called doc/html contains all the documentation in HTML + form, hyperlinked in various ways, and rooted in a file called + doc/index.html. Contributions by users of PCRE @@ -41,7 +76,7 @@ INSTALL. Most commonly, people build PCRE within its own distribution directory, and in this case, on many systems, just running "./configure" is sufficient, but the -usual methods of changing standard defaults are available. For example, +usual methods of changing standard defaults are available. For example: CFLAGS='-O2 -Wall' ./configure --prefix=/opt/local @@ -59,11 +94,23 @@ cd /build/pcre/pcre-xxx There are some optional features that can be included or omitted from the PCRE library. You can read more about them in the pcrebuild man page. +. If you want to suppress the building of the C++ wrapper library, you can add + --disable-cpp to the "configure" command. Otherwise, when "configure" is run, + will try to find a C++ compiler and C++ header files, and if it succeeds, it + will try to build the C++ wrapper. + . If you want to make use of the support for UTF-8 character strings in PCRE, you must add --enable-utf8 to the "configure" command. Without it, the code for handling UTF-8 is not included in the library. (Even when included, it still has to be enabled by an option at run time.) +. If, in addition to support for UTF-8 character strings, you want to include + support for the \P, \p, and \X sequences that recognize Unicode character + properties, you must add --enable-unicode-properties to the "configure" + command. This adds about 90K to the size of the library (in the form of a + property table); only the basic two-letter properties such as Lu are + supported. + . You can build PCRE to recognized CR or NL as the newline character, instead of whatever your compiler uses for "\n", by adding --newline-is-cr or --newline-is-nl to the "configure" command, respectively. Only do this if you @@ -78,7 +125,7 @@ library. You can read more about them in the pcrebuild man page. on the "configure" command. -. PCRE has a counter which can be set to limit the amount of resources it uses. +. PCRE has a counter that can be set to limit the amount of resources it uses. If the limit is exceeded during a match, the match fails. The default is ten million. You can change the default by setting, for example, @@ -96,51 +143,91 @@ library. You can read more about them in the pcrebuild man page. is a representation of the compiled pattern, and this changes with the link size. -The "configure" script builds five files: +. You can build PCRE so that its internal match() function that is called from + pcre_exec() does not call itself recursively. Instead, it uses blocks of data + from the heap via special functions pcre_stack_malloc() and pcre_stack_free() + to save data that would otherwise be saved on the stack. To build PCRE like + this, use + + --disable-stack-for-recursion + on the "configure" command. PCRE runs more slowly in this mode, but it may be + necessary in environments with limited stack sizes. This applies only to the + pcre_exec() function; it does not apply to pcre_dfa_exec(), which does not + use deeply nested recursion. + +The "configure" script builds eight files for the basic C library: + +. pcre.h is the header file for C programs that call PCRE +. Makefile is the makefile that builds the library +. config.h contains build-time configuration options for the library +. pcre-config is a script that shows the settings of "configure" options +. libpcre.pc is data for the pkg-config command . libtool is a script that builds shared and/or static libraries -. Makefile is built by copying Makefile.in and making substitutions. -. config.h is built by copying config.in and making substitutions. -. pcre-config is built by copying pcre-config.in and making substitutions. -. RunTest is a script for running tests +. RunTest is a script for running tests on the library +. RunGrepTest is a script for running tests on the pcregrep command + +In addition, if a C++ compiler is found, the following are also built: + +. pcrecpp.h is the header file for programs that call PCRE via the C++ wrapper +. pcre_stringpiece.h is the header for the C++ "stringpiece" functions -Once "configure" has run, you can run "make". It builds two libraries called +The "configure" script also creates config.status, which is an executable +script that can be run to recreate the configuration, and config.log, which +contains compiler output from tests that "configure" runs. + +Once "configure" has run, you can run "make". It builds two libraries, called libpcre and libpcreposix, a test program called pcretest, and the pcregrep -command. You can use "make install" to copy these, the public header files -pcre.h and pcreposix.h, and the man pages to appropriate live directories on -your system, in the normal way. +command. If a C++ compiler was found on your system, it also builds the C++ +wrapper library, which is called libpcrecpp, and some test programs called +pcrecpp_unittest, pcre_scanner_unittest, and pcre_stringpiece_unittest. + +The command "make test" runs all the appropriate tests. Details of the PCRE +tests are given in a separate section of this document, below. + +You can use "make install" to copy the libraries, the public header files +pcre.h, pcreposix.h, pcrecpp.h, and pcre_stringpiece.h (the last two only if +the C++ wrapper was built), and the man pages to appropriate live directories +on your system, in the normal way. + +If you want to remove PCRE from your system, you can run "make uninstall". +This removes all the files that "make install" installed. However, it does not +remove any directories, because these are often shared with other programs. + + +Retrieving configuration information on Unix-like systems +--------------------------------------------------------- Running "make install" also installs the command pcre-config, which can be used to recall information about the PCRE configuration and installation. For -example, +example: pcre-config --version prints the version number, and - pcre-config --libs + pcre-config --libs outputs information about where the library is installed. This command can be included in makefiles for programs that use PCRE, saving the programmer from having to remember too many details. +The pkg-config command is another system for saving and retrieving information +about installed libraries. Instead of separate commands for each library, a +single command is used. For example: -Cross-compiling PCRE on a Unix-like system ------------------------------------------- + pkg-config --cflags pcre -PCRE needs to compile and run an auxiliary program as part of the building -process. Obviously, if the real compilation is for some other system, it can't -use the same CC and CFLAGS values when it is doing this. For cross compilation, -therefore, you must set CC_FOR_BUILD to the local host's compiler, and you can -set flags in CFLAGS_FOR_BUILD if you need to. +The data is held in *.pc files that are installed in a directory called +pkgconfig. Shared libraries on Unix-like systems ------------------------------------- -The default distribution builds PCRE as two shared libraries and two static -libraries, as long as the operating system supports shared libraries. Shared -library support relies on the "libtool" script which is built as part of the +The default distribution builds PCRE as shared libraries and static libraries, +as long as the operating system supports shared libraries. Shared library +support relies on the "libtool" script which is built as part of the "configure" process. The libtool script is used to compile and link both shared and static @@ -153,7 +240,7 @@ installed themselves. However, the versions left in the source directory still use the uninstalled libraries. To build PCRE using static libraries only you must use --disable-shared when -configuring it. For example +configuring it. For example: ./configure --prefix=/usr/gnu --disable-shared @@ -169,17 +256,21 @@ order to cross-compile PCRE for some other host. However, during the building process, the dftables.c source file is compiled *and run* on the local host, in order to generate the default character tables (the chartables.c file). It therefore needs to be compiled with the local compiler, not the cross compiler. -You can do this by specifying HOST_CC (and if necessary HOST_CFLAGS) when -calling the "configure" command. If they are not specified, they default to the -values of CC and CFLAGS. +You can do this by specifying CC_FOR_BUILD (and if necessary CFLAGS_FOR_BUILD; +there are also CXX_FOR_BUILD and CXXFLAGS_FOR_BUILD for the C++ wrapper) +when calling the "configure" command. If they are not specified, they default +to the values of CC and CFLAGS. Building on non-Unix systems ---------------------------- -For a non-Unix system, read the comments in the file NON-UNIX-USE. PCRE has -been compiled on Windows systems and on Macintoshes, but I don't know the -details because I don't use those systems. It should be straightforward to +For a non-Unix system, read the comments in the file NON-UNIX-USE, though if +the system supports the use of "configure" and "make" you may be able to build +PCRE in the same way as for Unix systems. + +PCRE has been compiled on Windows systems and on Macintoshes, but I don't know +the details because I don't use those systems. It should be straightforward to build PCRE on any system that has a Standard C compiler, because it uses only Standard C functions. @@ -188,15 +279,21 @@ Testing PCRE ------------ To test PCRE on a Unix system, run the RunTest script that is created by the -configuring process. (This can also be run by "make runtest", "make check", or -"make test".) For other systems, see the instruction in NON-UNIX-USE. - -The script runs the pcretest test program (which is documented in its own man -page) on each of the testinput files (in the testdata directory) in turn, -and compares the output with the contents of the corresponding testoutput file. -A file called testtry is used to hold the output from pcretest. To run pcretest -on just one of the test files, give its number as an argument to RunTest, for -example: +configuring process. There is also a script called RunGrepTest that tests the +options of the pcregrep command. If the C++ wrapper library is build, three +test programs called pcrecpp_unittest, pcre_scanner_unittest, and +pcre_stringpiece_unittest are provided. + +Both the scripts and all the program tests are run if you obey "make runtest", +"make check", or "make test". For other systems, see the instructions in +NON-UNIX-USE. + +The RunTest script runs the pcretest test program (which is documented in its +own man page) on each of the testinput files (in the testdata directory) in +turn, and compares the output with the contents of the corresponding testoutput +file. A file called testtry is used to hold the main output from pcretest +(testsavedregex is also used as a working file). To run pcretest on just one of +the test files, give its number as an argument to RunTest, for example: RunTest 2 @@ -222,13 +319,13 @@ bug in PCRE. The third set of tests checks pcre_maketables(), the facility for building a set of character tables for a specific locale and using them instead of the -default tables. The tests make use of the "fr" (French) locale. Before running -the test, the script checks for the presence of this locale by running the -"locale" command. If that command fails, or if it doesn't include "fr" in the -list of available locales, the third test cannot be run, and a comment is -output to say why. If running this test produces instances of the error +default tables. The tests make use of the "fr_FR" (French) locale. Before +running the test, the script checks for the presence of this locale by running +the "locale" command. If that command fails, or if it doesn't include "fr_FR" +in the list of available locales, the third test cannot be run, and a comment +is output to say why. If running this test produces instances of the error - ** Failed to set locale "fr" + ** Failed to set locale "fr_FR" in the comparison output, it means that locale is not available on your system, despite being listed by "locale". This does not mean that PCRE is broken. @@ -239,19 +336,28 @@ running "configure". This file can be also fed directly to the perltest script, provided you are running Perl 5.8 or higher. (For Perl 5.6, a small patch, commented in the script, can be be used.) -The fifth and final file tests error handling with UTF-8 encoding, and internal -UTF-8 features of PCRE that are not relevant to Perl. +The fifth test checks error handling with UTF-8 encoding, and internal UTF-8 +features of PCRE that are not relevant to Perl. + +The sixth and test checks the support for Unicode character properties. It it +not run automatically unless PCRE is built with Unicode property support. To to +this you must set --enable-unicode-properties when running "configure". + +The seventh, eighth, and ninth tests check the pcre_dfa_exec() alternative +matching function, in non-UTF-8 mode, UTF-8 mode, and UTF-8 mode with Unicode +property support, respectively. The eighth and ninth tests are not run +automatically unless PCRE is build with the relevant support. Character tables ---------------- -PCRE uses four tables for manipulating and identifying characters. The final -argument of the pcre_compile() function is a pointer to a block of memory -containing the concatenated tables. A call to pcre_maketables() can be used to -generate a set of tables in the current locale. If the final argument for -pcre_compile() is passed as NULL, a set of default tables that is built into -the binary is used. +PCRE uses four tables for manipulating and identifying characters whose values +are less than 256. The final argument of the pcre_compile() function is a +pointer to a block of memory containing the concatenated tables. A call to +pcre_maketables() can be used to generate a set of tables in the current +locale. If the final argument for pcre_compile() is passed as NULL, a set of +default tables that is built into the binary is used. The source file called chartables.c contains the default set of tables. This is not supplied in the distribution, but is built by the program dftables @@ -291,18 +397,47 @@ The distribution should contain the following files: headers: dftables.c auxiliary program for building chartables.c - get.c ) - maketables.c ) - study.c ) source of - pcre.c ) the functions + pcreposix.c ) - printint.c ) + pcre_compile.c ) + pcre_config.c ) + pcre_dfa_exec.c ) + pcre_exec.c ) + pcre_fullinfo.c ) + pcre_get.c ) sources for the functions in the library, + pcre_globals.c ) and some internal functions that they use + pcre_info.c ) + pcre_maketables.c ) + pcre_ord2utf8.c ) + pcre_printint.c ) + pcre_study.c ) + pcre_tables.c ) + pcre_try_flipped.c ) + pcre_ucp_findchar.c ) + pcre_valid_utf8.c ) + pcre_version.c ) + pcre_xclass.c ) + + ucp_findchar.c ) + ucp.h ) source for the code that is used for + ucpinternal.h ) Unicode property handling + ucptable.c ) + ucptypetable.c ) + pcre.in "source" for the header for the external API; pcre.h is built from this by "configure" pcreposix.h header for the external POSIX wrapper API - internal.h header for internal use + pcre_internal.h header for internal use config.in template for config.h, which is built by configure + pcrecpp.h.in "source" for the header file for the C++ wrapper + pcrecpp.cc ) + pcre_scanner.cc ) source for the C++ wrapper library + + pcre_stringpiece.h.in "source" for pcre_stringpiece.h, the header for the + C++ stringpiece functions + pcre_stringpiece.cc source for the C++ stringpiece functions + (B) Auxiliary files: AUTHORS information about the author of PCRE @@ -315,6 +450,7 @@ The distribution should contain the following files: NON-UNIX-USE notes on building PCRE on non-Unix systems README this file RunTest.in template for a Unix shell script for running tests + RunGrepTest.in template for a Unix shell script for pcregrep tests config.guess ) files used by libtool, config.sub ) used only when building a shared library configure a configuring shell script (built by autoconf) @@ -327,31 +463,32 @@ The distribution should contain the following files: doc/pcretest.txt plain text documentation of test program doc/perltest.txt plain text documentation of Perl test program install-sh a shell script for installing files + libpcre.pc.in "source" for libpcre.pc for pkg-config ltmain.sh file used to build a libtool script + mkinstalldirs script for making install directories pcretest.c comprehensive test program pcredemo.c simple demonstration of coding calls to PCRE perltest Perl test program pcregrep.c source of a grep utility that uses PCRE pcre-config.in source of script which retains PCRE information - testdata/testinput1 test data, compatible with Perl - testdata/testinput2 test data for error messages and non-Perl things - testdata/testinput3 test data for locale-specific tests - testdata/testinput4 test data for UTF-8 tests compatible with Perl - testdata/testinput5 test data for other UTF-8 tests - testdata/testoutput1 test results corresponding to testinput1 - testdata/testoutput2 test results corresponding to testinput2 - testdata/testoutput3 test results corresponding to testinput3 - testdata/testoutput4 test results corresponding to testinput4 - testdata/testoutput5 test results corresponding to testinput5 + pcrecpp_unittest.c ) + pcre_scanner_unittest.c ) test programs for the C++ wrapper + pcre_stringpiece_unittest.c ) + testdata/testinput* test data for main library tests + testdata/testoutput* expected test results + testdata/grep* input and output for pcregrep tests (C) Auxiliary files for Win32 DLL - dll.mk + libpcre.def + libpcreposix.def pcre.def (D) Auxiliary file for VPASCAL makevp.bat -Philip Hazel -February 2003 +Philip Hazel +Email local part: ph10 +Email domain: cam.ac.uk +August 2005 diff --git a/libpcre/RunTest.in b/libpcre/RunTest.in deleted file mode 100755 index dd5a02fb60..0000000000 --- a/libpcre/RunTest.in +++ /dev/null @@ -1,135 +0,0 @@ -#! /bin/sh - -# This file is generated by configure from RunTest.in. Make any changes -# to that file. - -# Run PCRE tests - -cf=diff -testdata=@top_srcdir@/testdata - -# Select which tests to run; if no selection, run all - -do1=no -do2=no -do3=no -do4=no -do5=no - -while [ $# -gt 0 ] ; do - case $1 in - 1) do1=yes;; - 2) do2=yes;; - 3) do3=yes;; - 4) do4=yes;; - 5) do5=yes;; - *) echo "Unknown test number $1"; exit 1;; - esac - shift -done - -if [ "@UTF8@" = "" ] ; then - if [ $do4 = yes ] ; then - echo "Can't run test 4 because UFT8 support is not configured" - exit 1 - fi - if [ $do5 = yes ] ; then - echo "Can't run test 5 because UFT8 support is not configured" - exit 1 - fi -fi - -if [ $do1 = no -a $do2 = no -a $do3 = no -a $do4 = no -a\ - $do5 = no ] ; then - do1=yes - do2=yes - do3=yes - if [ "@UTF8@" != "" ] ; then do4=yes; fi - if [ "@UTF8@" != "" ] ; then do5=yes; fi -fi - -# Primary test, Perl-compatible - -if [ $do1 = yes ] ; then - echo "Testing main functionality (Perl compatible)" - ./pcretest $testdata/testinput1 testtry - if [ $? = 0 ] ; then - $cf testtry $testdata/testoutput1 - if [ $? != 0 ] ; then exit 1; fi - echo " " - else exit 1 - fi -fi - -# PCRE tests that are not Perl-compatible - API & error tests, mostly - -if [ $do2 = yes ] ; then - echo "Testing API and error handling (not Perl compatible)" - ./pcretest -i $testdata/testinput2 testtry - if [ $? = 0 ] ; then - $cf testtry $testdata/testoutput2 - if [ $? != 0 ] ; then exit 1; fi - else exit 1 - fi -fi - -if [ $do1 = yes -a $do2 = yes ] ; then - echo " " - echo "The two main tests ran OK" - echo " " -fi - -# Locale-specific tests, provided the "fr" locale is available - -if [ $do3 = yes ] ; then - locale -a | grep '^fr$' >/dev/null - if [ $? -eq 0 ] ; then - echo "Testing locale-specific features (using 'fr' locale)" - ./pcretest $testdata/testinput3 testtry - if [ $? = 0 ] ; then - $cf testtry $testdata/testoutput3 - if [ $? != 0 ] ; then - echo " " - echo "Locale test did not run entirely successfully." - echo "This usually means that there is a problem with the locale" - echo "settings rather than a bug in PCRE." - else - echo "Locale test ran OK" - fi - echo " " - else exit 1 - fi - else - echo "Cannot test locale-specific features - 'fr' locale not found," - echo "or the \"locale\" command is not available to check for it." - echo " " - fi -fi - -# Additional tests for UTF8 support - -if [ $do4 = yes ] ; then - echo "Testing UTF-8 support (Perl compatible)" - ./pcretest $testdata/testinput4 testtry - if [ $? = 0 ] ; then - $cf testtry $testdata/testoutput4 - if [ $? != 0 ] ; then exit 1; fi - else exit 1 - fi - echo "UTF8 test ran OK" - echo " " -fi - -if [ $do5 = yes ] ; then - echo "Testing API and internals for UTF-8 support (not Perl compatible)" - ./pcretest $testdata/testinput5 testtry - if [ $? = 0 ] ; then - $cf testtry $testdata/testoutput5 - if [ $? != 0 ] ; then exit 1; fi - else exit 1 - fi - echo "UTF8 internals test ran OK" - echo " " -fi - -# End diff --git a/libpcre/aclocal.m4 b/libpcre/aclocal.m4 deleted file mode 100644 index c586d32449..0000000000 --- a/libpcre/aclocal.m4 +++ /dev/null @@ -1,3337 +0,0 @@ -# aclocal.m4 generated automatically by aclocal 1.6.2 -*- Autoconf -*- - -# Copyright 1996, 1997, 1998, 1999, 2000, 2001, 2002 -# Free Software Foundation, Inc. -# This file is free software; the Free Software Foundation -# gives unlimited permission to copy and/or distribute it, -# with or without modifications, as long as this notice is preserved. - -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY, to the extent permitted by law; without -# even the implied warranty of MERCHANTABILITY or FITNESS FOR A -# PARTICULAR PURPOSE. - -# libtool.m4 - Configure libtool for the host system. -*-Shell-script-*- - -# serial 46 AC_PROG_LIBTOOL -AC_DEFUN([AC_PROG_LIBTOOL], -[AC_REQUIRE([AC_LIBTOOL_SETUP])dnl - -# This can be used to rebuild libtool when needed -LIBTOOL_DEPS="$ac_aux_dir/ltmain.sh" - -# Always use our own libtool. -LIBTOOL='$(SHELL) $(top_builddir)/libtool' -AC_SUBST(LIBTOOL)dnl - -# Prevent multiple expansion -define([AC_PROG_LIBTOOL], []) -]) - -AC_DEFUN([AC_LIBTOOL_SETUP], -[AC_PREREQ(2.13)dnl -AC_REQUIRE([AC_ENABLE_SHARED])dnl -AC_REQUIRE([AC_ENABLE_STATIC])dnl -AC_REQUIRE([AC_ENABLE_FAST_INSTALL])dnl -AC_REQUIRE([AC_CANONICAL_HOST])dnl -AC_REQUIRE([AC_CANONICAL_BUILD])dnl -AC_REQUIRE([AC_PROG_CC])dnl -AC_REQUIRE([AC_PROG_LD])dnl -AC_REQUIRE([AC_PROG_LD_RELOAD_FLAG])dnl -AC_REQUIRE([AC_PROG_NM])dnl -AC_REQUIRE([AC_PROG_LN_S])dnl -AC_REQUIRE([AC_DEPLIBS_CHECK_METHOD])dnl -AC_REQUIRE([AC_OBJEXT])dnl -AC_REQUIRE([AC_EXEEXT])dnl -dnl - -_LT_AC_PROG_ECHO_BACKSLASH -# Only perform the check for file, if the check method requires it -case $deplibs_check_method in -file_magic*) - if test "$file_magic_cmd" = '$MAGIC_CMD'; then - AC_PATH_MAGIC - fi - ;; -esac - -AC_CHECK_TOOL(RANLIB, ranlib, :) -AC_CHECK_TOOL(STRIP, strip, :) - -ifdef([AC_PROVIDE_AC_LIBTOOL_DLOPEN], enable_dlopen=yes, enable_dlopen=no) -ifdef([AC_PROVIDE_AC_LIBTOOL_WIN32_DLL], -enable_win32_dll=yes, enable_win32_dll=no) - -AC_ARG_ENABLE(libtool-lock, - [ --disable-libtool-lock avoid locking (might break parallel builds)]) -test "x$enable_libtool_lock" != xno && enable_libtool_lock=yes - -# Some flags need to be propagated to the compiler or linker for good -# libtool support. -case $host in -*-*-irix6*) - # Find out which ABI we are using. - echo '[#]line __oline__ "configure"' > conftest.$ac_ext - if AC_TRY_EVAL(ac_compile); then - case `/usr/bin/file conftest.$ac_objext` in - *32-bit*) - LD="${LD-ld} -32" - ;; - *N32*) - LD="${LD-ld} -n32" - ;; - *64-bit*) - LD="${LD-ld} -64" - ;; - esac - fi - rm -rf conftest* - ;; - -*-*-sco3.2v5*) - # On SCO OpenServer 5, we need -belf to get full-featured binaries. - SAVE_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -belf" - AC_CACHE_CHECK([whether the C compiler needs -belf], lt_cv_cc_needs_belf, - [AC_LANG_SAVE - AC_LANG_C - AC_TRY_LINK([],[],[lt_cv_cc_needs_belf=yes],[lt_cv_cc_needs_belf=no]) - AC_LANG_RESTORE]) - if test x"$lt_cv_cc_needs_belf" != x"yes"; then - # this is probably gcc 2.8.0, egcs 1.0 or newer; no need for -belf - CFLAGS="$SAVE_CFLAGS" - fi - ;; - -ifdef([AC_PROVIDE_AC_LIBTOOL_WIN32_DLL], -[*-*-cygwin* | *-*-mingw* | *-*-pw32*) - AC_CHECK_TOOL(DLLTOOL, dlltool, false) - AC_CHECK_TOOL(AS, as, false) - AC_CHECK_TOOL(OBJDUMP, objdump, false) - - # recent cygwin and mingw systems supply a stub DllMain which the user - # can override, but on older systems we have to supply one - AC_CACHE_CHECK([if libtool should supply DllMain function], lt_cv_need_dllmain, - [AC_TRY_LINK([], - [extern int __attribute__((__stdcall__)) DllMain(void*, int, void*); - DllMain (0, 0, 0);], - [lt_cv_need_dllmain=no],[lt_cv_need_dllmain=yes])]) - - case $host/$CC in - *-*-cygwin*/gcc*-mno-cygwin*|*-*-mingw*) - # old mingw systems require "-dll" to link a DLL, while more recent ones - # require "-mdll" - SAVE_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -mdll" - AC_CACHE_CHECK([how to link DLLs], lt_cv_cc_dll_switch, - [AC_TRY_LINK([], [], [lt_cv_cc_dll_switch=-mdll],[lt_cv_cc_dll_switch=-dll])]) - CFLAGS="$SAVE_CFLAGS" ;; - *-*-cygwin* | *-*-pw32*) - # cygwin systems need to pass --dll to the linker, and not link - # crt.o which will require a WinMain@16 definition. - lt_cv_cc_dll_switch="-Wl,--dll -nostartfiles" ;; - esac - ;; - ]) -esac - -_LT_AC_LTCONFIG_HACK - -]) - -# _LT_AC_CHECK_DLFCN -# -------------------- -AC_DEFUN(_LT_AC_CHECK_DLFCN, -[AC_CHECK_HEADERS(dlfcn.h) -])# _LT_AC_CHECK_DLFCN - -# AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE -# --------------------------------- -AC_DEFUN([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE], -[AC_REQUIRE([AC_CANONICAL_HOST]) -AC_REQUIRE([AC_PROG_NM]) -AC_REQUIRE([AC_OBJEXT]) -# Check for command to grab the raw symbol name followed by C symbol from nm. -AC_MSG_CHECKING([command to parse $NM output]) -AC_CACHE_VAL([lt_cv_sys_global_symbol_pipe], [dnl - -# These are sane defaults that work on at least a few old systems. -# [They come from Ultrix. What could be older than Ultrix?!! ;)] - -# Character class describing NM global symbol codes. -[symcode='[BCDEGRST]'] - -# Regexp to match symbols that can be accessed directly from C. -[sympat='\([_A-Za-z][_A-Za-z0-9]*\)'] - -# Transform the above into a raw symbol and a C symbol. -symxfrm='\1 \2\3 \3' - -# Transform an extracted symbol line into a proper C declaration -lt_cv_global_symbol_to_cdecl="sed -n -e 's/^. .* \(.*\)$/extern char \1;/p'" - -# Define system-specific variables. -case $host_os in -aix*) - [symcode='[BCDT]'] - ;; -cygwin* | mingw* | pw32*) - [symcode='[ABCDGISTW]'] - ;; -hpux*) # Its linker distinguishes data from code symbols - lt_cv_global_symbol_to_cdecl="sed -n -e 's/^T .* \(.*\)$/extern char \1();/p' -e 's/^$symcode* .* \(.*\)$/extern char \1;/p'" - ;; -irix*) - [symcode='[BCDEGRST]'] - ;; -solaris* | sysv5*) - [symcode='[BDT]'] - ;; -sysv4) - [symcode='[DFNSTU]'] - ;; -esac - -# Handle CRLF in mingw tool chain -opt_cr= -case $host_os in -mingw*) - opt_cr=`echo 'x\{0,1\}' | tr x '\015'` # option cr in regexp - ;; -esac - -# If we're using GNU nm, then use its standard symbol codes. -if $NM -V 2>&1 | egrep '(GNU|with BFD)' > /dev/null; then - [symcode='[ABCDGISTW]'] -fi - -# Try without a prefix undercore, then with it. -for ac_symprfx in "" "_"; do - - # Write the raw and C identifiers. -[lt_cv_sys_global_symbol_pipe="sed -n -e 's/^.*[ ]\($symcode$symcode*\)[ ][ ]*\($ac_symprfx\)$sympat$opt_cr$/$symxfrm/p'"] - - # Check to see that the pipe works correctly. - pipe_works=no - rm -f conftest* - cat > conftest.$ac_ext < $nlist) && test -s "$nlist"; then - # Try sorting and uniquifying the output. - if sort "$nlist" | uniq > "$nlist"T; then - mv -f "$nlist"T "$nlist" - else - rm -f "$nlist"T - fi - - # Make sure that we snagged all the symbols we need. - if egrep ' nm_test_var$' "$nlist" >/dev/null; then - if egrep ' nm_test_func$' "$nlist" >/dev/null; then - cat < conftest.$ac_ext -#ifdef __cplusplus -extern "C" { -#endif - -EOF - # Now generate the symbol file. - eval "$lt_cv_global_symbol_to_cdecl"' < "$nlist" >> conftest.$ac_ext' - - cat <> conftest.$ac_ext -#if defined (__STDC__) && __STDC__ -# define lt_ptr_t void * -#else -# define lt_ptr_t char * -# define const -#endif - -/* The mapping between symbol names and symbols. */ -const struct { - const char *name; - lt_ptr_t address; -} -[lt_preloaded_symbols[] =] -{ -EOF - sed "s/^$symcode$symcode* \(.*\) \(.*\)$/ {\"\2\", (lt_ptr_t) \&\2},/" < "$nlist" >> conftest.$ac_ext - cat <<\EOF >> conftest.$ac_ext - {0, (lt_ptr_t) 0} -}; - -#ifdef __cplusplus -} -#endif -EOF - # Now try linking the two files. - mv conftest.$ac_objext conftstm.$ac_objext - save_LIBS="$LIBS" - save_CFLAGS="$CFLAGS" - LIBS="conftstm.$ac_objext" - CFLAGS="$CFLAGS$no_builtin_flag" - if AC_TRY_EVAL(ac_link) && test -s conftest; then - pipe_works=yes - fi - LIBS="$save_LIBS" - CFLAGS="$save_CFLAGS" - else - echo "cannot find nm_test_func in $nlist" >&AC_FD_CC - fi - else - echo "cannot find nm_test_var in $nlist" >&AC_FD_CC - fi - else - echo "cannot run $lt_cv_sys_global_symbol_pipe" >&AC_FD_CC - fi - else - echo "$progname: failed program was:" >&AC_FD_CC - cat conftest.$ac_ext >&5 - fi - rm -f conftest* conftst* - - # Do not use the global_symbol_pipe unless it works. - if test "$pipe_works" = yes; then - break - else - lt_cv_sys_global_symbol_pipe= - fi -done -]) -global_symbol_pipe="$lt_cv_sys_global_symbol_pipe" -if test -z "$lt_cv_sys_global_symbol_pipe"; then - global_symbol_to_cdecl= -else - global_symbol_to_cdecl="$lt_cv_global_symbol_to_cdecl" -fi -if test -z "$global_symbol_pipe$global_symbol_to_cdecl"; then - AC_MSG_RESULT(failed) -else - AC_MSG_RESULT(ok) -fi -]) # AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE - -# _LT_AC_LIBTOOL_SYS_PATH_SEPARATOR -# --------------------------------- -AC_DEFUN([_LT_AC_LIBTOOL_SYS_PATH_SEPARATOR], -[# Find the correct PATH separator. Usually this is `:', but -# DJGPP uses `;' like DOS. -if test "X${PATH_SEPARATOR+set}" != Xset; then - UNAME=${UNAME-`uname 2>/dev/null`} - case X$UNAME in - *-DOS) lt_cv_sys_path_separator=';' ;; - *) lt_cv_sys_path_separator=':' ;; - esac -fi -])# _LT_AC_LIBTOOL_SYS_PATH_SEPARATOR - -# _LT_AC_PROG_ECHO_BACKSLASH -# -------------------------- -# Add some code to the start of the generated configure script which -# will find an echo command which doesn;t interpret backslashes. -AC_DEFUN([_LT_AC_PROG_ECHO_BACKSLASH], -[ifdef([AC_DIVERSION_NOTICE], [AC_DIVERT_PUSH(AC_DIVERSION_NOTICE)], - [AC_DIVERT_PUSH(NOTICE)]) -_LT_AC_LIBTOOL_SYS_PATH_SEPARATOR - -# Check that we are running under the correct shell. -SHELL=${CONFIG_SHELL-/bin/sh} - -case X$ECHO in -X*--fallback-echo) - # Remove one level of quotation (which was required for Make). - ECHO=`echo "$ECHO" | sed 's,\\\\\[$]\\[$]0,'[$]0','` - ;; -esac - -echo=${ECHO-echo} -if test "X[$]1" = X--no-reexec; then - # Discard the --no-reexec flag, and continue. - shift -elif test "X[$]1" = X--fallback-echo; then - # Avoid inline document here, it may be left over - : -elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then - # Yippee, $echo works! - : -else - # Restart under the correct shell. - exec $SHELL "[$]0" --no-reexec ${1+"[$]@"} -fi - -if test "X[$]1" = X--fallback-echo; then - # used as fallback echo - shift - cat </dev/null && - echo_test_string="`eval $cmd`" && - (test "X$echo_test_string" = "X$echo_test_string") 2>/dev/null - then - break - fi - done -fi - -if test "X`($echo '\t') 2>/dev/null`" = 'X\t' && - echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` && - test "X$echo_testing_string" = "X$echo_test_string"; then - : -else - # The Solaris, AIX, and Digital Unix default echo programs unquote - # backslashes. This makes it impossible to quote backslashes using - # echo "$something" | sed 's/\\/\\\\/g' - # - # So, first we look for a working echo in the user's PATH. - - IFS="${IFS= }"; save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR}" - for dir in $PATH /usr/ucb; do - if (test -f $dir/echo || test -f $dir/echo$ac_exeext) && - test "X`($dir/echo '\t') 2>/dev/null`" = 'X\t' && - echo_testing_string=`($dir/echo "$echo_test_string") 2>/dev/null` && - test "X$echo_testing_string" = "X$echo_test_string"; then - echo="$dir/echo" - break - fi - done - IFS="$save_ifs" - - if test "X$echo" = Xecho; then - # We didn't find a better echo, so look for alternatives. - if test "X`(print -r '\t') 2>/dev/null`" = 'X\t' && - echo_testing_string=`(print -r "$echo_test_string") 2>/dev/null` && - test "X$echo_testing_string" = "X$echo_test_string"; then - # This shell has a builtin print -r that does the trick. - echo='print -r' - elif (test -f /bin/ksh || test -f /bin/ksh$ac_exeext) && - test "X$CONFIG_SHELL" != X/bin/ksh; then - # If we have ksh, try running configure again with it. - ORIGINAL_CONFIG_SHELL=${CONFIG_SHELL-/bin/sh} - export ORIGINAL_CONFIG_SHELL - CONFIG_SHELL=/bin/ksh - export CONFIG_SHELL - exec $CONFIG_SHELL "[$]0" --no-reexec ${1+"[$]@"} - else - # Try using printf. - echo='printf %s\n' - if test "X`($echo '\t') 2>/dev/null`" = 'X\t' && - echo_testing_string=`($echo "$echo_test_string") 2>/dev/null` && - test "X$echo_testing_string" = "X$echo_test_string"; then - # Cool, printf works - : - elif echo_testing_string=`($ORIGINAL_CONFIG_SHELL "[$]0" --fallback-echo '\t') 2>/dev/null` && - test "X$echo_testing_string" = 'X\t' && - echo_testing_string=`($ORIGINAL_CONFIG_SHELL "[$]0" --fallback-echo "$echo_test_string") 2>/dev/null` && - test "X$echo_testing_string" = "X$echo_test_string"; then - CONFIG_SHELL=$ORIGINAL_CONFIG_SHELL - export CONFIG_SHELL - SHELL="$CONFIG_SHELL" - export SHELL - echo="$CONFIG_SHELL [$]0 --fallback-echo" - elif echo_testing_string=`($CONFIG_SHELL "[$]0" --fallback-echo '\t') 2>/dev/null` && - test "X$echo_testing_string" = 'X\t' && - echo_testing_string=`($CONFIG_SHELL "[$]0" --fallback-echo "$echo_test_string") 2>/dev/null` && - test "X$echo_testing_string" = "X$echo_test_string"; then - echo="$CONFIG_SHELL [$]0 --fallback-echo" - else - # maybe with a smaller string... - prev=: - - for cmd in 'echo test' 'sed 2q "[$]0"' 'sed 10q "[$]0"' 'sed 20q "[$]0"' 'sed 50q "[$]0"'; do - if (test "X$echo_test_string" = "X`eval $cmd`") 2>/dev/null - then - break - fi - prev="$cmd" - done - - if test "$prev" != 'sed 50q "[$]0"'; then - echo_test_string=`eval $prev` - export echo_test_string - exec ${ORIGINAL_CONFIG_SHELL-${CONFIG_SHELL-/bin/sh}} "[$]0" ${1+"[$]@"} - else - # Oops. We lost completely, so just stick with echo. - echo=echo - fi - fi - fi - fi -fi -fi - -# Copy echo and quote the copy suitably for passing to libtool from -# the Makefile, instead of quoting the original, which is used later. -ECHO=$echo -if test "X$ECHO" = "X$CONFIG_SHELL [$]0 --fallback-echo"; then - ECHO="$CONFIG_SHELL \\\$\[$]0 --fallback-echo" -fi - -AC_SUBST(ECHO) -AC_DIVERT_POP -])# _LT_AC_PROG_ECHO_BACKSLASH - -# _LT_AC_TRY_DLOPEN_SELF (ACTION-IF-TRUE, ACTION-IF-TRUE-W-USCORE, -# ACTION-IF-FALSE, ACTION-IF-CROSS-COMPILING) -# ------------------------------------------------------------------ -AC_DEFUN(_LT_AC_TRY_DLOPEN_SELF, -[if test "$cross_compiling" = yes; then : - [$4] -else - AC_REQUIRE([_LT_AC_CHECK_DLFCN])dnl - lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 - lt_status=$lt_dlunknown - cat > conftest.$ac_ext < -#endif - -#include - -#ifdef RTLD_GLOBAL -# define LT_DLGLOBAL RTLD_GLOBAL -#else -# ifdef DL_GLOBAL -# define LT_DLGLOBAL DL_GLOBAL -# else -# define LT_DLGLOBAL 0 -# endif -#endif - -/* We may have to define LT_DLLAZY_OR_NOW in the command line if we - find out it does not work in some platform. */ -#ifndef LT_DLLAZY_OR_NOW -# ifdef RTLD_LAZY -# define LT_DLLAZY_OR_NOW RTLD_LAZY -# else -# ifdef DL_LAZY -# define LT_DLLAZY_OR_NOW DL_LAZY -# else -# ifdef RTLD_NOW -# define LT_DLLAZY_OR_NOW RTLD_NOW -# else -# ifdef DL_NOW -# define LT_DLLAZY_OR_NOW DL_NOW -# else -# define LT_DLLAZY_OR_NOW 0 -# endif -# endif -# endif -# endif -#endif - -#ifdef __cplusplus -extern "C" void exit (int); -#endif - -void fnord() { int i=42;} -int main () -{ - void *self = dlopen (0, LT_DLGLOBAL|LT_DLLAZY_OR_NOW); - int status = $lt_dlunknown; - - if (self) - { - if (dlsym (self,"fnord")) status = $lt_dlno_uscore; - else if (dlsym( self,"_fnord")) status = $lt_dlneed_uscore; - /* dlclose (self); */ - } - - exit (status); -}] -EOF - if AC_TRY_EVAL(ac_link) && test -s conftest${ac_exeext} 2>/dev/null; then - (./conftest; exit; ) 2>/dev/null - lt_status=$? - case x$lt_status in - x$lt_dlno_uscore) $1 ;; - x$lt_dlneed_uscore) $2 ;; - x$lt_unknown|x*) $3 ;; - esac - else : - # compilation failed - $3 - fi -fi -rm -fr conftest* -])# _LT_AC_TRY_DLOPEN_SELF - -# AC_LIBTOOL_DLOPEN_SELF -# ------------------- -AC_DEFUN(AC_LIBTOOL_DLOPEN_SELF, -[if test "x$enable_dlopen" != xyes; then - enable_dlopen=unknown - enable_dlopen_self=unknown - enable_dlopen_self_static=unknown -else - lt_cv_dlopen=no - lt_cv_dlopen_libs= - - case $host_os in - beos*) - lt_cv_dlopen="load_add_on" - lt_cv_dlopen_libs= - lt_cv_dlopen_self=yes - ;; - - cygwin* | mingw* | pw32*) - lt_cv_dlopen="LoadLibrary" - lt_cv_dlopen_libs= - ;; - - *) - AC_CHECK_LIB(dl, dlopen, [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-ldl"], - [AC_CHECK_FUNC(dlopen, lt_cv_dlopen="dlopen", - [AC_CHECK_FUNC(shl_load, lt_cv_dlopen="shl_load", - [AC_CHECK_LIB(svld, dlopen, - [lt_cv_dlopen="dlopen" lt_cv_dlopen_libs="-lsvld"], - [AC_CHECK_LIB(dld, shl_load, - [lt_cv_dlopen="dld_link" lt_cv_dlopen_libs="-dld"]) - ]) - ]) - ]) - ]) - ;; - esac - - if test "x$lt_cv_dlopen" != xno; then - enable_dlopen=yes - else - enable_dlopen=no - fi - - case $lt_cv_dlopen in - dlopen) - save_CPPFLAGS="$CPPFLAGS" - AC_REQUIRE([_LT_AC_CHECK_DLFCN])dnl - test "x$ac_cv_header_dlfcn_h" = xyes && CPPFLAGS="$CPPFLAGS -DHAVE_DLFCN_H" - - save_LDFLAGS="$LDFLAGS" - eval LDFLAGS=\"\$LDFLAGS $export_dynamic_flag_spec\" - - save_LIBS="$LIBS" - LIBS="$lt_cv_dlopen_libs $LIBS" - - AC_CACHE_CHECK([whether a program can dlopen itself], - lt_cv_dlopen_self, [dnl - _LT_AC_TRY_DLOPEN_SELF( - lt_cv_dlopen_self=yes, lt_cv_dlopen_self=yes, - lt_cv_dlopen_self=no, lt_cv_dlopen_self=cross) - ]) - - if test "x$lt_cv_dlopen_self" = xyes; then - LDFLAGS="$LDFLAGS $link_static_flag" - AC_CACHE_CHECK([whether a statically linked program can dlopen itself], - lt_cv_dlopen_self_static, [dnl - _LT_AC_TRY_DLOPEN_SELF( - lt_cv_dlopen_self_static=yes, lt_cv_dlopen_self_static=yes, - lt_cv_dlopen_self_static=no, lt_cv_dlopen_self_static=cross) - ]) - fi - - CPPFLAGS="$save_CPPFLAGS" - LDFLAGS="$save_LDFLAGS" - LIBS="$save_LIBS" - ;; - esac - - case $lt_cv_dlopen_self in - yes|no) enable_dlopen_self=$lt_cv_dlopen_self ;; - *) enable_dlopen_self=unknown ;; - esac - - case $lt_cv_dlopen_self_static in - yes|no) enable_dlopen_self_static=$lt_cv_dlopen_self_static ;; - *) enable_dlopen_self_static=unknown ;; - esac -fi -])# AC_LIBTOOL_DLOPEN_SELF - -AC_DEFUN([_LT_AC_LTCONFIG_HACK], -[AC_REQUIRE([AC_LIBTOOL_SYS_GLOBAL_SYMBOL_PIPE])dnl -# Sed substitution that helps us do robust quoting. It backslashifies -# metacharacters that are still active within double-quoted strings. -Xsed='sed -e s/^X//' -[sed_quote_subst='s/\([\\"\\`$\\\\]\)/\\\1/g'] - -# Same as above, but do not quote variable references. -[double_quote_subst='s/\([\\"\\`\\\\]\)/\\\1/g'] - -# Sed substitution to delay expansion of an escaped shell variable in a -# double_quote_subst'ed string. -delay_variable_subst='s/\\\\\\\\\\\$/\\\\\\$/g' - -# Constants: -rm="rm -f" - -# Global variables: -default_ofile=libtool -can_build_shared=yes - -# All known linkers require a `.a' archive for static linking (except M$VC, -# which needs '.lib'). -libext=a -ltmain="$ac_aux_dir/ltmain.sh" -ofile="$default_ofile" -with_gnu_ld="$lt_cv_prog_gnu_ld" -need_locks="$enable_libtool_lock" - -old_CC="$CC" -old_CFLAGS="$CFLAGS" - -# Set sane defaults for various variables -test -z "$AR" && AR=ar -test -z "$AR_FLAGS" && AR_FLAGS=cru -test -z "$AS" && AS=as -test -z "$CC" && CC=cc -test -z "$DLLTOOL" && DLLTOOL=dlltool -test -z "$LD" && LD=ld -test -z "$LN_S" && LN_S="ln -s" -test -z "$MAGIC_CMD" && MAGIC_CMD=file -test -z "$NM" && NM=nm -test -z "$OBJDUMP" && OBJDUMP=objdump -test -z "$RANLIB" && RANLIB=: -test -z "$STRIP" && STRIP=: -test -z "$ac_objext" && ac_objext=o - -if test x"$host" != x"$build"; then - ac_tool_prefix=${host_alias}- -else - ac_tool_prefix= -fi - -# Transform linux* to *-*-linux-gnu*, to support old configure scripts. -case $host_os in -linux-gnu*) ;; -linux*) host=`echo $host | sed 's/^\(.*-.*-linux\)\(.*\)$/\1-gnu\2/'` -esac - -case $host_os in -aix3*) - # AIX sometimes has problems with the GCC collect2 program. For some - # reason, if we set the COLLECT_NAMES environment variable, the problems - # vanish in a puff of smoke. - if test "X${COLLECT_NAMES+set}" != Xset; then - COLLECT_NAMES= - export COLLECT_NAMES - fi - ;; -esac - -# Determine commands to create old-style static archives. -old_archive_cmds='$AR $AR_FLAGS $oldlib$oldobjs$old_deplibs' -old_postinstall_cmds='chmod 644 $oldlib' -old_postuninstall_cmds= - -if test -n "$RANLIB"; then - old_archive_cmds="$old_archive_cmds~\$RANLIB \$oldlib" - old_postinstall_cmds="\$RANLIB \$oldlib~$old_postinstall_cmds" -fi - -# Allow CC to be a program name with arguments. -set dummy $CC -compiler="[$]2" - -AC_MSG_CHECKING([for objdir]) -rm -f .libs 2>/dev/null -mkdir .libs 2>/dev/null -if test -d .libs; then - objdir=.libs -else - # MS-DOS does not allow filenames that begin with a dot. - objdir=_libs -fi -rmdir .libs 2>/dev/null -AC_MSG_RESULT($objdir) - - -AC_ARG_WITH(pic, -[ --with-pic try to use only PIC/non-PIC objects [default=use both]], -pic_mode="$withval", pic_mode=default) -test -z "$pic_mode" && pic_mode=default - -# We assume here that the value for lt_cv_prog_cc_pic will not be cached -# in isolation, and that seeing it set (from the cache) indicates that -# the associated values are set (in the cache) correctly too. -AC_MSG_CHECKING([for $compiler option to produce PIC]) -AC_CACHE_VAL(lt_cv_prog_cc_pic, -[ lt_cv_prog_cc_pic= - lt_cv_prog_cc_shlib= - lt_cv_prog_cc_wl= - lt_cv_prog_cc_static= - lt_cv_prog_cc_no_builtin= - lt_cv_prog_cc_can_build_shared=$can_build_shared - - if test "$GCC" = yes; then - lt_cv_prog_cc_wl='-Wl,' - lt_cv_prog_cc_static='-static' - - case $host_os in - aix*) - # Below there is a dirty hack to force normal static linking with -ldl - # The problem is because libdl dynamically linked with both libc and - # libC (AIX C++ library), which obviously doesn't included in libraries - # list by gcc. This cause undefined symbols with -static flags. - # This hack allows C programs to be linked with "-static -ldl", but - # we not sure about C++ programs. - lt_cv_prog_cc_static="$lt_cv_prog_cc_static ${lt_cv_prog_cc_wl}-lC" - ;; - amigaos*) - # FIXME: we need at least 68020 code to build shared libraries, but - # adding the `-m68020' flag to GCC prevents building anything better, - # like `-m68040'. - lt_cv_prog_cc_pic='-m68020 -resident32 -malways-restore-a4' - ;; - beos* | irix5* | irix6* | osf3* | osf4* | osf5*) - # PIC is the default for these OSes. - ;; - darwin* | rhapsody*) - # PIC is the default on this platform - # Common symbols not allowed in MH_DYLIB files - lt_cv_prog_cc_pic='-fno-common' - ;; - cygwin* | mingw* | pw32* | os2*) - # This hack is so that the source file can tell whether it is being - # built for inclusion in a dll (and should export symbols for example). - lt_cv_prog_cc_pic='-DDLL_EXPORT' - ;; - sysv4*MP*) - if test -d /usr/nec; then - lt_cv_prog_cc_pic=-Kconform_pic - fi - ;; - *) - lt_cv_prog_cc_pic='-fPIC' - ;; - esac - else - # PORTME Check for PIC flags for the system compiler. - case $host_os in - aix3* | aix4* | aix5*) - # All AIX code is PIC. - if test "$host_cpu" = ia64; then - # AIX 5 now supports IA64 processor - lt_cv_prog_cc_static='-Bstatic' - lt_cv_prog_cc_wl='-Wl,' - else - lt_cv_prog_cc_static='-bnso -bI:/lib/syscalls.exp' - fi - ;; - - hpux9* | hpux10* | hpux11*) - # Is there a better lt_cv_prog_cc_static that works with the bundled CC? - lt_cv_prog_cc_wl='-Wl,' - lt_cv_prog_cc_static="${lt_cv_prog_cc_wl}-a ${lt_cv_prog_cc_wl}archive" - lt_cv_prog_cc_pic='+Z' - ;; - - irix5* | irix6*) - lt_cv_prog_cc_wl='-Wl,' - lt_cv_prog_cc_static='-non_shared' - # PIC (with -KPIC) is the default. - ;; - - cygwin* | mingw* | pw32* | os2*) - # This hack is so that the source file can tell whether it is being - # built for inclusion in a dll (and should export symbols for example). - lt_cv_prog_cc_pic='-DDLL_EXPORT' - ;; - - newsos6) - lt_cv_prog_cc_pic='-KPIC' - lt_cv_prog_cc_static='-Bstatic' - ;; - - osf3* | osf4* | osf5*) - # All OSF/1 code is PIC. - lt_cv_prog_cc_wl='-Wl,' - lt_cv_prog_cc_static='-non_shared' - ;; - - sco3.2v5*) - lt_cv_prog_cc_pic='-Kpic' - lt_cv_prog_cc_static='-dn' - lt_cv_prog_cc_shlib='-belf' - ;; - - solaris*) - lt_cv_prog_cc_pic='-KPIC' - lt_cv_prog_cc_static='-Bstatic' - lt_cv_prog_cc_wl='-Wl,' - ;; - - sunos4*) - lt_cv_prog_cc_pic='-PIC' - lt_cv_prog_cc_static='-Bstatic' - lt_cv_prog_cc_wl='-Qoption ld ' - ;; - - sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) - lt_cv_prog_cc_pic='-KPIC' - lt_cv_prog_cc_static='-Bstatic' - if test "x$host_vendor" = xsni; then - lt_cv_prog_cc_wl='-LD' - else - lt_cv_prog_cc_wl='-Wl,' - fi - ;; - - uts4*) - lt_cv_prog_cc_pic='-pic' - lt_cv_prog_cc_static='-Bstatic' - ;; - - sysv4*MP*) - if test -d /usr/nec ;then - lt_cv_prog_cc_pic='-Kconform_pic' - lt_cv_prog_cc_static='-Bstatic' - fi - ;; - - *) - lt_cv_prog_cc_can_build_shared=no - ;; - esac - fi -]) -if test -z "$lt_cv_prog_cc_pic"; then - AC_MSG_RESULT([none]) -else - AC_MSG_RESULT([$lt_cv_prog_cc_pic]) - - # Check to make sure the pic_flag actually works. - AC_MSG_CHECKING([if $compiler PIC flag $lt_cv_prog_cc_pic works]) - AC_CACHE_VAL(lt_cv_prog_cc_pic_works, [dnl - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS $lt_cv_prog_cc_pic -DPIC" - AC_TRY_COMPILE([], [], [dnl - case $host_os in - hpux9* | hpux10* | hpux11*) - # On HP-UX, both CC and GCC only warn that PIC is supported... then - # they create non-PIC objects. So, if there were any warnings, we - # assume that PIC is not supported. - if test -s conftest.err; then - lt_cv_prog_cc_pic_works=no - else - lt_cv_prog_cc_pic_works=yes - fi - ;; - *) - lt_cv_prog_cc_pic_works=yes - ;; - esac - ], [dnl - lt_cv_prog_cc_pic_works=no - ]) - CFLAGS="$save_CFLAGS" - ]) - - if test "X$lt_cv_prog_cc_pic_works" = Xno; then - lt_cv_prog_cc_pic= - lt_cv_prog_cc_can_build_shared=no - else - lt_cv_prog_cc_pic=" $lt_cv_prog_cc_pic" - fi - - AC_MSG_RESULT([$lt_cv_prog_cc_pic_works]) -fi - -# Check for any special shared library compilation flags. -if test -n "$lt_cv_prog_cc_shlib"; then - AC_MSG_WARN([\`$CC' requires \`$lt_cv_prog_cc_shlib' to build shared libraries]) - if echo "$old_CC $old_CFLAGS " | [egrep -e "[ ]$lt_cv_prog_cc_shlib[ ]"] >/dev/null; then : - else - AC_MSG_WARN([add \`$lt_cv_prog_cc_shlib' to the CC or CFLAGS env variable and reconfigure]) - lt_cv_prog_cc_can_build_shared=no - fi -fi - -AC_MSG_CHECKING([if $compiler static flag $lt_cv_prog_cc_static works]) -AC_CACHE_VAL([lt_cv_prog_cc_static_works], [dnl - lt_cv_prog_cc_static_works=no - save_LDFLAGS="$LDFLAGS" - LDFLAGS="$LDFLAGS $lt_cv_prog_cc_static" - AC_TRY_LINK([], [], [lt_cv_prog_cc_static_works=yes]) - LDFLAGS="$save_LDFLAGS" -]) - -# Belt *and* braces to stop my trousers falling down: -test "X$lt_cv_prog_cc_static_works" = Xno && lt_cv_prog_cc_static= -AC_MSG_RESULT([$lt_cv_prog_cc_static_works]) - -pic_flag="$lt_cv_prog_cc_pic" -special_shlib_compile_flags="$lt_cv_prog_cc_shlib" -wl="$lt_cv_prog_cc_wl" -link_static_flag="$lt_cv_prog_cc_static" -no_builtin_flag="$lt_cv_prog_cc_no_builtin" -can_build_shared="$lt_cv_prog_cc_can_build_shared" - - -# Check to see if options -o and -c are simultaneously supported by compiler -AC_MSG_CHECKING([if $compiler supports -c -o file.$ac_objext]) -AC_CACHE_VAL([lt_cv_compiler_c_o], [ -$rm -r conftest 2>/dev/null -mkdir conftest -cd conftest -echo "int some_variable = 0;" > conftest.$ac_ext -mkdir out -# According to Tom Tromey, Ian Lance Taylor reported there are C compilers -# that will create temporary files in the current directory regardless of -# the output directory. Thus, making CWD read-only will cause this test -# to fail, enabling locking or at least warning the user not to do parallel -# builds. -chmod -w . -save_CFLAGS="$CFLAGS" -CFLAGS="$CFLAGS -o out/conftest2.$ac_objext" -compiler_c_o=no -if { (eval echo configure:__oline__: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>out/conftest.err; } && test -s out/conftest2.$ac_objext; then - # The compiler can only warn and ignore the option if not recognized - # So say no if there are warnings - if test -s out/conftest.err; then - lt_cv_compiler_c_o=no - else - lt_cv_compiler_c_o=yes - fi -else - # Append any errors to the config.log. - cat out/conftest.err 1>&AC_FD_CC - lt_cv_compiler_c_o=no -fi -CFLAGS="$save_CFLAGS" -chmod u+w . -$rm conftest* out/* -rmdir out -cd .. -rmdir conftest -$rm -r conftest 2>/dev/null -]) -compiler_c_o=$lt_cv_compiler_c_o -AC_MSG_RESULT([$compiler_c_o]) - -if test x"$compiler_c_o" = x"yes"; then - # Check to see if we can write to a .lo - AC_MSG_CHECKING([if $compiler supports -c -o file.lo]) - AC_CACHE_VAL([lt_cv_compiler_o_lo], [ - lt_cv_compiler_o_lo=no - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -c -o conftest.lo" - AC_TRY_COMPILE([], [int some_variable = 0;], [dnl - # The compiler can only warn and ignore the option if not recognized - # So say no if there are warnings - if test -s conftest.err; then - lt_cv_compiler_o_lo=no - else - lt_cv_compiler_o_lo=yes - fi - ]) - CFLAGS="$save_CFLAGS" - ]) - compiler_o_lo=$lt_cv_compiler_o_lo - AC_MSG_RESULT([$compiler_o_lo]) -else - compiler_o_lo=no -fi - -# Check to see if we can do hard links to lock some files if needed -hard_links="nottested" -if test "$compiler_c_o" = no && test "$need_locks" != no; then - # do not overwrite the value of need_locks provided by the user - AC_MSG_CHECKING([if we can lock with hard links]) - hard_links=yes - $rm conftest* - ln conftest.a conftest.b 2>/dev/null && hard_links=no - touch conftest.a - ln conftest.a conftest.b 2>&5 || hard_links=no - ln conftest.a conftest.b 2>/dev/null && hard_links=no - AC_MSG_RESULT([$hard_links]) - if test "$hard_links" = no; then - AC_MSG_WARN([\`$CC' does not support \`-c -o', so \`make -j' may be unsafe]) - need_locks=warn - fi -else - need_locks=no -fi - -if test "$GCC" = yes; then - # Check to see if options -fno-rtti -fno-exceptions are supported by compiler - AC_MSG_CHECKING([if $compiler supports -fno-rtti -fno-exceptions]) - echo "int some_variable = 0;" > conftest.$ac_ext - save_CFLAGS="$CFLAGS" - CFLAGS="$CFLAGS -fno-rtti -fno-exceptions -c conftest.$ac_ext" - compiler_rtti_exceptions=no - AC_TRY_COMPILE([], [int some_variable = 0;], [dnl - # The compiler can only warn and ignore the option if not recognized - # So say no if there are warnings - if test -s conftest.err; then - compiler_rtti_exceptions=no - else - compiler_rtti_exceptions=yes - fi - ]) - CFLAGS="$save_CFLAGS" - AC_MSG_RESULT([$compiler_rtti_exceptions]) - - if test "$compiler_rtti_exceptions" = "yes"; then - no_builtin_flag=' -fno-builtin -fno-rtti -fno-exceptions' - else - no_builtin_flag=' -fno-builtin' - fi -fi - -# See if the linker supports building shared libraries. -AC_MSG_CHECKING([whether the linker ($LD) supports shared libraries]) - -allow_undefined_flag= -no_undefined_flag= -need_lib_prefix=unknown -need_version=unknown -# when you set need_version to no, make sure it does not cause -set_version -# flags to be left without arguments -archive_cmds= -archive_expsym_cmds= -old_archive_from_new_cmds= -old_archive_from_expsyms_cmds= -export_dynamic_flag_spec= -whole_archive_flag_spec= -thread_safe_flag_spec= -hardcode_into_libs=no -hardcode_libdir_flag_spec= -hardcode_libdir_separator= -hardcode_direct=no -hardcode_minus_L=no -hardcode_shlibpath_var=unsupported -runpath_var= -link_all_deplibs=unknown -always_export_symbols=no -export_symbols_cmds='$NM $libobjs $convenience | $global_symbol_pipe | sed '\''s/.* //'\'' | sort | uniq > $export_symbols' -# include_expsyms should be a list of space-separated symbols to be *always* -# included in the symbol list -include_expsyms= -# exclude_expsyms can be an egrep regular expression of symbols to exclude -# it will be wrapped by ` (' and `)$', so one must not match beginning or -# end of line. Example: `a|bc|.*d.*' will exclude the symbols `a' and `bc', -# as well as any symbol that contains `d'. -exclude_expsyms="_GLOBAL_OFFSET_TABLE_" -# Although _GLOBAL_OFFSET_TABLE_ is a valid symbol C name, most a.out -# platforms (ab)use it in PIC code, but their linkers get confused if -# the symbol is explicitly referenced. Since portable code cannot -# rely on this symbol name, it's probably fine to never include it in -# preloaded symbol tables. -extract_expsyms_cmds= - -case $host_os in -cygwin* | mingw* | pw32* ) - # FIXME: the MSVC++ port hasn't been tested in a loooong time - # When not using gcc, we currently assume that we are using - # Microsoft Visual C++. - if test "$GCC" != yes; then - with_gnu_ld=no - fi - ;; - -esac - -ld_shlibs=yes -if test "$with_gnu_ld" = yes; then - # If archive_cmds runs LD, not CC, wlarc should be empty - wlarc='${wl}' - - # See if GNU ld supports shared libraries. - case $host_os in - aix3* | aix4* | aix5*) - # On AIX, the GNU linker is very broken - # Note:Check GNU linker on AIX 5-IA64 when/if it becomes available. - ld_shlibs=no - cat <&2 - -*** Warning: the GNU linker, at least up to release 2.9.1, is reported -*** to be unable to reliably create shared libraries on AIX. -*** Therefore, libtool is disabling shared libraries support. If you -*** really care for shared libraries, you may want to modify your PATH -*** so that a non-GNU linker is found, and then restart. - -EOF - ;; - - amigaos*) - archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' - hardcode_libdir_flag_spec='-L$libdir' - hardcode_minus_L=yes - - # Samuel A. Falvo II reports - # that the semantics of dynamic libraries on AmigaOS, at least up - # to version 4, is to share data among multiple programs linked - # with the same dynamic library. Since this doesn't match the - # behavior of shared libraries on other platforms, we can use - # them. - ld_shlibs=no - ;; - - beos*) - if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then - allow_undefined_flag=unsupported - # Joseph Beckenbach says some releases of gcc - # support --undefined. This deserves some investigation. FIXME - archive_cmds='$CC -nostart $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - else - ld_shlibs=no - fi - ;; - - cygwin* | mingw* | pw32*) - # hardcode_libdir_flag_spec is actually meaningless, as there is - # no search path for DLLs. - hardcode_libdir_flag_spec='-L$libdir' - allow_undefined_flag=unsupported - always_export_symbols=yes - - extract_expsyms_cmds='test -f $output_objdir/impgen.c || \ - sed -e "/^# \/\* impgen\.c starts here \*\//,/^# \/\* impgen.c ends here \*\// { s/^# //;s/^# *$//; p; }" -e d < $''0 > $output_objdir/impgen.c~ - test -f $output_objdir/impgen.exe || (cd $output_objdir && \ - if test "x$HOST_CC" != "x" ; then $HOST_CC -o impgen impgen.c ; \ - else $CC -o impgen impgen.c ; fi)~ - $output_objdir/impgen $dir/$soroot > $output_objdir/$soname-def' - - old_archive_from_expsyms_cmds='$DLLTOOL --as=$AS --dllname $soname --def $output_objdir/$soname-def --output-lib $output_objdir/$newlib' - - # cygwin and mingw dlls have different entry points and sets of symbols - # to exclude. - # FIXME: what about values for MSVC? - dll_entry=__cygwin_dll_entry@12 - dll_exclude_symbols=DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12~ - case $host_os in - mingw*) - # mingw values - dll_entry=_DllMainCRTStartup@12 - dll_exclude_symbols=DllMain@12,DllMainCRTStartup@12,DllEntryPoint@12~ - ;; - esac - - # mingw and cygwin differ, and it's simplest to just exclude the union - # of the two symbol sets. - dll_exclude_symbols=DllMain@12,_cygwin_dll_entry@12,_cygwin_noncygwin_dll_entry@12,DllMainCRTStartup@12,DllEntryPoint@12 - - # recent cygwin and mingw systems supply a stub DllMain which the user - # can override, but on older systems we have to supply one (in ltdll.c) - if test "x$lt_cv_need_dllmain" = "xyes"; then - ltdll_obj='$output_objdir/$soname-ltdll.'"$ac_objext " - ltdll_cmds='test -f $output_objdir/$soname-ltdll.c || sed -e "/^# \/\* ltdll\.c starts here \*\//,/^# \/\* ltdll.c ends here \*\// { s/^# //; p; }" -e d < [$]0 > $output_objdir/$soname-ltdll.c~ - test -f $output_objdir/$soname-ltdll.$ac_objext || (cd $output_objdir && $CC -c $soname-ltdll.c)~' - else - ltdll_obj= - ltdll_cmds= - fi - - # Extract the symbol export list from an `--export-all' def file, - # then regenerate the def file from the symbol export list, so that - # the compiled dll only exports the symbol export list. - # Be careful not to strip the DATA tag left be newer dlltools. - export_symbols_cmds="$ltdll_cmds"' - $DLLTOOL --export-all --exclude-symbols '$dll_exclude_symbols' --output-def $output_objdir/$soname-def '$ltdll_obj'$libobjs $convenience~ - [sed -e "1,/EXPORTS/d" -e "s/ @ [0-9]*//" -e "s/ *;.*$//"] < $output_objdir/$soname-def > $export_symbols' - - # If the export-symbols file already is a .def file (1st line - # is EXPORTS), use it as is. - # If DATA tags from a recent dlltool are present, honour them! - archive_expsym_cmds='if test "x`head -1 $export_symbols`" = xEXPORTS; then - cp $export_symbols $output_objdir/$soname-def; - else - echo EXPORTS > $output_objdir/$soname-def; - _lt_hint=1; - cat $export_symbols | while read symbol; do - set dummy \$symbol; - case \[$]# in - 2) echo " \[$]2 @ \$_lt_hint ; " >> $output_objdir/$soname-def;; - *) echo " \[$]2 @ \$_lt_hint \[$]3 ; " >> $output_objdir/$soname-def;; - esac; - _lt_hint=`expr 1 + \$_lt_hint`; - done; - fi~ - '"$ltdll_cmds"' - $CC -Wl,--base-file,$output_objdir/$soname-base '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags~ - $DLLTOOL --as=$AS --dllname $soname --exclude-symbols '$dll_exclude_symbols' --def $output_objdir/$soname-def --base-file $output_objdir/$soname-base --output-exp $output_objdir/$soname-exp~ - $CC -Wl,--base-file,$output_objdir/$soname-base $output_objdir/$soname-exp '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags~ - $DLLTOOL --as=$AS --dllname $soname --exclude-symbols '$dll_exclude_symbols' --def $output_objdir/$soname-def --base-file $output_objdir/$soname-base --output-exp $output_objdir/$soname-exp --output-lib $output_objdir/$libname.dll.a~ - $CC $output_objdir/$soname-exp '$lt_cv_cc_dll_switch' -Wl,-e,'$dll_entry' -o $output_objdir/$soname '$ltdll_obj'$libobjs $deplibs $compiler_flags' - ;; - - netbsd*) - if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then - archive_cmds='$LD -Bshareable $libobjs $deplibs $linker_flags -o $lib' - wlarc= - else - archive_cmds='$CC -shared -nodefaultlibs $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - archive_expsym_cmds='$CC -shared -nodefaultlibs $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' - fi - ;; - - solaris* | sysv5*) - if $LD -v 2>&1 | egrep 'BFD 2\.8' > /dev/null; then - ld_shlibs=no - cat <&2 - -*** Warning: The releases 2.8.* of the GNU linker cannot reliably -*** create shared libraries on Solaris systems. Therefore, libtool -*** is disabling shared libraries support. We urge you to upgrade GNU -*** binutils to release 2.9.1 or newer. Another option is to modify -*** your PATH or compiler configuration so that the native linker is -*** used, and then restart. - -EOF - elif $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then - archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' - else - ld_shlibs=no - fi - ;; - - sunos4*) - archive_cmds='$LD -assert pure-text -Bshareable -o $lib $libobjs $deplibs $linker_flags' - wlarc= - hardcode_direct=yes - hardcode_shlibpath_var=no - ;; - - *) - if $LD --help 2>&1 | egrep ': supported targets:.* elf' > /dev/null; then - archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname -o $lib' - archive_expsym_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname $wl$soname ${wl}-retain-symbols-file $wl$export_symbols -o $lib' - else - ld_shlibs=no - fi - ;; - esac - - if test "$ld_shlibs" = yes; then - runpath_var=LD_RUN_PATH - hardcode_libdir_flag_spec='${wl}--rpath ${wl}$libdir' - export_dynamic_flag_spec='${wl}--export-dynamic' - case $host_os in - cygwin* | mingw* | pw32*) - # dlltool doesn't understand --whole-archive et. al. - whole_archive_flag_spec= - ;; - *) - # ancient GNU ld didn't support --whole-archive et. al. - if $LD --help 2>&1 | egrep 'no-whole-archive' > /dev/null; then - whole_archive_flag_spec="$wlarc"'--whole-archive$convenience '"$wlarc"'--no-whole-archive' - else - whole_archive_flag_spec= - fi - ;; - esac - fi -else - # PORTME fill in a description of your system's linker (not GNU ld) - case $host_os in - aix3*) - allow_undefined_flag=unsupported - always_export_symbols=yes - archive_expsym_cmds='$LD -o $output_objdir/$soname $libobjs $deplibs $linker_flags -bE:$export_symbols -T512 -H512 -bM:SRE~$AR $AR_FLAGS $lib $output_objdir/$soname' - # Note: this linker hardcodes the directories in LIBPATH if there - # are no directories specified by -L. - hardcode_minus_L=yes - if test "$GCC" = yes && test -z "$link_static_flag"; then - # Neither direct hardcoding nor static linking is supported with a - # broken collect2. - hardcode_direct=unsupported - fi - ;; - - aix4* | aix5*) - # When large executables or shared objects are built, AIX ld can - # have problems creating the table of contents. If linking a library - # or program results in "error TOC overflow" add -mminimal-toc to - # CXXFLAGS/CFLAGS for g++/gcc. In the cases where that is not - # enough to fix the problem, add -Wl,-bbigtoc to LDFLAGS. - - archive_cmds='' - hardcode_libdir_separator=':' - if test "$GCC" = yes; then - collect2name=`${CC} -print-prog-name=collect2` - if test -f "$collect2name" && \ - strings "$collect2name" | grep resolve_lib_name >/dev/null - then - # We have reworked collect2 - hardcode_direct=yes - else - # We have old collect2 - hardcode_direct=unsupported - # It fails to find uninstalled libraries when the uninstalled - # path is not listed in the libpath. Setting hardcode_minus_L - # to unsupported forces relinking - hardcode_minus_L=yes - hardcode_libdir_flag_spec='-L$libdir' - hardcode_libdir_separator= - fi - shared_flag='-shared' - else - if test "$host_cpu" = ia64; then - shared_flag='-G' - else - shared_flag='${wl}-bM:SRE' - fi - hardcode_direct=yes - fi - - if test "$host_cpu" = ia64; then - # On IA64, the linker does run time linking by default, so we don't - # have to do anything special. - aix_use_runtimelinking=no - exp_sym_flag='-Bexport' - no_entry_flag="" - else - # Test if we are trying to use run time linking, or normal AIX style linking. - # If -brtl is somewhere in LDFLAGS, we need to do run time linking. - aix_use_runtimelinking=no - for ld_flag in $LDFLAGS; do - if (test $ld_flag = "-brtl" || test $ld_flag = "-Wl,-brtl" ); then - aix_use_runtimelinking=yes - break - fi - done - exp_sym_flag='-bexport' - no_entry_flag='-bnoentry' - fi - # It seems that -bexpall can do strange things, so it is better to - # generate a list of symbols to export. - always_export_symbols=yes - if test "$aix_use_runtimelinking" = yes; then - hardcode_libdir_flag_spec='${wl}-blibpath:$libdir:/usr/lib:/lib' - allow_undefined_flag=' -Wl,-G' - archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols" - else - if test "$host_cpu" = ia64; then - hardcode_libdir_flag_spec='${wl}-R $libdir:/usr/lib:/lib' - allow_undefined_flag="-znodefs" - archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname ${wl}-h$soname $libobjs $deplibs $compiler_flags ${wl}${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols" - else - hardcode_libdir_flag_spec='${wl}-bnolibpath ${wl}-blibpath:$libdir:/usr/lib:/lib' - # Warning - without using the other run time loading flags, -berok will - # link without error, but may produce a broken library. - allow_undefined_flag='${wl}-berok' - # This is a bit strange, but is similar to how AIX traditionally builds - # it's shared libraries. - archive_expsym_cmds="\$CC $shared_flag"' -o $output_objdir/$soname $libobjs $deplibs $compiler_flags ${allow_undefined_flag} '"\${wl}$no_entry_flag \${wl}$exp_sym_flag:\$export_symbols"' ~$AR -crlo $objdir/$libname$release.a $objdir/$soname' - fi - fi - ;; - - amigaos*) - archive_cmds='$rm $output_objdir/a2ixlibrary.data~$echo "#define NAME $libname" > $output_objdir/a2ixlibrary.data~$echo "#define LIBRARY_ID 1" >> $output_objdir/a2ixlibrary.data~$echo "#define VERSION $major" >> $output_objdir/a2ixlibrary.data~$echo "#define REVISION $revision" >> $output_objdir/a2ixlibrary.data~$AR $AR_FLAGS $lib $libobjs~$RANLIB $lib~(cd $output_objdir && a2ixlibrary -32)' - hardcode_libdir_flag_spec='-L$libdir' - hardcode_minus_L=yes - # see comment about different semantics on the GNU ld section - ld_shlibs=no - ;; - - cygwin* | mingw* | pw32*) - # When not using gcc, we currently assume that we are using - # Microsoft Visual C++. - # hardcode_libdir_flag_spec is actually meaningless, as there is - # no search path for DLLs. - hardcode_libdir_flag_spec=' ' - allow_undefined_flag=unsupported - # Tell ltmain to make .lib files, not .a files. - libext=lib - # FIXME: Setting linknames here is a bad hack. - archive_cmds='$CC -o $lib $libobjs $compiler_flags `echo "$deplibs" | sed -e '\''s/ -lc$//'\''` -link -dll~linknames=' - # The linker will automatically build a .lib file if we build a DLL. - old_archive_from_new_cmds='true' - # FIXME: Should let the user specify the lib program. - old_archive_cmds='lib /OUT:$oldlib$oldobjs$old_deplibs' - fix_srcfile_path='`cygpath -w "$srcfile"`' - ;; - - darwin* | rhapsody*) - allow_undefined_flag='-undefined suppress' - # FIXME: Relying on posixy $() will cause problems for - # cross-compilation, but unfortunately the echo tests do not - # yet detect zsh echo's removal of \ escapes. - archive_cmds='$CC $(test .$module = .yes && echo -bundle || echo -dynamiclib) $allow_undefined_flag -o $lib $libobjs $deplibs$linkopts -install_name $rpath/$soname $(test -n "$verstring" -a x$verstring != x0.0 && echo $verstring)' - # We need to add '_' to the symbols in $export_symbols first - #archive_expsym_cmds="$archive_cmds"' && strip -s $export_symbols' - hardcode_direct=yes - hardcode_shlibpath_var=no - whole_archive_flag_spec='-all_load $convenience' - ;; - - freebsd1*) - ld_shlibs=no - ;; - - # FreeBSD 2.2.[012] allows us to include c++rt0.o to get C++ constructor - # support. Future versions do this automatically, but an explicit c++rt0.o - # does not break anything, and helps significantly (at the cost of a little - # extra space). - freebsd2.2*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags /usr/lib/c++rt0.o' - hardcode_libdir_flag_spec='-R$libdir' - hardcode_direct=yes - hardcode_shlibpath_var=no - ;; - - # Unfortunately, older versions of FreeBSD 2 do not have this feature. - freebsd2*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=yes - hardcode_shlibpath_var=no - ;; - - # FreeBSD 3 and greater uses gcc -shared to do shared libraries. - freebsd*) - archive_cmds='$CC -shared -o $lib $libobjs $deplibs $compiler_flags' - hardcode_libdir_flag_spec='-R$libdir' - hardcode_direct=yes - hardcode_shlibpath_var=no - ;; - - hpux9* | hpux10* | hpux11*) - case $host_os in - hpux9*) archive_cmds='$rm $output_objdir/$soname~$LD -b +b $install_libdir -o $output_objdir/$soname $libobjs $deplibs $linker_flags~test $output_objdir/$soname = $lib || mv $output_objdir/$soname $lib' ;; - *) archive_cmds='$LD -b +h $soname +b $install_libdir -o $lib $libobjs $deplibs $linker_flags' ;; - esac - hardcode_libdir_flag_spec='${wl}+b ${wl}$libdir' - hardcode_libdir_separator=: - hardcode_direct=yes - hardcode_minus_L=yes # Not in the search PATH, but as the default - # location of the library. - export_dynamic_flag_spec='${wl}-E' - ;; - - irix5* | irix6*) - if test "$GCC" = yes; then - archive_cmds='$CC -shared $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' - else - archive_cmds='$LD -shared $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' - fi - hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' - hardcode_libdir_separator=: - link_all_deplibs=yes - ;; - - netbsd*) - if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' # a.out - else - archive_cmds='$LD -shared -o $lib $libobjs $deplibs $linker_flags' # ELF - fi - hardcode_libdir_flag_spec='-R$libdir' - hardcode_direct=yes - hardcode_shlibpath_var=no - ;; - - newsos6) - archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linkopts' - hardcode_direct=yes - hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' - hardcode_libdir_separator=: - hardcode_shlibpath_var=no - ;; - - openbsd*) - archive_cmds='$LD -Bshareable -o $lib $libobjs $deplibs $linker_flags' - hardcode_libdir_flag_spec='-R$libdir' - hardcode_direct=yes - hardcode_shlibpath_var=no - ;; - - os2*) - hardcode_libdir_flag_spec='-L$libdir' - hardcode_minus_L=yes - allow_undefined_flag=unsupported - archive_cmds='$echo "LIBRARY $libname INITINSTANCE" > $output_objdir/$libname.def~$echo "DESCRIPTION \"$libname\"" >> $output_objdir/$libname.def~$echo DATA >> $output_objdir/$libname.def~$echo " SINGLE NONSHARED" >> $output_objdir/$libname.def~$echo EXPORTS >> $output_objdir/$libname.def~emxexp $libobjs >> $output_objdir/$libname.def~$CC -Zdll -Zcrtdll -o $lib $libobjs $deplibs $compiler_flags $output_objdir/$libname.def' - old_archive_from_new_cmds='emximp -o $output_objdir/$libname.a $output_objdir/$libname.def' - ;; - - osf3*) - if test "$GCC" = yes; then - allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' - archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' - else - allow_undefined_flag=' -expect_unresolved \*' - archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' - fi - hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' - hardcode_libdir_separator=: - ;; - - osf4* | osf5*) # as osf3* with the addition of -msym flag - if test "$GCC" = yes; then - allow_undefined_flag=' ${wl}-expect_unresolved ${wl}\*' - archive_cmds='$CC -shared${allow_undefined_flag} $libobjs $deplibs $compiler_flags ${wl}-msym ${wl}-soname ${wl}$soname `test -n "$verstring" && echo ${wl}-set_version ${wl}$verstring` ${wl}-update_registry ${wl}${output_objdir}/so_locations -o $lib' - hardcode_libdir_flag_spec='${wl}-rpath ${wl}$libdir' - else - allow_undefined_flag=' -expect_unresolved \*' - archive_cmds='$LD -shared${allow_undefined_flag} $libobjs $deplibs $linker_flags -msym -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${output_objdir}/so_locations -o $lib' - archive_expsym_cmds='for i in `cat $export_symbols`; do printf "-exported_symbol " >> $lib.exp; echo "\$i" >> $lib.exp; done; echo "-hidden">> $lib.exp~ - $LD -shared${allow_undefined_flag} -input $lib.exp $linker_flags $libobjs $deplibs -soname $soname `test -n "$verstring" && echo -set_version $verstring` -update_registry ${objdir}/so_locations -o $lib~$rm $lib.exp' - - #Both c and cxx compiler support -rpath directly - hardcode_libdir_flag_spec='-rpath $libdir' - fi - hardcode_libdir_separator=: - ;; - - sco3.2v5*) - archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - hardcode_shlibpath_var=no - runpath_var=LD_RUN_PATH - hardcode_runpath_var=yes - ;; - - solaris*) - no_undefined_flag=' -z defs' - # $CC -shared without GNU ld will not create a library from C++ - # object files and a static libstdc++, better avoid it by now - archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' - archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ - $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp' - hardcode_libdir_flag_spec='-R$libdir' - hardcode_shlibpath_var=no - case $host_os in - [solaris2.[0-5] | solaris2.[0-5].*]) ;; - *) # Supported since Solaris 2.6 (maybe 2.5.1?) - whole_archive_flag_spec='-z allextract$convenience -z defaultextract' ;; - esac - link_all_deplibs=yes - ;; - - sunos4*) - if test "x$host_vendor" = xsequent; then - # Use $CC to link under sequent, because it throws in some extra .o - # files that make .init and .fini sections work. - archive_cmds='$CC -G ${wl}-h $soname -o $lib $libobjs $deplibs $compiler_flags' - else - archive_cmds='$LD -assert pure-text -Bstatic -o $lib $libobjs $deplibs $linker_flags' - fi - hardcode_libdir_flag_spec='-L$libdir' - hardcode_direct=yes - hardcode_minus_L=yes - hardcode_shlibpath_var=no - ;; - - sysv4) - if test "x$host_vendor" = xsno; then - archive_cmds='$LD -G -Bsymbolic -h $soname -o $lib $libobjs $deplibs $linkopts' - hardcode_direct=yes # is this really true??? - else - archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=no #Motorola manual says yes, but my tests say they lie - fi - runpath_var='LD_RUN_PATH' - hardcode_shlibpath_var=no - ;; - - sysv4.3*) - archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - hardcode_shlibpath_var=no - export_dynamic_flag_spec='-Bexport' - ;; - - sysv5*) - no_undefined_flag=' -z text' - # $CC -shared without GNU ld will not create a library from C++ - # object files and a static libstdc++, better avoid it by now - archive_cmds='$LD -G${allow_undefined_flag} -h $soname -o $lib $libobjs $deplibs $linker_flags' - archive_expsym_cmds='$echo "{ global:" > $lib.exp~cat $export_symbols | sed -e "s/\(.*\)/\1;/" >> $lib.exp~$echo "local: *; };" >> $lib.exp~ - $LD -G${allow_undefined_flag} -M $lib.exp -h $soname -o $lib $libobjs $deplibs $linker_flags~$rm $lib.exp' - hardcode_libdir_flag_spec= - hardcode_shlibpath_var=no - runpath_var='LD_RUN_PATH' - ;; - - uts4*) - archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - hardcode_libdir_flag_spec='-L$libdir' - hardcode_shlibpath_var=no - ;; - - dgux*) - archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - hardcode_libdir_flag_spec='-L$libdir' - hardcode_shlibpath_var=no - ;; - - sysv4*MP*) - if test -d /usr/nec; then - archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linker_flags' - hardcode_shlibpath_var=no - runpath_var=LD_RUN_PATH - hardcode_runpath_var=yes - ld_shlibs=yes - fi - ;; - - sysv4.2uw2*) - archive_cmds='$LD -G -o $lib $libobjs $deplibs $linker_flags' - hardcode_direct=yes - hardcode_minus_L=no - hardcode_shlibpath_var=no - hardcode_runpath_var=yes - runpath_var=LD_RUN_PATH - ;; - - sysv5uw7* | unixware7*) - no_undefined_flag='${wl}-z ${wl}text' - if test "$GCC" = yes; then - archive_cmds='$CC -shared ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' - else - archive_cmds='$CC -G ${wl}-h ${wl}$soname -o $lib $libobjs $deplibs $compiler_flags' - fi - runpath_var='LD_RUN_PATH' - hardcode_shlibpath_var=no - ;; - - *) - ld_shlibs=no - ;; - esac -fi -AC_MSG_RESULT([$ld_shlibs]) -test "$ld_shlibs" = no && can_build_shared=no - -# Check hardcoding attributes. -AC_MSG_CHECKING([how to hardcode library paths into programs]) -hardcode_action= -if test -n "$hardcode_libdir_flag_spec" || \ - test -n "$runpath_var"; then - - # We can hardcode non-existant directories. - if test "$hardcode_direct" != no && - # If the only mechanism to avoid hardcoding is shlibpath_var, we - # have to relink, otherwise we might link with an installed library - # when we should be linking with a yet-to-be-installed one - ## test "$hardcode_shlibpath_var" != no && - test "$hardcode_minus_L" != no; then - # Linking always hardcodes the temporary library directory. - hardcode_action=relink - else - # We can link without hardcoding, and we can hardcode nonexisting dirs. - hardcode_action=immediate - fi -else - # We cannot hardcode anything, or else we can only hardcode existing - # directories. - hardcode_action=unsupported -fi -AC_MSG_RESULT([$hardcode_action]) - -striplib= -old_striplib= -AC_MSG_CHECKING([whether stripping libraries is possible]) -if test -n "$STRIP" && $STRIP -V 2>&1 | grep "GNU strip" >/dev/null; then - test -z "$old_striplib" && old_striplib="$STRIP --strip-debug" - test -z "$striplib" && striplib="$STRIP --strip-unneeded" - AC_MSG_RESULT([yes]) -else - AC_MSG_RESULT([no]) -fi - -reload_cmds='$LD$reload_flag -o $output$reload_objs' -test -z "$deplibs_check_method" && deplibs_check_method=unknown - -# PORTME Fill in your ld.so characteristics -AC_MSG_CHECKING([dynamic linker characteristics]) -library_names_spec= -libname_spec='lib$name' -soname_spec= -postinstall_cmds= -postuninstall_cmds= -finish_cmds= -finish_eval= -shlibpath_var= -shlibpath_overrides_runpath=unknown -version_type=none -dynamic_linker="$host_os ld.so" -sys_lib_dlsearch_path_spec="/lib /usr/lib" -sys_lib_search_path_spec="/lib /usr/lib /usr/local/lib" - -case $host_os in -aix3*) - version_type=linux - library_names_spec='${libname}${release}.so$versuffix $libname.a' - shlibpath_var=LIBPATH - - # AIX has no versioning support, so we append a major version to the name. - soname_spec='${libname}${release}.so$major' - ;; - -aix4* | aix5*) - version_type=linux - if test "$host_cpu" = ia64; then - # AIX 5 supports IA64 - library_names_spec='${libname}${release}.so$major ${libname}${release}.so$versuffix $libname.so' - shlibpath_var=LD_LIBRARY_PATH - else - # With GCC up to 2.95.x, collect2 would create an import file - # for dependence libraries. The import file would start with - # the line `#! .'. This would cause the generated library to - # depend on `.', always an invalid library. This was fixed in - # development snapshots of GCC prior to 3.0. - case $host_os in - [ aix4 | aix4.[01] | aix4.[01].*)] - if { echo '#if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 97)' - echo ' yes ' - echo '#endif'; } | ${CC} -E - | grep yes > /dev/null; then - : - else - can_build_shared=no - fi - ;; - esac - # AIX (on Power*) has no versioning support, so currently we can not hardcode correct - # soname into executable. Probably we can add versioning support to - # collect2, so additional links can be useful in future. - if test "$aix_use_runtimelinking" = yes; then - # If using run time linking (on AIX 4.2 or later) use lib.so instead of - # lib.a to let people know that these are not typical AIX shared libraries. - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' - else - # We preserve .a as extension for shared libraries through AIX4.2 - # and later when we are not doing run time linking. - library_names_spec='${libname}${release}.a $libname.a' - soname_spec='${libname}${release}.so$major' - fi - shlibpath_var=LIBPATH - deplibs_check_method=pass_all - fi - ;; - -amigaos*) - library_names_spec='$libname.ixlibrary $libname.a' - # Create ${libname}_ixlibrary.a entries in /sys/libs. - finish_eval='for lib in `ls $libdir/*.ixlibrary 2>/dev/null`; do libname=`$echo "X$lib" | [$Xsed -e '\''s%^.*/\([^/]*\)\.ixlibrary$%\1%'\'']`; test $rm /sys/libs/${libname}_ixlibrary.a; $show "(cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a)"; (cd /sys/libs && $LN_S $lib ${libname}_ixlibrary.a) || exit 1; done' - ;; - -beos*) - library_names_spec='${libname}.so' - dynamic_linker="$host_os ld.so" - shlibpath_var=LIBRARY_PATH - ;; - -bsdi4*) - version_type=linux - need_version=no - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' - soname_spec='${libname}${release}.so$major' - finish_cmds='PATH="\$PATH:/sbin" ldconfig $libdir' - shlibpath_var=LD_LIBRARY_PATH - sys_lib_search_path_spec="/shlib /usr/lib /usr/X11/lib /usr/contrib/lib /lib /usr/local/lib" - sys_lib_dlsearch_path_spec="/shlib /usr/lib /usr/local/lib" - export_dynamic_flag_spec=-rdynamic - # the default ld.so.conf also contains /usr/contrib/lib and - # /usr/X11R6/lib (/usr/X11 is a link to /usr/X11R6), but let us allow - # libtool to hard-code these into programs - ;; - -cygwin* | mingw* | pw32*) - version_type=windows - need_version=no - need_lib_prefix=no - case $GCC,$host_os in - yes,cygwin*) - library_names_spec='$libname.dll.a' - soname_spec='`echo ${libname} | sed -e 's/^lib/cyg/'``echo ${release} | [sed -e 's/[.]/-/g']`${versuffix}.dll' - postinstall_cmds='dlpath=`bash 2>&1 -c '\''. $dir/${file}i;echo \$dlname'\''`~ - dldir=$destdir/`dirname \$dlpath`~ - test -d \$dldir || mkdir -p \$dldir~ - $install_prog .libs/$dlname \$dldir/$dlname' - postuninstall_cmds='dldll=`bash 2>&1 -c '\''. $file; echo \$dlname'\''`~ - dlpath=$dir/\$dldll~ - $rm \$dlpath' - ;; - yes,mingw*) - library_names_spec='${libname}`echo ${release} | [sed -e 's/[.]/-/g']`${versuffix}.dll' - sys_lib_search_path_spec=`$CC -print-search-dirs | grep "^libraries:" | sed -e "s/^libraries://" -e "s/;/ /g"` - ;; - yes,pw32*) - library_names_spec='`echo ${libname} | sed -e 's/^lib/pw/'``echo ${release} | sed -e 's/[.]/-/g'`${versuffix}.dll' - ;; - *) - library_names_spec='${libname}`echo ${release} | [sed -e 's/[.]/-/g']`${versuffix}.dll $libname.lib' - ;; - esac - dynamic_linker='Win32 ld.exe' - # FIXME: first we should search . and the directory the executable is in - shlibpath_var=PATH - ;; - -darwin* | rhapsody*) - dynamic_linker="$host_os dyld" - version_type=darwin - need_lib_prefix=no - need_version=no - # FIXME: Relying on posixy $() will cause problems for - # cross-compilation, but unfortunately the echo tests do not - # yet detect zsh echo's removal of \ escapes. - library_names_spec='${libname}${release}${versuffix}.$(test .$module = .yes && echo so || echo dylib) ${libname}${release}${major}.$(test .$module = .yes && echo so || echo dylib) ${libname}.$(test .$module = .yes && echo so || echo dylib)' - soname_spec='${libname}${release}${major}.$(test .$module = .yes && echo so || echo dylib)' - shlibpath_overrides_runpath=yes - shlibpath_var=DYLD_LIBRARY_PATH - ;; - -freebsd1*) - dynamic_linker=no - ;; - -freebsd*) - objformat=`test -x /usr/bin/objformat && /usr/bin/objformat || echo aout` - version_type=freebsd-$objformat - case $version_type in - freebsd-elf*) - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so $libname.so' - need_version=no - need_lib_prefix=no - ;; - freebsd-*) - library_names_spec='${libname}${release}.so$versuffix $libname.so$versuffix' - need_version=yes - ;; - esac - shlibpath_var=LD_LIBRARY_PATH - case $host_os in - freebsd2*) - shlibpath_overrides_runpath=yes - ;; - *) - shlibpath_overrides_runpath=no - hardcode_into_libs=yes - ;; - esac - ;; - -gnu*) - version_type=linux - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so${major} ${libname}.so' - soname_spec='${libname}${release}.so$major' - shlibpath_var=LD_LIBRARY_PATH - hardcode_into_libs=yes - ;; - -hpux9* | hpux10* | hpux11*) - # Give a soname corresponding to the major version so that dld.sl refuses to - # link against other versions. - dynamic_linker="$host_os dld.sl" - version_type=sunos - need_lib_prefix=no - need_version=no - shlibpath_var=SHLIB_PATH - shlibpath_overrides_runpath=no # +s is required to enable SHLIB_PATH - library_names_spec='${libname}${release}.sl$versuffix ${libname}${release}.sl$major $libname.sl' - soname_spec='${libname}${release}.sl$major' - # HP-UX runs *really* slowly unless shared libraries are mode 555. - postinstall_cmds='chmod 555 $lib' - ;; - -irix5* | irix6*) - version_type=irix - need_lib_prefix=no - need_version=no - soname_spec='${libname}${release}.so$major' - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major ${libname}${release}.so $libname.so' - case $host_os in - irix5*) - libsuff= shlibsuff= - ;; - *) - case $LD in # libtool.m4 will add one of these switches to LD - *-32|*"-32 ") libsuff= shlibsuff= libmagic=32-bit;; - *-n32|*"-n32 ") libsuff=32 shlibsuff=N32 libmagic=N32;; - *-64|*"-64 ") libsuff=64 shlibsuff=64 libmagic=64-bit;; - *) libsuff= shlibsuff= libmagic=never-match;; - esac - ;; - esac - shlibpath_var=LD_LIBRARY${shlibsuff}_PATH - shlibpath_overrides_runpath=no - sys_lib_search_path_spec="/usr/lib${libsuff} /lib${libsuff} /usr/local/lib${libsuff}" - sys_lib_dlsearch_path_spec="/usr/lib${libsuff} /lib${libsuff}" - ;; - -# No shared lib support for Linux oldld, aout, or coff. -linux-gnuoldld* | linux-gnuaout* | linux-gnucoff*) - dynamic_linker=no - ;; - -# This must be Linux ELF. -linux-gnu*) - version_type=linux - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' - soname_spec='${libname}${release}.so$major' - finish_cmds='PATH="\$PATH:/sbin" ldconfig -n $libdir' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=no - # This implies no fast_install, which is unacceptable. - # Some rework will be needed to allow for fast_install - # before this can be enabled. - hardcode_into_libs=yes - - # We used to test for /lib/ld.so.1 and disable shared libraries on - # powerpc, because MkLinux only supported shared libraries with the - # GNU dynamic linker. Since this was broken with cross compilers, - # most powerpc-linux boxes support dynamic linking these days and - # people can always --disable-shared, the test was removed, and we - # assume the GNU/Linux dynamic linker is in use. - dynamic_linker='GNU/Linux ld.so' - ;; - -netbsd*) - version_type=sunos - need_lib_prefix=no - need_version=no - if echo __ELF__ | $CC -E - | grep __ELF__ >/dev/null; then - library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix' - finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' - dynamic_linker='NetBSD (a.out) ld.so' - else - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major ${libname}${release}.so ${libname}.so' - soname_spec='${libname}${release}.so$major' - dynamic_linker='NetBSD ld.elf_so' - fi - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - hardcode_into_libs=yes - ;; - -newsos6) - version_type=linux - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - ;; - -openbsd*) - version_type=sunos - if test "$with_gnu_ld" = yes; then - need_lib_prefix=no - need_version=no - fi - library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix' - finish_cmds='PATH="\$PATH:/sbin" ldconfig -m $libdir' - shlibpath_var=LD_LIBRARY_PATH - ;; - -os2*) - libname_spec='$name' - need_lib_prefix=no - library_names_spec='$libname.dll $libname.a' - dynamic_linker='OS/2 ld.exe' - shlibpath_var=LIBPATH - ;; - -osf3* | osf4* | osf5*) - version_type=osf - need_version=no - soname_spec='${libname}${release}.so' - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so $libname.so' - shlibpath_var=LD_LIBRARY_PATH - sys_lib_search_path_spec="/usr/shlib /usr/ccs/lib /usr/lib/cmplrs/cc /usr/lib /usr/local/lib /var/shlib" - sys_lib_dlsearch_path_spec="$sys_lib_search_path_spec" - ;; - -sco3.2v5*) - version_type=osf - soname_spec='${libname}${release}.so$major' - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' - shlibpath_var=LD_LIBRARY_PATH - ;; - -solaris*) - version_type=linux - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' - soname_spec='${libname}${release}.so$major' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - hardcode_into_libs=yes - # ldd complains unless libraries are executable - postinstall_cmds='chmod +x $lib' - ;; - -sunos4*) - version_type=sunos - library_names_spec='${libname}${release}.so$versuffix ${libname}.so$versuffix' - finish_cmds='PATH="\$PATH:/usr/etc" ldconfig $libdir' - shlibpath_var=LD_LIBRARY_PATH - shlibpath_overrides_runpath=yes - if test "$with_gnu_ld" = yes; then - need_lib_prefix=no - fi - need_version=yes - ;; - -sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) - version_type=linux - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' - soname_spec='${libname}${release}.so$major' - shlibpath_var=LD_LIBRARY_PATH - case $host_vendor in - sni) - shlibpath_overrides_runpath=no - ;; - motorola) - need_lib_prefix=no - need_version=no - shlibpath_overrides_runpath=no - sys_lib_search_path_spec='/lib /usr/lib /usr/ccs/lib' - ;; - esac - ;; - -uts4*) - version_type=linux - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' - soname_spec='${libname}${release}.so$major' - shlibpath_var=LD_LIBRARY_PATH - ;; - -dgux*) - version_type=linux - need_lib_prefix=no - need_version=no - library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so' - soname_spec='${libname}${release}.so$major' - shlibpath_var=LD_LIBRARY_PATH - ;; - -sysv4*MP*) - if test -d /usr/nec ;then - version_type=linux - library_names_spec='$libname.so.$versuffix $libname.so.$major $libname.so' - soname_spec='$libname.so.$major' - shlibpath_var=LD_LIBRARY_PATH - fi - ;; - -*) - dynamic_linker=no - ;; -esac -AC_MSG_RESULT([$dynamic_linker]) -test "$dynamic_linker" = no && can_build_shared=no - -# Report the final consequences. -AC_MSG_CHECKING([if libtool supports shared libraries]) -AC_MSG_RESULT([$can_build_shared]) - -if test "$hardcode_action" = relink; then - # Fast installation is not supported - enable_fast_install=no -elif test "$shlibpath_overrides_runpath" = yes || - test "$enable_shared" = no; then - # Fast installation is not necessary - enable_fast_install=needless -fi - -variables_saved_for_relink="PATH $shlibpath_var $runpath_var" -if test "$GCC" = yes; then - variables_saved_for_relink="$variables_saved_for_relink GCC_EXEC_PREFIX COMPILER_PATH LIBRARY_PATH" -fi - -AC_LIBTOOL_DLOPEN_SELF - -if test "$enable_shared" = yes && test "$GCC" = yes; then - case $archive_cmds in - *'~'*) - # FIXME: we may have to deal with multi-command sequences. - ;; - '$CC '*) - # Test whether the compiler implicitly links with -lc since on some - # systems, -lgcc has to come before -lc. If gcc already passes -lc - # to ld, don't add -lc before -lgcc. - AC_MSG_CHECKING([whether -lc should be explicitly linked in]) - AC_CACHE_VAL([lt_cv_archive_cmds_need_lc], - [$rm conftest* - echo 'static int dummy;' > conftest.$ac_ext - - if AC_TRY_EVAL(ac_compile); then - soname=conftest - lib=conftest - libobjs=conftest.$ac_objext - deplibs= - wl=$lt_cv_prog_cc_wl - compiler_flags=-v - linker_flags=-v - verstring= - output_objdir=. - libname=conftest - save_allow_undefined_flag=$allow_undefined_flag - allow_undefined_flag= - if AC_TRY_EVAL(archive_cmds 2\>\&1 \| grep \" -lc \" \>/dev/null 2\>\&1) - then - lt_cv_archive_cmds_need_lc=no - else - lt_cv_archive_cmds_need_lc=yes - fi - allow_undefined_flag=$save_allow_undefined_flag - else - cat conftest.err 1>&5 - fi]) - AC_MSG_RESULT([$lt_cv_archive_cmds_need_lc]) - ;; - esac -fi -need_lc=${lt_cv_archive_cmds_need_lc-yes} - -# The second clause should only fire when bootstrapping the -# libtool distribution, otherwise you forgot to ship ltmain.sh -# with your package, and you will get complaints that there are -# no rules to generate ltmain.sh. -if test -f "$ltmain"; then - : -else - # If there is no Makefile yet, we rely on a make rule to execute - # `config.status --recheck' to rerun these tests and create the - # libtool script then. - test -f Makefile && make "$ltmain" -fi - -if test -f "$ltmain"; then - trap "$rm \"${ofile}T\"; exit 1" 1 2 15 - $rm -f "${ofile}T" - - echo creating $ofile - - # Now quote all the things that may contain metacharacters while being - # careful not to overquote the AC_SUBSTed values. We take copies of the - # variables and quote the copies for generation of the libtool script. - for var in echo old_CC old_CFLAGS \ - AR AR_FLAGS CC LD LN_S NM SHELL \ - reload_flag reload_cmds wl \ - pic_flag link_static_flag no_builtin_flag export_dynamic_flag_spec \ - thread_safe_flag_spec whole_archive_flag_spec libname_spec \ - library_names_spec soname_spec \ - RANLIB old_archive_cmds old_archive_from_new_cmds old_postinstall_cmds \ - old_postuninstall_cmds archive_cmds archive_expsym_cmds postinstall_cmds \ - postuninstall_cmds extract_expsyms_cmds old_archive_from_expsyms_cmds \ - old_striplib striplib file_magic_cmd export_symbols_cmds \ - deplibs_check_method allow_undefined_flag no_undefined_flag \ - finish_cmds finish_eval global_symbol_pipe global_symbol_to_cdecl \ - hardcode_libdir_flag_spec hardcode_libdir_separator \ - sys_lib_search_path_spec sys_lib_dlsearch_path_spec \ - compiler_c_o compiler_o_lo need_locks exclude_expsyms include_expsyms; do - - case $var in - reload_cmds | old_archive_cmds | old_archive_from_new_cmds | \ - old_postinstall_cmds | old_postuninstall_cmds | \ - export_symbols_cmds | archive_cmds | archive_expsym_cmds | \ - extract_expsyms_cmds | old_archive_from_expsyms_cmds | \ - postinstall_cmds | postuninstall_cmds | \ - finish_cmds | sys_lib_search_path_spec | sys_lib_dlsearch_path_spec) - # Double-quote double-evaled strings. - eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$double_quote_subst\" -e \"\$sed_quote_subst\" -e \"\$delay_variable_subst\"\`\\\"" - ;; - *) - eval "lt_$var=\\\"\`\$echo \"X\$$var\" | \$Xsed -e \"\$sed_quote_subst\"\`\\\"" - ;; - esac - done - - cat <<__EOF__ > "${ofile}T" -#! $SHELL - -# `$echo "$ofile" | sed 's%^.*/%%'` - Provide generalized library-building support services. -# Generated automatically by $PROGRAM (GNU $PACKAGE $VERSION$TIMESTAMP) -# NOTE: Changes made to this file will be lost: look at ltmain.sh. -# -# Copyright (C) 1996-2000 Free Software Foundation, Inc. -# Originally by Gordon Matzigkeit , 1996 -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Sed that helps us avoid accidentally triggering echo(1) options like -n. -Xsed="sed -e s/^X//" - -# The HP-UX ksh and POSIX shell print the target directory to stdout -# if CDPATH is set. -if test "X\${CDPATH+set}" = Xset; then CDPATH=:; export CDPATH; fi - -# ### BEGIN LIBTOOL CONFIG - -# Libtool was configured on host `(hostname || uname -n) 2>/dev/null | sed 1q`: - -# Shell to use when invoking shell scripts. -SHELL=$lt_SHELL - -# Whether or not to build shared libraries. -build_libtool_libs=$enable_shared - -# Whether or not to add -lc for building shared libraries. -build_libtool_need_lc=$need_lc - -# Whether or not to build static libraries. -build_old_libs=$enable_static - -# Whether or not to optimize for fast installation. -fast_install=$enable_fast_install - -# The host system. -host_alias=$host_alias -host=$host - -# An echo program that does not interpret backslashes. -echo=$lt_echo - -# The archiver. -AR=$lt_AR -AR_FLAGS=$lt_AR_FLAGS - -# The default C compiler. -CC=$lt_CC - -# Is the compiler the GNU C compiler? -with_gcc=$GCC - -# The linker used to build libraries. -LD=$lt_LD - -# Whether we need hard or soft links. -LN_S=$lt_LN_S - -# A BSD-compatible nm program. -NM=$lt_NM - -# A symbol stripping program -STRIP=$STRIP - -# Used to examine libraries when file_magic_cmd begins "file" -MAGIC_CMD=$MAGIC_CMD - -# Used on cygwin: DLL creation program. -DLLTOOL="$DLLTOOL" - -# Used on cygwin: object dumper. -OBJDUMP="$OBJDUMP" - -# Used on cygwin: assembler. -AS="$AS" - -# The name of the directory that contains temporary libtool files. -objdir=$objdir - -# How to create reloadable object files. -reload_flag=$lt_reload_flag -reload_cmds=$lt_reload_cmds - -# How to pass a linker flag through the compiler. -wl=$lt_wl - -# Object file suffix (normally "o"). -objext="$ac_objext" - -# Old archive suffix (normally "a"). -libext="$libext" - -# Executable file suffix (normally ""). -exeext="$exeext" - -# Additional compiler flags for building library objects. -pic_flag=$lt_pic_flag -pic_mode=$pic_mode - -# Does compiler simultaneously support -c and -o options? -compiler_c_o=$lt_compiler_c_o - -# Can we write directly to a .lo ? -compiler_o_lo=$lt_compiler_o_lo - -# Must we lock files when doing compilation ? -need_locks=$lt_need_locks - -# Do we need the lib prefix for modules? -need_lib_prefix=$need_lib_prefix - -# Do we need a version for libraries? -need_version=$need_version - -# Whether dlopen is supported. -dlopen_support=$enable_dlopen - -# Whether dlopen of programs is supported. -dlopen_self=$enable_dlopen_self - -# Whether dlopen of statically linked programs is supported. -dlopen_self_static=$enable_dlopen_self_static - -# Compiler flag to prevent dynamic linking. -link_static_flag=$lt_link_static_flag - -# Compiler flag to turn off builtin functions. -no_builtin_flag=$lt_no_builtin_flag - -# Compiler flag to allow reflexive dlopens. -export_dynamic_flag_spec=$lt_export_dynamic_flag_spec - -# Compiler flag to generate shared objects directly from archives. -whole_archive_flag_spec=$lt_whole_archive_flag_spec - -# Compiler flag to generate thread-safe objects. -thread_safe_flag_spec=$lt_thread_safe_flag_spec - -# Library versioning type. -version_type=$version_type - -# Format of library name prefix. -libname_spec=$lt_libname_spec - -# List of archive names. First name is the real one, the rest are links. -# The last name is the one that the linker finds with -lNAME. -library_names_spec=$lt_library_names_spec - -# The coded name of the library, if different from the real name. -soname_spec=$lt_soname_spec - -# Commands used to build and install an old-style archive. -RANLIB=$lt_RANLIB -old_archive_cmds=$lt_old_archive_cmds -old_postinstall_cmds=$lt_old_postinstall_cmds -old_postuninstall_cmds=$lt_old_postuninstall_cmds - -# Create an old-style archive from a shared archive. -old_archive_from_new_cmds=$lt_old_archive_from_new_cmds - -# Create a temporary old-style archive to link instead of a shared archive. -old_archive_from_expsyms_cmds=$lt_old_archive_from_expsyms_cmds - -# Commands used to build and install a shared archive. -archive_cmds=$lt_archive_cmds -archive_expsym_cmds=$lt_archive_expsym_cmds -postinstall_cmds=$lt_postinstall_cmds -postuninstall_cmds=$lt_postuninstall_cmds - -# Commands to strip libraries. -old_striplib=$lt_old_striplib -striplib=$lt_striplib - -# Method to check whether dependent libraries are shared objects. -deplibs_check_method=$lt_deplibs_check_method - -# Command to use when deplibs_check_method == file_magic. -file_magic_cmd=$lt_file_magic_cmd - -# Flag that allows shared libraries with undefined symbols to be built. -allow_undefined_flag=$lt_allow_undefined_flag - -# Flag that forces no undefined symbols. -no_undefined_flag=$lt_no_undefined_flag - -# Commands used to finish a libtool library installation in a directory. -finish_cmds=$lt_finish_cmds - -# Same as above, but a single script fragment to be evaled but not shown. -finish_eval=$lt_finish_eval - -# Take the output of nm and produce a listing of raw symbols and C names. -global_symbol_pipe=$lt_global_symbol_pipe - -# Transform the output of nm in a proper C declaration -global_symbol_to_cdecl=$lt_global_symbol_to_cdecl - -# This is the shared library runtime path variable. -runpath_var=$runpath_var - -# This is the shared library path variable. -shlibpath_var=$shlibpath_var - -# Is shlibpath searched before the hard-coded library search path? -shlibpath_overrides_runpath=$shlibpath_overrides_runpath - -# How to hardcode a shared library path into an executable. -hardcode_action=$hardcode_action - -# Whether we should hardcode library paths into libraries. -hardcode_into_libs=$hardcode_into_libs - -# Flag to hardcode \$libdir into a binary during linking. -# This must work even if \$libdir does not exist. -hardcode_libdir_flag_spec=$lt_hardcode_libdir_flag_spec - -# Whether we need a single -rpath flag with a separated argument. -hardcode_libdir_separator=$lt_hardcode_libdir_separator - -# Set to yes if using DIR/libNAME.so during linking hardcodes DIR into the -# resulting binary. -hardcode_direct=$hardcode_direct - -# Set to yes if using the -LDIR flag during linking hardcodes DIR into the -# resulting binary. -hardcode_minus_L=$hardcode_minus_L - -# Set to yes if using SHLIBPATH_VAR=DIR during linking hardcodes DIR into -# the resulting binary. -hardcode_shlibpath_var=$hardcode_shlibpath_var - -# Variables whose values should be saved in libtool wrapper scripts and -# restored at relink time. -variables_saved_for_relink="$variables_saved_for_relink" - -# Whether libtool must link a program against all its dependency libraries. -link_all_deplibs=$link_all_deplibs - -# Compile-time system search path for libraries -sys_lib_search_path_spec=$lt_sys_lib_search_path_spec - -# Run-time system search path for libraries -sys_lib_dlsearch_path_spec=$lt_sys_lib_dlsearch_path_spec - -# Fix the shell variable \$srcfile for the compiler. -fix_srcfile_path="$fix_srcfile_path" - -# Set to yes if exported symbols are required. -always_export_symbols=$always_export_symbols - -# The commands to list exported symbols. -export_symbols_cmds=$lt_export_symbols_cmds - -# The commands to extract the exported symbol list from a shared archive. -extract_expsyms_cmds=$lt_extract_expsyms_cmds - -# Symbols that should not be listed in the preloaded symbols. -exclude_expsyms=$lt_exclude_expsyms - -# Symbols that must always be exported. -include_expsyms=$lt_include_expsyms - -# ### END LIBTOOL CONFIG - -__EOF__ - - case $host_os in - aix3*) - cat <<\EOF >> "${ofile}T" - -# AIX sometimes has problems with the GCC collect2 program. For some -# reason, if we set the COLLECT_NAMES environment variable, the problems -# vanish in a puff of smoke. -if test "X${COLLECT_NAMES+set}" != Xset; then - COLLECT_NAMES= - export COLLECT_NAMES -fi -EOF - ;; - esac - - case $host_os in - cygwin* | mingw* | pw32* | os2*) - cat <<'EOF' >> "${ofile}T" - # This is a source program that is used to create dlls on Windows - # Don't remove nor modify the starting and closing comments -# /* ltdll.c starts here */ -# #define WIN32_LEAN_AND_MEAN -# #include -# #undef WIN32_LEAN_AND_MEAN -# #include -# -# #ifndef __CYGWIN__ -# # ifdef __CYGWIN32__ -# # define __CYGWIN__ __CYGWIN32__ -# # endif -# #endif -# -# #ifdef __cplusplus -# extern "C" { -# #endif -# BOOL APIENTRY DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved); -# #ifdef __cplusplus -# } -# #endif -# -# #ifdef __CYGWIN__ -# #include -# DECLARE_CYGWIN_DLL( DllMain ); -# #endif -# HINSTANCE __hDllInstance_base; -# -# BOOL APIENTRY -# DllMain (HINSTANCE hInst, DWORD reason, LPVOID reserved) -# { -# __hDllInstance_base = hInst; -# return TRUE; -# } -# /* ltdll.c ends here */ - # This is a source program that is used to create import libraries - # on Windows for dlls which lack them. Don't remove nor modify the - # starting and closing comments -# /* impgen.c starts here */ -# /* Copyright (C) 1999-2000 Free Software Foundation, Inc. -# -# This file is part of GNU libtool. -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# */ -# -# #include /* for printf() */ -# #include /* for open(), lseek(), read() */ -# #include /* for O_RDONLY, O_BINARY */ -# #include /* for strdup() */ -# -# /* O_BINARY isn't required (or even defined sometimes) under Unix */ -# #ifndef O_BINARY -# #define O_BINARY 0 -# #endif -# -# static unsigned int -# pe_get16 (fd, offset) -# int fd; -# int offset; -# { -# unsigned char b[2]; -# lseek (fd, offset, SEEK_SET); -# read (fd, b, 2); -# return b[0] + (b[1]<<8); -# } -# -# static unsigned int -# pe_get32 (fd, offset) -# int fd; -# int offset; -# { -# unsigned char b[4]; -# lseek (fd, offset, SEEK_SET); -# read (fd, b, 4); -# return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24); -# } -# -# static unsigned int -# pe_as32 (ptr) -# void *ptr; -# { -# unsigned char *b = ptr; -# return b[0] + (b[1]<<8) + (b[2]<<16) + (b[3]<<24); -# } -# -# int -# main (argc, argv) -# int argc; -# char *argv[]; -# { -# int dll; -# unsigned long pe_header_offset, opthdr_ofs, num_entries, i; -# unsigned long export_rva, export_size, nsections, secptr, expptr; -# unsigned long name_rvas, nexp; -# unsigned char *expdata, *erva; -# char *filename, *dll_name; -# -# filename = argv[1]; -# -# dll = open(filename, O_RDONLY|O_BINARY); -# if (dll < 1) -# return 1; -# -# dll_name = filename; -# -# for (i=0; filename[i]; i++) -# if (filename[i] == '/' || filename[i] == '\\' || filename[i] == ':') -# dll_name = filename + i +1; -# -# pe_header_offset = pe_get32 (dll, 0x3c); -# opthdr_ofs = pe_header_offset + 4 + 20; -# num_entries = pe_get32 (dll, opthdr_ofs + 92); -# -# if (num_entries < 1) /* no exports */ -# return 1; -# -# export_rva = pe_get32 (dll, opthdr_ofs + 96); -# export_size = pe_get32 (dll, opthdr_ofs + 100); -# nsections = pe_get16 (dll, pe_header_offset + 4 +2); -# secptr = (pe_header_offset + 4 + 20 + -# pe_get16 (dll, pe_header_offset + 4 + 16)); -# -# expptr = 0; -# for (i = 0; i < nsections; i++) -# { -# char sname[8]; -# unsigned long secptr1 = secptr + 40 * i; -# unsigned long vaddr = pe_get32 (dll, secptr1 + 12); -# unsigned long vsize = pe_get32 (dll, secptr1 + 16); -# unsigned long fptr = pe_get32 (dll, secptr1 + 20); -# lseek(dll, secptr1, SEEK_SET); -# read(dll, sname, 8); -# if (vaddr <= export_rva && vaddr+vsize > export_rva) -# { -# expptr = fptr + (export_rva - vaddr); -# if (export_rva + export_size > vaddr + vsize) -# export_size = vsize - (export_rva - vaddr); -# break; -# } -# } -# -# expdata = (unsigned char*)malloc(export_size); -# lseek (dll, expptr, SEEK_SET); -# read (dll, expdata, export_size); -# erva = expdata - export_rva; -# -# nexp = pe_as32 (expdata+24); -# name_rvas = pe_as32 (expdata+32); -# -# printf ("EXPORTS\n"); -# for (i = 0; i> "${ofile}T" || (rm -f "${ofile}T"; exit 1) - - mv -f "${ofile}T" "$ofile" || \ - (rm -f "$ofile" && cp "${ofile}T" "$ofile" && rm -f "${ofile}T") - chmod +x "$ofile" -fi - -])# _LT_AC_LTCONFIG_HACK - -# AC_LIBTOOL_DLOPEN - enable checks for dlopen support -AC_DEFUN([AC_LIBTOOL_DLOPEN], [AC_BEFORE([$0],[AC_LIBTOOL_SETUP])]) - -# AC_LIBTOOL_WIN32_DLL - declare package support for building win32 dll's -AC_DEFUN([AC_LIBTOOL_WIN32_DLL], [AC_BEFORE([$0], [AC_LIBTOOL_SETUP])]) - -# AC_ENABLE_SHARED - implement the --enable-shared flag -# Usage: AC_ENABLE_SHARED[(DEFAULT)] -# Where DEFAULT is either `yes' or `no'. If omitted, it defaults to -# `yes'. -AC_DEFUN([AC_ENABLE_SHARED], -[define([AC_ENABLE_SHARED_DEFAULT], ifelse($1, no, no, yes))dnl -AC_ARG_ENABLE(shared, -changequote(<<, >>)dnl -<< --enable-shared[=PKGS] build shared libraries [default=>>AC_ENABLE_SHARED_DEFAULT], -changequote([, ])dnl -[p=${PACKAGE-default} -case $enableval in -yes) enable_shared=yes ;; -no) enable_shared=no ;; -*) - enable_shared=no - # Look at the argument we got. We use all the common list separators. - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:," - for pkg in $enableval; do - if test "X$pkg" = "X$p"; then - enable_shared=yes - fi - done - IFS="$ac_save_ifs" - ;; -esac], -enable_shared=AC_ENABLE_SHARED_DEFAULT)dnl -]) - -# AC_DISABLE_SHARED - set the default shared flag to --disable-shared -AC_DEFUN([AC_DISABLE_SHARED], -[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl -AC_ENABLE_SHARED(no)]) - -# AC_ENABLE_STATIC - implement the --enable-static flag -# Usage: AC_ENABLE_STATIC[(DEFAULT)] -# Where DEFAULT is either `yes' or `no'. If omitted, it defaults to -# `yes'. -AC_DEFUN([AC_ENABLE_STATIC], -[define([AC_ENABLE_STATIC_DEFAULT], ifelse($1, no, no, yes))dnl -AC_ARG_ENABLE(static, -changequote(<<, >>)dnl -<< --enable-static[=PKGS] build static libraries [default=>>AC_ENABLE_STATIC_DEFAULT], -changequote([, ])dnl -[p=${PACKAGE-default} -case $enableval in -yes) enable_static=yes ;; -no) enable_static=no ;; -*) - enable_static=no - # Look at the argument we got. We use all the common list separators. - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:," - for pkg in $enableval; do - if test "X$pkg" = "X$p"; then - enable_static=yes - fi - done - IFS="$ac_save_ifs" - ;; -esac], -enable_static=AC_ENABLE_STATIC_DEFAULT)dnl -]) - -# AC_DISABLE_STATIC - set the default static flag to --disable-static -AC_DEFUN([AC_DISABLE_STATIC], -[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl -AC_ENABLE_STATIC(no)]) - - -# AC_ENABLE_FAST_INSTALL - implement the --enable-fast-install flag -# Usage: AC_ENABLE_FAST_INSTALL[(DEFAULT)] -# Where DEFAULT is either `yes' or `no'. If omitted, it defaults to -# `yes'. -AC_DEFUN([AC_ENABLE_FAST_INSTALL], -[define([AC_ENABLE_FAST_INSTALL_DEFAULT], ifelse($1, no, no, yes))dnl -AC_ARG_ENABLE(fast-install, -changequote(<<, >>)dnl -<< --enable-fast-install[=PKGS] optimize for fast installation [default=>>AC_ENABLE_FAST_INSTALL_DEFAULT], -changequote([, ])dnl -[p=${PACKAGE-default} -case $enableval in -yes) enable_fast_install=yes ;; -no) enable_fast_install=no ;; -*) - enable_fast_install=no - # Look at the argument we got. We use all the common list separators. - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:," - for pkg in $enableval; do - if test "X$pkg" = "X$p"; then - enable_fast_install=yes - fi - done - IFS="$ac_save_ifs" - ;; -esac], -enable_fast_install=AC_ENABLE_FAST_INSTALL_DEFAULT)dnl -]) - -# AC_DISABLE_FAST_INSTALL - set the default to --disable-fast-install -AC_DEFUN([AC_DISABLE_FAST_INSTALL], -[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl -AC_ENABLE_FAST_INSTALL(no)]) - -# AC_LIBTOOL_PICMODE - implement the --with-pic flag -# Usage: AC_LIBTOOL_PICMODE[(MODE)] -# Where MODE is either `yes' or `no'. If omitted, it defaults to -# `both'. -AC_DEFUN([AC_LIBTOOL_PICMODE], -[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl -pic_mode=ifelse($#,1,$1,default)]) - - -# AC_PATH_TOOL_PREFIX - find a file program which can recognise shared library -AC_DEFUN([AC_PATH_TOOL_PREFIX], -[AC_MSG_CHECKING([for $1]) -AC_CACHE_VAL(lt_cv_path_MAGIC_CMD, -[case $MAGIC_CMD in - /*) - lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a path. - ;; - ?:/*) - lt_cv_path_MAGIC_CMD="$MAGIC_CMD" # Let the user override the test with a dos path. - ;; - *) - ac_save_MAGIC_CMD="$MAGIC_CMD" - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS=":" -dnl $ac_dummy forces splitting on constant user-supplied paths. -dnl POSIX.2 word splitting is done only on the output of word expansions, -dnl not every word. This closes a longstanding sh security hole. - ac_dummy="ifelse([$2], , $PATH, [$2])" - for ac_dir in $ac_dummy; do - test -z "$ac_dir" && ac_dir=. - if test -f $ac_dir/$1; then - lt_cv_path_MAGIC_CMD="$ac_dir/$1" - if test -n "$file_magic_test_file"; then - case $deplibs_check_method in - "file_magic "*) - file_magic_regex="`expr \"$deplibs_check_method\" : \"file_magic \(.*\)\"`" - MAGIC_CMD="$lt_cv_path_MAGIC_CMD" - if eval $file_magic_cmd \$file_magic_test_file 2> /dev/null | - egrep "$file_magic_regex" > /dev/null; then - : - else - cat <&2 - -*** Warning: the command libtool uses to detect shared libraries, -*** $file_magic_cmd, produces output that libtool cannot recognize. -*** The result is that libtool may fail to recognize shared libraries -*** as such. This will affect the creation of libtool libraries that -*** depend on shared libraries, but programs linked with such libtool -*** libraries will work regardless of this problem. Nevertheless, you -*** may want to report the problem to your system manager and/or to -*** bug-libtool@gnu.org - -EOF - fi ;; - esac - fi - break - fi - done - IFS="$ac_save_ifs" - MAGIC_CMD="$ac_save_MAGIC_CMD" - ;; -esac]) -MAGIC_CMD="$lt_cv_path_MAGIC_CMD" -if test -n "$MAGIC_CMD"; then - AC_MSG_RESULT($MAGIC_CMD) -else - AC_MSG_RESULT(no) -fi -]) - - -# AC_PATH_MAGIC - find a file program which can recognise a shared library -AC_DEFUN([AC_PATH_MAGIC], -[AC_REQUIRE([AC_CHECK_TOOL_PREFIX])dnl -AC_PATH_TOOL_PREFIX(${ac_tool_prefix}file, /usr/bin:$PATH) -if test -z "$lt_cv_path_MAGIC_CMD"; then - if test -n "$ac_tool_prefix"; then - AC_PATH_TOOL_PREFIX(file, /usr/bin:$PATH) - else - MAGIC_CMD=: - fi -fi -]) - - -# AC_PROG_LD - find the path to the GNU or non-GNU linker -AC_DEFUN([AC_PROG_LD], -[AC_ARG_WITH(gnu-ld, -[ --with-gnu-ld assume the C compiler uses GNU ld [default=no]], -test "$withval" = no || with_gnu_ld=yes, with_gnu_ld=no) -AC_REQUIRE([AC_PROG_CC])dnl -AC_REQUIRE([AC_CANONICAL_HOST])dnl -AC_REQUIRE([AC_CANONICAL_BUILD])dnl -ac_prog=ld -if test "$GCC" = yes; then - # Check if gcc -print-prog-name=ld gives a path. - AC_MSG_CHECKING([for ld used by GCC]) - case $host in - *-*-mingw*) - # gcc leaves a trailing carriage return which upsets mingw - ac_prog=`($CC -print-prog-name=ld) 2>&5 | tr -d '\015'` ;; - *) - ac_prog=`($CC -print-prog-name=ld) 2>&5` ;; - esac - case $ac_prog in - # Accept absolute paths. - [[\\/]* | [A-Za-z]:[\\/]*)] - [re_direlt='/[^/][^/]*/\.\./'] - # Canonicalize the path of ld - ac_prog=`echo $ac_prog| sed 's%\\\\%/%g'` - while echo $ac_prog | grep "$re_direlt" > /dev/null 2>&1; do - ac_prog=`echo $ac_prog| sed "s%$re_direlt%/%"` - done - test -z "$LD" && LD="$ac_prog" - ;; - "") - # If it fails, then pretend we aren't using GCC. - ac_prog=ld - ;; - *) - # If it is relative, then search for the first ld in PATH. - with_gnu_ld=unknown - ;; - esac -elif test "$with_gnu_ld" = yes; then - AC_MSG_CHECKING([for GNU ld]) -else - AC_MSG_CHECKING([for non-GNU ld]) -fi -AC_CACHE_VAL(lt_cv_path_LD, -[if test -z "$LD"; then - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}" - for ac_dir in $PATH; do - test -z "$ac_dir" && ac_dir=. - if test -f "$ac_dir/$ac_prog" || test -f "$ac_dir/$ac_prog$ac_exeext"; then - lt_cv_path_LD="$ac_dir/$ac_prog" - # Check to see if the program is GNU ld. I'd rather use --version, - # but apparently some GNU ld's only accept -v. - # Break only if it was the GNU/non-GNU ld that we prefer. - if "$lt_cv_path_LD" -v 2>&1 < /dev/null | egrep '(GNU|with BFD)' > /dev/null; then - test "$with_gnu_ld" != no && break - else - test "$with_gnu_ld" != yes && break - fi - fi - done - IFS="$ac_save_ifs" -else - lt_cv_path_LD="$LD" # Let the user override the test with a path. -fi]) -LD="$lt_cv_path_LD" -if test -n "$LD"; then - AC_MSG_RESULT($LD) -else - AC_MSG_RESULT(no) -fi -test -z "$LD" && AC_MSG_ERROR([no acceptable ld found in \$PATH]) -AC_PROG_LD_GNU -]) - -# AC_PROG_LD_GNU - -AC_DEFUN([AC_PROG_LD_GNU], -[AC_CACHE_CHECK([if the linker ($LD) is GNU ld], lt_cv_prog_gnu_ld, -[# I'd rather use --version here, but apparently some GNU ld's only accept -v. -if $LD -v 2>&1 &5; then - lt_cv_prog_gnu_ld=yes -else - lt_cv_prog_gnu_ld=no -fi]) -with_gnu_ld=$lt_cv_prog_gnu_ld -]) - -# AC_PROG_LD_RELOAD_FLAG - find reload flag for linker -# -- PORTME Some linkers may need a different reload flag. -AC_DEFUN([AC_PROG_LD_RELOAD_FLAG], -[AC_CACHE_CHECK([for $LD option to reload object files], lt_cv_ld_reload_flag, -[lt_cv_ld_reload_flag='-r']) -reload_flag=$lt_cv_ld_reload_flag -test -n "$reload_flag" && reload_flag=" $reload_flag" -]) - -# AC_DEPLIBS_CHECK_METHOD - how to check for library dependencies -# -- PORTME fill in with the dynamic library characteristics -AC_DEFUN([AC_DEPLIBS_CHECK_METHOD], -[AC_CACHE_CHECK([how to recognise dependant libraries], -lt_cv_deplibs_check_method, -[lt_cv_file_magic_cmd='$MAGIC_CMD' -lt_cv_file_magic_test_file= -lt_cv_deplibs_check_method='unknown' -# Need to set the preceding variable on all platforms that support -# interlibrary dependencies. -# 'none' -- dependencies not supported. -# `unknown' -- same as none, but documents that we really don't know. -# 'pass_all' -- all dependencies passed with no checks. -# 'test_compile' -- check by making test program. -# ['file_magic [regex]'] -- check by looking for files in library path -# which responds to the $file_magic_cmd with a given egrep regex. -# If you have `file' or equivalent on your system and you're not sure -# whether `pass_all' will *always* work, you probably want this one. - -case $host_os in -aix4* | aix5*) - lt_cv_deplibs_check_method=pass_all - ;; - -beos*) - lt_cv_deplibs_check_method=pass_all - ;; - -bsdi4*) - [lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib)'] - lt_cv_file_magic_cmd='/usr/bin/file -L' - lt_cv_file_magic_test_file=/shlib/libc.so - ;; - -cygwin* | mingw* | pw32*) - lt_cv_deplibs_check_method='file_magic file format pei*-i386(.*architecture: i386)?' - lt_cv_file_magic_cmd='$OBJDUMP -f' - ;; - -darwin* | rhapsody*) - lt_cv_deplibs_check_method='file_magic Mach-O dynamically linked shared library' - lt_cv_file_magic_cmd='/usr/bin/file -L' - case "$host_os" in - rhapsody* | darwin1.[012]) - lt_cv_file_magic_test_file=`echo /System/Library/Frameworks/System.framework/Versions/*/System | head -1` - ;; - *) # Darwin 1.3 on - lt_cv_file_magic_test_file='/usr/lib/libSystem.dylib' - ;; - esac - ;; - -freebsd*) - if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then - case $host_cpu in - i*86 ) - # Not sure whether the presence of OpenBSD here was a mistake. - # Let's accept both of them until this is cleared up. - [lt_cv_deplibs_check_method='file_magic (FreeBSD|OpenBSD)/i[3-9]86 (compact )?demand paged shared library'] - lt_cv_file_magic_cmd=/usr/bin/file - lt_cv_file_magic_test_file=`echo /usr/lib/libc.so.*` - ;; - esac - else - lt_cv_deplibs_check_method=pass_all - fi - ;; - -gnu*) - lt_cv_deplibs_check_method=pass_all - ;; - -hpux10.20*|hpux11*) - [lt_cv_deplibs_check_method='file_magic (s[0-9][0-9][0-9]|PA-RISC[0-9].[0-9]) shared library'] - lt_cv_file_magic_cmd=/usr/bin/file - lt_cv_file_magic_test_file=/usr/lib/libc.sl - ;; - -irix5* | irix6*) - case $host_os in - irix5*) - # this will be overridden with pass_all, but let us keep it just in case - lt_cv_deplibs_check_method="file_magic ELF 32-bit MSB dynamic lib MIPS - version 1" - ;; - *) - case $LD in - *-32|*"-32 ") libmagic=32-bit;; - *-n32|*"-n32 ") libmagic=N32;; - *-64|*"-64 ") libmagic=64-bit;; - *) libmagic=never-match;; - esac - # this will be overridden with pass_all, but let us keep it just in case - [lt_cv_deplibs_check_method="file_magic ELF ${libmagic} MSB mips-[1234] dynamic lib MIPS - version 1"] - ;; - esac - lt_cv_file_magic_test_file=`echo /lib${libsuff}/libc.so*` - lt_cv_deplibs_check_method=pass_all - ;; - -# This must be Linux ELF. -linux-gnu*) - case $host_cpu in - alpha* | hppa* | i*86 | powerpc* | sparc* | ia64* | s390* ) - lt_cv_deplibs_check_method=pass_all ;; - *) - # glibc up to 2.1.1 does not perform some relocations on ARM - [lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )' ;;] - esac - lt_cv_file_magic_test_file=`echo /lib/libc.so* /lib/libc-*.so` - ;; - -netbsd*) - if echo __ELF__ | $CC -E - | grep __ELF__ > /dev/null; then - [lt_cv_deplibs_check_method='match_pattern /lib[^/\.]+\.so\.[0-9]+\.[0-9]+$'] - else - [lt_cv_deplibs_check_method='match_pattern /lib[^/\.]+\.so$'] - fi - ;; - -newos6*) - [lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (executable|dynamic lib)'] - lt_cv_file_magic_cmd=/usr/bin/file - lt_cv_file_magic_test_file=/usr/lib/libnls.so - ;; - -osf3* | osf4* | osf5*) - # this will be overridden with pass_all, but let us keep it just in case - lt_cv_deplibs_check_method='file_magic COFF format alpha shared library' - lt_cv_file_magic_test_file=/shlib/libc.so - lt_cv_deplibs_check_method=pass_all - ;; - -sco3.2v5*) - lt_cv_deplibs_check_method=pass_all - ;; - -solaris*) - lt_cv_deplibs_check_method=pass_all - lt_cv_file_magic_test_file=/lib/libc.so - ;; - -[sysv5uw[78]* | sysv4*uw2*)] - lt_cv_deplibs_check_method=pass_all - ;; - -sysv4 | sysv4.2uw2* | sysv4.3* | sysv5*) - case $host_vendor in - motorola) - [lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [ML]SB (shared object|dynamic lib) M[0-9][0-9]* Version [0-9]'] - lt_cv_file_magic_test_file=`echo /usr/lib/libc.so*` - ;; - ncr) - lt_cv_deplibs_check_method=pass_all - ;; - sequent) - lt_cv_file_magic_cmd='/bin/file' - [lt_cv_deplibs_check_method='file_magic ELF [0-9][0-9]*-bit [LM]SB (shared object|dynamic lib )'] - ;; - sni) - lt_cv_file_magic_cmd='/bin/file' - [lt_cv_deplibs_check_method="file_magic ELF [0-9][0-9]*-bit [LM]SB dynamic lib"] - lt_cv_file_magic_test_file=/lib/libc.so - ;; - esac - ;; -esac -]) -file_magic_cmd=$lt_cv_file_magic_cmd -deplibs_check_method=$lt_cv_deplibs_check_method -]) - - -# AC_PROG_NM - find the path to a BSD-compatible name lister -AC_DEFUN([AC_PROG_NM], -[AC_MSG_CHECKING([for BSD-compatible nm]) -AC_CACHE_VAL(lt_cv_path_NM, -[if test -n "$NM"; then - # Let the user override the test. - lt_cv_path_NM="$NM" -else - IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}${PATH_SEPARATOR-:}" - for ac_dir in $PATH /usr/ccs/bin /usr/ucb /bin; do - test -z "$ac_dir" && ac_dir=. - tmp_nm=$ac_dir/${ac_tool_prefix}nm - if test -f $tmp_nm || test -f $tmp_nm$ac_exeext ; then - # Check to see if the nm accepts a BSD-compat flag. - # Adding the `sed 1q' prevents false positives on HP-UX, which says: - # nm: unknown option "B" ignored - # Tru64's nm complains that /dev/null is an invalid object file - if ($tmp_nm -B /dev/null 2>&1 | sed '1q'; exit 0) | egrep '(/dev/null|Invalid file or object type)' >/dev/null; then - lt_cv_path_NM="$tmp_nm -B" - break - elif ($tmp_nm -p /dev/null 2>&1 | sed '1q'; exit 0) | egrep /dev/null >/dev/null; then - lt_cv_path_NM="$tmp_nm -p" - break - else - lt_cv_path_NM=${lt_cv_path_NM="$tmp_nm"} # keep the first match, but - continue # so that we can try to find one that supports BSD flags - fi - fi - done - IFS="$ac_save_ifs" - test -z "$lt_cv_path_NM" && lt_cv_path_NM=nm -fi]) -NM="$lt_cv_path_NM" -AC_MSG_RESULT([$NM]) -]) - -# AC_CHECK_LIBM - check for math library -AC_DEFUN([AC_CHECK_LIBM], -[AC_REQUIRE([AC_CANONICAL_HOST])dnl -LIBM= -case $host in -*-*-beos* | *-*-cygwin* | *-*-pw32*) - # These system don't have libm - ;; -*-ncr-sysv4.3*) - AC_CHECK_LIB(mw, _mwvalidcheckl, LIBM="-lmw") - AC_CHECK_LIB(m, main, LIBM="$LIBM -lm") - ;; -*) - AC_CHECK_LIB(m, main, LIBM="-lm") - ;; -esac -]) - -# AC_LIBLTDL_CONVENIENCE[(dir)] - sets LIBLTDL to the link flags for -# the libltdl convenience library and INCLTDL to the include flags for -# the libltdl header and adds --enable-ltdl-convenience to the -# configure arguments. Note that LIBLTDL and INCLTDL are not -# AC_SUBSTed, nor is AC_CONFIG_SUBDIRS called. If DIR is not -# provided, it is assumed to be `libltdl'. LIBLTDL will be prefixed -# with '${top_builddir}/' and INCLTDL will be prefixed with -# '${top_srcdir}/' (note the single quotes!). If your package is not -# flat and you're not using automake, define top_builddir and -# top_srcdir appropriately in the Makefiles. -AC_DEFUN([AC_LIBLTDL_CONVENIENCE], -[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl - case $enable_ltdl_convenience in - no) AC_MSG_ERROR([this package needs a convenience libltdl]) ;; - "") enable_ltdl_convenience=yes - ac_configure_args="$ac_configure_args --enable-ltdl-convenience" ;; - esac - LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdlc.la - INCLTDL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl']) -]) - -# AC_LIBLTDL_INSTALLABLE[(dir)] - sets LIBLTDL to the link flags for -# the libltdl installable library and INCLTDL to the include flags for -# the libltdl header and adds --enable-ltdl-install to the configure -# arguments. Note that LIBLTDL and INCLTDL are not AC_SUBSTed, nor is -# AC_CONFIG_SUBDIRS called. If DIR is not provided and an installed -# libltdl is not found, it is assumed to be `libltdl'. LIBLTDL will -# be prefixed with '${top_builddir}/' and INCLTDL will be prefixed -# with '${top_srcdir}/' (note the single quotes!). If your package is -# not flat and you're not using automake, define top_builddir and -# top_srcdir appropriately in the Makefiles. -# In the future, this macro may have to be called after AC_PROG_LIBTOOL. -AC_DEFUN([AC_LIBLTDL_INSTALLABLE], -[AC_BEFORE([$0],[AC_LIBTOOL_SETUP])dnl - AC_CHECK_LIB(ltdl, main, - [test x"$enable_ltdl_install" != xyes && enable_ltdl_install=no], - [if test x"$enable_ltdl_install" = xno; then - AC_MSG_WARN([libltdl not installed, but installation disabled]) - else - enable_ltdl_install=yes - fi - ]) - if test x"$enable_ltdl_install" = x"yes"; then - ac_configure_args="$ac_configure_args --enable-ltdl-install" - LIBLTDL='${top_builddir}/'ifelse($#,1,[$1],['libltdl'])/libltdl.la - INCLTDL='-I${top_srcdir}/'ifelse($#,1,[$1],['libltdl']) - else - ac_configure_args="$ac_configure_args --enable-ltdl-install=no" - LIBLTDL="-lltdl" - INCLTDL= - fi -]) - -# old names -AC_DEFUN([AM_PROG_LIBTOOL], [AC_PROG_LIBTOOL]) -AC_DEFUN([AM_ENABLE_SHARED], [AC_ENABLE_SHARED($@)]) -AC_DEFUN([AM_ENABLE_STATIC], [AC_ENABLE_STATIC($@)]) -AC_DEFUN([AM_DISABLE_SHARED], [AC_DISABLE_SHARED($@)]) -AC_DEFUN([AM_DISABLE_STATIC], [AC_DISABLE_STATIC($@)]) -AC_DEFUN([AM_PROG_LD], [AC_PROG_LD]) -AC_DEFUN([AM_PROG_NM], [AC_PROG_NM]) - -# This is just to silence aclocal about the macro not being used -ifelse([AC_DISABLE_FAST_INSTALL]) - diff --git a/libpcre/config.guess b/libpcre/config.guess index 500ee74b04..82a4ba20b0 100755 --- a/libpcre/config.guess +++ b/libpcre/config.guess @@ -1,9 +1,9 @@ #! /bin/sh # Attempt to guess a canonical system name. # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003 Free Software Foundation, Inc. +# 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. -timestamp='2003-10-03' +timestamp='2005-03-24' # This file is free software; you can redistribute it and/or modify it # under the terms of the GNU General Public License as published by @@ -53,7 +53,7 @@ version="\ GNU config.guess ($timestamp) Originally written by Per Bothner. -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO @@ -136,6 +136,23 @@ UNAME_RELEASE=`(uname -r) 2>/dev/null` || UNAME_RELEASE=unknown UNAME_SYSTEM=`(uname -s) 2>/dev/null` || UNAME_SYSTEM=unknown UNAME_VERSION=`(uname -v) 2>/dev/null` || UNAME_VERSION=unknown +if [ "${UNAME_SYSTEM}" = "Linux" ] ; then + eval $set_cc_for_build + cat << EOF > $dummy.c + #include + #ifdef __UCLIBC__ + # ifdef __UCLIBC_CONFIG_VERSION__ + LIBC=uclibc __UCLIBC_CONFIG_VERSION__ + # else + LIBC=uclibc + # endif + #else + LIBC=gnu + #endif +EOF + eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep LIBC= | sed -e 's: ::g'` +fi + # Note: order is significant - the case branches are not exclusive. case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in @@ -197,15 +214,21 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in # CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used. echo "${machine}-${os}${release}" exit 0 ;; + amd64:OpenBSD:*:*) + echo x86_64-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; amiga:OpenBSD:*:*) echo m68k-unknown-openbsd${UNAME_RELEASE} exit 0 ;; - arc:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} + cats:OpenBSD:*:*) + echo arm-unknown-openbsd${UNAME_RELEASE} exit 0 ;; hp300:OpenBSD:*:*) echo m68k-unknown-openbsd${UNAME_RELEASE} exit 0 ;; + luna88k:OpenBSD:*:*) + echo m88k-unknown-openbsd${UNAME_RELEASE} + exit 0 ;; mac68k:OpenBSD:*:*) echo m68k-unknown-openbsd${UNAME_RELEASE} exit 0 ;; @@ -221,25 +244,33 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in mvmeppc:OpenBSD:*:*) echo powerpc-unknown-openbsd${UNAME_RELEASE} exit 0 ;; - pmax:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; sgi:OpenBSD:*:*) - echo mipseb-unknown-openbsd${UNAME_RELEASE} + echo mips64-unknown-openbsd${UNAME_RELEASE} exit 0 ;; sun3:OpenBSD:*:*) echo m68k-unknown-openbsd${UNAME_RELEASE} exit 0 ;; - wgrisc:OpenBSD:*:*) - echo mipsel-unknown-openbsd${UNAME_RELEASE} - exit 0 ;; *:OpenBSD:*:*) echo ${UNAME_MACHINE}-unknown-openbsd${UNAME_RELEASE} exit 0 ;; + *:ekkoBSD:*:*) + echo ${UNAME_MACHINE}-unknown-ekkobsd${UNAME_RELEASE} + exit 0 ;; + macppc:MirBSD:*:*) + echo powerppc-unknown-mirbsd${UNAME_RELEASE} + exit 0 ;; + *:MirBSD:*:*) + echo ${UNAME_MACHINE}-unknown-mirbsd${UNAME_RELEASE} + exit 0 ;; alpha:OSF1:*:*) - if test $UNAME_RELEASE = "V4.0"; then + case $UNAME_RELEASE in + *4.0) UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $3}'` - fi + ;; + *5.*) + UNAME_RELEASE=`/usr/sbin/sizer -v | awk '{print $4}'` + ;; + esac # According to Compaq, /usr/sbin/psrinfo has been available on # OSF/1 and Tru64 systems produced since 1995. I hope that # covers most systems running today. This code pipes the CPU @@ -277,14 +308,12 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in "EV7.9 (21364A)") UNAME_MACHINE="alphaev79" ;; esac + # A Pn.n version is a patched version. # A Vn.n version is a released version. # A Tn.n version is a released field test version. # A Xn.n version is an unreleased experimental baselevel. # 1.2 uses "1.2" for uname -r. - echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[VTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` - exit 0 ;; - Alpha*:OpenVMS:*:*) - echo alpha-hp-vms + echo ${UNAME_MACHINE}-dec-osf`echo ${UNAME_RELEASE} | sed -e 's/^[PVTX]//' | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` exit 0 ;; Alpha\ *:Windows_NT*:*) # How do we know it's Interix rather than the generic POSIX subsystem? @@ -307,6 +336,12 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in *:OS/390:*:*) echo i370-ibm-openedition exit 0 ;; + *:z/VM:*:*) + echo s390-ibm-zvmoe + exit 0 ;; + *:OS400:*:*) + echo powerpc-ibm-os400 + exit 0 ;; arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*) echo arm-acorn-riscix${UNAME_RELEASE} exit 0;; @@ -327,7 +362,7 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in DRS?6000:unix:4.0:6*) echo sparc-icl-nx6 exit 0 ;; - DRS?6000:UNIX_SV:4.2*:7*) + DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*) case `/usr/bin/uname -p` in sparc) echo sparc-icl-nx7 && exit 0 ;; esac ;; @@ -399,6 +434,9 @@ case "${UNAME_MACHINE}:${UNAME_SYSTEM}:${UNAME_RELEASE}:${UNAME_VERSION}" in *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*) echo m68k-unknown-mint${UNAME_RELEASE} exit 0 ;; + m68k:machten:*:*) + echo m68k-apple-machten${UNAME_RELEASE} + exit 0 ;; powerpc:machten:*:*) echo powerpc-apple-machten${UNAME_RELEASE} exit 0 ;; @@ -734,7 +772,7 @@ EOF echo sv1-cray-unicos${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit 0 ;; *:UNICOS/mp:*:*) - echo nv1-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' + echo craynv-cray-unicosmp${UNAME_RELEASE} | sed -e 's/\.[^.]*$/.X/' exit 0 ;; F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*) FUJITSU_PROC=`uname -m | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz'` @@ -742,6 +780,11 @@ EOF FUJITSU_REL=`echo ${UNAME_RELEASE} | sed -e 's/ /_/'` echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" exit 0 ;; + 5000:UNIX_System_V:4.*:*) + FUJITSU_SYS=`uname -p | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/\///'` + FUJITSU_REL=`echo ${UNAME_RELEASE} | tr 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz' | sed -e 's/ /_/'` + echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}" + exit 0 ;; i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*) echo ${UNAME_MACHINE}-pc-bsdi${UNAME_RELEASE} exit 0 ;; @@ -751,22 +794,8 @@ EOF *:BSD/OS:*:*) echo ${UNAME_MACHINE}-unknown-bsdi${UNAME_RELEASE} exit 0 ;; - *:FreeBSD:*:*|*:GNU/FreeBSD:*:*) - # Determine whether the default compiler uses glibc. - eval $set_cc_for_build - sed 's/^ //' << EOF >$dummy.c - #include - #if __GLIBC__ >= 2 - LIBC=gnu - #else - LIBC= - #endif -EOF - eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^LIBC=` - # GNU/FreeBSD systems have a "k" prefix to indicate we are using - # FreeBSD's kernel, but not the complete OS. - case ${LIBC} in gnu) kernel_only='k' ;; esac - echo ${UNAME_MACHINE}-unknown-${kernel_only}freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`${LIBC:+-$LIBC} + *:FreeBSD:*:*) + echo ${UNAME_MACHINE}-unknown-freebsd`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` exit 0 ;; i*:CYGWIN*:*) echo ${UNAME_MACHINE}-pc-cygwin @@ -792,6 +821,9 @@ EOF i*:UWIN*:*) echo ${UNAME_MACHINE}-pc-uwin exit 0 ;; + amd64:CYGWIN*:*:*) + echo x86_64-unknown-cygwin + exit 0 ;; p*:CYGWIN*:*) echo powerpcle-unknown-cygwin exit 0 ;; @@ -799,22 +831,36 @@ EOF echo powerpcle-unknown-solaris2`echo ${UNAME_RELEASE}|sed -e 's/[^.]*//'` exit 0 ;; *:GNU:*:*) + # the GNU system echo `echo ${UNAME_MACHINE}|sed -e 's,[-/].*$,,'`-unknown-gnu`echo ${UNAME_RELEASE}|sed -e 's,/.*$,,'` exit 0 ;; + *:GNU/*:*:*) + # other systems with GNU libc and userland + echo ${UNAME_MACHINE}-unknown-`echo ${UNAME_SYSTEM} | sed 's,^[^/]*/,,' | tr '[A-Z]' '[a-z]'``echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'`-gnu + exit 0 ;; i*86:Minix:*:*) echo ${UNAME_MACHINE}-pc-minix exit 0 ;; arm*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit 0 ;; cris:Linux:*:*) - echo cris-axis-linux-gnu + echo cris-axis-linux-${LIBC} + exit 0 ;; + crisv32:Linux:*:*) + echo crisv32-axis-linux-${LIBC} + exit 0 ;; + frv:Linux:*:*) + echo frv-unknown-linux-${LIBC} exit 0 ;; ia64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} + exit 0 ;; + m32r*:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit 0 ;; m68*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit 0 ;; mips:Linux:*:*) eval $set_cc_for_build @@ -833,7 +879,7 @@ EOF #endif EOF eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=` - test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0 + test x"${CPU}" != x && echo "${CPU}-unknown-linux-${LIBC}" && exit 0 ;; mips64:Linux:*:*) eval $set_cc_for_build @@ -852,13 +898,13 @@ EOF #endif EOF eval `$CC_FOR_BUILD -E $dummy.c 2>/dev/null | grep ^CPU=` - test x"${CPU}" != x && echo "${CPU}-unknown-linux-gnu" && exit 0 + test x"${CPU}" != x && echo "${CPU}-unknown-linux-${LIBC}" && exit 0 ;; ppc:Linux:*:*) - echo powerpc-unknown-linux-gnu + echo powerpc-unknown-linux-${LIBC} exit 0 ;; ppc64:Linux:*:*) - echo powerpc64-unknown-linux-gnu + echo powerpc64-unknown-linux-${LIBC} exit 0 ;; alpha:Linux:*:*) case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in @@ -871,34 +917,34 @@ EOF EV68*) UNAME_MACHINE=alphaev68 ;; esac objdump --private-headers /bin/sh | grep ld.so.1 >/dev/null - if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi - echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC} + if test "$?" = 0 ; then LIBC="gnulibc1" ; fi + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit 0 ;; parisc:Linux:*:* | hppa:Linux:*:*) # Look for CPU level case `grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2` in - PA7*) echo hppa1.1-unknown-linux-gnu ;; - PA8*) echo hppa2.0-unknown-linux-gnu ;; - *) echo hppa-unknown-linux-gnu ;; + PA7*) echo hppa1.1-unknown-linux-${LIBC} ;; + PA8*) echo hppa2.0-unknown-linux-${LIBC} ;; + *) echo hppa-unknown-linux-${LIBC} ;; esac exit 0 ;; parisc64:Linux:*:* | hppa64:Linux:*:*) - echo hppa64-unknown-linux-gnu + echo hppa64-unknown-linux-${LIBC} exit 0 ;; s390:Linux:*:* | s390x:Linux:*:*) echo ${UNAME_MACHINE}-ibm-linux exit 0 ;; sh64*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit 0 ;; sh*:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit 0 ;; sparc:Linux:*:* | sparc64:Linux:*:*) - echo ${UNAME_MACHINE}-unknown-linux-gnu + echo ${UNAME_MACHINE}-unknown-linux-${LIBC} exit 0 ;; x86_64:Linux:*:*) - echo x86_64-unknown-linux-gnu + echo x86_64-unknown-linux-${LIBC} exit 0 ;; i*86:Linux:*:*) # The BFD linker knows what the default object file format is, so @@ -913,20 +959,21 @@ EOF p'` case "$ld_supported_targets" in elf32-i386) - TENTATIVE="${UNAME_MACHINE}-pc-linux-gnu" + TENTATIVE="${UNAME_MACHINE}-pc-linux-${LIBC}" ;; a.out-i386-linux) - echo "${UNAME_MACHINE}-pc-linux-gnuaout" + echo "${UNAME_MACHINE}-pc-linux-${LIBC}aout" exit 0 ;; coff-i386) - echo "${UNAME_MACHINE}-pc-linux-gnucoff" + echo "${UNAME_MACHINE}-pc-linux-${LIBC}coff" exit 0 ;; "") # Either a pre-BFD a.out linker (linux-gnuoldld) or # one that does not give us useful --help. - echo "${UNAME_MACHINE}-pc-linux-gnuoldld" + echo "${UNAME_MACHINE}-pc-linux-${LIBC}oldld" exit 0 ;; esac + if [ "`echo $LIBC | sed -e 's:uclibc::'`" != "$LIBC" ] ; then echo "$TENTATIVE" && exit 0 ; fi # Determine whether the default compiler is a.out or elf eval $set_cc_for_build sed 's/^ //' << EOF >$dummy.c @@ -981,6 +1028,9 @@ EOF i*86:atheos:*:*) echo ${UNAME_MACHINE}-unknown-atheos exit 0 ;; + i*86:syllable:*:*) + echo ${UNAME_MACHINE}-pc-syllable + exit 0 ;; i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.0*:*) echo i386-unknown-lynxos${UNAME_RELEASE} exit 0 ;; @@ -1050,9 +1100,9 @@ EOF M680?0:D-NIX:5.3:*) echo m68k-diab-dnix exit 0 ;; - M68*:*:R3V[567]*:*) + M68*:*:R3V[5678]*:*) test -r /sysV68 && echo 'm68k-motorola-sysv' && exit 0 ;; - 3[34]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0) + 3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0) OS_REL='' test -r /etc/.relid \ && OS_REL=.`sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid` @@ -1150,9 +1200,10 @@ EOF echo ${UNAME_MACHINE}-apple-rhapsody${UNAME_RELEASE} exit 0 ;; *:Darwin:*:*) - case `uname -p` in + UNAME_PROCESSOR=`uname -p` || UNAME_PROCESSOR=unknown + case $UNAME_PROCESSOR in *86) UNAME_PROCESSOR=i686 ;; - powerpc) UNAME_PROCESSOR=powerpc ;; + unknown) UNAME_PROCESSOR=powerpc ;; esac echo ${UNAME_PROCESSOR}-apple-darwin${UNAME_RELEASE} exit 0 ;; @@ -1167,7 +1218,10 @@ EOF *:QNX:*:4*) echo i386-pc-qnx exit 0 ;; - NSR-[DGKLNPTVWY]:NONSTOP_KERNEL:*:*) + NSE-?:NONSTOP_KERNEL:*:*) + echo nse-tandem-nsk${UNAME_RELEASE} + exit 0 ;; + NSR-?:NONSTOP_KERNEL:*:*) echo nsr-tandem-nsk${UNAME_RELEASE} exit 0 ;; *:NonStop-UX:*:*) @@ -1211,6 +1265,19 @@ EOF SEI:*:*:SEIUX) echo mips-sei-seiux${UNAME_RELEASE} exit 0 ;; + *:DragonFly:*:*) + echo ${UNAME_MACHINE}-unknown-dragonfly`echo ${UNAME_RELEASE}|sed -e 's/[-(].*//'` + exit 0 ;; + *:*VMS:*:*) + UNAME_MACHINE=`(uname -p) 2>/dev/null` + case "${UNAME_MACHINE}" in + A*) echo alpha-dec-vms && exit 0 ;; + I*) echo ia64-dec-vms && exit 0 ;; + V*) echo vax-dec-vms && exit 0 ;; + esac ;; + *:XENIX:*:SysV) + echo i386-pc-xenix + exit 0 ;; esac #echo '(No uname command or uname output not recognized.)' 1>&2 @@ -1370,7 +1437,9 @@ This script, last modified $timestamp, has failed to recognize the operating system you are using. It is advised that you download the most up to date version of the config scripts from - ftp://ftp.gnu.org/pub/gnu/config/ + http://savannah.gnu.org/cgi-bin/viewcvs/*checkout*/config/config/config.guess +and + http://savannah.gnu.org/cgi-bin/viewcvs/*checkout*/config/config/config.sub If the version you run ($0) is already up to date, please send the following data and any information you think might be diff --git a/libpcre/config.in b/libpcre/config.in index 62a594e5f3..fc17ddd13c 100644 --- a/libpcre/config.in +++ b/libpcre/config.in @@ -11,11 +11,25 @@ memmove(), change the definition of HAVE_BCOPY instead of HAVE_MEMMOVE. If your system has neither bcopy() nor memmove(), leave them both as 0; an emulation function will be used. */ -/* Define to empty if the keyword does not work. */ +/* If you are compiling for a system that uses EBCDIC instead of ASCII +character codes, define this macro as 1. On systems that can use "configure", +this can be done via --enable-ebcdic. */ + +#ifndef EBCDIC +#define EBCDIC 0 +#endif + +/* If you are compiling for a system that needs some magic to be inserted +before the definition of an exported function, define this macro to contain the +relevant magic. It apears at the start of every exported function. */ + +#define EXPORT + +/* Define to empty if the "const" keyword does not work. */ #undef const -/* Define to `unsigned' if doesn't define size_t. */ +/* Define to "unsigned" if doesn't define size_t. */ #undef size_t @@ -67,4 +81,27 @@ default default. */ #define MATCH_LIMIT 10000000 #endif +/* When calling PCRE via the POSIX interface, additional working storage is +required for holding the pointers to capturing substrings because PCRE requires +three integers per substring, whereas the POSIX interface provides only two. If +the number of expected substrings is small, the wrapper function uses space on +the stack, because this is faster than using malloc() for each call. The +threshold above which the stack is no longer use is defined by POSIX_MALLOC_ +THRESHOLD. On Unix systems, "configure" can be used to override this default. +*/ + +#ifndef POSIX_MALLOC_THRESHOLD +#define POSIX_MALLOC_THRESHOLD 10 +#endif + +/* PCRE uses recursive function calls to handle backtracking while matching. +This can sometimes be a problem on systems that have stacks of limited size. +Define NO_RECURSE to get a version that doesn't use recursion in the match() +function; instead it creates its own stack by steam using pcre_recurse_malloc +to get memory. For more detail, see comments and other stuff just above the +match() function. On Unix systems, "configure" can be used to set this in the +Makefile (use --disable-stack-for-recursion). */ + +/* #define NO_RECURSE */ + /* End */ diff --git a/libpcre/config.sub b/libpcre/config.sub index 1f31816b97..b93d317194 100755 --- a/libpcre/config.sub +++ b/libpcre/config.sub @@ -1,9 +1,9 @@ #! /bin/sh # Configuration validation subroutine script. # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, -# 2000, 2001, 2002, 2003 Free Software Foundation, Inc. +# 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. -timestamp='2003-08-18' +timestamp='2005-02-10' # This file is (in principle) common to ALL GNU software. # The presence of a machine in this file suggests that SOME GNU software @@ -70,7 +70,7 @@ Report bugs and patches to ." version="\ GNU config.sub ($timestamp) -Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001 +Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO @@ -118,7 +118,8 @@ esac # Here we must recognize all the valid KERNEL-OS combinations. maybe_os=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\2/'` case $maybe_os in - nto-qnx* | linux-gnu* | linux-dietlibc | kfreebsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*) + nto-qnx* | linux-gnu* | linux-dietlibc | linux-uclibc* | uclinux-uclibc* | uclinux-gnu* | \ + kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* | storm-chaos* | os2-emx* | rtmk-nova*) os=-$maybe_os basic_machine=`echo $1 | sed 's/^\(.*\)-\([^-]*-[^-]*\)$/\1/'` ;; @@ -144,7 +145,7 @@ case $os in -convergent* | -ncr* | -news | -32* | -3600* | -3100* | -hitachi* |\ -c[123]* | -convex* | -sun | -crds | -omron* | -dg | -ultra | -tti* | \ -harris | -dolphin | -highlevel | -gould | -cbm | -ns | -masscomp | \ - -apple | -axis) + -apple | -axis | -knuth | -cray) os= basic_machine=$1 ;; @@ -236,7 +237,7 @@ case $basic_machine in | h8300 | h8500 | hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \ | i370 | i860 | i960 | ia64 \ | ip2k | iq2000 \ - | m32r | m68000 | m68k | m88k | mcore \ + | m32r | m32rle | m68000 | m68k | m88k | maxq | mcore \ | mips | mipsbe | mipseb | mipsel | mipsle \ | mips16 \ | mips64 | mips64el \ @@ -252,6 +253,7 @@ case $basic_machine in | mipsisa64sb1 | mipsisa64sb1el \ | mipsisa64sr71k | mipsisa64sr71kel \ | mipstx39 | mipstx39el \ + | mips64r5900 | mips64r5900el \ | mn10200 | mn10300 \ | msp430 \ | ns16k | ns32k \ @@ -261,12 +263,12 @@ case $basic_machine in | pyramid \ | sh | sh[1234] | sh[23]e | sh[34]eb | shbe | shle | sh[1234]le | sh3ele \ | sh64 | sh64le \ - | sparc | sparc64 | sparc86x | sparclet | sparclite | sparcv9 | sparcv9b \ + | sparc | sparc64 | sparc86x | sparclet | sparclite | sparcv8 | sparcv9 | sparcv9b \ | strongarm \ - | tahoe | thumb | tic4x | tic80 | tron \ + | tahoe | dvp | thumb | tic4x | tic80 | tron \ | v850 | v850e \ | we32k \ - | x86 | xscale | xstormy16 | xtensa \ + | x86 | xscale | xscalee[bl] | xstormy16 | xtensa \ | z8k) basic_machine=$basic_machine-unknown ;; @@ -299,7 +301,7 @@ case $basic_machine in | avr-* \ | bs2000-* \ | c[123]* | c30-* | [cjt]90-* | c4x-* | c54x-* | c55x-* | c6x-* \ - | clipper-* | cydra-* \ + | clipper-* | craynv-* | cydra-* \ | d10v-* | d30v-* | dlx-* \ | elxsi-* \ | f30[01]-* | f700-* | fr30-* | frv-* | fx80-* \ @@ -307,9 +309,9 @@ case $basic_machine in | hppa-* | hppa1.[01]-* | hppa2.0-* | hppa2.0[nw]-* | hppa64-* \ | i*86-* | i860-* | i960-* | ia64-* \ | ip2k-* | iq2000-* \ - | m32r-* \ + | m32r-* | m32rle-* \ | m68000-* | m680[012346]0-* | m68360-* | m683?2-* | m68k-* \ - | m88110-* | m88k-* | mcore-* \ + | m88110-* | m88k-* | maxq-* | mcore-* \ | mips-* | mipsbe-* | mipseb-* | mipsel-* | mipsle-* \ | mips16-* \ | mips64-* | mips64el-* \ @@ -325,8 +327,10 @@ case $basic_machine in | mipsisa64sb1-* | mipsisa64sb1el-* \ | mipsisa64sr71k-* | mipsisa64sr71kel-* \ | mipstx39-* | mipstx39el-* \ + | mips64r5900-* | mips64r5900el-* \ + | mmix-* \ | msp430-* \ - | none-* | np1-* | nv1-* | ns16k-* | ns32k-* \ + | none-* | np1-* | ns16k-* | ns32k-* \ | orion-* \ | pdp10-* | pdp11-* | pj-* | pjl-* | pn-* | power-* \ | powerpc-* | powerpc64-* | powerpc64le-* | powerpcle-* | ppcbe-* \ @@ -335,14 +339,14 @@ case $basic_machine in | sh-* | sh[1234]-* | sh[23]e-* | sh[34]eb-* | shbe-* \ | shle-* | sh[1234]le-* | sh3ele-* | sh64-* | sh64le-* \ | sparc-* | sparc64-* | sparc86x-* | sparclet-* | sparclite-* \ - | sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \ + | sparcv8-* | sparcv9-* | sparcv9b-* | strongarm-* | sv1-* | sx?-* \ | tahoe-* | thumb-* \ | tic30-* | tic4x-* | tic54x-* | tic55x-* | tic6x-* | tic80-* \ | tron-* \ | v850-* | v850e-* | vax-* \ | we32k-* \ - | x86-* | x86_64-* | xps100-* | xscale-* | xstormy16-* \ - | xtensa-* \ + | x86-* | x86_64-* | xps100-* | xscale-* | xscalee[bl]-* \ + | xstormy16-* | xtensa-* \ | ymp-* \ | z8k-*) ;; @@ -362,6 +366,9 @@ case $basic_machine in basic_machine=a29k-amd os=-udi ;; + abacus) + basic_machine=abacus-unknown + ;; adobe68k) basic_machine=m68010-adobe os=-scout @@ -379,6 +386,9 @@ case $basic_machine in amd64) basic_machine=x86_64-pc ;; + amd64-*) + basic_machine=x86_64-`echo $basic_machine | sed 's/^[^-]*-//'` + ;; amdahl) basic_machine=580-amdahl os=-sysv @@ -438,12 +448,27 @@ case $basic_machine in basic_machine=j90-cray os=-unicos ;; + craynv) + basic_machine=craynv-cray + os=-unicosmp + ;; + cr16c) + basic_machine=cr16c-unknown + os=-elf + ;; crds | unos) basic_machine=m68k-crds ;; + crisv32 | crisv32-* | etraxfs*) + basic_machine=crisv32-axis + ;; cris | cris-* | etrax*) basic_machine=cris-axis ;; + crx) + basic_machine=crx-unknown + os=-elf + ;; da30 | da30-*) basic_machine=m68k-da30 ;; @@ -466,6 +491,10 @@ case $basic_machine in basic_machine=m88k-motorola os=-sysv3 ;; + djgpp) + basic_machine=i586-pc + os=-msdosdjgpp + ;; dpx20 | dpx20-*) basic_machine=rs6000-bull os=-bosx @@ -638,16 +667,30 @@ case $basic_machine in basic_machine=m68k-atari os=-mint ;; + mipsEE* | ee | ps2) + basic_machine=mips64r5900el-scei + case $os in + -linux*) + ;; + *) + os=-elf + ;; + esac + ;; + iop) + basic_machine=mipsel-scei + os=-irx + ;; + dvp) + basic_machine=dvp-scei + os=-elf + ;; mips3*-*) basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'` ;; mips3*) basic_machine=`echo $basic_machine | sed -e 's/mips3/mips64/'`-unknown ;; - mmix*) - basic_machine=mmix-knuth - os=-mmixware - ;; monitor) basic_machine=m68k-rom68k os=-coff @@ -728,10 +771,6 @@ case $basic_machine in np1) basic_machine=np1-gould ;; - nv1) - basic_machine=nv1-cray - os=-unicosmp - ;; nsr-tandem) basic_machine=nsr-tandem ;; @@ -743,6 +782,10 @@ case $basic_machine in basic_machine=or32-unknown os=-coff ;; + os400) + basic_machine=powerpc-ibm + os=-os400 + ;; OSE68000 | ose68000) basic_machine=m68000-ericsson os=-ose @@ -963,6 +1006,10 @@ case $basic_machine in tower | tower-32) basic_machine=m68k-ncr ;; + tpf) + basic_machine=s390x-ibm + os=-tpf + ;; udi29k) basic_machine=a29k-amd os=-udi @@ -1006,6 +1053,10 @@ case $basic_machine in basic_machine=hppa1.1-winbond os=-proelf ;; + xbox) + basic_machine=i686-pc + os=-mingw32 + ;; xps | xps100) basic_machine=xps100-honeywell ;; @@ -1036,6 +1087,9 @@ case $basic_machine in romp) basic_machine=romp-ibm ;; + mmix) + basic_machine=mmix-knuth + ;; rs6000) basic_machine=rs6000-ibm ;; @@ -1058,7 +1112,7 @@ case $basic_machine in sh64) basic_machine=sh64-unknown ;; - sparc | sparcv9 | sparcv9b) + sparc | sparcv8 | sparcv9 | sparcv9b) basic_machine=sparc-sun ;; cydra) @@ -1131,19 +1185,20 @@ case $os in | -aos* \ | -nindy* | -vxsim* | -vxworks* | -ebmon* | -hms* | -mvs* \ | -clix* | -riscos* | -uniplus* | -iris* | -rtu* | -xenix* \ - | -hiux* | -386bsd* | -netbsd* | -openbsd* | -kfreebsd* | -freebsd* | -riscix* \ - | -lynxos* | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ + | -hiux* | -386bsd* | -knetbsd* | -mirbsd* | -netbsd* | -openbsd* \ + | -ekkobsd* | -kfreebsd* | -freebsd* | -riscix* | -lynxos* \ + | -bosx* | -nextstep* | -cxux* | -aout* | -elf* | -oabi* \ | -ptx* | -coff* | -ecoff* | -winnt* | -domain* | -vsta* \ | -udi* | -eabi* | -lites* | -ieee* | -go32* | -aux* \ | -chorusos* | -chorusrdb* \ | -cygwin* | -pe* | -psos* | -moss* | -proelf* | -rtems* \ - | -mingw32* | -linux-gnu* | -uxpv* | -beos* | -mpeix* | -udk* \ + | -mingw32* | -linux-gnu* | -linux-uclibc* | -uxpv* | -beos* | -mpeix* | -udk* \ | -interix* | -uwin* | -mks* | -rhapsody* | -darwin* | -opened* \ | -openstep* | -oskit* | -conix* | -pw32* | -nonstopux* \ | -storm-chaos* | -tops10* | -tenex* | -tops20* | -its* \ | -os2* | -vos* | -palmos* | -uclinux* | -nucleus* \ | -morphos* | -superux* | -rtmk* | -rtmk-nova* | -windiss* \ - | -powermax* | -dnix* | -nx6 | -nx7 | -sei*) + | -powermax* | -dnix* | -nx6 | -nx7 | -sei* | -dragonfly* | -irx*) # Remember, each alternative MUST END IN *, to match a version number. ;; -qnx*) @@ -1182,6 +1237,9 @@ case $os in -opened*) os=-openedition ;; + -os400*) + os=-os400 + ;; -wince*) os=-wince ;; @@ -1203,6 +1261,9 @@ case $os in -atheos*) os=-atheos ;; + -syllable*) + os=-syllable + ;; -386bsd) os=-bsd ;; @@ -1225,6 +1286,9 @@ case $os in -sinix*) os=-sysv4 ;; + -tpf*) + os=-tpf + ;; -triton*) os=-sysv3 ;; @@ -1261,6 +1325,9 @@ case $os in -kaos*) os=-kaos ;; + -zvmoe) + os=-zvmoe + ;; -none) ;; *) @@ -1341,6 +1408,9 @@ case $basic_machine in *-ibm) os=-aix ;; + *-knuth) + os=-mmixware + ;; *-wec) os=-proelf ;; @@ -1473,9 +1543,15 @@ case $basic_machine in -mvs* | -opened*) vendor=ibm ;; + -os400*) + vendor=ibm + ;; -ptx*) vendor=sequent ;; + -tpf*) + vendor=ibm + ;; -vxsim* | -vxworks* | -windiss*) vendor=wrs ;; diff --git a/libpcre/configure b/libpcre/configure index 7868034613..fc5fa4bde4 100755 --- a/libpcre/configure +++ b/libpcre/configure @@ -309,7 +309,7 @@ ac_includes_default="\ # include #endif" -ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT RANLIB ac_ct_RANLIB CPP EGREP BUILD_EXEEXT BUILD_OBJEXT CC_FOR_BUILD CFLAGS_FOR_BUILD HAVE_MEMMOVE HAVE_STRERROR LINK_SIZE MATCH_LIMIT NEWLINE PCRE_MAJOR PCRE_MINOR PCRE_DATE PCRE_VERSION PCRE_LIB_VERSION PCRE_POSIXLIB_VERSION POSIX_MALLOC_THRESHOLD UTF8 POSIX_OBJ POSIX_LOBJ POSIX_LIB LIBOBJS LTLIBOBJS' +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT RANLIB ac_ct_RANLIB CPP EGREP pcre_have_long_long pcre_have_ulong_long BUILD_EXEEXT BUILD_OBJEXT CC_FOR_BUILD CFLAGS_FOR_BUILD EBCDIC HAVE_MEMMOVE HAVE_STRERROR LINK_SIZE MATCH_LIMIT NEWLINE NO_RECURSE PCRE_MAJOR PCRE_MINOR PCRE_DATE PCRE_VERSION PCRE_LIB_VERSION PCRE_POSIXLIB_VERSION POSIX_MALLOC_THRESHOLD UCP UTF8 POSIX_OBJ POSIX_LOBJ POSIX_LIB LIBOBJS LTLIBOBJS' ac_subst_files='' # Initialize some variables set by options. @@ -841,8 +841,11 @@ Optional Features: --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) --enable-FEATURE[=ARG] include FEATURE [ARG=yes] --enable-utf8 enable UTF8 support + --enable-unicode-properties enable Unicode properties support --enable-newline-is-cr use CR as the newline character --enable-newline-is-lf use LF as the newline character + --enable-ebcdic assume EBCDIC coding rather than ASCII + --disable-stack-for-recursion disable use of stack recursion when matching Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -1306,9 +1309,9 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu -PCRE_MAJOR=4 +PCRE_MAJOR=6 PCRE_MINOR=3 -PCRE_DATE=21-May-2003 +PCRE_DATE=15-Aug-2005 PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR} @@ -2249,6 +2252,7 @@ ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' ac_compiler_gnu=$ac_cv_c_compiler_gnu + if test -n "$ac_tool_prefix"; then # Extract the first word of "${ac_tool_prefix}ranlib", so it can be a program name with args. set dummy ${ac_tool_prefix}ranlib; ac_word=$2 @@ -2333,6 +2337,7 @@ fi CC_FOR_BUILD=${CC_FOR_BUILD:-'$(CC)'} CFLAGS_FOR_BUILD=${CFLAGS_FOR_BUILD:-'$(CFLAGS)'} +CPPFLAGS_FOR_BUILD=${CFLAGS_FOR_BUILD:-'$(CPPFLAGS)'} BUILD_EXEEXT=${BUILD_EXEEXT:-'$(EXEEXT)'} BUILD_OBJEXT=${BUILD_OBJEXT:-'$(OBJEXT)'} @@ -3152,11 +3157,150 @@ _ACEOF fi +echo "$as_me:$LINENO: checking for long long" >&5 +echo $ECHO_N "checking for long long... $ECHO_C" >&6 +if test "${ac_cv_type_long_long+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +if ((long long *) 0) + return 0; +if (sizeof (long long)) + return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_type_long_long=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_type_long_long=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_type_long_long" >&5 +echo "${ECHO_T}$ac_cv_type_long_long" >&6 +if test $ac_cv_type_long_long = yes; then + +cat >>confdefs.h <<_ACEOF +#define HAVE_LONG_LONG 1 +_ACEOF + +pcre_have_long_long="1" +else + pcre_have_long_long="0" +fi + +echo "$as_me:$LINENO: checking for unsigned long long" >&5 +echo $ECHO_N "checking for unsigned long long... $ECHO_C" >&6 +if test "${ac_cv_type_unsigned_long_long+set}" = set; then + echo $ECHO_N "(cached) $ECHO_C" >&6 +else + cat >conftest.$ac_ext <<_ACEOF +/* confdefs.h. */ +_ACEOF +cat confdefs.h >>conftest.$ac_ext +cat >>conftest.$ac_ext <<_ACEOF +/* end confdefs.h. */ +$ac_includes_default +int +main () +{ +if ((unsigned long long *) 0) + return 0; +if (sizeof (unsigned long long)) + return 0; + ; + return 0; +} +_ACEOF +rm -f conftest.$ac_objext +if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 + (eval $ac_compile) 2>conftest.er1 + ac_status=$? + grep -v '^ *+' conftest.er1 >conftest.err + rm -f conftest.er1 + cat conftest.err >&5 + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); } && + { ac_try='test -z "$ac_c_werror_flag" + || test ! -s conftest.err' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; } && + { ac_try='test -s conftest.$ac_objext' + { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 + (eval $ac_try) 2>&5 + ac_status=$? + echo "$as_me:$LINENO: \$? = $ac_status" >&5 + (exit $ac_status); }; }; then + ac_cv_type_unsigned_long_long=yes +else + echo "$as_me: failed program was:" >&5 +sed 's/^/| /' conftest.$ac_ext >&5 + +ac_cv_type_unsigned_long_long=no +fi +rm -f conftest.err conftest.$ac_objext conftest.$ac_ext +fi +echo "$as_me:$LINENO: result: $ac_cv_type_unsigned_long_long" >&5 +echo "${ECHO_T}$ac_cv_type_unsigned_long_long" >&6 +if test $ac_cv_type_unsigned_long_long = yes; then + +cat >>confdefs.h <<_ACEOF +#define HAVE_UNSIGNED_LONG_LONG 1 +_ACEOF + +pcre_have_ulong_long="1" +else + pcre_have_ulong_long="0" +fi + + + -for ac_func in bcopy memmove strerror + + + +for ac_func in bcopy memmove strerror strtoq strtoll do as_ac_var=`echo "ac_cv_func_$ac_func" | $as_tr_sh` echo "$as_me:$LINENO: checking for $ac_func" >&5 @@ -3269,6 +3413,16 @@ fi fi; +# Check whether --enable-unicode-properties or --disable-unicode-properties was given. +if test "${enable_unicode_properties+set}" = set; then + enableval="$enable_unicode_properties" + if test "$enableval" = "yes"; then + UCP=-DSUPPORT_UCP +fi + +fi; + + # Check whether --enable-newline-is-cr or --disable-newline-is-cr was given. if test "${enable_newline_is_cr+set}" = set; then enableval="$enable_newline_is_cr" @@ -3289,6 +3443,26 @@ fi fi; +# Check whether --enable-ebcdic or --disable-ebcdic was given. +if test "${enable_ebcdic+set}" = set; then + enableval="$enable_ebcdic" + if test "$enableval" == "yes"; then + EBCDIC=-DEBCDIC=1 +fi + +fi; + + +# Check whether --enable-stack-for-recursion or --disable-stack-for-recursion was given. +if test "${enable_stack_for_recursion+set}" = set; then + enableval="$enable_stack_for_recursion" + if test "$enableval" = "no"; then + NO_RECURSE=-DNO_RECURSE +fi + +fi; + + # Check whether --with-posix-malloc-threshold or --without-posix-malloc-threshold was given. @@ -3317,6 +3491,13 @@ if test "${with_match_limit+set}" = set; then fi; +if test "$UCP" != "" ; then + UTF8=-DSUPPORT_UTF8 +fi + + + + @@ -3341,14 +3522,14 @@ fi; if test "x$enable_shared" = "xno" ; then - cat >>confdefs.h <<\_ACEOF + +cat >>confdefs.h <<\_ACEOF #define PCRE_STATIC 1 _ACEOF fi - ac_config_files="$ac_config_files Makefile pcre.h:pcre.in pcre-config:pcre-config.in" - ac_config_commands="$ac_config_commands default" + ac_config_files="$ac_config_files Makefile pcre.h:pcre.in" cat >confcache <<\_ACEOF # This file is a shell script that caches the results of configure # tests run on this system so they can be shared between configure @@ -3766,9 +3947,6 @@ $config_files Configuration headers: $config_headers -Configuration commands: -$config_commands - Report bugs to ." _ACEOF @@ -3878,8 +4056,6 @@ do # Handling of arguments. "Makefile" ) CONFIG_FILES="$CONFIG_FILES Makefile" ;; "pcre.h" ) CONFIG_FILES="$CONFIG_FILES pcre.h:pcre.in" ;; - "pcre-config" ) CONFIG_FILES="$CONFIG_FILES pcre-config:pcre-config.in" ;; - "default" ) CONFIG_COMMANDS="$CONFIG_COMMANDS default" ;; "config.h" ) CONFIG_HEADERS="$CONFIG_HEADERS config.h:config.in" ;; *) { { echo "$as_me:$LINENO: error: invalid argument: $ac_config_target" >&5 echo "$as_me: error: invalid argument: $ac_config_target" >&2;} @@ -3894,7 +4070,6 @@ done if $ac_need_defaults; then test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files test "${CONFIG_HEADERS+set}" = set || CONFIG_HEADERS=$config_headers - test "${CONFIG_COMMANDS+set}" = set || CONFIG_COMMANDS=$config_commands fi # Have a temporary directory for convenience. Make it in the build tree @@ -3977,15 +4152,19 @@ s,@RANLIB@,$RANLIB,;t t s,@ac_ct_RANLIB@,$ac_ct_RANLIB,;t t s,@CPP@,$CPP,;t t s,@EGREP@,$EGREP,;t t +s,@pcre_have_long_long@,$pcre_have_long_long,;t t +s,@pcre_have_ulong_long@,$pcre_have_ulong_long,;t t s,@BUILD_EXEEXT@,$BUILD_EXEEXT,;t t s,@BUILD_OBJEXT@,$BUILD_OBJEXT,;t t s,@CC_FOR_BUILD@,$CC_FOR_BUILD,;t t s,@CFLAGS_FOR_BUILD@,$CFLAGS_FOR_BUILD,;t t +s,@EBCDIC@,$EBCDIC,;t t s,@HAVE_MEMMOVE@,$HAVE_MEMMOVE,;t t s,@HAVE_STRERROR@,$HAVE_STRERROR,;t t s,@LINK_SIZE@,$LINK_SIZE,;t t s,@MATCH_LIMIT@,$MATCH_LIMIT,;t t s,@NEWLINE@,$NEWLINE,;t t +s,@NO_RECURSE@,$NO_RECURSE,;t t s,@PCRE_MAJOR@,$PCRE_MAJOR,;t t s,@PCRE_MINOR@,$PCRE_MINOR,;t t s,@PCRE_DATE@,$PCRE_DATE,;t t @@ -3993,6 +4172,7 @@ s,@PCRE_VERSION@,$PCRE_VERSION,;t t s,@PCRE_LIB_VERSION@,$PCRE_LIB_VERSION,;t t s,@PCRE_POSIXLIB_VERSION@,$PCRE_POSIXLIB_VERSION,;t t s,@POSIX_MALLOC_THRESHOLD@,$POSIX_MALLOC_THRESHOLD,;t t +s,@UCP@,$UCP,;t t s,@UTF8@,$UTF8,;t t s,@POSIX_OBJ@,$POSIX_OBJ,;t t s,@POSIX_LOBJ@,$POSIX_LOBJ,;t t @@ -4455,124 +4635,6 @@ echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} fi done _ACEOF -cat >>$CONFIG_STATUS <<\_ACEOF - -# -# CONFIG_COMMANDS section. -# -for ac_file in : $CONFIG_COMMANDS; do test "x$ac_file" = x: && continue - ac_dest=`echo "$ac_file" | sed 's,:.*,,'` - ac_source=`echo "$ac_file" | sed 's,[^:]*:,,'` - ac_dir=`(dirname "$ac_dest") 2>/dev/null || -$as_expr X"$ac_dest" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$ac_dest" : 'X\(//\)[^/]' \| \ - X"$ac_dest" : 'X\(//\)$' \| \ - X"$ac_dest" : 'X\(/\)' \| \ - . : '\(.\)' 2>/dev/null || -echo X"$ac_dest" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } - /^X\(\/\/\)[^/].*/{ s//\1/; q; } - /^X\(\/\/\)$/{ s//\1/; q; } - /^X\(\/\).*/{ s//\1/; q; } - s/.*/./; q'` - { if $as_mkdir_p; then - mkdir -p "$ac_dir" - else - as_dir="$ac_dir" - as_dirs= - while test ! -d "$as_dir"; do - as_dirs="$as_dir $as_dirs" - as_dir=`(dirname "$as_dir") 2>/dev/null || -$as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ - X"$as_dir" : 'X\(//\)[^/]' \| \ - X"$as_dir" : 'X\(//\)$' \| \ - X"$as_dir" : 'X\(/\)' \| \ - . : '\(.\)' 2>/dev/null || -echo X"$as_dir" | - sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/; q; } - /^X\(\/\/\)[^/].*/{ s//\1/; q; } - /^X\(\/\/\)$/{ s//\1/; q; } - /^X\(\/\).*/{ s//\1/; q; } - s/.*/./; q'` - done - test ! -n "$as_dirs" || mkdir $as_dirs - fi || { { echo "$as_me:$LINENO: error: cannot create directory \"$ac_dir\"" >&5 -echo "$as_me: error: cannot create directory \"$ac_dir\"" >&2;} - { (exit 1); exit 1; }; }; } - - ac_builddir=. - -if test "$ac_dir" != .; then - ac_dir_suffix=/`echo "$ac_dir" | sed 's,^\.[\\/],,'` - # A "../" for each directory in $ac_dir_suffix. - ac_top_builddir=`echo "$ac_dir_suffix" | sed 's,/[^\\/]*,../,g'` -else - ac_dir_suffix= ac_top_builddir= -fi - -case $srcdir in - .) # No --srcdir option. We are building in place. - ac_srcdir=. - if test -z "$ac_top_builddir"; then - ac_top_srcdir=. - else - ac_top_srcdir=`echo $ac_top_builddir | sed 's,/$,,'` - fi ;; - [\\/]* | ?:[\\/]* ) # Absolute path. - ac_srcdir=$srcdir$ac_dir_suffix; - ac_top_srcdir=$srcdir ;; - *) # Relative path. - ac_srcdir=$ac_top_builddir$srcdir$ac_dir_suffix - ac_top_srcdir=$ac_top_builddir$srcdir ;; -esac - -# Do not use `cd foo && pwd` to compute absolute paths, because -# the directories may not exist. -case `pwd` in -.) ac_abs_builddir="$ac_dir";; -*) - case "$ac_dir" in - .) ac_abs_builddir=`pwd`;; - [\\/]* | ?:[\\/]* ) ac_abs_builddir="$ac_dir";; - *) ac_abs_builddir=`pwd`/"$ac_dir";; - esac;; -esac -case $ac_abs_builddir in -.) ac_abs_top_builddir=${ac_top_builddir}.;; -*) - case ${ac_top_builddir}. in - .) ac_abs_top_builddir=$ac_abs_builddir;; - [\\/]* | ?:[\\/]* ) ac_abs_top_builddir=${ac_top_builddir}.;; - *) ac_abs_top_builddir=$ac_abs_builddir/${ac_top_builddir}.;; - esac;; -esac -case $ac_abs_builddir in -.) ac_abs_srcdir=$ac_srcdir;; -*) - case $ac_srcdir in - .) ac_abs_srcdir=$ac_abs_builddir;; - [\\/]* | ?:[\\/]* ) ac_abs_srcdir=$ac_srcdir;; - *) ac_abs_srcdir=$ac_abs_builddir/$ac_srcdir;; - esac;; -esac -case $ac_abs_builddir in -.) ac_abs_top_srcdir=$ac_top_srcdir;; -*) - case $ac_top_srcdir in - .) ac_abs_top_srcdir=$ac_abs_builddir;; - [\\/]* | ?:[\\/]* ) ac_abs_top_srcdir=$ac_top_srcdir;; - *) ac_abs_top_srcdir=$ac_abs_builddir/$ac_top_srcdir;; - esac;; -esac - - - { echo "$as_me:$LINENO: executing $ac_dest commands" >&5 -echo "$as_me: executing $ac_dest commands" >&6;} - case $ac_dest in - default ) chmod a+x pcre-config ;; - esac -done -_ACEOF cat >>$CONFIG_STATUS <<\_ACEOF diff --git a/libpcre/configure.ac b/libpcre/configure.ac index 33728d6de7..78035c8d70 100644 --- a/libpcre/configure.ac +++ b/libpcre/configure.ac @@ -1,7 +1,13 @@ +dnl Process this file with autoconf to produce a configure script. + dnl This has been stripped down a bit by fyodor@insecure.org for dnl inclusion in Nmap (removed libtool and dynamic library stuff) -dnl Process this file with autoconf to produce a configure script. +dnl This configure.in file has been hacked around quite a lot as a result of +dnl patches that various people have sent to me (PH). Sometimes the information +dnl I get is contradictory. I've tried to put in comments that explain things, +dnl but in some cases the information is second-hand and I have no way of +dnl verifying it. I am not an autoconf or libtool expert! dnl This is required at the start; the name is the name of a file dnl it should be seeing, to verify it is in the same directory. @@ -23,9 +29,9 @@ dnl macro, and may be treated as octal constants. Stick to single dnl digits for minor numbers less than 10. There are unlikely to be dnl that many releases anyway. -PCRE_MAJOR=4 +PCRE_MAJOR=6 PCRE_MINOR=3 -PCRE_DATE=21-May-2003 +PCRE_DATE=15-Aug-2005 PCRE_VERSION=${PCRE_MAJOR}.${PCRE_MINOR} dnl Default values for miscellaneous macros @@ -41,6 +47,7 @@ PCRE_POSIXLIB_VERSION=0:0:0 dnl Checks for programs. AC_PROG_CC + AC_PROG_RANLIB dnl We need to find a compiler for compiling a program to run on the local host @@ -48,12 +55,13 @@ dnl while building. It needs to be different from CC when cross-compiling. dnl There is a macro called AC_PROG_CC_FOR_BUILD in the GNU archive for dnl figuring this out automatically. Unfortunately, it does not work with the dnl latest versions of autoconf. So for the moment, we just default to the -dnl same values as the "main" compiler. People who are corss-compiling will +dnl same values as the "main" compiler. People who are cross-compiling will dnl just have to adjust the Makefile by hand or set these values when they dnl run "configure". CC_FOR_BUILD=${CC_FOR_BUILD:-'$(CC)'} CFLAGS_FOR_BUILD=${CFLAGS_FOR_BUILD:-'$(CFLAGS)'} +CPPFLAGS_FOR_BUILD=${CFLAGS_FOR_BUILD:-'$(CPPFLAGS)'} BUILD_EXEEXT=${BUILD_EXEEXT:-'$(EXEEXT)'} BUILD_OBJEXT=${BUILD_OBJEXT:-'$(OBJEXT)'} @@ -67,9 +75,14 @@ dnl Checks for typedefs, structures, and compiler characteristics. AC_C_CONST AC_TYPE_SIZE_T +AC_CHECK_TYPES([long long], [pcre_have_long_long="1"], [pcre_have_long_long="0"]) +AC_CHECK_TYPES([unsigned long long], [pcre_have_ulong_long="1"], [pcre_have_ulong_long="0"]) +AC_SUBST(pcre_have_long_long) +AC_SUBST(pcre_have_ulong_long) + dnl Checks for library functions. -AC_CHECK_FUNCS(bcopy memmove strerror) +AC_CHECK_FUNCS(bcopy memmove strerror strtoq strtoll) dnl Handle --enable-utf8 @@ -80,6 +93,15 @@ if test "$enableval" = "yes"; then fi ) +dnl Handle --enable-unicode-properties + +AC_ARG_ENABLE(unicode-properties, +[ --enable-unicode-properties enable Unicode properties support], +if test "$enableval" = "yes"; then + UCP=-DSUPPORT_UCP +fi +) + dnl Handle --enable-newline-is-cr AC_ARG_ENABLE(newline-is-cr, @@ -98,6 +120,24 @@ if test "$enableval" = "yes"; then fi ) +dnl Handle --enable-ebcdic + +AC_ARG_ENABLE(ebcdic, +[ --enable-ebcdic assume EBCDIC coding rather than ASCII], +if test "$enableval" == "yes"; then + EBCDIC=-DEBCDIC=1 +fi +) + +dnl Handle --disable-stack-for-recursion + +AC_ARG_ENABLE(stack-for-recursion, +[ --disable-stack-for-recursion disable use of stack recursion when matching], +if test "$enableval" = "no"; then + NO_RECURSE=-DNO_RECURSE +fi +) + dnl There doesn't seem to be a straightforward way of having parameters dnl that set values, other than fudging the --with thing. So that's what dnl I've done. @@ -123,17 +163,25 @@ AC_ARG_WITH(match-limit, MATCH_LIMIT=-DMATCH_LIMIT=$withval ) +dnl Unicode character property support implies UTF-8 support + +if test "$UCP" != "" ; then + UTF8=-DSUPPORT_UTF8 +fi + dnl "Export" these variables AC_SUBST(BUILD_EXEEXT) AC_SUBST(BUILD_OBJEXT) AC_SUBST(CC_FOR_BUILD) AC_SUBST(CFLAGS_FOR_BUILD) +AC_SUBST(EBCDIC) AC_SUBST(HAVE_MEMMOVE) AC_SUBST(HAVE_STRERROR) AC_SUBST(LINK_SIZE) AC_SUBST(MATCH_LIMIT) AC_SUBST(NEWLINE) +AC_SUBST(NO_RECURSE) AC_SUBST(PCRE_MAJOR) AC_SUBST(PCRE_MINOR) AC_SUBST(PCRE_DATE) @@ -141,16 +189,16 @@ AC_SUBST(PCRE_VERSION) AC_SUBST(PCRE_LIB_VERSION) AC_SUBST(PCRE_POSIXLIB_VERSION) AC_SUBST(POSIX_MALLOC_THRESHOLD) +AC_SUBST(UCP) AC_SUBST(UTF8) - AC_SUBST(POSIX_OBJ) AC_SUBST(POSIX_LOBJ) AC_SUBST(POSIX_LIB) if test "x$enable_shared" = "xno" ; then - AC_DEFINE(PCRE_STATIC,1) + AC_DEFINE([PCRE_STATIC],[1],[to link statically]) fi dnl This must be last; it determines what files are written as well as config.h -AC_OUTPUT(Makefile pcre.h:pcre.in pcre-config:pcre-config.in,[chmod a+x pcre-config]) +AC_OUTPUT(Makefile pcre.h:pcre.in) diff --git a/libpcre/dftables.c b/libpcre/dftables.c index 9aa7b77e27..480753887e 100644 --- a/libpcre/dftables.c +++ b/libpcre/dftables.c @@ -2,117 +2,135 @@ * Perl-Compatible Regular Expressions * *************************************************/ -/* -PCRE is a library of functions to support regular expressions whose syntax +/* PCRE is a library of functions to support regular expressions whose syntax and semantics are as close as possible to those of the Perl 5 language. -Written by: Philip Hazel - - Copyright (c) 1997-2001 University of Cambridge + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge ----------------------------------------------------------------------------- -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- - -See the file Tech.Notes for some information on the internals. */ -/* This is a support program to generate the file chartables.c, containing -character tables of various kinds. They are built according to the default C -locale and used as the default tables by PCRE. Now that pcre_maketables is -a function visible to the outside world, we make use of its code from here in -order to be consistent. */ +/* This is a freestanding support program to generate a file containing default +character tables for PCRE. The tables are built according to the default C +locale. Now that pcre_maketables is a function visible to the outside world, we +make use of its code from here in order to be consistent. */ #include #include #include -#include "internal.h" +#include "pcre_internal.h" -#define DFTABLES /* maketables.c notices this */ -#include "maketables.c" +#define DFTABLES /* pcre_maketables.c notices this */ +#include "pcre_maketables.c" -int main(void) +int main(int argc, char **argv) { int i; +FILE *f; const unsigned char *tables = pcre_maketables(); +const unsigned char *base_of_tables = tables; + +if (argc != 2) + { + fprintf(stderr, "dftables: one filename argument is required\n"); + return 1; + } + +f = fopen(argv[1], "wb"); +if (f == NULL) + { + fprintf(stderr, "dftables: failed to open %s for writing\n", argv[1]); + return 1; + } -/* There are two printf() calls here, because gcc in pedantic mode complains +/* There are two fprintf() calls here, because gcc in pedantic mode complains about the very long string otherwise. */ -printf( +fprintf(f, "/*************************************************\n" "* Perl-Compatible Regular Expressions *\n" "*************************************************/\n\n" "/* This file is automatically written by the dftables auxiliary \n" "program. If you edit it by hand, you might like to edit the Makefile to \n" "prevent its ever being regenerated.\n\n"); -printf( - "This file is #included in the compilation of pcre.c to build the default\n" - "character tables which are used when no tables are passed to the compile\n" - "function. */\n\n" - "static unsigned char pcre_default_tables[] = {\n\n" +fprintf(f, + "This file contains the default tables for characters with codes less than\n" + "128 (ASCII characters). These tables are used when no external tables are\n" + "passed to PCRE. */\n\n" + "const unsigned char _pcre_default_tables[] = {\n\n" "/* This table is a lower casing table. */\n\n"); -printf(" "); +fprintf(f, " "); for (i = 0; i < 256; i++) { - if ((i & 7) == 0 && i != 0) printf("\n "); - printf("%3d", *tables++); - if (i != 255) printf(","); + if ((i & 7) == 0 && i != 0) fprintf(f, "\n "); + fprintf(f, "%3d", *tables++); + if (i != 255) fprintf(f, ","); } -printf(",\n\n"); +fprintf(f, ",\n\n"); -printf("/* This table is a case flipping table. */\n\n"); +fprintf(f, "/* This table is a case flipping table. */\n\n"); -printf(" "); +fprintf(f, " "); for (i = 0; i < 256; i++) { - if ((i & 7) == 0 && i != 0) printf("\n "); - printf("%3d", *tables++); - if (i != 255) printf(","); + if ((i & 7) == 0 && i != 0) fprintf(f, "\n "); + fprintf(f, "%3d", *tables++); + if (i != 255) fprintf(f, ","); } -printf(",\n\n"); +fprintf(f, ",\n\n"); -printf( +fprintf(f, "/* This table contains bit maps for various character classes.\n" "Each map is 32 bytes long and the bits run from the least\n" "significant end of each byte. The classes that have their own\n" "maps are: space, xdigit, digit, upper, lower, word, graph\n" "print, punct, and cntrl. Other classes are built from combinations. */\n\n"); -printf(" "); +fprintf(f, " "); for (i = 0; i < cbit_length; i++) { if ((i & 7) == 0 && i != 0) { - if ((i & 31) == 0) printf("\n"); - printf("\n "); + if ((i & 31) == 0) fprintf(f, "\n"); + fprintf(f, "\n "); } - printf("0x%02x", *tables++); - if (i != cbit_length - 1) printf(","); + fprintf(f, "0x%02x", *tables++); + if (i != cbit_length - 1) fprintf(f, ","); } -printf(",\n\n"); +fprintf(f, ",\n\n"); -printf( +fprintf(f, "/* This table identifies various classes of character by individual bits:\n" " 0x%02x white space character\n" " 0x%02x letter\n" @@ -123,29 +141,31 @@ printf( ctype_space, ctype_letter, ctype_digit, ctype_xdigit, ctype_word, ctype_meta); -printf(" "); +fprintf(f, " "); for (i = 0; i < 256; i++) { if ((i & 7) == 0 && i != 0) { - printf(" /* "); - if (isprint(i-8)) printf(" %c -", i-8); - else printf("%3d-", i-8); - if (isprint(i-1)) printf(" %c ", i-1); - else printf("%3d", i-1); - printf(" */\n "); + fprintf(f, " /* "); + if (isprint(i-8)) fprintf(f, " %c -", i-8); + else fprintf(f, "%3d-", i-8); + if (isprint(i-1)) fprintf(f, " %c ", i-1); + else fprintf(f, "%3d", i-1); + fprintf(f, " */\n "); } - printf("0x%02x", *tables++); - if (i != 255) printf(","); + fprintf(f, "0x%02x", *tables++); + if (i != 255) fprintf(f, ","); } -printf("};/* "); -if (isprint(i-8)) printf(" %c -", i-8); - else printf("%3d-", i-8); -if (isprint(i-1)) printf(" %c ", i-1); - else printf("%3d", i-1); -printf(" */\n\n/* End of chartables.c */\n"); +fprintf(f, "};/* "); +if (isprint(i-8)) fprintf(f, " %c -", i-8); + else fprintf(f, "%3d-", i-8); +if (isprint(i-1)) fprintf(f, " %c ", i-1); + else fprintf(f, "%3d", i-1); +fprintf(f, " */\n\n/* End of chartables.c */\n"); +fclose(f); +free((void *)base_of_tables); return 0; } diff --git a/libpcre/ltmain.sh b/libpcre/ltmain.sh deleted file mode 100644 index de848c62e4..0000000000 --- a/libpcre/ltmain.sh +++ /dev/null @@ -1,5069 +0,0 @@ -# ltmain.sh - Provide generalized library-building support services. -# NOTE: Changing this file will not affect anything until you rerun configure. -# -# Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001 -# Free Software Foundation, Inc. -# Originally by Gordon Matzigkeit , 1996 -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; either version 2 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -# -# As a special exception to the GNU General Public License, if you -# distribute this file as part of a program that contains a -# configuration script generated by Autoconf, you may include it under -# the same distribution terms that you use for the rest of that program. - -# Check that we have a working $echo. -if test "X$1" = X--no-reexec; then - # Discard the --no-reexec flag, and continue. - shift -elif test "X$1" = X--fallback-echo; then - # Avoid inline document here, it may be left over - : -elif test "X`($echo '\t') 2>/dev/null`" = 'X\t'; then - # Yippee, $echo works! - : -else - # Restart under the correct shell, and then maybe $echo will work. - exec $SHELL "$0" --no-reexec ${1+"$@"} -fi - -if test "X$1" = X--fallback-echo; then - # used as fallback echo - shift - cat <&2 - echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2 - exit 1 -fi - -# Global variables. -mode=$default_mode -nonopt= -prev= -prevopt= -run= -show="$echo" -show_help= -execute_dlfiles= -lo2o="s/\\.lo\$/.${objext}/" -o2lo="s/\\.${objext}\$/.lo/" - -# Parse our command line options once, thoroughly. -while test $# -gt 0 -do - arg="$1" - shift - - case $arg in - -*=*) optarg=`$echo "X$arg" | $Xsed -e 's/[-_a-zA-Z0-9]*=//'` ;; - *) optarg= ;; - esac - - # If the previous option needs an argument, assign it. - if test -n "$prev"; then - case $prev in - execute_dlfiles) - execute_dlfiles="$execute_dlfiles $arg" - ;; - *) - eval "$prev=\$arg" - ;; - esac - - prev= - prevopt= - continue - fi - - # Have we seen a non-optional argument yet? - case $arg in - --help) - show_help=yes - ;; - - --version) - echo "$PROGRAM (GNU $PACKAGE) $VERSION$TIMESTAMP" - exit 0 - ;; - - --config) - ${SED} -e '1,/^# ### BEGIN LIBTOOL CONFIG/d' -e '/^# ### END LIBTOOL CONFIG/,$d' $0 - exit 0 - ;; - - --debug) - echo "$progname: enabling shell trace mode" - set -x - ;; - - --dry-run | -n) - run=: - ;; - - --features) - echo "host: $host" - if test "$build_libtool_libs" = yes; then - echo "enable shared libraries" - else - echo "disable shared libraries" - fi - if test "$build_old_libs" = yes; then - echo "enable static libraries" - else - echo "disable static libraries" - fi - exit 0 - ;; - - --finish) mode="finish" ;; - - --mode) prevopt="--mode" prev=mode ;; - --mode=*) mode="$optarg" ;; - - --preserve-dup-deps) duplicate_deps="yes" ;; - - --quiet | --silent) - show=: - ;; - - -dlopen) - prevopt="-dlopen" - prev=execute_dlfiles - ;; - - -*) - $echo "$modename: unrecognized option \`$arg'" 1>&2 - $echo "$help" 1>&2 - exit 1 - ;; - - *) - nonopt="$arg" - break - ;; - esac -done - -if test -n "$prevopt"; then - $echo "$modename: option \`$prevopt' requires an argument" 1>&2 - $echo "$help" 1>&2 - exit 1 -fi - -# If this variable is set in any of the actions, the command in it -# will be execed at the end. This prevents here-documents from being -# left over by shells. -exec_cmd= - -if test -z "$show_help"; then - - # Infer the operation mode. - if test -z "$mode"; then - case $nonopt in - *cc | *++ | gcc* | *-gcc* | xlc*) - mode=link - for arg - do - case $arg in - -c) - mode=compile - break - ;; - esac - done - ;; - *db | *dbx | *strace | *truss) - mode=execute - ;; - *install*|cp|mv) - mode=install - ;; - *rm) - mode=uninstall - ;; - *) - # If we have no mode, but dlfiles were specified, then do execute mode. - test -n "$execute_dlfiles" && mode=execute - - # Just use the default operation mode. - if test -z "$mode"; then - if test -n "$nonopt"; then - $echo "$modename: warning: cannot infer operation mode from \`$nonopt'" 1>&2 - else - $echo "$modename: warning: cannot infer operation mode without MODE-ARGS" 1>&2 - fi - fi - ;; - esac - fi - - # Only execute mode is allowed to have -dlopen flags. - if test -n "$execute_dlfiles" && test "$mode" != execute; then - $echo "$modename: unrecognized option \`-dlopen'" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - # Change the help message to a mode-specific one. - generic_help="$help" - help="Try \`$modename --help --mode=$mode' for more information." - - # These modes are in order of execution frequency so that they run quickly. - case $mode in - # libtool compile mode - compile) - modename="$modename: compile" - # Get the compilation command and the source file. - base_compile= - prev= - lastarg= - srcfile="$nonopt" - suppress_output= - - user_target=no - for arg - do - case $prev in - "") ;; - xcompiler) - # Aesthetically quote the previous argument. - prev= - lastarg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` - - case $arg in - # Double-quote args containing other shell metacharacters. - # Many Bourne shells cannot handle close brackets correctly - # in scan sets, so we specify it separately. - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") - arg="\"$arg\"" - ;; - esac - - # Add the previous argument to base_compile. - if test -z "$base_compile"; then - base_compile="$lastarg" - else - base_compile="$base_compile $lastarg" - fi - continue - ;; - esac - - # Accept any command-line options. - case $arg in - -o) - if test "$user_target" != "no"; then - $echo "$modename: you cannot specify \`-o' more than once" 1>&2 - exit 1 - fi - user_target=next - ;; - - -static) - build_old_libs=yes - continue - ;; - - -prefer-pic) - pic_mode=yes - continue - ;; - - -prefer-non-pic) - pic_mode=no - continue - ;; - - -Xcompiler) - prev=xcompiler - continue - ;; - - -Wc,*) - args=`$echo "X$arg" | $Xsed -e "s/^-Wc,//"` - lastarg= - save_ifs="$IFS"; IFS=',' - for arg in $args; do - IFS="$save_ifs" - - # Double-quote args containing other shell metacharacters. - # Many Bourne shells cannot handle close brackets correctly - # in scan sets, so we specify it separately. - case $arg in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") - arg="\"$arg\"" - ;; - esac - lastarg="$lastarg $arg" - done - IFS="$save_ifs" - lastarg=`$echo "X$lastarg" | $Xsed -e "s/^ //"` - - # Add the arguments to base_compile. - if test -z "$base_compile"; then - base_compile="$lastarg" - else - base_compile="$base_compile $lastarg" - fi - continue - ;; - esac - - case $user_target in - next) - # The next one is the -o target name - user_target=yes - continue - ;; - yes) - # We got the output file - user_target=set - libobj="$arg" - continue - ;; - esac - - # Accept the current argument as the source file. - lastarg="$srcfile" - srcfile="$arg" - - # Aesthetically quote the previous argument. - - # Backslashify any backslashes, double quotes, and dollar signs. - # These are the only characters that are still specially - # interpreted inside of double-quoted scrings. - lastarg=`$echo "X$lastarg" | $Xsed -e "$sed_quote_subst"` - - # Double-quote args containing other shell metacharacters. - # Many Bourne shells cannot handle close brackets correctly - # in scan sets, so we specify it separately. - case $lastarg in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") - lastarg="\"$lastarg\"" - ;; - esac - - # Add the previous argument to base_compile. - if test -z "$base_compile"; then - base_compile="$lastarg" - else - base_compile="$base_compile $lastarg" - fi - done - - case $user_target in - set) - ;; - no) - # Get the name of the library object. - libobj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%'` - ;; - *) - $echo "$modename: you must specify a target with \`-o'" 1>&2 - exit 1 - ;; - esac - - # Recognize several different file suffixes. - # If the user specifies -o file.o, it is replaced with file.lo - xform='[cCFSfmso]' - case $libobj in - *.ada) xform=ada ;; - *.adb) xform=adb ;; - *.ads) xform=ads ;; - *.asm) xform=asm ;; - *.c++) xform=c++ ;; - *.cc) xform=cc ;; - *.cpp) xform=cpp ;; - *.cxx) xform=cxx ;; - *.f90) xform=f90 ;; - *.for) xform=for ;; - esac - - libobj=`$echo "X$libobj" | $Xsed -e "s/\.$xform$/.lo/"` - - case $libobj in - *.lo) obj=`$echo "X$libobj" | $Xsed -e "$lo2o"` ;; - *) - $echo "$modename: cannot determine name of library object from \`$libobj'" 1>&2 - exit 1 - ;; - esac - - if test -z "$base_compile"; then - $echo "$modename: you must specify a compilation command" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - # Delete any leftover library objects. - if test "$build_old_libs" = yes; then - removelist="$obj $libobj" - else - removelist="$libobj" - fi - - $run $rm $removelist - trap "$run $rm $removelist; exit 1" 1 2 15 - - # On Cygwin there's no "real" PIC flag so we must build both object types - case $host_os in - cygwin* | mingw* | pw32* | os2*) - pic_mode=default - ;; - esac - if test "$pic_mode" = no && test "$deplibs_check_method" != pass_all; then - # non-PIC code in shared libraries is not supported - pic_mode=default - fi - - # Calculate the filename of the output object if compiler does - # not support -o with -c - if test "$compiler_c_o" = no; then - output_obj=`$echo "X$srcfile" | $Xsed -e 's%^.*/%%' -e 's%\.[^.]*$%%'`.${objext} - lockfile="$output_obj.lock" - removelist="$removelist $output_obj $lockfile" - trap "$run $rm $removelist; exit 1" 1 2 15 - else - need_locks=no - lockfile= - fi - - # Lock this critical section if it is needed - # We use this script file to make the link, it avoids creating a new file - if test "$need_locks" = yes; then - until $run ln "$0" "$lockfile" 2>/dev/null; do - $show "Waiting for $lockfile to be removed" - sleep 2 - done - elif test "$need_locks" = warn; then - if test -f "$lockfile"; then - echo "\ -*** ERROR, $lockfile exists and contains: -`cat $lockfile 2>/dev/null` - -This indicates that another process is trying to use the same -temporary object file, and libtool could not work around it because -your compiler does not support \`-c' and \`-o' together. If you -repeat this compilation, it may succeed, by chance, but you had better -avoid parallel builds (make -j) in this platform, or get a better -compiler." - - $run $rm $removelist - exit 1 - fi - echo $srcfile > "$lockfile" - fi - - if test -n "$fix_srcfile_path"; then - eval srcfile=\"$fix_srcfile_path\" - fi - - # Only build a PIC object if we are building libtool libraries. - if test "$build_libtool_libs" = yes; then - # Without this assignment, base_compile gets emptied. - fbsd_hideous_sh_bug=$base_compile - - if test "$pic_mode" != no; then - # All platforms use -DPIC, to notify preprocessed assembler code. - command="$base_compile $srcfile $pic_flag -DPIC" - else - # Don't build PIC code - command="$base_compile $srcfile" - fi - if test "$build_old_libs" = yes; then - lo_libobj="$libobj" - dir=`$echo "X$libobj" | $Xsed -e 's%/[^/]*$%%'` - if test "X$dir" = "X$libobj"; then - dir="$objdir" - else - dir="$dir/$objdir" - fi - libobj="$dir/"`$echo "X$libobj" | $Xsed -e 's%^.*/%%'` - - if test -d "$dir"; then - $show "$rm $libobj" - $run $rm $libobj - else - $show "$mkdir $dir" - $run $mkdir $dir - status=$? - if test $status -ne 0 && test ! -d $dir; then - exit $status - fi - fi - fi - if test "$compiler_o_lo" = yes; then - output_obj="$libobj" - command="$command -o $output_obj" - elif test "$compiler_c_o" = yes; then - output_obj="$obj" - command="$command -o $output_obj" - fi - - $run $rm "$output_obj" - $show "$command" - if $run eval "$command"; then : - else - test -n "$output_obj" && $run $rm $removelist - exit 1 - fi - - if test "$need_locks" = warn && - test x"`cat $lockfile 2>/dev/null`" != x"$srcfile"; then - echo "\ -*** ERROR, $lockfile contains: -`cat $lockfile 2>/dev/null` - -but it should contain: -$srcfile - -This indicates that another process is trying to use the same -temporary object file, and libtool could not work around it because -your compiler does not support \`-c' and \`-o' together. If you -repeat this compilation, it may succeed, by chance, but you had better -avoid parallel builds (make -j) in this platform, or get a better -compiler." - - $run $rm $removelist - exit 1 - fi - - # Just move the object if needed, then go on to compile the next one - if test x"$output_obj" != x"$libobj"; then - $show "$mv $output_obj $libobj" - if $run $mv $output_obj $libobj; then : - else - error=$? - $run $rm $removelist - exit $error - fi - fi - - # If we have no pic_flag, then copy the object into place and finish. - if (test -z "$pic_flag" || test "$pic_mode" != default) && - test "$build_old_libs" = yes; then - # Rename the .lo from within objdir to obj - if test -f $obj; then - $show $rm $obj - $run $rm $obj - fi - - $show "$mv $libobj $obj" - if $run $mv $libobj $obj; then : - else - error=$? - $run $rm $removelist - exit $error - fi - - xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'` - if test "X$xdir" = "X$obj"; then - xdir="." - else - xdir="$xdir" - fi - baseobj=`$echo "X$obj" | $Xsed -e "s%.*/%%"` - libobj=`$echo "X$baseobj" | $Xsed -e "$o2lo"` - # Now arrange that obj and lo_libobj become the same file - $show "(cd $xdir && $LN_S $baseobj $libobj)" - if $run eval '(cd $xdir && $LN_S $baseobj $libobj)'; then - # Unlock the critical section if it was locked - if test "$need_locks" != no; then - $run $rm "$lockfile" - fi - exit 0 - else - error=$? - $run $rm $removelist - exit $error - fi - fi - - # Allow error messages only from the first compilation. - suppress_output=' >/dev/null 2>&1' - fi - - # Only build a position-dependent object if we build old libraries. - if test "$build_old_libs" = yes; then - if test "$pic_mode" != yes; then - # Don't build PIC code - command="$base_compile $srcfile" - else - # All platforms use -DPIC, to notify preprocessed assembler code. - command="$base_compile $srcfile $pic_flag -DPIC" - fi - if test "$compiler_c_o" = yes; then - command="$command -o $obj" - output_obj="$obj" - fi - - # Suppress compiler output if we already did a PIC compilation. - command="$command$suppress_output" - $run $rm "$output_obj" - $show "$command" - if $run eval "$command"; then : - else - $run $rm $removelist - exit 1 - fi - - if test "$need_locks" = warn && - test x"`cat $lockfile 2>/dev/null`" != x"$srcfile"; then - echo "\ -*** ERROR, $lockfile contains: -`cat $lockfile 2>/dev/null` - -but it should contain: -$srcfile - -This indicates that another process is trying to use the same -temporary object file, and libtool could not work around it because -your compiler does not support \`-c' and \`-o' together. If you -repeat this compilation, it may succeed, by chance, but you had better -avoid parallel builds (make -j) in this platform, or get a better -compiler." - - $run $rm $removelist - exit 1 - fi - - # Just move the object if needed - if test x"$output_obj" != x"$obj"; then - $show "$mv $output_obj $obj" - if $run $mv $output_obj $obj; then : - else - error=$? - $run $rm $removelist - exit $error - fi - fi - - # Create an invalid libtool object if no PIC, so that we do not - # accidentally link it into a program. - if test "$build_libtool_libs" != yes; then - $show "echo timestamp > $libobj" - $run eval "echo timestamp > \$libobj" || exit $? - else - # Move the .lo from within objdir - $show "$mv $libobj $lo_libobj" - if $run $mv $libobj $lo_libobj; then : - else - error=$? - $run $rm $removelist - exit $error - fi - fi - fi - - # Unlock the critical section if it was locked - if test "$need_locks" != no; then - $run $rm "$lockfile" - fi - - exit 0 - ;; - - # libtool link mode - link | relink) - modename="$modename: link" - case $host in - *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*) - # It is impossible to link a dll without this setting, and - # we shouldn't force the makefile maintainer to figure out - # which system we are compiling for in order to pass an extra - # flag for every libtool invokation. - # allow_undefined=no - - # FIXME: Unfortunately, there are problems with the above when trying - # to make a dll which has undefined symbols, in which case not - # even a static library is built. For now, we need to specify - # -no-undefined on the libtool link line when we can be certain - # that all symbols are satisfied, otherwise we get a static library. - allow_undefined=yes - ;; - *) - allow_undefined=yes - ;; - esac - libtool_args="$nonopt" - compile_command="$nonopt" - finalize_command="$nonopt" - - compile_rpath= - finalize_rpath= - compile_shlibpath= - finalize_shlibpath= - convenience= - old_convenience= - deplibs= - old_deplibs= - compiler_flags= - linker_flags= - dllsearchpath= - lib_search_path=`pwd` - - avoid_version=no - dlfiles= - dlprefiles= - dlself=no - export_dynamic=no - export_symbols= - export_symbols_regex= - generated= - libobjs= - ltlibs= - module=no - no_install=no - objs= - prefer_static_libs=no - preload=no - prev= - prevarg= - release= - rpath= - xrpath= - perm_rpath= - temp_rpath= - thread_safe=no - vinfo= - - # We need to know -static, to get the right output filenames. - for arg - do - case $arg in - -all-static | -static) - if test "X$arg" = "X-all-static"; then - if test "$build_libtool_libs" = yes && test -z "$link_static_flag"; then - $echo "$modename: warning: complete static linking is impossible in this configuration" 1>&2 - fi - if test -n "$link_static_flag"; then - dlopen_self=$dlopen_self_static - fi - else - if test -z "$pic_flag" && test -n "$link_static_flag"; then - dlopen_self=$dlopen_self_static - fi - fi - build_libtool_libs=no - build_old_libs=yes - prefer_static_libs=yes - break - ;; - esac - done - - # See if our shared archives depend on static archives. - test -n "$old_archive_from_new_cmds" && build_old_libs=yes - - # Go through the arguments, transforming them on the way. - while test $# -gt 0; do - arg="$1" - shift - case $arg in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") - qarg=\"`$echo "X$arg" | $Xsed -e "$sed_quote_subst"`\" ### testsuite: skip nested quoting test - ;; - *) qarg=$arg ;; - esac - libtool_args="$libtool_args $qarg" - - # If the previous option needs an argument, assign it. - if test -n "$prev"; then - case $prev in - output) - compile_command="$compile_command @OUTPUT@" - finalize_command="$finalize_command @OUTPUT@" - ;; - esac - - case $prev in - dlfiles|dlprefiles) - if test "$preload" = no; then - # Add the symbol object into the linking commands. - compile_command="$compile_command @SYMFILE@" - finalize_command="$finalize_command @SYMFILE@" - preload=yes - fi - case $arg in - *.la | *.lo) ;; # We handle these cases below. - force) - if test "$dlself" = no; then - dlself=needless - export_dynamic=yes - fi - prev= - continue - ;; - self) - if test "$prev" = dlprefiles; then - dlself=yes - elif test "$prev" = dlfiles && test "$dlopen_self" != yes; then - dlself=yes - else - dlself=needless - export_dynamic=yes - fi - prev= - continue - ;; - *) - if test "$prev" = dlfiles; then - dlfiles="$dlfiles $arg" - else - dlprefiles="$dlprefiles $arg" - fi - prev= - continue - ;; - esac - ;; - expsyms) - export_symbols="$arg" - if test ! -f "$arg"; then - $echo "$modename: symbol file \`$arg' does not exist" - exit 1 - fi - prev= - continue - ;; - expsyms_regex) - export_symbols_regex="$arg" - prev= - continue - ;; - release) - release="-$arg" - prev= - continue - ;; - rpath | xrpath) - # We need an absolute path. - case $arg in - [\\/]* | [A-Za-z]:[\\/]*) ;; - *) - $echo "$modename: only absolute run-paths are allowed" 1>&2 - exit 1 - ;; - esac - if test "$prev" = rpath; then - case "$rpath " in - *" $arg "*) ;; - *) rpath="$rpath $arg" ;; - esac - else - case "$xrpath " in - *" $arg "*) ;; - *) xrpath="$xrpath $arg" ;; - esac - fi - prev= - continue - ;; - xcompiler) - compiler_flags="$compiler_flags $qarg" - prev= - compile_command="$compile_command $qarg" - finalize_command="$finalize_command $qarg" - continue - ;; - xlinker) - linker_flags="$linker_flags $qarg" - compiler_flags="$compiler_flags $wl$qarg" - prev= - compile_command="$compile_command $wl$qarg" - finalize_command="$finalize_command $wl$qarg" - continue - ;; - *) - eval "$prev=\"\$arg\"" - prev= - continue - ;; - esac - fi # test -n $prev - - prevarg="$arg" - - case $arg in - -all-static) - if test -n "$link_static_flag"; then - compile_command="$compile_command $link_static_flag" - finalize_command="$finalize_command $link_static_flag" - fi - continue - ;; - - -allow-undefined) - # FIXME: remove this flag sometime in the future. - $echo "$modename: \`-allow-undefined' is deprecated because it is the default" 1>&2 - continue - ;; - - -avoid-version) - avoid_version=yes - continue - ;; - - -dlopen) - prev=dlfiles - continue - ;; - - -dlpreopen) - prev=dlprefiles - continue - ;; - - -export-dynamic) - export_dynamic=yes - continue - ;; - - -export-symbols | -export-symbols-regex) - if test -n "$export_symbols" || test -n "$export_symbols_regex"; then - $echo "$modename: more than one -exported-symbols argument is not allowed" - exit 1 - fi - if test "X$arg" = "X-export-symbols"; then - prev=expsyms - else - prev=expsyms_regex - fi - continue - ;; - - # The native IRIX linker understands -LANG:*, -LIST:* and -LNO:* - # so, if we see these flags be careful not to treat them like -L - -L[A-Z][A-Z]*:*) - case $with_gcc/$host in - no/*-*-irix* | no/*-*-nonstopux*) - compile_command="$compile_command $arg" - finalize_command="$finalize_command $arg" - ;; - esac - continue - ;; - - -L*) - dir=`$echo "X$arg" | $Xsed -e 's/^-L//'` - # We need an absolute path. - case $dir in - [\\/]* | [A-Za-z]:[\\/]*) ;; - *) - absdir=`cd "$dir" && pwd` - if test -z "$absdir"; then - $echo "$modename: cannot determine absolute directory name of \`$dir'" 1>&2 - exit 1 - fi - dir="$absdir" - ;; - esac - case "$deplibs " in - *" -L$dir "*) ;; - *) - deplibs="$deplibs -L$dir" - lib_search_path="$lib_search_path $dir" - ;; - esac - case $host in - *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*) - case :$dllsearchpath: in - *":$dir:"*) ;; - *) dllsearchpath="$dllsearchpath:$dir";; - esac - ;; - esac - continue - ;; - - -l*) - if test "X$arg" = "X-lc" || test "X$arg" = "X-lm"; then - case $host in - *-*-cygwin* | *-*-pw32* | *-*-beos*) - # These systems don't actually have a C or math library (as such) - continue - ;; - *-*-mingw* | *-*-os2*) - # These systems don't actually have a C library (as such) - test "X$arg" = "X-lc" && continue - ;; - *-*-openbsd* | *-*-freebsd*) - # Do not include libc due to us having libc/libc_r. - test "X$arg" = "X-lc" && continue - ;; - esac - elif test "X$arg" = "X-lc_r"; then - case $host in - *-*-openbsd* | *-*-freebsd*) - # Do not include libc_r directly, use -pthread flag. - continue - ;; - esac - fi - deplibs="$deplibs $arg" - continue - ;; - - -module) - module=yes - continue - ;; - - -no-fast-install) - fast_install=no - continue - ;; - - -no-install) - case $host in - *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*) - # The PATH hackery in wrapper scripts is required on Windows - # in order for the loader to find any dlls it needs. - $echo "$modename: warning: \`-no-install' is ignored for $host" 1>&2 - $echo "$modename: warning: assuming \`-no-fast-install' instead" 1>&2 - fast_install=no - ;; - *) no_install=yes ;; - esac - continue - ;; - - -no-undefined) - allow_undefined=no - continue - ;; - - -o) prev=output ;; - - -release) - prev=release - continue - ;; - - -rpath) - prev=rpath - continue - ;; - - -R) - prev=xrpath - continue - ;; - - -R*) - dir=`$echo "X$arg" | $Xsed -e 's/^-R//'` - # We need an absolute path. - case $dir in - [\\/]* | [A-Za-z]:[\\/]*) ;; - *) - $echo "$modename: only absolute run-paths are allowed" 1>&2 - exit 1 - ;; - esac - case "$xrpath " in - *" $dir "*) ;; - *) xrpath="$xrpath $dir" ;; - esac - continue - ;; - - -static) - # The effects of -static are defined in a previous loop. - # We used to do the same as -all-static on platforms that - # didn't have a PIC flag, but the assumption that the effects - # would be equivalent was wrong. It would break on at least - # Digital Unix and AIX. - continue - ;; - - -thread-safe) - thread_safe=yes - continue - ;; - - -version-info) - prev=vinfo - continue - ;; - - -Wc,*) - args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wc,//'` - arg= - save_ifs="$IFS"; IFS=',' - for flag in $args; do - IFS="$save_ifs" - case $flag in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") - flag="\"$flag\"" - ;; - esac - arg="$arg $wl$flag" - compiler_flags="$compiler_flags $flag" - done - IFS="$save_ifs" - arg=`$echo "X$arg" | $Xsed -e "s/^ //"` - ;; - - -Wl,*) - args=`$echo "X$arg" | $Xsed -e "$sed_quote_subst" -e 's/^-Wl,//'` - arg= - save_ifs="$IFS"; IFS=',' - for flag in $args; do - IFS="$save_ifs" - case $flag in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") - flag="\"$flag\"" - ;; - esac - arg="$arg $wl$flag" - compiler_flags="$compiler_flags $wl$flag" - linker_flags="$linker_flags $flag" - done - IFS="$save_ifs" - arg=`$echo "X$arg" | $Xsed -e "s/^ //"` - ;; - - -Xcompiler) - prev=xcompiler - continue - ;; - - -Xlinker) - prev=xlinker - continue - ;; - - # Some other compiler flag. - -* | +*) - # Unknown arguments in both finalize_command and compile_command need - # to be aesthetically quoted because they are evaled later. - arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` - case $arg in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") - arg="\"$arg\"" - ;; - esac - ;; - - *.lo | *.$objext) - # A library or standard object. - if test "$prev" = dlfiles; then - # This file was specified with -dlopen. - if test "$build_libtool_libs" = yes && test "$dlopen_support" = yes; then - dlfiles="$dlfiles $arg" - prev= - continue - else - # If libtool objects are unsupported, then we need to preload. - prev=dlprefiles - fi - fi - - if test "$prev" = dlprefiles; then - # Preload the old-style object. - dlprefiles="$dlprefiles "`$echo "X$arg" | $Xsed -e "$lo2o"` - prev= - else - case $arg in - *.lo) libobjs="$libobjs $arg" ;; - *) objs="$objs $arg" ;; - esac - fi - ;; - - *.$libext) - # An archive. - deplibs="$deplibs $arg" - old_deplibs="$old_deplibs $arg" - continue - ;; - - *.la) - # A libtool-controlled library. - - if test "$prev" = dlfiles; then - # This library was specified with -dlopen. - dlfiles="$dlfiles $arg" - prev= - elif test "$prev" = dlprefiles; then - # The library was specified with -dlpreopen. - dlprefiles="$dlprefiles $arg" - prev= - else - deplibs="$deplibs $arg" - fi - continue - ;; - - # Some other compiler argument. - *) - # Unknown arguments in both finalize_command and compile_command need - # to be aesthetically quoted because they are evaled later. - arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` - case $arg in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*|"") - arg="\"$arg\"" - ;; - esac - ;; - esac # arg - - # Now actually substitute the argument into the commands. - if test -n "$arg"; then - compile_command="$compile_command $arg" - finalize_command="$finalize_command $arg" - fi - done # argument parsing loop - - if test -n "$prev"; then - $echo "$modename: the \`$prevarg' option requires an argument" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - if test "$export_dynamic" = yes && test -n "$export_dynamic_flag_spec"; then - eval arg=\"$export_dynamic_flag_spec\" - compile_command="$compile_command $arg" - finalize_command="$finalize_command $arg" - fi - - # calculate the name of the file, without its directory - outputname=`$echo "X$output" | $Xsed -e 's%^.*/%%'` - libobjs_save="$libobjs" - - if test -n "$shlibpath_var"; then - # get the directories listed in $shlibpath_var - eval shlib_search_path=\`\$echo \"X\${$shlibpath_var}\" \| \$Xsed -e \'s/:/ /g\'\` - else - shlib_search_path= - fi - eval sys_lib_search_path=\"$sys_lib_search_path_spec\" - eval sys_lib_dlsearch_path=\"$sys_lib_dlsearch_path_spec\" - - output_objdir=`$echo "X$output" | $Xsed -e 's%/[^/]*$%%'` - if test "X$output_objdir" = "X$output"; then - output_objdir="$objdir" - else - output_objdir="$output_objdir/$objdir" - fi - # Create the object directory. - if test ! -d $output_objdir; then - $show "$mkdir $output_objdir" - $run $mkdir $output_objdir - status=$? - if test $status -ne 0 && test ! -d $output_objdir; then - exit $status - fi - fi - - # Determine the type of output - case $output in - "") - $echo "$modename: you must specify an output file" 1>&2 - $echo "$help" 1>&2 - exit 1 - ;; - *.$libext) linkmode=oldlib ;; - *.lo | *.$objext) linkmode=obj ;; - *.la) linkmode=lib ;; - *) linkmode=prog ;; # Anything else should be a program. - esac - - specialdeplibs= - libs= - # Find all interdependent deplibs by searching for libraries - # that are linked more than once (e.g. -la -lb -la) - for deplib in $deplibs; do - if test "X$duplicate_deps" = "Xyes" ; then - case "$libs " in - *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;; - esac - fi - libs="$libs $deplib" - done - deplibs= - newdependency_libs= - newlib_search_path= - need_relink=no # whether we're linking any uninstalled libtool libraries - notinst_deplibs= # not-installed libtool libraries - notinst_path= # paths that contain not-installed libtool libraries - case $linkmode in - lib) - passes="conv link" - for file in $dlfiles $dlprefiles; do - case $file in - *.la) ;; - *) - $echo "$modename: libraries can \`-dlopen' only libtool libraries: $file" 1>&2 - exit 1 - ;; - esac - done - ;; - prog) - compile_deplibs= - finalize_deplibs= - alldeplibs=no - newdlfiles= - newdlprefiles= - passes="conv scan dlopen dlpreopen link" - ;; - *) passes="conv" - ;; - esac - for pass in $passes; do - if test $linkmode = prog; then - # Determine which files to process - case $pass in - dlopen) - libs="$dlfiles" - save_deplibs="$deplibs" # Collect dlpreopened libraries - deplibs= - ;; - dlpreopen) libs="$dlprefiles" ;; - link) libs="$deplibs %DEPLIBS% $dependency_libs" ;; - esac - fi - for deplib in $libs; do - lib= - found=no - case $deplib in - -l*) - if test $linkmode = oldlib && test $linkmode = obj; then - $echo "$modename: warning: \`-l' is ignored for archives/objects: $deplib" 1>&2 - continue - fi - if test $pass = conv; then - deplibs="$deplib $deplibs" - continue - fi - name=`$echo "X$deplib" | $Xsed -e 's/^-l//'` - for searchdir in $newlib_search_path $lib_search_path $sys_lib_search_path $shlib_search_path; do - # Search the libtool library - lib="$searchdir/lib${name}.la" - if test -f "$lib"; then - found=yes - break - fi - done - if test "$found" != yes; then - # deplib doesn't seem to be a libtool library - if test "$linkmode,$pass" = "prog,link"; then - compile_deplibs="$deplib $compile_deplibs" - finalize_deplibs="$deplib $finalize_deplibs" - else - deplibs="$deplib $deplibs" - test $linkmode = lib && newdependency_libs="$deplib $newdependency_libs" - fi - continue - fi - ;; # -l - -L*) - case $linkmode in - lib) - deplibs="$deplib $deplibs" - test $pass = conv && continue - newdependency_libs="$deplib $newdependency_libs" - newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'` - ;; - prog) - if test $pass = conv; then - deplibs="$deplib $deplibs" - continue - fi - if test $pass = scan; then - deplibs="$deplib $deplibs" - newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'` - else - compile_deplibs="$deplib $compile_deplibs" - finalize_deplibs="$deplib $finalize_deplibs" - fi - ;; - *) - $echo "$modename: warning: \`-L' is ignored for archives/objects: $deplib" 1>&2 - ;; - esac # linkmode - continue - ;; # -L - -R*) - if test $pass = link; then - dir=`$echo "X$deplib" | $Xsed -e 's/^-R//'` - # Make sure the xrpath contains only unique directories. - case "$xrpath " in - *" $dir "*) ;; - *) xrpath="$xrpath $dir" ;; - esac - fi - deplibs="$deplib $deplibs" - continue - ;; - *.la) lib="$deplib" ;; - *.$libext) - if test $pass = conv; then - deplibs="$deplib $deplibs" - continue - fi - case $linkmode in - lib) - if test "$deplibs_check_method" != pass_all; then - echo - echo "*** Warning: Trying to link with static lib archive $deplib." - echo "*** I have the capability to make that library automatically link in when" - echo "*** you link to this library. But I can only do this if you have a" - echo "*** shared version of the library, which you do not appear to have" - echo "*** because the file extensions .$libext of this argument makes me believe" - echo "*** that it is just a static archive that I should not used here." - else - echo - echo "*** Warning: Linking the shared library $output against the" - echo "*** static library $deplib is not portable!" - deplibs="$deplib $deplibs" - fi - continue - ;; - prog) - if test $pass != link; then - deplibs="$deplib $deplibs" - else - compile_deplibs="$deplib $compile_deplibs" - finalize_deplibs="$deplib $finalize_deplibs" - fi - continue - ;; - esac # linkmode - ;; # *.$libext - *.lo | *.$objext) - if test $pass = dlpreopen || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then - # If there is no dlopen support or we're linking statically, - # we need to preload. - newdlprefiles="$newdlprefiles $deplib" - compile_deplibs="$deplib $compile_deplibs" - finalize_deplibs="$deplib $finalize_deplibs" - else - newdlfiles="$newdlfiles $deplib" - fi - continue - ;; - %DEPLIBS%) - alldeplibs=yes - continue - ;; - esac # case $deplib - if test $found = yes || test -f "$lib"; then : - else - $echo "$modename: cannot find the library \`$lib'" 1>&2 - exit 1 - fi - - # Check to see that this really is a libtool archive. - if (${SED} -e '2q' $lib | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then : - else - $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 - exit 1 - fi - - ladir=`$echo "X$lib" | $Xsed -e 's%/[^/]*$%%'` - test "X$ladir" = "X$lib" && ladir="." - - dlname= - dlopen= - dlpreopen= - libdir= - library_names= - old_library= - # If the library was installed with an old release of libtool, - # it will not redefine variable installed. - installed=yes - - # Read the .la file - case $lib in - */* | *\\*) . $lib ;; - *) . ./$lib ;; - esac - - if test "$linkmode,$pass" = "lib,link" || - test "$linkmode,$pass" = "prog,scan" || - { test $linkmode = oldlib && test $linkmode = obj; }; then - # Add dl[pre]opened files of deplib - test -n "$dlopen" && dlfiles="$dlfiles $dlopen" - test -n "$dlpreopen" && dlprefiles="$dlprefiles $dlpreopen" - fi - - if test $pass = conv; then - # Only check for convenience libraries - deplibs="$lib $deplibs" - if test -z "$libdir"; then - if test -z "$old_library"; then - $echo "$modename: cannot find name of link library for \`$lib'" 1>&2 - exit 1 - fi - # It is a libtool convenience library, so add in its objects. - convenience="$convenience $ladir/$objdir/$old_library" - old_convenience="$old_convenience $ladir/$objdir/$old_library" - tmp_libs= - for deplib in $dependency_libs; do - deplibs="$deplib $deplibs" - if test "X$duplicate_deps" = "Xyes" ; then - case "$tmp_libs " in - *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;; - esac - fi - tmp_libs="$tmp_libs $deplib" - done - elif test $linkmode != prog && test $linkmode != lib; then - $echo "$modename: \`$lib' is not a convenience library" 1>&2 - exit 1 - fi - continue - fi # $pass = conv - - # Get the name of the library we link against. - linklib= - for l in $old_library $library_names; do - linklib="$l" - done - if test -z "$linklib"; then - $echo "$modename: cannot find name of link library for \`$lib'" 1>&2 - exit 1 - fi - - # This library was specified with -dlopen. - if test $pass = dlopen; then - if test -z "$libdir"; then - $echo "$modename: cannot -dlopen a convenience library: \`$lib'" 1>&2 - exit 1 - fi - if test -z "$dlname" || test "$dlopen_support" != yes || test "$build_libtool_libs" = no; then - # If there is no dlname, no dlopen support or we're linking - # statically, we need to preload. - dlprefiles="$dlprefiles $lib" - else - newdlfiles="$newdlfiles $lib" - fi - continue - fi # $pass = dlopen - - # We need an absolute path. - case $ladir in - [\\/]* | [A-Za-z]:[\\/]*) abs_ladir="$ladir" ;; - *) - abs_ladir=`cd "$ladir" && pwd` - if test -z "$abs_ladir"; then - $echo "$modename: warning: cannot determine absolute directory name of \`$ladir'" 1>&2 - $echo "$modename: passing it literally to the linker, although it might fail" 1>&2 - abs_ladir="$ladir" - fi - ;; - esac - laname=`$echo "X$lib" | $Xsed -e 's%^.*/%%'` - - # Find the relevant object directory and library name. - if test "X$installed" = Xyes; then - if test ! -f "$libdir/$linklib" && test -f "$abs_ladir/$linklib"; then - $echo "$modename: warning: library \`$lib' was moved." 1>&2 - dir="$ladir" - absdir="$abs_ladir" - libdir="$abs_ladir" - else - dir="$libdir" - absdir="$libdir" - fi - else - dir="$ladir/$objdir" - absdir="$abs_ladir/$objdir" - # Remove this search path later - notinst_path="$notinst_path $abs_ladir" - fi # $installed = yes - name=`$echo "X$laname" | $Xsed -e 's/\.la$//' -e 's/^lib//'` - - # This library was specified with -dlpreopen. - if test $pass = dlpreopen; then - if test -z "$libdir"; then - $echo "$modename: cannot -dlpreopen a convenience library: \`$lib'" 1>&2 - exit 1 - fi - # Prefer using a static library (so that no silly _DYNAMIC symbols - # are required to link). - if test -n "$old_library"; then - newdlprefiles="$newdlprefiles $dir/$old_library" - # Otherwise, use the dlname, so that lt_dlopen finds it. - elif test -n "$dlname"; then - newdlprefiles="$newdlprefiles $dir/$dlname" - else - newdlprefiles="$newdlprefiles $dir/$linklib" - fi - fi # $pass = dlpreopen - - if test -z "$libdir"; then - # Link the convenience library - if test $linkmode = lib; then - deplibs="$dir/$old_library $deplibs" - elif test "$linkmode,$pass" = "prog,link"; then - compile_deplibs="$dir/$old_library $compile_deplibs" - finalize_deplibs="$dir/$old_library $finalize_deplibs" - else - deplibs="$lib $deplibs" - fi - continue - fi - - if test $linkmode = prog && test $pass != link; then - newlib_search_path="$newlib_search_path $ladir" - deplibs="$lib $deplibs" - - linkalldeplibs=no - if test "$link_all_deplibs" != no || test -z "$library_names" || - test "$build_libtool_libs" = no; then - linkalldeplibs=yes - fi - - tmp_libs= - for deplib in $dependency_libs; do - case $deplib in - -L*) newlib_search_path="$newlib_search_path "`$echo "X$deplib" | $Xsed -e 's/^-L//'`;; ### testsuite: skip nested quoting test - esac - # Need to link against all dependency_libs? - if test $linkalldeplibs = yes; then - deplibs="$deplib $deplibs" - else - # Need to hardcode shared library paths - # or/and link against static libraries - newdependency_libs="$deplib $newdependency_libs" - fi - if test "X$duplicate_deps" = "Xyes" ; then - case "$tmp_libs " in - *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;; - esac - fi - tmp_libs="$tmp_libs $deplib" - done # for deplib - continue - fi # $linkmode = prog... - - link_static=no # Whether the deplib will be linked statically - if test -n "$library_names" && - { test "$prefer_static_libs" = no || test -z "$old_library"; }; then - # Link against this shared library - - if test "$linkmode,$pass" = "prog,link" || - { test $linkmode = lib && test $hardcode_into_libs = yes; }; then - # Hardcode the library path. - # Skip directories that are in the system default run-time - # search path. - case " $sys_lib_dlsearch_path " in - *" $absdir "*) ;; - *) - case "$compile_rpath " in - *" $absdir "*) ;; - *) compile_rpath="$compile_rpath $absdir" - esac - ;; - esac - case " $sys_lib_dlsearch_path " in - *" $libdir "*) ;; - *) - case "$finalize_rpath " in - *" $libdir "*) ;; - *) finalize_rpath="$finalize_rpath $libdir" - esac - ;; - esac - if test $linkmode = prog; then - # We need to hardcode the library path - if test -n "$shlibpath_var"; then - # Make sure the rpath contains only unique directories. - case "$temp_rpath " in - *" $dir "*) ;; - *" $absdir "*) ;; - *) temp_rpath="$temp_rpath $dir" ;; - esac - fi - fi - fi # $linkmode,$pass = prog,link... - - if test "$alldeplibs" = yes && - { test "$deplibs_check_method" = pass_all || - { test "$build_libtool_libs" = yes && - test -n "$library_names"; }; }; then - # We only need to search for static libraries - continue - fi - - if test "$installed" = no; then - notinst_deplibs="$notinst_deplibs $lib" - need_relink=yes - fi - - if test -n "$old_archive_from_expsyms_cmds"; then - # figure out the soname - set dummy $library_names - realname="$2" - shift; shift - libname=`eval \\$echo \"$libname_spec\"` - # use dlname if we got it. it's perfectly good, no? - if test -n "$dlname"; then - soname="$dlname" - elif test -n "$soname_spec"; then - # bleh windows - case $host in - *cygwin*) - major=`expr $current - $age` - versuffix="-$major" - ;; - esac - eval soname=\"$soname_spec\" - else - soname="$realname" - fi - - # Make a new name for the extract_expsyms_cmds to use - soroot="$soname" - soname=`echo $soroot | ${SED} -e 's/^.*\///'` - newlib="libimp-`echo $soname | ${SED} 's/^lib//;s/\.dll$//'`.a" - - # If the library has no export list, then create one now - if test -f "$output_objdir/$soname-def"; then : - else - $show "extracting exported symbol list from \`$soname'" - save_ifs="$IFS"; IFS='~' - eval cmds=\"$extract_expsyms_cmds\" - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - fi - - # Create $newlib - if test -f "$output_objdir/$newlib"; then :; else - $show "generating import library for \`$soname'" - save_ifs="$IFS"; IFS='~' - eval cmds=\"$old_archive_from_expsyms_cmds\" - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - fi - # make sure the library variables are pointing to the new library - dir=$output_objdir - linklib=$newlib - fi # test -n $old_archive_from_expsyms_cmds - - if test $linkmode = prog || test "$mode" != relink; then - add_shlibpath= - add_dir= - add= - lib_linked=yes - case $hardcode_action in - immediate | unsupported) - if test "$hardcode_direct" = no; then - add="$dir/$linklib" - elif test "$hardcode_minus_L" = no; then - case $host in - *-*-sunos*) add_shlibpath="$dir" ;; - esac - add_dir="-L$dir" - add="-l$name" - elif test "$hardcode_shlibpath_var" = no; then - add_shlibpath="$dir" - add="-l$name" - else - lib_linked=no - fi - ;; - relink) - if test "$hardcode_direct" = yes; then - add="$dir/$linklib" - elif test "$hardcode_minus_L" = yes; then - add_dir="-L$dir" - add="-l$name" - elif test "$hardcode_shlibpath_var" = yes; then - add_shlibpath="$dir" - add="-l$name" - else - lib_linked=no - fi - ;; - *) lib_linked=no ;; - esac - - if test "$lib_linked" != yes; then - $echo "$modename: configuration error: unsupported hardcode properties" - exit 1 - fi - - if test -n "$add_shlibpath"; then - case :$compile_shlibpath: in - *":$add_shlibpath:"*) ;; - *) compile_shlibpath="$compile_shlibpath$add_shlibpath:" ;; - esac - fi - if test $linkmode = prog; then - test -n "$add_dir" && compile_deplibs="$add_dir $compile_deplibs" - test -n "$add" && compile_deplibs="$add $compile_deplibs" - else - test -n "$add_dir" && deplibs="$add_dir $deplibs" - test -n "$add" && deplibs="$add $deplibs" - if test "$hardcode_direct" != yes && \ - test "$hardcode_minus_L" != yes && \ - test "$hardcode_shlibpath_var" = yes; then - case :$finalize_shlibpath: in - *":$libdir:"*) ;; - *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;; - esac - fi - fi - fi - - if test $linkmode = prog || test "$mode" = relink; then - add_shlibpath= - add_dir= - add= - # Finalize command for both is simple: just hardcode it. - if test "$hardcode_direct" = yes; then - add="$libdir/$linklib" - elif test "$hardcode_minus_L" = yes; then - add_dir="-L$libdir" - add="-l$name" - elif test "$hardcode_shlibpath_var" = yes; then - case :$finalize_shlibpath: in - *":$libdir:"*) ;; - *) finalize_shlibpath="$finalize_shlibpath$libdir:" ;; - esac - add="-l$name" - else - # We cannot seem to hardcode it, guess we'll fake it. - add_dir="-L$libdir" - add="-l$name" - fi - - if test $linkmode = prog; then - test -n "$add_dir" && finalize_deplibs="$add_dir $finalize_deplibs" - test -n "$add" && finalize_deplibs="$add $finalize_deplibs" - else - test -n "$add_dir" && deplibs="$add_dir $deplibs" - test -n "$add" && deplibs="$add $deplibs" - fi - fi - elif test $linkmode = prog; then - if test "$alldeplibs" = yes && - { test "$deplibs_check_method" = pass_all || - { test "$build_libtool_libs" = yes && - test -n "$library_names"; }; }; then - # We only need to search for static libraries - continue - fi - - # Try to link the static library - # Here we assume that one of hardcode_direct or hardcode_minus_L - # is not unsupported. This is valid on all known static and - # shared platforms. - if test "$hardcode_direct" != unsupported; then - test -n "$old_library" && linklib="$old_library" - compile_deplibs="$dir/$linklib $compile_deplibs" - finalize_deplibs="$dir/$linklib $finalize_deplibs" - else - compile_deplibs="-l$name -L$dir $compile_deplibs" - finalize_deplibs="-l$name -L$dir $finalize_deplibs" - fi - elif test "$build_libtool_libs" = yes; then - # Not a shared library - if test "$deplibs_check_method" != pass_all; then - # We're trying link a shared library against a static one - # but the system doesn't support it. - - # Just print a warning and add the library to dependency_libs so - # that the program can be linked against the static library. - echo - echo "*** Warning: This system can not link to static lib archive $lib." - echo "*** I have the capability to make that library automatically link in when" - echo "*** you link to this library. But I can only do this if you have a" - echo "*** shared version of the library, which you do not appear to have." - if test "$module" = yes; then - echo "*** But as you try to build a module library, libtool will still create " - echo "*** a static module, that should work as long as the dlopening application" - echo "*** is linked with the -dlopen flag to resolve symbols at runtime." - if test -z "$global_symbol_pipe"; then - echo - echo "*** However, this would only work if libtool was able to extract symbol" - echo "*** lists from a program, using \`nm' or equivalent, but libtool could" - echo "*** not find such a program. So, this module is probably useless." - echo "*** \`nm' from GNU binutils and a full rebuild may help." - fi - if test "$build_old_libs" = no; then - build_libtool_libs=module - build_old_libs=yes - else - build_libtool_libs=no - fi - fi - else - convenience="$convenience $dir/$old_library" - old_convenience="$old_convenience $dir/$old_library" - deplibs="$dir/$old_library $deplibs" - link_static=yes - fi - fi # link shared/static library? - - if test $linkmode = lib; then - if test -n "$dependency_libs" && - { test $hardcode_into_libs != yes || test $build_old_libs = yes || - test $link_static = yes; }; then - # Extract -R from dependency_libs - temp_deplibs= - for libdir in $dependency_libs; do - case $libdir in - -R*) temp_xrpath=`$echo "X$libdir" | $Xsed -e 's/^-R//'` - case " $xrpath " in - *" $temp_xrpath "*) ;; - *) xrpath="$xrpath $temp_xrpath";; - esac;; - *) temp_deplibs="$temp_deplibs $libdir";; - esac - done - dependency_libs="$temp_deplibs" - fi - - newlib_search_path="$newlib_search_path $absdir" - # Link against this library - test "$link_static" = no && newdependency_libs="$abs_ladir/$laname $newdependency_libs" - # ... and its dependency_libs - tmp_libs= - for deplib in $dependency_libs; do - newdependency_libs="$deplib $newdependency_libs" - if test "X$duplicate_deps" = "Xyes" ; then - case "$tmp_libs " in - *" $deplib "*) specialdeplibs="$specialdeplibs $deplib" ;; - esac - fi - tmp_libs="$tmp_libs $deplib" - done - - if test $link_all_deplibs != no; then - # Add the search paths of all dependency libraries - for deplib in $dependency_libs; do - case $deplib in - -L*) path="$deplib" ;; - *.la) - dir=`$echo "X$deplib" | $Xsed -e 's%/[^/]*$%%'` - test "X$dir" = "X$deplib" && dir="." - # We need an absolute path. - case $dir in - [\\/]* | [A-Za-z]:[\\/]*) absdir="$dir" ;; - *) - absdir=`cd "$dir" && pwd` - if test -z "$absdir"; then - $echo "$modename: warning: cannot determine absolute directory name of \`$dir'" 1>&2 - absdir="$dir" - fi - ;; - esac - if grep "^installed=no" $deplib > /dev/null; then - path="-L$absdir/$objdir" - else - eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` - if test -z "$libdir"; then - $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2 - exit 1 - fi - if test "$absdir" != "$libdir"; then - $echo "$modename: warning: \`$deplib' seems to be moved" 1>&2 - fi - path="-L$absdir" - fi - ;; - *) continue ;; - esac - case " $deplibs " in - *" $path "*) ;; - *) deplibs="$deplibs $path" ;; - esac - done - fi # link_all_deplibs != no - fi # linkmode = lib - done # for deplib in $libs - if test $pass = dlpreopen; then - # Link the dlpreopened libraries before other libraries - for deplib in $save_deplibs; do - deplibs="$deplib $deplibs" - done - fi - if test $pass != dlopen; then - test $pass != scan && dependency_libs="$newdependency_libs" - if test $pass != conv; then - # Make sure lib_search_path contains only unique directories. - lib_search_path= - for dir in $newlib_search_path; do - case "$lib_search_path " in - *" $dir "*) ;; - *) lib_search_path="$lib_search_path $dir" ;; - esac - done - newlib_search_path= - fi - - if test "$linkmode,$pass" != "prog,link"; then - vars="deplibs" - else - vars="compile_deplibs finalize_deplibs" - fi - for var in $vars dependency_libs; do - # Add libraries to $var in reverse order - eval tmp_libs=\"\$$var\" - new_libs= - for deplib in $tmp_libs; do - case $deplib in - -L*) new_libs="$deplib $new_libs" ;; - *) - case " $specialdeplibs " in - *" $deplib "*) new_libs="$deplib $new_libs" ;; - *) - case " $new_libs " in - *" $deplib "*) ;; - *) new_libs="$deplib $new_libs" ;; - esac - ;; - esac - ;; - esac - done - tmp_libs= - for deplib in $new_libs; do - case $deplib in - -L*) - case " $tmp_libs " in - *" $deplib "*) ;; - *) tmp_libs="$tmp_libs $deplib" ;; - esac - ;; - *) tmp_libs="$tmp_libs $deplib" ;; - esac - done - eval $var=\"$tmp_libs\" - done # for var - fi - if test "$pass" = "conv" && - { test "$linkmode" = "lib" || test "$linkmode" = "prog"; }; then - libs="$deplibs" # reset libs - deplibs= - fi - done # for pass - if test $linkmode = prog; then - dlfiles="$newdlfiles" - dlprefiles="$newdlprefiles" - fi - - case $linkmode in - oldlib) - if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then - $echo "$modename: warning: \`-dlopen' is ignored for archives" 1>&2 - fi - - if test -n "$rpath"; then - $echo "$modename: warning: \`-rpath' is ignored for archives" 1>&2 - fi - - if test -n "$xrpath"; then - $echo "$modename: warning: \`-R' is ignored for archives" 1>&2 - fi - - if test -n "$vinfo"; then - $echo "$modename: warning: \`-version-info' is ignored for archives" 1>&2 - fi - - if test -n "$release"; then - $echo "$modename: warning: \`-release' is ignored for archives" 1>&2 - fi - - if test -n "$export_symbols" || test -n "$export_symbols_regex"; then - $echo "$modename: warning: \`-export-symbols' is ignored for archives" 1>&2 - fi - - # Now set the variables for building old libraries. - build_libtool_libs=no - oldlibs="$output" - objs="$objs$old_deplibs" - ;; - - lib) - # Make sure we only generate libraries of the form `libNAME.la'. - case $outputname in - lib*) - name=`$echo "X$outputname" | $Xsed -e 's/\.la$//' -e 's/^lib//'` - eval libname=\"$libname_spec\" - ;; - *) - if test "$module" = no; then - $echo "$modename: libtool library \`$output' must begin with \`lib'" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - if test "$need_lib_prefix" != no; then - # Add the "lib" prefix for modules if required - name=`$echo "X$outputname" | $Xsed -e 's/\.la$//'` - eval libname=\"$libname_spec\" - else - libname=`$echo "X$outputname" | $Xsed -e 's/\.la$//'` - fi - ;; - esac - - if test -n "$objs"; then - if test "$deplibs_check_method" != pass_all; then - $echo "$modename: cannot build libtool library \`$output' from non-libtool objects on this host:$objs" 2>&1 - exit 1 - else - echo - echo "*** Warning: Linking the shared library $output against the non-libtool" - echo "*** objects $objs is not portable!" - libobjs="$libobjs $objs" - fi - fi - - if test "$dlself" != no; then - $echo "$modename: warning: \`-dlopen self' is ignored for libtool libraries" 1>&2 - fi - - set dummy $rpath - if test $# -gt 2; then - $echo "$modename: warning: ignoring multiple \`-rpath's for a libtool library" 1>&2 - fi - install_libdir="$2" - - oldlibs= - if test -z "$rpath"; then - if test "$build_libtool_libs" = yes; then - # Building a libtool convenience library. - libext=al - oldlibs="$output_objdir/$libname.$libext $oldlibs" - build_libtool_libs=convenience - build_old_libs=yes - fi - - if test -n "$vinfo"; then - $echo "$modename: warning: \`-version-info' is ignored for convenience libraries" 1>&2 - fi - - if test -n "$release"; then - $echo "$modename: warning: \`-release' is ignored for convenience libraries" 1>&2 - fi - else - - # Parse the version information argument. - save_ifs="$IFS"; IFS=':' - set dummy $vinfo 0 0 0 - IFS="$save_ifs" - - if test -n "$8"; then - $echo "$modename: too many parameters to \`-version-info'" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - current="$2" - revision="$3" - age="$4" - - # Check that each of the things are valid numbers. - case $current in - 0 | [1-9] | [1-9][0-9] | [1-9][0-9][0-9]) ;; - *) - $echo "$modename: CURRENT \`$current' is not a nonnegative integer" 1>&2 - $echo "$modename: \`$vinfo' is not valid version information" 1>&2 - exit 1 - ;; - esac - - case $revision in - 0 | [1-9] | [1-9][0-9] | [1-9][0-9][0-9]) ;; - *) - $echo "$modename: REVISION \`$revision' is not a nonnegative integer" 1>&2 - $echo "$modename: \`$vinfo' is not valid version information" 1>&2 - exit 1 - ;; - esac - - case $age in - 0 | [1-9] | [1-9][0-9] | [1-9][0-9][0-9]) ;; - *) - $echo "$modename: AGE \`$age' is not a nonnegative integer" 1>&2 - $echo "$modename: \`$vinfo' is not valid version information" 1>&2 - exit 1 - ;; - esac - - if test $age -gt $current; then - $echo "$modename: AGE \`$age' is greater than the current interface number \`$current'" 1>&2 - $echo "$modename: \`$vinfo' is not valid version information" 1>&2 - exit 1 - fi - - # Calculate the version variables. - major= - versuffix= - verstring= - case $version_type in - none) ;; - - darwin) - # Like Linux, but with the current version available in - # verstring for coding it into the library header - major=.`expr $current - $age` - versuffix="$major.$age.$revision" - # Darwin ld doesn't like 0 for these options... - minor_current=`expr $current + 1` - verstring="-compatibility_version $minor_current -current_version $minor_current.$revision" - ;; - - freebsd-aout) - major=".$current" - versuffix=".$current.$revision"; - ;; - - freebsd-elf) - major=".$current" - versuffix=".$current"; - ;; - - irix | nonstopux) - major=`expr $current - $age + 1` - - case $version_type in - nonstopux) verstring_prefix=nonstopux ;; - *) verstring_prefix=sgi ;; - esac - verstring="$verstring_prefix$major.$revision" - - # Add in all the interfaces that we are compatible with. - loop=$revision - while test $loop != 0; do - iface=`expr $revision - $loop` - loop=`expr $loop - 1` - verstring="$verstring_prefix$major.$iface:$verstring" - done - - # Before this point, $major must not contain `.'. - major=.$major - versuffix="$major.$revision" - ;; - - linux) - major=.`expr $current - $age` - versuffix="$major.$age.$revision" - ;; - - osf) - major=.`expr $current - $age` - versuffix=".$current.$age.$revision" - verstring="$current.$age.$revision" - - # Add in all the interfaces that we are compatible with. - loop=$age - while test $loop != 0; do - iface=`expr $current - $loop` - loop=`expr $loop - 1` - verstring="$verstring:${iface}.0" - done - - # Make executables depend on our current version. - verstring="$verstring:${current}.0" - ;; - - sunos) - major=".$current" - versuffix=".$current.$revision" - ;; - - windows) - # Use '-' rather than '.', since we only want one - # extension on DOS 8.3 filesystems. - major=`expr $current - $age` - versuffix="-$major" - ;; - - *) - $echo "$modename: unknown library version type \`$version_type'" 1>&2 - echo "Fatal configuration error. See the $PACKAGE docs for more information." 1>&2 - exit 1 - ;; - esac - - # Clear the version info if we defaulted, and they specified a release. - if test -z "$vinfo" && test -n "$release"; then - major= - verstring="0.0" - case $version_type in - darwin) - # we can't check for "0.0" in archive_cmds due to quoting - # problems, so we reset it completely - verstring="" - ;; - *) - verstring="0.0" - ;; - esac - if test "$need_version" = no; then - versuffix= - else - versuffix=".0.0" - fi - fi - - # Remove version info from name if versioning should be avoided - if test "$avoid_version" = yes && test "$need_version" = no; then - major= - versuffix= - verstring="" - fi - - # Check to see if the archive will have undefined symbols. - if test "$allow_undefined" = yes; then - if test "$allow_undefined_flag" = unsupported; then - $echo "$modename: warning: undefined symbols not allowed in $host shared libraries" 1>&2 - build_libtool_libs=no - build_old_libs=yes - fi - else - # Don't allow undefined symbols. - allow_undefined_flag="$no_undefined_flag" - fi - fi - - if test "$mode" != relink; then - # Remove our outputs. - $show "${rm}r $output_objdir/$outputname $output_objdir/$libname.* $output_objdir/${libname}${release}.*" - $run ${rm}r $output_objdir/$outputname $output_objdir/$libname.* $output_objdir/${libname}${release}.* - fi - - # Now set the variables for building old libraries. - if test "$build_old_libs" = yes && test "$build_libtool_libs" != convenience ; then - oldlibs="$oldlibs $output_objdir/$libname.$libext" - - # Transform .lo files to .o files. - oldobjs="$objs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e "$lo2o" | $NL2SP` - fi - - # Eliminate all temporary directories. - for path in $notinst_path; do - lib_search_path=`echo "$lib_search_path " | ${SED} -e 's% $path % %g'` - deplibs=`echo "$deplibs " | ${SED} -e 's% -L$path % %g'` - dependency_libs=`echo "$dependency_libs " | ${SED} -e 's% -L$path % %g'` - done - - if test -n "$xrpath"; then - # If the user specified any rpath flags, then add them. - temp_xrpath= - for libdir in $xrpath; do - temp_xrpath="$temp_xrpath -R$libdir" - case "$finalize_rpath " in - *" $libdir "*) ;; - *) finalize_rpath="$finalize_rpath $libdir" ;; - esac - done - if test $hardcode_into_libs != yes || test $build_old_libs = yes; then - dependency_libs="$temp_xrpath $dependency_libs" - fi - fi - - # Make sure dlfiles contains only unique files that won't be dlpreopened - old_dlfiles="$dlfiles" - dlfiles= - for lib in $old_dlfiles; do - case " $dlprefiles $dlfiles " in - *" $lib "*) ;; - *) dlfiles="$dlfiles $lib" ;; - esac - done - - # Make sure dlprefiles contains only unique files - old_dlprefiles="$dlprefiles" - dlprefiles= - for lib in $old_dlprefiles; do - case "$dlprefiles " in - *" $lib "*) ;; - *) dlprefiles="$dlprefiles $lib" ;; - esac - done - - if test "$build_libtool_libs" = yes; then - if test -n "$rpath"; then - case $host in - *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2* | *-*-beos*) - # these systems don't actually have a c library (as such)! - ;; - *-*-rhapsody* | *-*-darwin1.[012]) - # Rhapsody C library is in the System framework - deplibs="$deplibs -framework System" - ;; - *-*-netbsd*) - # Don't link with libc until the a.out ld.so is fixed. - ;; - *-*-openbsd* | *-*-freebsd*) - # Do not include libc due to us having libc/libc_r. - ;; - *) - # Add libc to deplibs on all other systems if necessary. - if test $build_libtool_need_lc = "yes"; then - deplibs="$deplibs -lc" - fi - ;; - esac - fi - - # Transform deplibs into only deplibs that can be linked in shared. - name_save=$name - libname_save=$libname - release_save=$release - versuffix_save=$versuffix - major_save=$major - # I'm not sure if I'm treating the release correctly. I think - # release should show up in the -l (ie -lgmp5) so we don't want to - # add it in twice. Is that correct? - release="" - versuffix="" - major="" - newdeplibs= - droppeddeps=no - case $deplibs_check_method in - pass_all) - # Don't check for shared/static. Everything works. - # This might be a little naive. We might want to check - # whether the library exists or not. But this is on - # osf3 & osf4 and I'm not really sure... Just - # implementing what was already the behaviour. - newdeplibs=$deplibs - ;; - test_compile) - # This code stresses the "libraries are programs" paradigm to its - # limits. Maybe even breaks it. We compile a program, linking it - # against the deplibs as a proxy for the library. Then we can check - # whether they linked in statically or dynamically with ldd. - $rm conftest.c - cat > conftest.c </dev/null` - for potent_lib in $potential_libs; do - # Follow soft links. - if ls -lLd "$potent_lib" 2>/dev/null \ - | grep " -> " >/dev/null; then - continue - fi - # The statement above tries to avoid entering an - # endless loop below, in case of cyclic links. - # We might still enter an endless loop, since a link - # loop can be closed while we follow links, - # but so what? - potlib="$potent_lib" - while test -h "$potlib" 2>/dev/null; do - potliblink=`ls -ld $potlib | ${SED} 's/.* -> //'` - case $potliblink in - [\\/]* | [A-Za-z]:[\\/]*) potlib="$potliblink";; - *) potlib=`$echo "X$potlib" | $Xsed -e 's,[^/]*$,,'`"$potliblink";; - esac - done - if eval $file_magic_cmd \"\$potlib\" 2>/dev/null \ - | ${SED} 10q \ - | egrep "$file_magic_regex" > /dev/null; then - newdeplibs="$newdeplibs $a_deplib" - a_deplib="" - break 2 - fi - done - done - if test -n "$a_deplib" ; then - droppeddeps=yes - echo - echo "*** Warning: linker path does not have real file for library $a_deplib." - echo "*** I have the capability to make that library automatically link in when" - echo "*** you link to this library. But I can only do this if you have a" - echo "*** shared version of the library, which you do not appear to have" - echo "*** because I did check the linker path looking for a file starting" - if test -z "$potlib" ; then - echo "*** with $libname but no candidates were found. (...for file magic test)" - else - echo "*** with $libname and none of the candidates passed a file format test" - echo "*** using a file magic. Last file checked: $potlib" - fi - fi - else - # Add a -L argument. - newdeplibs="$newdeplibs $a_deplib" - fi - done # Gone through all deplibs. - ;; - match_pattern*) - set dummy $deplibs_check_method - match_pattern_regex=`expr "$deplibs_check_method" : "$2 \(.*\)"` - for a_deplib in $deplibs; do - name="`expr $a_deplib : '-l\(.*\)'`" - # If $name is empty we are operating on a -L argument. - if test -n "$name" && test "$name" != "0"; then - libname=`eval \\$echo \"$libname_spec\"` - for i in $lib_search_path $sys_lib_search_path $shlib_search_path; do - potential_libs=`ls $i/$libname[.-]* 2>/dev/null` - for potent_lib in $potential_libs; do - potlib="$potent_lib" # see symlink-check below in file_magic test - if eval echo \"$potent_lib\" 2>/dev/null \ - | ${SED} 10q \ - | egrep "$match_pattern_regex" > /dev/null; then - newdeplibs="$newdeplibs $a_deplib" - a_deplib="" - break 2 - fi - done - done - if test -n "$a_deplib" ; then - droppeddeps=yes - echo - echo "*** Warning: linker path does not have real file for library $a_deplib." - echo "*** I have the capability to make that library automatically link in when" - echo "*** you link to this library. But I can only do this if you have a" - echo "*** shared version of the library, which you do not appear to have" - echo "*** because I did check the linker path looking for a file starting" - if test -z "$potlib" ; then - echo "*** with $libname but no candidates were found. (...for regex pattern test)" - else - echo "*** with $libname and none of the candidates passed a file format test" - echo "*** using a regex pattern. Last file checked: $potlib" - fi - fi - else - # Add a -L argument. - newdeplibs="$newdeplibs $a_deplib" - fi - done # Gone through all deplibs. - ;; - none | unknown | *) - newdeplibs="" - if $echo "X $deplibs" | $Xsed -e 's/ -lc$//' \ - -e 's/ -[LR][^ ]*//g' -e 's/[ ]//g' | - grep . >/dev/null; then - echo - if test "X$deplibs_check_method" = "Xnone"; then - echo "*** Warning: inter-library dependencies are not supported in this platform." - else - echo "*** Warning: inter-library dependencies are not known to be supported." - fi - echo "*** All declared inter-library dependencies are being dropped." - droppeddeps=yes - fi - ;; - esac - versuffix=$versuffix_save - major=$major_save - release=$release_save - libname=$libname_save - name=$name_save - - case $host in - *-*-rhapsody* | *-*-darwin1.[012]) - # On Rhapsody replace the C library is the System framework - newdeplibs=`$echo "X $newdeplibs" | $Xsed -e 's/ -lc / -framework System /'` - ;; - esac - - if test "$droppeddeps" = yes; then - if test "$module" = yes; then - echo - echo "*** Warning: libtool could not satisfy all declared inter-library" - echo "*** dependencies of module $libname. Therefore, libtool will create" - echo "*** a static module, that should work as long as the dlopening" - echo "*** application is linked with the -dlopen flag." - if test -z "$global_symbol_pipe"; then - echo - echo "*** However, this would only work if libtool was able to extract symbol" - echo "*** lists from a program, using \`nm' or equivalent, but libtool could" - echo "*** not find such a program. So, this module is probably useless." - echo "*** \`nm' from GNU binutils and a full rebuild may help." - fi - if test "$build_old_libs" = no; then - oldlibs="$output_objdir/$libname.$libext" - build_libtool_libs=module - build_old_libs=yes - else - build_libtool_libs=no - fi - else - echo "*** The inter-library dependencies that have been dropped here will be" - echo "*** automatically added whenever a program is linked with this library" - echo "*** or is declared to -dlopen it." - - if test $allow_undefined = no; then - echo - echo "*** Since this library must not contain undefined symbols," - echo "*** because either the platform does not support them or" - echo "*** it was explicitly requested with -no-undefined," - echo "*** libtool will only create a static version of it." - if test "$build_old_libs" = no; then - oldlibs="$output_objdir/$libname.$libext" - build_libtool_libs=module - build_old_libs=yes - else - build_libtool_libs=no - fi - fi - fi - fi - # Done checking deplibs! - deplibs=$newdeplibs - fi - - # All the library-specific variables (install_libdir is set above). - library_names= - old_library= - dlname= - - # Test again, we may have decided not to build it any more - if test "$build_libtool_libs" = yes; then - if test $hardcode_into_libs = yes; then - # Hardcode the library paths - hardcode_libdirs= - dep_rpath= - rpath="$finalize_rpath" - test "$mode" != relink && rpath="$compile_rpath$rpath" - for libdir in $rpath; do - if test -n "$hardcode_libdir_flag_spec"; then - if test -n "$hardcode_libdir_separator"; then - if test -z "$hardcode_libdirs"; then - hardcode_libdirs="$libdir" - else - # Just accumulate the unique libdirs. - case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in - *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) - ;; - *) - hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir" - ;; - esac - fi - else - eval flag=\"$hardcode_libdir_flag_spec\" - dep_rpath="$dep_rpath $flag" - fi - elif test -n "$runpath_var"; then - case "$perm_rpath " in - *" $libdir "*) ;; - *) perm_rpath="$perm_rpath $libdir" ;; - esac - fi - done - # Substitute the hardcoded libdirs into the rpath. - if test -n "$hardcode_libdir_separator" && - test -n "$hardcode_libdirs"; then - libdir="$hardcode_libdirs" - eval dep_rpath=\"$hardcode_libdir_flag_spec\" - fi - if test -n "$runpath_var" && test -n "$perm_rpath"; then - # We should set the runpath_var. - rpath= - for dir in $perm_rpath; do - rpath="$rpath$dir:" - done - eval "$runpath_var='$rpath\$$runpath_var'; export $runpath_var" - fi - test -n "$dep_rpath" && deplibs="$dep_rpath $deplibs" - fi - - shlibpath="$finalize_shlibpath" - test "$mode" != relink && shlibpath="$compile_shlibpath$shlibpath" - if test -n "$shlibpath"; then - eval "$shlibpath_var='$shlibpath\$$shlibpath_var'; export $shlibpath_var" - fi - - # Get the real and link names of the library. - eval library_names=\"$library_names_spec\" - set dummy $library_names - realname="$2" - shift; shift - - if test -n "$soname_spec"; then - eval soname=\"$soname_spec\" - else - soname="$realname" - fi - test -z "$dlname" && dlname=$soname - - lib="$output_objdir/$realname" - for link - do - linknames="$linknames $link" - done - - # Ensure that we have .o objects for linkers which dislike .lo - # (e.g. aix) in case we are running --disable-static - for obj in $libobjs; do - xdir=`$echo "X$obj" | $Xsed -e 's%/[^/]*$%%'` - if test "X$xdir" = "X$obj"; then - xdir="." - else - xdir="$xdir" - fi - baseobj=`$echo "X$obj" | $Xsed -e 's%^.*/%%'` - oldobj=`$echo "X$baseobj" | $Xsed -e "$lo2o"` - if test ! -f $xdir/$oldobj; then - $show "(cd $xdir && ${LN_S} $baseobj $oldobj)" - $run eval '(cd $xdir && ${LN_S} $baseobj $oldobj)' || exit $? - fi - done - - # Use standard objects if they are pic - test -z "$pic_flag" && libobjs=`$echo "X$libobjs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` - - # Prepare the list of exported symbols - if test -z "$export_symbols"; then - if test "$always_export_symbols" = yes || test -n "$export_symbols_regex"; then - $show "generating symbol list for \`$libname.la'" - export_symbols="$output_objdir/$libname.exp" - $run $rm $export_symbols - eval cmds=\"$export_symbols_cmds\" - save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - if test -n "$export_symbols_regex"; then - $show "egrep -e \"$export_symbols_regex\" \"$export_symbols\" > \"${export_symbols}T\"" - $run eval 'egrep -e "$export_symbols_regex" "$export_symbols" > "${export_symbols}T"' - $show "$mv \"${export_symbols}T\" \"$export_symbols\"" - $run eval '$mv "${export_symbols}T" "$export_symbols"' - fi - fi - fi - - if test -n "$export_symbols" && test -n "$include_expsyms"; then - $run eval '$echo "X$include_expsyms" | $SP2NL >> "$export_symbols"' - fi - - if test -n "$convenience"; then - if test -n "$whole_archive_flag_spec"; then - eval libobjs=\"\$libobjs $whole_archive_flag_spec\" - else - gentop="$output_objdir/${outputname}x" - $show "${rm}r $gentop" - $run ${rm}r "$gentop" - $show "mkdir $gentop" - $run mkdir "$gentop" - status=$? - if test $status -ne 0 && test ! -d "$gentop"; then - exit $status - fi - generated="$generated $gentop" - - for xlib in $convenience; do - # Extract the objects. - case $xlib in - [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;; - *) xabs=`pwd`"/$xlib" ;; - esac - xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'` - xdir="$gentop/$xlib" - - $show "${rm}r $xdir" - $run ${rm}r "$xdir" - $show "mkdir $xdir" - $run mkdir "$xdir" - status=$? - if test $status -ne 0 && test ! -d "$xdir"; then - exit $status - fi - $show "(cd $xdir && $AR x $xabs)" - $run eval "(cd \$xdir && $AR x \$xabs)" || exit $? - - libobjs="$libobjs "`find $xdir -name \*.o -print -o -name \*.lo -print | $NL2SP` - done - fi - fi - - if test "$thread_safe" = yes && test -n "$thread_safe_flag_spec"; then - eval flag=\"$thread_safe_flag_spec\" - linker_flags="$linker_flags $flag" - fi - - # Make a backup of the uninstalled library when relinking - if test "$mode" = relink; then - $run eval '(cd $output_objdir && $rm ${realname}U && $mv $realname ${realname}U)' || exit $? - fi - - # Do each of the archive commands. - if test -n "$export_symbols" && test -n "$archive_expsym_cmds"; then - eval cmds=\"$archive_expsym_cmds\" - else - save_deplibs="$deplibs" - for conv in $convenience; do - tmp_deplibs= - for test_deplib in $deplibs; do - if test "$test_deplib" != "$conv"; then - tmp_deplibs="$tmp_deplibs $test_deplib" - fi - done - deplibs="$tmp_deplibs" - done - eval cmds=\"$archive_cmds\" - deplibs="$save_deplibs" - fi - save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - - # Restore the uninstalled library and exit - if test "$mode" = relink; then - $run eval '(cd $output_objdir && $rm ${realname}T && $mv $realname ${realname}T && $mv "$realname"U $realname)' || exit $? - exit 0 - fi - - # Create links to the real library. - for linkname in $linknames; do - if test "$realname" != "$linkname"; then - $show "(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)" - $run eval '(cd $output_objdir && $rm $linkname && $LN_S $realname $linkname)' || exit $? - fi - done - - # If -module or -export-dynamic was specified, set the dlname. - if test "$module" = yes || test "$export_dynamic" = yes; then - # On all known operating systems, these are identical. - dlname="$soname" - fi - fi - ;; - - obj) - if test -n "$deplibs"; then - $echo "$modename: warning: \`-l' and \`-L' are ignored for objects" 1>&2 - fi - - if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then - $echo "$modename: warning: \`-dlopen' is ignored for objects" 1>&2 - fi - - if test -n "$rpath"; then - $echo "$modename: warning: \`-rpath' is ignored for objects" 1>&2 - fi - - if test -n "$xrpath"; then - $echo "$modename: warning: \`-R' is ignored for objects" 1>&2 - fi - - if test -n "$vinfo"; then - $echo "$modename: warning: \`-version-info' is ignored for objects" 1>&2 - fi - - if test -n "$release"; then - $echo "$modename: warning: \`-release' is ignored for objects" 1>&2 - fi - - case $output in - *.lo) - if test -n "$objs$old_deplibs"; then - $echo "$modename: cannot build library object \`$output' from non-libtool objects" 1>&2 - exit 1 - fi - libobj="$output" - obj=`$echo "X$output" | $Xsed -e "$lo2o"` - ;; - *) - libobj= - obj="$output" - ;; - esac - - # Delete the old objects. - $run $rm $obj $libobj - - # Objects from convenience libraries. This assumes - # single-version convenience libraries. Whenever we create - # different ones for PIC/non-PIC, this we'll have to duplicate - # the extraction. - reload_conv_objs= - gentop= - # reload_cmds runs $LD directly, so let us get rid of - # -Wl from whole_archive_flag_spec - wl= - - if test -n "$convenience"; then - if test -n "$whole_archive_flag_spec"; then - eval reload_conv_objs=\"\$reload_objs $whole_archive_flag_spec\" - else - gentop="$output_objdir/${obj}x" - $show "${rm}r $gentop" - $run ${rm}r "$gentop" - $show "mkdir $gentop" - $run mkdir "$gentop" - status=$? - if test $status -ne 0 && test ! -d "$gentop"; then - exit $status - fi - generated="$generated $gentop" - - for xlib in $convenience; do - # Extract the objects. - case $xlib in - [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;; - *) xabs=`pwd`"/$xlib" ;; - esac - xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'` - xdir="$gentop/$xlib" - - $show "${rm}r $xdir" - $run ${rm}r "$xdir" - $show "mkdir $xdir" - $run mkdir "$xdir" - status=$? - if test $status -ne 0 && test ! -d "$xdir"; then - exit $status - fi - $show "(cd $xdir && $AR x $xabs)" - $run eval "(cd \$xdir && $AR x \$xabs)" || exit $? - - reload_conv_objs="$reload_objs "`find $xdir -name \*.o -print -o -name \*.lo -print | $NL2SP` - done - fi - fi - - # Create the old-style object. - reload_objs="$objs$old_deplibs "`$echo "X$libobjs" | $SP2NL | $Xsed -e '/\.'${libext}$'/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP`" $reload_conv_objs" ### testsuite: skip nested quoting test - - output="$obj" - eval cmds=\"$reload_cmds\" - save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - - # Exit if we aren't doing a library object file. - if test -z "$libobj"; then - if test -n "$gentop"; then - $show "${rm}r $gentop" - $run ${rm}r $gentop - fi - - exit 0 - fi - - if test "$build_libtool_libs" != yes; then - if test -n "$gentop"; then - $show "${rm}r $gentop" - $run ${rm}r $gentop - fi - - # Create an invalid libtool object if no PIC, so that we don't - # accidentally link it into a program. - $show "echo timestamp > $libobj" - $run eval "echo timestamp > $libobj" || exit $? - exit 0 - fi - - if test -n "$pic_flag" || test "$pic_mode" != default; then - # Only do commands if we really have different PIC objects. - reload_objs="$libobjs $reload_conv_objs" - output="$libobj" - eval cmds=\"$reload_cmds\" - save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - else - # Just create a symlink. - $show $rm $libobj - $run $rm $libobj - xdir=`$echo "X$libobj" | $Xsed -e 's%/[^/]*$%%'` - if test "X$xdir" = "X$libobj"; then - xdir="." - else - xdir="$xdir" - fi - baseobj=`$echo "X$libobj" | $Xsed -e 's%^.*/%%'` - oldobj=`$echo "X$baseobj" | $Xsed -e "$lo2o"` - $show "(cd $xdir && $LN_S $oldobj $baseobj)" - $run eval '(cd $xdir && $LN_S $oldobj $baseobj)' || exit $? - fi - - if test -n "$gentop"; then - $show "${rm}r $gentop" - $run ${rm}r $gentop - fi - - exit 0 - ;; - - prog) - case $host in - *cygwin*) output=`echo $output | ${SED} -e 's,.exe$,,;s,$,.exe,'` ;; - esac - if test -n "$vinfo"; then - $echo "$modename: warning: \`-version-info' is ignored for programs" 1>&2 - fi - - if test -n "$release"; then - $echo "$modename: warning: \`-release' is ignored for programs" 1>&2 - fi - - if test "$preload" = yes; then - if test "$dlopen_support" = unknown && test "$dlopen_self" = unknown && - test "$dlopen_self_static" = unknown; then - $echo "$modename: warning: \`AC_LIBTOOL_DLOPEN' not used. Assuming no dlopen support." - fi - fi - - case $host in - *-*-rhapsody* | *-*-darwin1.[012]) - # On Rhapsody replace the C library is the System framework - compile_deplibs=`$echo "X $compile_deplibs" | $Xsed -e 's/ -lc / -framework System /'` - finalize_deplibs=`$echo "X $finalize_deplibs" | $Xsed -e 's/ -lc / -framework System /'` - case $host in - *darwin*) - # Don't allow lazy linking, it breaks C++ global constructors - compile_command="$compile_command ${wl}-bind_at_load" - finalize_command="$finalize_command ${wl}-bind_at_load" - ;; - esac - ;; - esac - - compile_command="$compile_command $compile_deplibs" - finalize_command="$finalize_command $finalize_deplibs" - - if test -n "$rpath$xrpath"; then - # If the user specified any rpath flags, then add them. - for libdir in $rpath $xrpath; do - # This is the magic to use -rpath. - case "$finalize_rpath " in - *" $libdir "*) ;; - *) finalize_rpath="$finalize_rpath $libdir" ;; - esac - done - fi - - # Now hardcode the library paths - rpath= - hardcode_libdirs= - for libdir in $compile_rpath $finalize_rpath; do - if test -n "$hardcode_libdir_flag_spec"; then - if test -n "$hardcode_libdir_separator"; then - if test -z "$hardcode_libdirs"; then - hardcode_libdirs="$libdir" - else - # Just accumulate the unique libdirs. - case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in - *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) - ;; - *) - hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir" - ;; - esac - fi - else - eval flag=\"$hardcode_libdir_flag_spec\" - rpath="$rpath $flag" - fi - elif test -n "$runpath_var"; then - case "$perm_rpath " in - *" $libdir "*) ;; - *) perm_rpath="$perm_rpath $libdir" ;; - esac - fi - case $host in - *-*-cygwin* | *-*-mingw* | *-*-pw32* | *-*-os2*) - case :$dllsearchpath: in - *":$libdir:"*) ;; - *) dllsearchpath="$dllsearchpath:$libdir";; - esac - ;; - esac - done - # Substitute the hardcoded libdirs into the rpath. - if test -n "$hardcode_libdir_separator" && - test -n "$hardcode_libdirs"; then - libdir="$hardcode_libdirs" - eval rpath=\" $hardcode_libdir_flag_spec\" - fi - compile_rpath="$rpath" - - rpath= - hardcode_libdirs= - for libdir in $finalize_rpath; do - if test -n "$hardcode_libdir_flag_spec"; then - if test -n "$hardcode_libdir_separator"; then - if test -z "$hardcode_libdirs"; then - hardcode_libdirs="$libdir" - else - # Just accumulate the unique libdirs. - case $hardcode_libdir_separator$hardcode_libdirs$hardcode_libdir_separator in - *"$hardcode_libdir_separator$libdir$hardcode_libdir_separator"*) - ;; - *) - hardcode_libdirs="$hardcode_libdirs$hardcode_libdir_separator$libdir" - ;; - esac - fi - else - eval flag=\"$hardcode_libdir_flag_spec\" - rpath="$rpath $flag" - fi - elif test -n "$runpath_var"; then - case "$finalize_perm_rpath " in - *" $libdir "*) ;; - *) finalize_perm_rpath="$finalize_perm_rpath $libdir" ;; - esac - fi - done - # Substitute the hardcoded libdirs into the rpath. - if test -n "$hardcode_libdir_separator" && - test -n "$hardcode_libdirs"; then - libdir="$hardcode_libdirs" - eval rpath=\" $hardcode_libdir_flag_spec\" - fi - finalize_rpath="$rpath" - - if test -n "$libobjs" && test "$build_old_libs" = yes; then - # Transform all the library objects into standard objects. - compile_command=`$echo "X$compile_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` - finalize_command=`$echo "X$finalize_command" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` - fi - - dlsyms= - if test -n "$dlfiles$dlprefiles" || test "$dlself" != no; then - if test -n "$NM" && test -n "$global_symbol_pipe"; then - dlsyms="${outputname}S.c" - else - $echo "$modename: not configured to extract global symbols from dlpreopened files" 1>&2 - fi - fi - - if test -n "$dlsyms"; then - case $dlsyms in - "") ;; - *.c) - # Discover the nlist of each of the dlfiles. - nlist="$output_objdir/${outputname}.nm" - - $show "$rm $nlist ${nlist}S ${nlist}T" - $run $rm "$nlist" "${nlist}S" "${nlist}T" - - # Parse the name list into a source file. - $show "creating $output_objdir/$dlsyms" - - test -z "$run" && $echo > "$output_objdir/$dlsyms" "\ -/* $dlsyms - symbol resolution table for \`$outputname' dlsym emulation. */ -/* Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP */ - -#ifdef __cplusplus -extern \"C\" { -#endif - -/* Prevent the only kind of declaration conflicts we can make. */ -#define lt_preloaded_symbols some_other_symbol - -/* External symbol declarations for the compiler. */\ -" - - if test "$dlself" = yes; then - $show "generating symbol list for \`$output'" - - test -z "$run" && $echo ': @PROGRAM@ ' > "$nlist" - - # Add our own program objects to the symbol list. - progfiles=`$echo "X$objs$old_deplibs" | $SP2NL | $Xsed -e "$lo2o" | $NL2SP` - for arg in $progfiles; do - $show "extracting global C symbols from \`$arg'" - $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'" - done - - if test -n "$exclude_expsyms"; then - $run eval 'egrep -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T' - $run eval '$mv "$nlist"T "$nlist"' - fi - - if test -n "$export_symbols_regex"; then - $run eval 'egrep -e "$export_symbols_regex" "$nlist" > "$nlist"T' - $run eval '$mv "$nlist"T "$nlist"' - fi - - # Prepare the list of exported symbols - if test -z "$export_symbols"; then - export_symbols="$output_objdir/$output.exp" - $run $rm $export_symbols - $run eval "${SED} -n -e '/^: @PROGRAM@$/d' -e 's/^.* \(.*\)$/\1/p' "'< "$nlist" > "$export_symbols"' - else - $run eval "${SED} -e 's/\([][.*^$]\)/\\\1/g' -e 's/^/ /' -e 's/$/$/'"' < "$export_symbols" > "$output_objdir/$output.exp"' - $run eval 'grep -f "$output_objdir/$output.exp" < "$nlist" > "$nlist"T' - $run eval 'mv "$nlist"T "$nlist"' - fi - fi - - for arg in $dlprefiles; do - $show "extracting global C symbols from \`$arg'" - name=`echo "$arg" | ${SED} -e 's%^.*/%%'` - $run eval 'echo ": $name " >> "$nlist"' - $run eval "$NM $arg | $global_symbol_pipe >> '$nlist'" - done - - if test -z "$run"; then - # Make sure we have at least an empty file. - test -f "$nlist" || : > "$nlist" - - if test -n "$exclude_expsyms"; then - egrep -v " ($exclude_expsyms)$" "$nlist" > "$nlist"T - $mv "$nlist"T "$nlist" - fi - - # Try sorting and uniquifying the output. - if grep -v "^: " < "$nlist" | - if sort -k 3 /dev/null 2>&1; then - sort -k 3 - else - sort +2 - fi | - uniq > "$nlist"S; then - : - else - grep -v "^: " < "$nlist" > "$nlist"S - fi - - if test -f "$nlist"S; then - eval "$global_symbol_to_cdecl"' < "$nlist"S >> "$output_objdir/$dlsyms"' - else - echo '/* NONE */' >> "$output_objdir/$dlsyms" - fi - - $echo >> "$output_objdir/$dlsyms" "\ - -#undef lt_preloaded_symbols - -#if defined (__STDC__) && __STDC__ -# define lt_ptr void * -#else -# define lt_ptr char * -# define const -#endif - -/* The mapping between symbol names and symbols. */ -const struct { - const char *name; - lt_ptr address; -} -lt_preloaded_symbols[] = -{\ -" - - eval "$global_symbol_to_c_name_address" < "$nlist" >> "$output_objdir/$dlsyms" - - $echo >> "$output_objdir/$dlsyms" "\ - {0, (lt_ptr) 0} -}; - -/* This works around a problem in FreeBSD linker */ -#ifdef FREEBSD_WORKAROUND -static const void *lt_preloaded_setup() { - return lt_preloaded_symbols; -} -#endif - -#ifdef __cplusplus -} -#endif\ -" - fi - - pic_flag_for_symtable= - case $host in - # compiling the symbol table file with pic_flag works around - # a FreeBSD bug that causes programs to crash when -lm is - # linked before any other PIC object. But we must not use - # pic_flag when linking with -static. The problem exists in - # FreeBSD 2.2.6 and is fixed in FreeBSD 3.1. - *-*-freebsd2*|*-*-freebsd3.0*|*-*-freebsdelf3.0*) - case "$compile_command " in - *" -static "*) ;; - *) pic_flag_for_symtable=" $pic_flag -DPIC -DFREEBSD_WORKAROUND";; - esac;; - *-*-hpux*) - case "$compile_command " in - *" -static "*) ;; - *) pic_flag_for_symtable=" $pic_flag -DPIC";; - esac - esac - - # Now compile the dynamic symbol file. - $show "(cd $output_objdir && $CC -c$no_builtin_flag$pic_flag_for_symtable \"$dlsyms\")" - $run eval '(cd $output_objdir && $CC -c$no_builtin_flag$pic_flag_for_symtable "$dlsyms")' || exit $? - - # Clean up the generated files. - $show "$rm $output_objdir/$dlsyms $nlist ${nlist}S ${nlist}T" - $run $rm "$output_objdir/$dlsyms" "$nlist" "${nlist}S" "${nlist}T" - - # Transform the symbol file into the correct name. - compile_command=`$echo "X$compile_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"` - finalize_command=`$echo "X$finalize_command" | $Xsed -e "s%@SYMFILE@%$output_objdir/${outputname}S.${objext}%"` - ;; - *) - $echo "$modename: unknown suffix for \`$dlsyms'" 1>&2 - exit 1 - ;; - esac - else - # We keep going just in case the user didn't refer to - # lt_preloaded_symbols. The linker will fail if global_symbol_pipe - # really was required. - - # Nullify the symbol file. - compile_command=`$echo "X$compile_command" | $Xsed -e "s% @SYMFILE@%%"` - finalize_command=`$echo "X$finalize_command" | $Xsed -e "s% @SYMFILE@%%"` - fi - - if test $need_relink = no || test "$build_libtool_libs" != yes; then - # Replace the output file specification. - compile_command=`$echo "X$compile_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'` - link_command="$compile_command$compile_rpath" - - # We have no uninstalled library dependencies, so finalize right now. - $show "$link_command" - $run eval "$link_command" - status=$? - - # Delete the generated files. - if test -n "$dlsyms"; then - $show "$rm $output_objdir/${outputname}S.${objext}" - $run $rm "$output_objdir/${outputname}S.${objext}" - fi - - exit $status - fi - - if test -n "$shlibpath_var"; then - # We should set the shlibpath_var - rpath= - for dir in $temp_rpath; do - case $dir in - [\\/]* | [A-Za-z]:[\\/]*) - # Absolute path. - rpath="$rpath$dir:" - ;; - *) - # Relative path: add a thisdir entry. - rpath="$rpath\$thisdir/$dir:" - ;; - esac - done - temp_rpath="$rpath" - fi - - if test -n "$compile_shlibpath$finalize_shlibpath"; then - compile_command="$shlibpath_var=\"$compile_shlibpath$finalize_shlibpath\$$shlibpath_var\" $compile_command" - fi - if test -n "$finalize_shlibpath"; then - finalize_command="$shlibpath_var=\"$finalize_shlibpath\$$shlibpath_var\" $finalize_command" - fi - - compile_var= - finalize_var= - if test -n "$runpath_var"; then - if test -n "$perm_rpath"; then - # We should set the runpath_var. - rpath= - for dir in $perm_rpath; do - rpath="$rpath$dir:" - done - compile_var="$runpath_var=\"$rpath\$$runpath_var\" " - fi - if test -n "$finalize_perm_rpath"; then - # We should set the runpath_var. - rpath= - for dir in $finalize_perm_rpath; do - rpath="$rpath$dir:" - done - finalize_var="$runpath_var=\"$rpath\$$runpath_var\" " - fi - fi - - if test "$no_install" = yes; then - # We don't need to create a wrapper script. - link_command="$compile_var$compile_command$compile_rpath" - # Replace the output file specification. - link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output"'%g'` - # Delete the old output file. - $run $rm $output - # Link the executable and exit - $show "$link_command" - $run eval "$link_command" || exit $? - exit 0 - fi - - if test "$hardcode_action" = relink; then - # Fast installation is not supported - link_command="$compile_var$compile_command$compile_rpath" - relink_command="$finalize_var$finalize_command$finalize_rpath" - - $echo "$modename: warning: this platform does not like uninstalled shared libraries" 1>&2 - $echo "$modename: \`$output' will be relinked during installation" 1>&2 - else - if test "$fast_install" != no; then - link_command="$finalize_var$compile_command$finalize_rpath" - if test "$fast_install" = yes; then - relink_command=`$echo "X$compile_var$compile_command$compile_rpath" | $Xsed -e 's%@OUTPUT@%\$progdir/\$file%g'` - else - # fast_install is set to needless - relink_command= - fi - else - link_command="$compile_var$compile_command$compile_rpath" - relink_command="$finalize_var$finalize_command$finalize_rpath" - fi - fi - - # Replace the output file specification. - link_command=`$echo "X$link_command" | $Xsed -e 's%@OUTPUT@%'"$output_objdir/$outputname"'%g'` - - # Delete the old output files. - $run $rm $output $output_objdir/$outputname $output_objdir/lt-$outputname - - $show "$link_command" - $run eval "$link_command" || exit $? - - # Now create the wrapper script. - $show "creating $output" - - # Quote the relink command for shipping. - if test -n "$relink_command"; then - # Preserve any variables that may affect compiler behavior - for var in $variables_saved_for_relink; do - if eval test -z \"\${$var+set}\"; then - relink_command="{ test -z \"\${$var+set}\" || unset $var || { $var=; export $var; }; }; $relink_command" - elif eval var_value=\$$var; test -z "$var_value"; then - relink_command="$var=; export $var; $relink_command" - else - var_value=`$echo "X$var_value" | $Xsed -e "$sed_quote_subst"` - relink_command="$var=\"$var_value\"; export $var; $relink_command" - fi - done - relink_command="(cd `pwd`; $relink_command)" - relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"` - fi - - # Quote $echo for shipping. - if test "X$echo" = "X$SHELL $0 --fallback-echo"; then - case $0 in - [\\/]* | [A-Za-z]:[\\/]*) qecho="$SHELL $0 --fallback-echo";; - *) qecho="$SHELL `pwd`/$0 --fallback-echo";; - esac - qecho=`$echo "X$qecho" | $Xsed -e "$sed_quote_subst"` - else - qecho=`$echo "X$echo" | $Xsed -e "$sed_quote_subst"` - fi - - # Only actually do things if our run command is non-null. - if test -z "$run"; then - # win32 will think the script is a binary if it has - # a .exe suffix, so we strip it off here. - case $output in - *.exe) output=`echo $output|${SED} 's,.exe$,,'` ;; - esac - # test for cygwin because mv fails w/o .exe extensions - case $host in - *cygwin*) exeext=.exe ;; - *) exeext= ;; - esac - $rm $output - trap "$rm $output; exit 1" 1 2 15 - - $echo > $output "\ -#! $SHELL - -# $output - temporary wrapper script for $objdir/$outputname -# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP -# -# The $output program cannot be directly executed until all the libtool -# libraries that it depends on are installed. -# -# This wrapper script should never be moved out of the build directory. -# If it is, it will not operate correctly. - -# Sed substitution that helps us do robust quoting. It backslashifies -# metacharacters that are still active within double-quoted strings. -Xsed="${SED}"' -e 1s/^X//' -sed_quote_subst='$sed_quote_subst' - -# The HP-UX ksh and POSIX shell print the target directory to stdout -# if CDPATH is set. -if test \"\${CDPATH+set}\" = set; then CDPATH=:; export CDPATH; fi - -relink_command=\"$relink_command\" - -# This environment variable determines our operation mode. -if test \"\$libtool_install_magic\" = \"$magic\"; then - # install mode needs the following variable: - notinst_deplibs='$notinst_deplibs' -else - # When we are sourced in execute mode, \$file and \$echo are already set. - if test \"\$libtool_execute_magic\" != \"$magic\"; then - echo=\"$qecho\" - file=\"\$0\" - # Make sure echo works. - if test \"X\$1\" = X--no-reexec; then - # Discard the --no-reexec flag, and continue. - shift - elif test \"X\`(\$echo '\t') 2>/dev/null\`\" = 'X\t'; then - # Yippee, \$echo works! - : - else - # Restart under the correct shell, and then maybe \$echo will work. - exec $SHELL \"\$0\" --no-reexec \${1+\"\$@\"} - fi - fi\ -" - $echo >> $output "\ - - # Find the directory that this script lives in. - thisdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*$%%'\` - test \"x\$thisdir\" = \"x\$file\" && thisdir=. - - # Follow symbolic links until we get to the real thisdir. - file=\`ls -ld \"\$file\" | ${SED} -n 's/.*-> //p'\` - while test -n \"\$file\"; do - destdir=\`\$echo \"X\$file\" | \$Xsed -e 's%/[^/]*\$%%'\` - - # If there was a directory component, then change thisdir. - if test \"x\$destdir\" != \"x\$file\"; then - case \"\$destdir\" in - [\\\\/]* | [A-Za-z]:[\\\\/]*) thisdir=\"\$destdir\" ;; - *) thisdir=\"\$thisdir/\$destdir\" ;; - esac - fi - - file=\`\$echo \"X\$file\" | \$Xsed -e 's%^.*/%%'\` - file=\`ls -ld \"\$thisdir/\$file\" | ${SED} -n 's/.*-> //p'\` - done - - # Try to get the absolute directory name. - absdir=\`cd \"\$thisdir\" && pwd\` - test -n \"\$absdir\" && thisdir=\"\$absdir\" -" - - if test "$fast_install" = yes; then - echo >> $output "\ - program=lt-'$outputname'$exeext - progdir=\"\$thisdir/$objdir\" - - if test ! -f \"\$progdir/\$program\" || \\ - { file=\`ls -1dt \"\$progdir/\$program\" \"\$progdir/../\$program\" 2>/dev/null | ${SED} 1q\`; \\ - test \"X\$file\" != \"X\$progdir/\$program\"; }; then - - file=\"\$\$-\$program\" - - if test ! -d \"\$progdir\"; then - $mkdir \"\$progdir\" - else - $rm \"\$progdir/\$file\" - fi" - - echo >> $output "\ - - # relink executable if necessary - if test -n \"\$relink_command\"; then - if relink_command_output=\`eval \$relink_command 2>&1\`; then : - else - $echo \"\$relink_command_output\" >&2 - $rm \"\$progdir/\$file\" - exit 1 - fi - fi - - $mv \"\$progdir/\$file\" \"\$progdir/\$program\" 2>/dev/null || - { $rm \"\$progdir/\$program\"; - $mv \"\$progdir/\$file\" \"\$progdir/\$program\"; } - $rm \"\$progdir/\$file\" - fi" - else - echo >> $output "\ - program='$outputname' - progdir=\"\$thisdir/$objdir\" -" - fi - - echo >> $output "\ - - if test -f \"\$progdir/\$program\"; then" - - # Export our shlibpath_var if we have one. - if test "$shlibpath_overrides_runpath" = yes && test -n "$shlibpath_var" && test -n "$temp_rpath"; then - $echo >> $output "\ - # Add our own library path to $shlibpath_var - $shlibpath_var=\"$temp_rpath\$$shlibpath_var\" - - # Some systems cannot cope with colon-terminated $shlibpath_var - # The second colon is a workaround for a bug in BeOS R4 ${SED} - $shlibpath_var=\`\$echo \"X\$$shlibpath_var\" | \$Xsed -e 's/::*\$//'\` - - export $shlibpath_var -" - fi - - # fixup the dll searchpath if we need to. - if test -n "$dllsearchpath"; then - $echo >> $output "\ - # Add the dll search path components to the executable PATH - PATH=$dllsearchpath:\$PATH -" - fi - - $echo >> $output "\ - if test \"\$libtool_execute_magic\" != \"$magic\"; then - # Run the actual program with our arguments. -" - case $host in - # win32 systems need to use the prog path for dll - # lookup to work - *-*-cygwin* | *-*-pw32*) - $echo >> $output "\ - exec \$progdir/\$program \${1+\"\$@\"} -" - ;; - - # Backslashes separate directories on plain windows - *-*-mingw | *-*-os2*) - $echo >> $output "\ - exec \$progdir\\\\\$program \${1+\"\$@\"} -" - ;; - - *) - $echo >> $output "\ - # Export the path to the program. - PATH=\"\$progdir:\$PATH\" - export PATH - - exec \$program \${1+\"\$@\"} -" - ;; - esac - $echo >> $output "\ - \$echo \"\$0: cannot exec \$program \${1+\"\$@\"}\" - exit 1 - fi - else - # The program doesn't exist. - \$echo \"\$0: error: \$progdir/\$program does not exist\" 1>&2 - \$echo \"This script is just a wrapper for \$program.\" 1>&2 - echo \"See the $PACKAGE documentation for more information.\" 1>&2 - exit 1 - fi -fi\ -" - chmod +x $output - fi - exit 0 - ;; - esac - - # See if we need to build an old-fashioned archive. - for oldlib in $oldlibs; do - - if test "$build_libtool_libs" = convenience; then - oldobjs="$libobjs_save" - addlibs="$convenience" - build_libtool_libs=no - else - if test "$build_libtool_libs" = module; then - oldobjs="$libobjs_save" - build_libtool_libs=no - else - oldobjs="$objs$old_deplibs "`$echo "X$libobjs_save" | $SP2NL | $Xsed -e '/\.'${libext}'$/d' -e '/\.lib$/d' -e "$lo2o" | $NL2SP` - fi - addlibs="$old_convenience" - fi - - if test -n "$addlibs"; then - gentop="$output_objdir/${outputname}x" - $show "${rm}r $gentop" - $run ${rm}r "$gentop" - $show "mkdir $gentop" - $run mkdir "$gentop" - status=$? - if test $status -ne 0 && test ! -d "$gentop"; then - exit $status - fi - generated="$generated $gentop" - - # Add in members from convenience archives. - for xlib in $addlibs; do - # Extract the objects. - case $xlib in - [\\/]* | [A-Za-z]:[\\/]*) xabs="$xlib" ;; - *) xabs=`pwd`"/$xlib" ;; - esac - xlib=`$echo "X$xlib" | $Xsed -e 's%^.*/%%'` - xdir="$gentop/$xlib" - - $show "${rm}r $xdir" - $run ${rm}r "$xdir" - $show "mkdir $xdir" - $run mkdir "$xdir" - status=$? - if test $status -ne 0 && test ! -d "$xdir"; then - exit $status - fi - $show "(cd $xdir && $AR x $xabs)" - $run eval "(cd \$xdir && $AR x \$xabs)" || exit $? - - oldobjs="$oldobjs "`find $xdir -name \*.${objext} -print -o -name \*.lo -print | $NL2SP` - done - fi - - # Do each command in the archive commands. - if test -n "$old_archive_from_new_cmds" && test "$build_libtool_libs" = yes; then - eval cmds=\"$old_archive_from_new_cmds\" - else - # Ensure that we have .o objects in place in case we decided - # not to build a shared library, and have fallen back to building - # static libs even though --disable-static was passed! - for oldobj in $oldobjs; do - if test ! -f $oldobj; then - xdir=`$echo "X$oldobj" | $Xsed -e 's%/[^/]*$%%'` - if test "X$xdir" = "X$oldobj"; then - xdir="." - else - xdir="$xdir" - fi - baseobj=`$echo "X$oldobj" | $Xsed -e 's%^.*/%%'` - obj=`$echo "X$baseobj" | $Xsed -e "$o2lo"` - $show "(cd $xdir && ${LN_S} $obj $baseobj)" - $run eval '(cd $xdir && ${LN_S} $obj $baseobj)' || exit $? - fi - done - - eval cmds=\"$old_archive_cmds\" - fi - save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - done - - if test -n "$generated"; then - $show "${rm}r$generated" - $run ${rm}r$generated - fi - - # Now create the libtool archive. - case $output in - *.la) - old_library= - test "$build_old_libs" = yes && old_library="$libname.$libext" - $show "creating $output" - - # Preserve any variables that may affect compiler behavior - for var in $variables_saved_for_relink; do - if eval test -z \"\${$var+set}\"; then - relink_command="{ test -z \"\${$var+set}\" || unset $var || { $var=; export $var; }; }; $relink_command" - elif eval var_value=\$$var; test -z "$var_value"; then - relink_command="$var=; export $var; $relink_command" - else - var_value=`$echo "X$var_value" | $Xsed -e "$sed_quote_subst"` - relink_command="$var=\"$var_value\"; export $var; $relink_command" - fi - done - # Quote the link command for shipping. - relink_command="(cd `pwd`; $SHELL $0 --mode=relink $libtool_args)" - relink_command=`$echo "X$relink_command" | $Xsed -e "$sed_quote_subst"` - - # Only create the output if not a dry run. - if test -z "$run"; then - for installed in no yes; do - if test "$installed" = yes; then - if test -z "$install_libdir"; then - break - fi - output="$output_objdir/$outputname"i - # Replace all uninstalled libtool libraries with the installed ones - newdependency_libs= - for deplib in $dependency_libs; do - case $deplib in - *.la) - name=`$echo "X$deplib" | $Xsed -e 's%^.*/%%'` - eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $deplib` - if test -z "$libdir"; then - $echo "$modename: \`$deplib' is not a valid libtool archive" 1>&2 - exit 1 - fi - newdependency_libs="$newdependency_libs $libdir/$name" - ;; - *) newdependency_libs="$newdependency_libs $deplib" ;; - esac - done - dependency_libs="$newdependency_libs" - newdlfiles= - for lib in $dlfiles; do - name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'` - eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib` - if test -z "$libdir"; then - $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 - exit 1 - fi - newdlfiles="$newdlfiles $libdir/$name" - done - dlfiles="$newdlfiles" - newdlprefiles= - for lib in $dlprefiles; do - name=`$echo "X$lib" | $Xsed -e 's%^.*/%%'` - eval libdir=`${SED} -n -e 's/^libdir=\(.*\)$/\1/p' $lib` - if test -z "$libdir"; then - $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 - exit 1 - fi - newdlprefiles="$newdlprefiles $libdir/$name" - done - dlprefiles="$newdlprefiles" - fi - $rm $output - # place dlname in correct position for cygwin - tdlname=$dlname - case $host,$output,$installed,$module,$dlname in - *cygwin*,*lai,yes,no,*.dll) tdlname=../bin/$dlname ;; - esac - $echo > $output "\ -# $outputname - a libtool library file -# Generated by $PROGRAM - GNU $PACKAGE $VERSION$TIMESTAMP -# -# Please DO NOT delete this file! -# It is necessary for linking the library. - -# The name that we can dlopen(3). -dlname='$tdlname' - -# Names of this library. -library_names='$library_names' - -# The name of the static archive. -old_library='$old_library' - -# Libraries that this one depends upon. -dependency_libs='$dependency_libs' - -# Version information for $libname. -current=$current -age=$age -revision=$revision - -# Is this an already installed library? -installed=$installed - -# Files to dlopen/dlpreopen -dlopen='$dlfiles' -dlpreopen='$dlprefiles' - -# Directory that this library needs to be installed in: -libdir='$install_libdir'" - if test "$installed" = no && test $need_relink = yes; then - $echo >> $output "\ -relink_command=\"$relink_command\"" - fi - done - fi - - # Do a symbolic link so that the libtool archive can be found in - # LD_LIBRARY_PATH before the program is installed. - $show "(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)" - $run eval '(cd $output_objdir && $rm $outputname && $LN_S ../$outputname $outputname)' || exit $? - ;; - esac - exit 0 - ;; - - # libtool install mode - install) - modename="$modename: install" - - # There may be an optional sh(1) argument at the beginning of - # install_prog (especially on Windows NT). - if test "$nonopt" = "$SHELL" || test "$nonopt" = /bin/sh || - # Allow the use of GNU shtool's install command. - $echo "X$nonopt" | $Xsed | grep shtool > /dev/null; then - # Aesthetically quote it. - arg=`$echo "X$nonopt" | $Xsed -e "$sed_quote_subst"` - case $arg in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) - arg="\"$arg\"" - ;; - esac - install_prog="$arg " - arg="$1" - shift - else - install_prog= - arg="$nonopt" - fi - - # The real first argument should be the name of the installation program. - # Aesthetically quote it. - arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` - case $arg in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) - arg="\"$arg\"" - ;; - esac - install_prog="$install_prog$arg" - - # We need to accept at least all the BSD install flags. - dest= - files= - opts= - prev= - install_type= - isdir=no - stripme= - for arg - do - if test -n "$dest"; then - files="$files $dest" - dest="$arg" - continue - fi - - case $arg in - -d) isdir=yes ;; - -f) prev="-f" ;; - -g) prev="-g" ;; - -m) prev="-m" ;; - -o) prev="-o" ;; - -s) - stripme=" -s" - continue - ;; - -*) ;; - - *) - # If the previous option needed an argument, then skip it. - if test -n "$prev"; then - prev= - else - dest="$arg" - continue - fi - ;; - esac - - # Aesthetically quote the argument. - arg=`$echo "X$arg" | $Xsed -e "$sed_quote_subst"` - case $arg in - *[\[\~\#\^\&\*\(\)\{\}\|\;\<\>\?\'\ \ ]*|*]*) - arg="\"$arg\"" - ;; - esac - install_prog="$install_prog $arg" - done - - if test -z "$install_prog"; then - $echo "$modename: you must specify an install program" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - if test -n "$prev"; then - $echo "$modename: the \`$prev' option requires an argument" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - if test -z "$files"; then - if test -z "$dest"; then - $echo "$modename: no file or destination specified" 1>&2 - else - $echo "$modename: you must specify a destination" 1>&2 - fi - $echo "$help" 1>&2 - exit 1 - fi - - # Strip any trailing slash from the destination. - dest=`$echo "X$dest" | $Xsed -e 's%/$%%'` - - # Check to see that the destination is a directory. - test -d "$dest" && isdir=yes - if test "$isdir" = yes; then - destdir="$dest" - destname= - else - destdir=`$echo "X$dest" | $Xsed -e 's%/[^/]*$%%'` - test "X$destdir" = "X$dest" && destdir=. - destname=`$echo "X$dest" | $Xsed -e 's%^.*/%%'` - - # Not a directory, so check to see that there is only one file specified. - set dummy $files - if test $# -gt 2; then - $echo "$modename: \`$dest' is not a directory" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - fi - case $destdir in - [\\/]* | [A-Za-z]:[\\/]*) ;; - *) - for file in $files; do - case $file in - *.lo) ;; - *) - $echo "$modename: \`$destdir' must be an absolute directory name" 1>&2 - $echo "$help" 1>&2 - exit 1 - ;; - esac - done - ;; - esac - - # This variable tells wrapper scripts just to set variables rather - # than running their programs. - libtool_install_magic="$magic" - - staticlibs= - future_libdirs= - current_libdirs= - for file in $files; do - - # Do each installation. - case $file in - *.$libext) - # Do the static libraries later. - staticlibs="$staticlibs $file" - ;; - - *.la) - # Check to see that this really is a libtool archive. - if (${SED} -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then : - else - $echo "$modename: \`$file' is not a valid libtool archive" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - library_names= - old_library= - relink_command= - # If there is no directory component, then add one. - case $file in - */* | *\\*) . $file ;; - *) . ./$file ;; - esac - - # Add the libdir to current_libdirs if it is the destination. - if test "X$destdir" = "X$libdir"; then - case "$current_libdirs " in - *" $libdir "*) ;; - *) current_libdirs="$current_libdirs $libdir" ;; - esac - else - # Note the libdir as a future libdir. - case "$future_libdirs " in - *" $libdir "*) ;; - *) future_libdirs="$future_libdirs $libdir" ;; - esac - fi - - dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'`/ - test "X$dir" = "X$file/" && dir= - dir="$dir$objdir" - - if test -n "$relink_command"; then - $echo "$modename: warning: relinking \`$file'" 1>&2 - $show "$relink_command" - if $run eval "$relink_command"; then : - else - $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2 - continue - fi - fi - - # See the names of the shared library. - set dummy $library_names - if test -n "$2"; then - realname="$2" - shift - shift - - srcname="$realname" - test -n "$relink_command" && srcname="$realname"T - - # Install the shared library and build the symlinks. - $show "$install_prog $dir/$srcname $destdir/$realname" - $run eval "$install_prog $dir/$srcname $destdir/$realname" || exit $? - if test -n "$stripme" && test -n "$striplib"; then - $show "$striplib $destdir/$realname" - $run eval "$striplib $destdir/$realname" || exit $? - fi - - if test $# -gt 0; then - # Delete the old symlinks, and create new ones. - for linkname - do - if test "$linkname" != "$realname"; then - $show "(cd $destdir && $rm $linkname && $LN_S $realname $linkname)" - $run eval "(cd $destdir && $rm $linkname && $LN_S $realname $linkname)" - fi - done - fi - - # Do each command in the postinstall commands. - lib="$destdir/$realname" - eval cmds=\"$postinstall_cmds\" - save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - fi - - # Install the pseudo-library for information purposes. - name=`$echo "X$file" | $Xsed -e 's%^.*/%%'` - instname="$dir/$name"i - $show "$install_prog $instname $destdir/$name" - $run eval "$install_prog $instname $destdir/$name" || exit $? - - # Maybe install the static library, too. - test -n "$old_library" && staticlibs="$staticlibs $dir/$old_library" - ;; - - *.lo) - # Install (i.e. copy) a libtool object. - - # Figure out destination file name, if it wasn't already specified. - if test -n "$destname"; then - destfile="$destdir/$destname" - else - destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'` - destfile="$destdir/$destfile" - fi - - # Deduce the name of the destination old-style object file. - case $destfile in - *.lo) - staticdest=`$echo "X$destfile" | $Xsed -e "$lo2o"` - ;; - *.$objext) - staticdest="$destfile" - destfile= - ;; - *) - $echo "$modename: cannot copy a libtool object to \`$destfile'" 1>&2 - $echo "$help" 1>&2 - exit 1 - ;; - esac - - # Install the libtool object if requested. - if test -n "$destfile"; then - $show "$install_prog $file $destfile" - $run eval "$install_prog $file $destfile" || exit $? - fi - - # Install the old object if enabled. - if test "$build_old_libs" = yes; then - # Deduce the name of the old-style object file. - staticobj=`$echo "X$file" | $Xsed -e "$lo2o"` - - $show "$install_prog $staticobj $staticdest" - $run eval "$install_prog \$staticobj \$staticdest" || exit $? - fi - exit 0 - ;; - - *) - # Figure out destination file name, if it wasn't already specified. - if test -n "$destname"; then - destfile="$destdir/$destname" - else - destfile=`$echo "X$file" | $Xsed -e 's%^.*/%%'` - destfile="$destdir/$destfile" - fi - - # Do a test to see if this is really a libtool program. - case $host in - *cygwin*|*mingw*) - wrapper=`echo $file | ${SED} -e 's,.exe$,,'` - ;; - *) - wrapper=$file - ;; - esac - if (${SED} -e '4q' $wrapper | egrep "^# Generated by .*$PACKAGE")>/dev/null 2>&1; then - notinst_deplibs= - relink_command= - - # If there is no directory component, then add one. - case $file in - */* | *\\*) . $wrapper ;; - *) . ./$wrapper ;; - esac - - # Check the variables that should have been set. - if test -z "$notinst_deplibs"; then - $echo "$modename: invalid libtool wrapper script \`$wrapper'" 1>&2 - exit 1 - fi - - finalize=yes - for lib in $notinst_deplibs; do - # Check to see that each library is installed. - libdir= - if test -f "$lib"; then - # If there is no directory component, then add one. - case $lib in - */* | *\\*) . $lib ;; - *) . ./$lib ;; - esac - fi - libfile="$libdir/"`$echo "X$lib" | $Xsed -e 's%^.*/%%g'` ### testsuite: skip nested quoting test - if test -n "$libdir" && test ! -f "$libfile"; then - $echo "$modename: warning: \`$lib' has not been installed in \`$libdir'" 1>&2 - finalize=no - fi - done - - relink_command= - # If there is no directory component, then add one. - case $file in - */* | *\\*) . $wrapper ;; - *) . ./$wrapper ;; - esac - - outputname= - if test "$fast_install" = no && test -n "$relink_command"; then - if test "$finalize" = yes && test -z "$run"; then - tmpdir="/tmp" - test -n "$TMPDIR" && tmpdir="$TMPDIR" - tmpdir="$tmpdir/libtool-$$" - if $mkdir -p "$tmpdir" && chmod 700 "$tmpdir"; then : - else - $echo "$modename: error: cannot create temporary directory \`$tmpdir'" 1>&2 - continue - fi - file=`$echo "X$file" | $Xsed -e 's%^.*/%%'` - outputname="$tmpdir/$file" - # Replace the output file specification. - relink_command=`$echo "X$relink_command" | $Xsed -e 's%@OUTPUT@%'"$outputname"'%g'` - - $show "$relink_command" - if $run eval "$relink_command"; then : - else - $echo "$modename: error: relink \`$file' with the above command before installing it" 1>&2 - ${rm}r "$tmpdir" - continue - fi - file="$outputname" - else - $echo "$modename: warning: cannot relink \`$file'" 1>&2 - fi - else - # Install the binary that we compiled earlier. - file=`$echo "X$file" | $Xsed -e "s%\([^/]*\)$%$objdir/\1%"` - fi - fi - - # remove .exe since cygwin /usr/bin/install will append another - # one anyways - case $install_prog,$host in - /usr/bin/install*,*cygwin*) - case $file:$destfile in - *.exe:*.exe) - # this is ok - ;; - *.exe:*) - destfile=$destfile.exe - ;; - *:*.exe) - destfile=`echo $destfile | ${SED} -e 's,.exe$,,'` - ;; - esac - ;; - esac - $show "$install_prog$stripme $file $destfile" - $run eval "$install_prog\$stripme \$file \$destfile" || exit $? - test -n "$outputname" && ${rm}r "$tmpdir" - ;; - esac - done - - for file in $staticlibs; do - name=`$echo "X$file" | $Xsed -e 's%^.*/%%'` - - # Set up the ranlib parameters. - oldlib="$destdir/$name" - - $show "$install_prog $file $oldlib" - $run eval "$install_prog \$file \$oldlib" || exit $? - - if test -n "$stripme" && test -n "$striplib"; then - $show "$old_striplib $oldlib" - $run eval "$old_striplib $oldlib" || exit $? - fi - - # Do each command in the postinstall commands. - eval cmds=\"$old_postinstall_cmds\" - save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || exit $? - done - IFS="$save_ifs" - done - - if test -n "$future_libdirs"; then - $echo "$modename: warning: remember to run \`$progname --finish$future_libdirs'" 1>&2 - fi - - if test -n "$current_libdirs"; then - # Maybe just do a dry run. - test -n "$run" && current_libdirs=" -n$current_libdirs" - exec_cmd='$SHELL $0 --finish$current_libdirs' - else - exit 0 - fi - ;; - - # libtool finish mode - finish) - modename="$modename: finish" - libdirs="$nonopt" - admincmds= - - if test -n "$finish_cmds$finish_eval" && test -n "$libdirs"; then - for dir - do - libdirs="$libdirs $dir" - done - - for libdir in $libdirs; do - if test -n "$finish_cmds"; then - # Do each command in the finish commands. - eval cmds=\"$finish_cmds\" - save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" || admincmds="$admincmds - $cmd" - done - IFS="$save_ifs" - fi - if test -n "$finish_eval"; then - # Do the single finish_eval. - eval cmds=\"$finish_eval\" - $run eval "$cmds" || admincmds="$admincmds - $cmds" - fi - done - fi - - # Exit here if they wanted silent mode. - test "$show" = ":" && exit 0 - - echo "----------------------------------------------------------------------" - echo "Libraries have been installed in:" - for libdir in $libdirs; do - echo " $libdir" - done - echo - echo "If you ever happen to want to link against installed libraries" - echo "in a given directory, LIBDIR, you must either use libtool, and" - echo "specify the full pathname of the library, or use the \`-LLIBDIR'" - echo "flag during linking and do at least one of the following:" - if test -n "$shlibpath_var"; then - echo " - add LIBDIR to the \`$shlibpath_var' environment variable" - echo " during execution" - fi - if test -n "$runpath_var"; then - echo " - add LIBDIR to the \`$runpath_var' environment variable" - echo " during linking" - fi - if test -n "$hardcode_libdir_flag_spec"; then - libdir=LIBDIR - eval flag=\"$hardcode_libdir_flag_spec\" - - echo " - use the \`$flag' linker flag" - fi - if test -n "$admincmds"; then - echo " - have your system administrator run these commands:$admincmds" - fi - if test -f /etc/ld.so.conf; then - echo " - have your system administrator add LIBDIR to \`/etc/ld.so.conf'" - fi - echo - echo "See any operating system documentation about shared libraries for" - echo "more information, such as the ld(1) and ld.so(8) manual pages." - echo "----------------------------------------------------------------------" - exit 0 - ;; - - # libtool execute mode - execute) - modename="$modename: execute" - - # The first argument is the command name. - cmd="$nonopt" - if test -z "$cmd"; then - $echo "$modename: you must specify a COMMAND" 1>&2 - $echo "$help" - exit 1 - fi - - # Handle -dlopen flags immediately. - for file in $execute_dlfiles; do - if test ! -f "$file"; then - $echo "$modename: \`$file' is not a file" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - dir= - case $file in - *.la) - # Check to see that this really is a libtool archive. - if (${SED} -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then : - else - $echo "$modename: \`$lib' is not a valid libtool archive" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - # Read the libtool library. - dlname= - library_names= - - # If there is no directory component, then add one. - case $file in - */* | *\\*) . $file ;; - *) . ./$file ;; - esac - - # Skip this library if it cannot be dlopened. - if test -z "$dlname"; then - # Warn if it was a shared library. - test -n "$library_names" && $echo "$modename: warning: \`$file' was not linked with \`-export-dynamic'" - continue - fi - - dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'` - test "X$dir" = "X$file" && dir=. - - if test -f "$dir/$objdir/$dlname"; then - dir="$dir/$objdir" - else - $echo "$modename: cannot find \`$dlname' in \`$dir' or \`$dir/$objdir'" 1>&2 - exit 1 - fi - ;; - - *.lo) - # Just add the directory containing the .lo file. - dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'` - test "X$dir" = "X$file" && dir=. - ;; - - *) - $echo "$modename: warning \`-dlopen' is ignored for non-libtool libraries and objects" 1>&2 - continue - ;; - esac - - # Get the absolute pathname. - absdir=`cd "$dir" && pwd` - test -n "$absdir" && dir="$absdir" - - # Now add the directory to shlibpath_var. - if eval "test -z \"\$$shlibpath_var\""; then - eval "$shlibpath_var=\"\$dir\"" - else - eval "$shlibpath_var=\"\$dir:\$$shlibpath_var\"" - fi - done - - # This variable tells wrapper scripts just to set shlibpath_var - # rather than running their programs. - libtool_execute_magic="$magic" - - # Check if any of the arguments is a wrapper script. - args= - for file - do - case $file in - -*) ;; - *) - # Do a test to see if this is really a libtool program. - if (${SED} -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then - # If there is no directory component, then add one. - case $file in - */* | *\\*) . $file ;; - *) . ./$file ;; - esac - - # Transform arg to wrapped name. - file="$progdir/$program" - fi - ;; - esac - # Quote arguments (to preserve shell metacharacters). - file=`$echo "X$file" | $Xsed -e "$sed_quote_subst"` - args="$args \"$file\"" - done - - if test -z "$run"; then - if test -n "$shlibpath_var"; then - # Export the shlibpath_var. - eval "export $shlibpath_var" - fi - - # Restore saved enviroment variables - if test "${save_LC_ALL+set}" = set; then - LC_ALL="$save_LC_ALL"; export LC_ALL - fi - if test "${save_LANG+set}" = set; then - LANG="$save_LANG"; export LANG - fi - - # Now prepare to actually exec the command. - exec_cmd="\$cmd$args" - else - # Display what would be done. - if test -n "$shlibpath_var"; then - eval "\$echo \"\$shlibpath_var=\$$shlibpath_var\"" - $echo "export $shlibpath_var" - fi - $echo "$cmd$args" - exit 0 - fi - ;; - - # libtool clean and uninstall mode - clean | uninstall) - modename="$modename: $mode" - rm="$nonopt" - files= - rmforce= - exit_status=0 - - # This variable tells wrapper scripts just to set variables rather - # than running their programs. - libtool_install_magic="$magic" - - for arg - do - case $arg in - -f) rm="$rm $arg"; rmforce=yes ;; - -*) rm="$rm $arg" ;; - *) files="$files $arg" ;; - esac - done - - if test -z "$rm"; then - $echo "$modename: you must specify an RM program" 1>&2 - $echo "$help" 1>&2 - exit 1 - fi - - rmdirs= - - for file in $files; do - dir=`$echo "X$file" | $Xsed -e 's%/[^/]*$%%'` - if test "X$dir" = "X$file"; then - dir=. - objdir="$objdir" - else - objdir="$dir/$objdir" - fi - name=`$echo "X$file" | $Xsed -e 's%^.*/%%'` - test $mode = uninstall && objdir="$dir" - - # Remember objdir for removal later, being careful to avoid duplicates - if test $mode = clean; then - case " $rmdirs " in - *" $objdir "*) ;; - *) rmdirs="$rmdirs $objdir" ;; - esac - fi - - # Don't error if the file doesn't exist and rm -f was used. - if (test -L "$file") >/dev/null 2>&1 \ - || (test -h "$file") >/dev/null 2>&1 \ - || test -f "$file"; then - : - elif test -d "$file"; then - exit_status=1 - continue - elif test "$rmforce" = yes; then - continue - fi - - rmfiles="$file" - - case $name in - *.la) - # Possibly a libtool archive, so verify it. - if (${SED} -e '2q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then - . $dir/$name - - # Delete the libtool libraries and symlinks. - for n in $library_names; do - rmfiles="$rmfiles $objdir/$n" - done - test -n "$old_library" && rmfiles="$rmfiles $objdir/$old_library" - test $mode = clean && rmfiles="$rmfiles $objdir/$name $objdir/${name}i" - - if test $mode = uninstall; then - if test -n "$library_names"; then - # Do each command in the postuninstall commands. - eval cmds=\"$postuninstall_cmds\" - save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" - if test $? != 0 && test "$rmforce" != yes; then - exit_status=1 - fi - done - IFS="$save_ifs" - fi - - if test -n "$old_library"; then - # Do each command in the old_postuninstall commands. - eval cmds=\"$old_postuninstall_cmds\" - save_ifs="$IFS"; IFS='~' - for cmd in $cmds; do - IFS="$save_ifs" - $show "$cmd" - $run eval "$cmd" - if test $? != 0 && test "$rmforce" != yes; then - exit_status=1 - fi - done - IFS="$save_ifs" - fi - # FIXME: should reinstall the best remaining shared library. - fi - fi - ;; - - *.lo) - if test "$build_old_libs" = yes; then - oldobj=`$echo "X$name" | $Xsed -e "$lo2o"` - rmfiles="$rmfiles $dir/$oldobj" - fi - ;; - - *) - # Do a test to see if this is a libtool program. - if test $mode = clean && - (${SED} -e '4q' $file | egrep "^# Generated by .*$PACKAGE") >/dev/null 2>&1; then - relink_command= - . $dir/$file - - rmfiles="$rmfiles $objdir/$name $objdir/${name}S.${objext}" - if test "$fast_install" = yes && test -n "$relink_command"; then - rmfiles="$rmfiles $objdir/lt-$name" - fi - fi - ;; - esac - $show "$rm $rmfiles" - $run $rm $rmfiles || exit_status=1 - done - - # Try to remove the ${objdir}s in the directories where we deleted files - for dir in $rmdirs; do - if test -d "$dir"; then - $show "rmdir $dir" - $run rmdir $dir >/dev/null 2>&1 - fi - done - - exit $exit_status - ;; - - "") - $echo "$modename: you must specify a MODE" 1>&2 - $echo "$generic_help" 1>&2 - exit 1 - ;; - esac - - if test -z "$exec_cmd"; then - $echo "$modename: invalid operation mode \`$mode'" 1>&2 - $echo "$generic_help" 1>&2 - exit 1 - fi -fi # test -z "$show_help" - -if test -n "$exec_cmd"; then - eval exec $exec_cmd - exit 1 -fi - -# We need to display help for each of the modes. -case $mode in -"") $echo \ -"Usage: $modename [OPTION]... [MODE-ARG]... - -Provide generalized library-building support services. - - --config show all configuration variables - --debug enable verbose shell tracing --n, --dry-run display commands without modifying any files - --features display basic configuration information and exit - --finish same as \`--mode=finish' - --help display this help message and exit - --mode=MODE use operation mode MODE [default=inferred from MODE-ARGS] - --quiet same as \`--silent' - --silent don't print informational messages - --version print version information - -MODE must be one of the following: - - clean remove files from the build directory - compile compile a source file into a libtool object - execute automatically set library path, then run a program - finish complete the installation of libtool libraries - install install libraries or executables - link create a library or an executable - uninstall remove libraries from an installed directory - -MODE-ARGS vary depending on the MODE. Try \`$modename --help --mode=MODE' for -a more detailed description of MODE." - exit 0 - ;; - -clean) - $echo \ -"Usage: $modename [OPTION]... --mode=clean RM [RM-OPTION]... FILE... - -Remove files from the build directory. - -RM is the name of the program to use to delete files associated with each FILE -(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed -to RM. - -If FILE is a libtool library, object or program, all the files associated -with it are deleted. Otherwise, only FILE itself is deleted using RM." - ;; - -compile) - $echo \ -"Usage: $modename [OPTION]... --mode=compile COMPILE-COMMAND... SOURCEFILE - -Compile a source file into a libtool library object. - -This mode accepts the following additional options: - - -o OUTPUT-FILE set the output file name to OUTPUT-FILE - -prefer-pic try to building PIC objects only - -prefer-non-pic try to building non-PIC objects only - -static always build a \`.o' file suitable for static linking - -COMPILE-COMMAND is a command to be used in creating a \`standard' object file -from the given SOURCEFILE. - -The output file name is determined by removing the directory component from -SOURCEFILE, then substituting the C source code suffix \`.c' with the -library object suffix, \`.lo'." - ;; - -execute) - $echo \ -"Usage: $modename [OPTION]... --mode=execute COMMAND [ARGS]... - -Automatically set library path, then run a program. - -This mode accepts the following additional options: - - -dlopen FILE add the directory containing FILE to the library path - -This mode sets the library path environment variable according to \`-dlopen' -flags. - -If any of the ARGS are libtool executable wrappers, then they are translated -into their corresponding uninstalled binary, and any of their required library -directories are added to the library path. - -Then, COMMAND is executed, with ARGS as arguments." - ;; - -finish) - $echo \ -"Usage: $modename [OPTION]... --mode=finish [LIBDIR]... - -Complete the installation of libtool libraries. - -Each LIBDIR is a directory that contains libtool libraries. - -The commands that this mode executes may require superuser privileges. Use -the \`--dry-run' option if you just want to see what would be executed." - ;; - -install) - $echo \ -"Usage: $modename [OPTION]... --mode=install INSTALL-COMMAND... - -Install executables or libraries. - -INSTALL-COMMAND is the installation command. The first component should be -either the \`install' or \`cp' program. - -The rest of the components are interpreted as arguments to that command (only -BSD-compatible install options are recognized)." - ;; - -link) - $echo \ -"Usage: $modename [OPTION]... --mode=link LINK-COMMAND... - -Link object files or libraries together to form another library, or to -create an executable program. - -LINK-COMMAND is a command using the C compiler that you would use to create -a program from several object files. - -The following components of LINK-COMMAND are treated specially: - - -all-static do not do any dynamic linking at all - -avoid-version do not add a version suffix if possible - -dlopen FILE \`-dlpreopen' FILE if it cannot be dlopened at runtime - -dlpreopen FILE link in FILE and add its symbols to lt_preloaded_symbols - -export-dynamic allow symbols from OUTPUT-FILE to be resolved with dlsym(3) - -export-symbols SYMFILE - try to export only the symbols listed in SYMFILE - -export-symbols-regex REGEX - try to export only the symbols matching REGEX - -LLIBDIR search LIBDIR for required installed libraries - -lNAME OUTPUT-FILE requires the installed library libNAME - -module build a library that can dlopened - -no-fast-install disable the fast-install mode - -no-install link a not-installable executable - -no-undefined declare that a library does not refer to external symbols - -o OUTPUT-FILE create OUTPUT-FILE from the specified objects - -release RELEASE specify package release information - -rpath LIBDIR the created library will eventually be installed in LIBDIR - -R[ ]LIBDIR add LIBDIR to the runtime path of programs and libraries - -static do not do any dynamic linking of libtool libraries - -version-info CURRENT[:REVISION[:AGE]] - specify library version info [each variable defaults to 0] - -All other options (arguments beginning with \`-') are ignored. - -Every other argument is treated as a filename. Files ending in \`.la' are -treated as uninstalled libtool libraries, other files are standard or library -object files. - -If the OUTPUT-FILE ends in \`.la', then a libtool library is created, -only library objects (\`.lo' files) may be specified, and \`-rpath' is -required, except when creating a convenience library. - -If OUTPUT-FILE ends in \`.a' or \`.lib', then a standard library is created -using \`ar' and \`ranlib', or on Windows using \`lib'. - -If OUTPUT-FILE ends in \`.lo' or \`.${objext}', then a reloadable object file -is created, otherwise an executable program is created." - ;; - -uninstall) - $echo \ -"Usage: $modename [OPTION]... --mode=uninstall RM [RM-OPTION]... FILE... - -Remove libraries from an installation directory. - -RM is the name of the program to use to delete files associated with each FILE -(typically \`/bin/rm'). RM-OPTIONS are options (such as \`-f') to be passed -to RM. - -If FILE is a libtool library, all the files associated with it are deleted. -Otherwise, only FILE itself is deleted using RM." - ;; - -*) - $echo "$modename: invalid operation mode \`$mode'" 1>&2 - $echo "$help" 1>&2 - exit 1 - ;; -esac - -echo -$echo "Try \`$modename --help' for more information about other modes." - -exit 0 - -# Local Variables: -# mode:shell-script -# sh-indentation:2 -# End: diff --git a/libpcre/pcre-config.in b/libpcre/pcre-config.in deleted file mode 100644 index 8daded9fe1..0000000000 --- a/libpcre/pcre-config.in +++ /dev/null @@ -1,59 +0,0 @@ -#!/bin/sh - -prefix=@prefix@ -exec_prefix=@exec_prefix@ -exec_prefix_set=no - -usage="\ -Usage: pcre-config [--prefix] [--exec-prefix] [--version] [--libs] [--libs-posix] [--cflags] [--cflags-posix]" - -if test $# -eq 0; then - echo "${usage}" 1>&2 - exit 1 -fi - -while test $# -gt 0; do - case "$1" in - -*=*) optarg=`echo "$1" | sed 's/[-_a-zA-Z0-9]*=//'` ;; - *) optarg= ;; - esac - - case $1 in - --prefix=*) - prefix=$optarg - if test $exec_prefix_set = no ; then - exec_prefix=$optarg - fi - ;; - --prefix) - echo $prefix - ;; - --exec-prefix=*) - exec_prefix=$optarg - exec_prefix_set=yes - ;; - --exec-prefix) - echo $exec_prefix - ;; - --version) - echo @PCRE_VERSION@ - ;; - --cflags | --cflags-posix) - if test @includedir@ != /usr/include ; then - includes=-I@includedir@ - fi - echo $includes - ;; - --libs-posix) - echo -L@libdir@ -lpcreposix -lpcre - ;; - --libs) - echo -L@libdir@ -lpcre - ;; - *) - echo "${usage}" 1>&2 - exit 1 - ;; - esac - shift -done diff --git a/libpcre/pcre.c b/libpcre/pcre.c deleted file mode 100644 index 5da0f76102..0000000000 --- a/libpcre/pcre.c +++ /dev/null @@ -1,7596 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* -This is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. See -the file Tech.Notes for some information on the internals. - -Written by: Philip Hazel - - Copyright (c) 1997-2003 University of Cambridge - ------------------------------------------------------------------------------ -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. ------------------------------------------------------------------------------ -*/ - -/* Define DEBUG to get debugging output on stdout. */ - -/* #define DEBUG */ - -/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef -inline, and there are *still* stupid compilers about that don't like indented -pre-processor statements. I suppose it's only been 10 years... */ - -#ifdef DEBUG -#define DPRINTF(p) printf p -#else -#define DPRINTF(p) /*nothing*/ -#endif - -/* Include the internals header, which itself includes Standard C headers plus -the external pcre header. */ - -#include "internal.h" - - -/* Allow compilation as C++ source code, should anybody want to do that. */ - -#ifdef __cplusplus -#define class pcre_class -#endif - - -/* Maximum number of items on the nested bracket stacks at compile time. This -applies to the nesting of all kinds of parentheses. It does not limit -un-nested, non-capturing parentheses. This number can be made bigger if -necessary - it is used to dimension one int and one unsigned char vector at -compile time. */ - -#define BRASTACK_SIZE 200 - - -/* Maximum number of ints of offset to save on the stack for recursive calls. -If the offset vector is bigger, malloc is used. This should be a multiple of 3, -because the offset vector is always a multiple of 3 long. */ - -#define REC_STACK_SAVE_MAX 30 - - -/* The number of bytes in a literal character string above which we can't add -any more is set at 250 in order to allow for UTF-8 characters. (In theory it -could be 255 when UTF-8 support is excluded, but that means that some of the -test output would be different, which just complicates things.) */ - -#define MAXLIT 250 - - -/* The maximum remaining length of subject we are prepared to search for a -req_byte match. */ - -#define REQ_BYTE_MAX 1000 - - -/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that -the definition is next to the definition of the opcodes in internal.h. */ - -static uschar OP_lengths[] = { OP_LENGTHS }; - -/* Min and max values for the common repeats; for the maxima, 0 => infinity */ - -static const char rep_min[] = { 0, 0, 1, 1, 0, 0 }; -static const char rep_max[] = { 0, 0, 0, 0, 1, 1 }; - -/* Table for handling escaped characters in the range '0'-'z'. Positive returns -are simple data values; negative values are for special things like \d and so -on. Zero means further processing is needed (for things like \x), or the escape -is invalid. */ - -static const short int escapes[] = { - 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */ - 0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */ - '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G, /* @ - G */ - 0, 0, 0, 0, 0, 0, 0, 0, /* H - O */ - 0, -ESC_Q, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */ - 0, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */ - '`', 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, /* ` - g */ - 0, 0, 0, 0, 0, 0, ESC_n, 0, /* h - o */ - 0, 0, ESC_r, -ESC_s, ESC_tee, 0, 0, -ESC_w, /* p - w */ - 0, 0, -ESC_z /* x - z */ -}; - -/* Tables of names of POSIX character classes and their lengths. The list is -terminated by a zero length entry. The first three must be alpha, upper, lower, -as this is assumed for handling case independence. */ - -static const char *posix_names[] = { - "alpha", "lower", "upper", - "alnum", "ascii", "blank", "cntrl", "digit", "graph", - "print", "punct", "space", "word", "xdigit" }; - -static const uschar posix_name_lengths[] = { - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; - -/* Table of class bit maps for each POSIX class; up to three may be combined -to form the class. The table for [:blank:] is dynamically modified to remove -the vertical space characters. */ - -static const int posix_class_maps[] = { - cbit_lower, cbit_upper, -1, /* alpha */ - cbit_lower, -1, -1, /* lower */ - cbit_upper, -1, -1, /* upper */ - cbit_digit, cbit_lower, cbit_upper, /* alnum */ - cbit_print, cbit_cntrl, -1, /* ascii */ - cbit_space, -1, -1, /* blank - a GNU extension */ - cbit_cntrl, -1, -1, /* cntrl */ - cbit_digit, -1, -1, /* digit */ - cbit_graph, -1, -1, /* graph */ - cbit_print, -1, -1, /* print */ - cbit_punct, -1, -1, /* punct */ - cbit_space, -1, -1, /* space */ - cbit_word, -1, -1, /* word - a Perl extension */ - cbit_xdigit,-1, -1 /* xdigit */ -}; - -/* Table to identify ASCII digits and hex digits. This is used when compiling -patterns. Note that the tables in chartables are dependent on the locale, and -may mark arbitrary characters as digits - but the PCRE compiling code expects -to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have -a private table here. It costs 256 bytes, but it is a lot faster than doing -character value tests (at least in some simple cases I timed), and in some -applications one wants PCRE to compile efficiently as well as match -efficiently. - -For convenience, we use the same bit definitions as in chartables: - - 0x04 decimal digit - 0x08 hexadecimal digit - -Then we can use ctype_digit and ctype_xdigit in the code. */ - -static const unsigned char digitab[] = - { - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */ - 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 */ - 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */ - 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* @ - G */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H - O */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* P - W */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* X - _ */ - 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* ` - g */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h - o */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p - w */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* x -127 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ - 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ - -/* Definition to allow mutual recursion */ - -static BOOL - compile_regex(int, int, int *, uschar **, const uschar **, const char **, - BOOL, int, int *, int *, branch_chain *, compile_data *); - -/* Structure for building a chain of data that actually lives on the -stack, for holding the values of the subject pointer at the start of each -subpattern, so as to detect when an empty string has been matched by a -subpattern - to break infinite loops. */ - -typedef struct eptrblock { - struct eptrblock *prev; - const uschar *saved_eptr; -} eptrblock; - -/* Flag bits for the match() function */ - -#define match_condassert 0x01 /* Called to check a condition assertion */ -#define match_isgroup 0x02 /* Set if start of bracketed group */ - -/* Non-error returns from the match() function. Error returns are externally -defined PCRE_ERROR_xxx codes, which are all negative. */ - -#define MATCH_MATCH 1 -#define MATCH_NOMATCH 0 - - - -/************************************************* -* Global variables * -*************************************************/ - -/* PCRE is thread-clean and doesn't use any global variables in the normal -sense. However, it calls memory allocation and free functions via the two -indirections below, and it can optionally do callouts. These values can be -changed by the caller, but are shared between all threads. However, when -compiling for Virtual Pascal, things are done differently (see pcre.in). */ - -#ifndef VPCOMPAT -void *(*pcre_malloc)(size_t) = malloc; -void (*pcre_free)(void *) = free; -int (*pcre_callout)(pcre_callout_block *) = NULL; -#endif - - -/************************************************* -* Macros and tables for character handling * -*************************************************/ - -/* When UTF-8 encoding is being used, a character is no longer just a single -byte. The macros for character handling generate simple sequences when used in -byte-mode, and more complicated ones for UTF-8 characters. */ - -#ifndef SUPPORT_UTF8 -#define GETCHAR(c, eptr) c = *eptr; -#define GETCHARINC(c, eptr) c = *eptr++; -#define GETCHARINCTEST(c, eptr) c = *eptr++; -#define GETCHARLEN(c, eptr, len) c = *eptr; -#define BACKCHAR(eptr) - -#else /* SUPPORT_UTF8 */ - -/* Get the next UTF-8 character, not advancing the pointer. This is called when -we know we are in UTF-8 mode. */ - -#define GETCHAR(c, eptr) \ - c = *eptr; \ - if ((c & 0xc0) == 0xc0) \ - { \ - int gcii; \ - int gcaa = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ - int gcss = 6*gcaa; \ - c = (c & utf8_table3[gcaa]) << gcss; \ - for (gcii = 1; gcii <= gcaa; gcii++) \ - { \ - gcss -= 6; \ - c |= (eptr[gcii] & 0x3f) << gcss; \ - } \ - } - -/* Get the next UTF-8 character, advancing the pointer. This is called when we -know we are in UTF-8 mode. */ - -#define GETCHARINC(c, eptr) \ - c = *eptr++; \ - if ((c & 0xc0) == 0xc0) \ - { \ - int gcaa = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ - int gcss = 6*gcaa; \ - c = (c & utf8_table3[gcaa]) << gcss; \ - while (gcaa-- > 0) \ - { \ - gcss -= 6; \ - c |= (*eptr++ & 0x3f) << gcss; \ - } \ - } - -/* Get the next character, testing for UTF-8 mode, and advancing the pointer */ - -#define GETCHARINCTEST(c, eptr) \ - c = *eptr++; \ - if (md->utf8 && (c & 0xc0) == 0xc0) \ - { \ - int gcaa = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ - int gcss = 6*gcaa; \ - c = (c & utf8_table3[gcaa]) << gcss; \ - while (gcaa-- > 0) \ - { \ - gcss -= 6; \ - c |= (*eptr++ & 0x3f) << gcss; \ - } \ - } - -/* Get the next UTF-8 character, not advancing the pointer, incrementing length -if there are extra bytes. This is called when we know we are in UTF-8 mode. */ - -#define GETCHARLEN(c, eptr, len) \ - c = *eptr; \ - if ((c & 0xc0) == 0xc0) \ - { \ - int gcii; \ - int gcaa = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ - int gcss = 6*gcaa; \ - c = (c & utf8_table3[gcaa]) << gcss; \ - for (gcii = 1; gcii <= gcaa; gcii++) \ - { \ - gcss -= 6; \ - c |= (eptr[gcii] & 0x3f) << gcss; \ - } \ - len += gcaa; \ - } - -/* If the pointer is not at the start of a character, move it back until -it is. Called only in UTF-8 mode. */ - -#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--; - -#endif - - - -/************************************************* -* Default character tables * -*************************************************/ - -/* A default set of character tables is included in the PCRE binary. Its source -is built by the maketables auxiliary program, which uses the default C ctypes -functions, and put in the file chartables.c. These tables are used by PCRE -whenever the caller of pcre_compile() does not provide an alternate set of -tables. */ - -#include "chartables.c" - - - -#ifdef SUPPORT_UTF8 -/************************************************* -* Tables for UTF-8 support * -*************************************************/ - -/* These are the breakpoints for different numbers of bytes in a UTF-8 -character. */ - -static const int utf8_table1[] = - { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; - -/* These are the indicator bits and the mask for the data bits to set in the -first byte of a character, indexed by the number of additional bytes. */ - -static const int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; -static const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; - -/* Table of the number of extra characters, indexed by the first character -masked with 0x3f. The highest number for a valid UTF-8 character is in fact -0x3d. */ - -static const uschar utf8_table4[] = { - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; - - -/************************************************* -* Convert character value to UTF-8 * -*************************************************/ - -/* This function takes an integer value in the range 0 - 0x7fffffff -and encodes it as a UTF-8 character in 0 to 6 bytes. - -Arguments: - cvalue the character value - buffer pointer to buffer for result - at least 6 bytes long - -Returns: number of characters placed in the buffer -*/ - -static int -ord2utf8(int cvalue, uschar *buffer) -{ -register int i, j; -for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++) - if (cvalue <= utf8_table1[i]) break; -buffer += i; -for (j = i; j > 0; j--) - { - *buffer-- = 0x80 | (cvalue & 0x3f); - cvalue >>= 6; - } -*buffer = utf8_table2[i] | cvalue; -return i + 1; -} -#endif - - - -/************************************************* -* Print compiled regex * -*************************************************/ - -/* The code for doing this is held in a separate file that is also included in -pcretest.c. It defines a function called print_internals(). */ - -#ifdef DEBUG -#include "printint.c" -#endif - - - -/************************************************* -* Return version string * -*************************************************/ - -#define STRING(a) # a -#define XSTRING(s) STRING(s) - -const char * -pcre_version(void) -{ -return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE); -} - - - - -/************************************************* -* (Obsolete) Return info about compiled pattern * -*************************************************/ - -/* This is the original "info" function. It picks potentially useful data out -of the private structure, but its interface was too rigid. It remains for -backwards compatibility. The public options are passed back in an int - though -the re->options field has been expanded to a long int, all the public options -at the low end of it, and so even on 16-bit systems this will still be OK. -Therefore, I haven't changed the API for pcre_info(). - -Arguments: - external_re points to compiled code - optptr where to pass back the options - first_byte where to pass back the first character, - or -1 if multiline and all branches start ^, - or -2 otherwise - -Returns: number of capturing subpatterns - or negative values on error -*/ - -int -pcre_info(const pcre *external_re, int *optptr, int *first_byte) -{ -const real_pcre *re = (const real_pcre *)external_re; -if (re == NULL) return PCRE_ERROR_NULL; -if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; -if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS); -if (first_byte != NULL) - *first_byte = ((re->options & PCRE_FIRSTSET) != 0)? re->first_byte : - ((re->options & PCRE_STARTLINE) != 0)? -1 : -2; -return re->top_bracket; -} - - - -/************************************************* -* Return info about compiled pattern * -*************************************************/ - -/* This is a newer "info" function which has an extensible interface so -that additional items can be added compatibly. - -Arguments: - external_re points to compiled code - extra_data points extra data, or NULL - what what information is required - where where to put the information - -Returns: 0 if data returned, negative on error -*/ - -int -pcre_fullinfo(const pcre *external_re, const pcre_extra *extra_data, int what, - void *where) -{ -const real_pcre *re = (const real_pcre *)external_re; -const pcre_study_data *study = NULL; - -if (re == NULL || where == NULL) return PCRE_ERROR_NULL; -if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; - -if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0) - study = extra_data->study_data; - -switch (what) - { - case PCRE_INFO_OPTIONS: - *((unsigned long int *)where) = re->options & PUBLIC_OPTIONS; - break; - - case PCRE_INFO_SIZE: - *((size_t *)where) = re->size; - break; - - case PCRE_INFO_STUDYSIZE: - *((size_t *)where) = (study == NULL)? 0 : study->size; - break; - - case PCRE_INFO_CAPTURECOUNT: - *((int *)where) = re->top_bracket; - break; - - case PCRE_INFO_BACKREFMAX: - *((int *)where) = re->top_backref; - break; - - case PCRE_INFO_FIRSTBYTE: - *((int *)where) = - ((re->options & PCRE_FIRSTSET) != 0)? re->first_byte : - ((re->options & PCRE_STARTLINE) != 0)? -1 : -2; - break; - - case PCRE_INFO_FIRSTTABLE: - *((const uschar **)where) = - (study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)? - study->start_bits : NULL; - break; - - case PCRE_INFO_LASTLITERAL: - *((int *)where) = - ((re->options & PCRE_REQCHSET) != 0)? re->req_byte : -1; - break; - - case PCRE_INFO_NAMEENTRYSIZE: - *((int *)where) = re->name_entry_size; - break; - - case PCRE_INFO_NAMECOUNT: - *((int *)where) = re->name_count; - break; - - case PCRE_INFO_NAMETABLE: - *((const uschar **)where) = (const uschar *)re + sizeof(real_pcre); - break; - - default: return PCRE_ERROR_BADOPTION; - } - -return 0; -} - - - -/************************************************* -* Return info about what features are configured * -*************************************************/ - -/* This is function which has an extensible interface so that additional items -can be added compatibly. - -Arguments: - what what information is required - where where to put the information - -Returns: 0 if data returned, negative on error -*/ - -int -pcre_config(int what, void *where) -{ -switch (what) - { - case PCRE_CONFIG_UTF8: - #ifdef SUPPORT_UTF8 - *((int *)where) = 1; - #else - *((int *)where) = 0; - #endif - break; - - case PCRE_CONFIG_NEWLINE: - *((int *)where) = NEWLINE; - break; - - case PCRE_CONFIG_LINK_SIZE: - *((int *)where) = LINK_SIZE; - break; - - case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD: - *((int *)where) = POSIX_MALLOC_THRESHOLD; - break; - - case PCRE_CONFIG_MATCH_LIMIT: - *((unsigned int *)where) = MATCH_LIMIT; - break; - - default: return PCRE_ERROR_BADOPTION; - } - -return 0; -} - - - -#ifdef DEBUG -/************************************************* -* Debugging function to print chars * -*************************************************/ - -/* Print a sequence of chars in printable format, stopping at the end of the -subject if the requested. - -Arguments: - p points to characters - length number to print - is_subject TRUE if printing from within md->start_subject - md pointer to matching data block, if is_subject is TRUE - -Returns: nothing -*/ - -static void -pchars(const uschar *p, int length, BOOL is_subject, match_data *md) -{ -int c; -if (is_subject && length > md->end_subject - p) length = md->end_subject - p; -while (length-- > 0) - if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c); -} -#endif - - - - -/************************************************* -* Handle escapes * -*************************************************/ - -/* This function is called when a \ has been encountered. It either returns a -positive value for a simple escape such as \n, or a negative value which -encodes one of the more complicated things such as \d. When UTF-8 is enabled, -a positive value greater than 255 may be returned. On entry, ptr is pointing at -the \. On exit, it is on the final character of the escape sequence. - -Arguments: - ptrptr points to the pattern position pointer - errorptr points to the pointer to the error message - bracount number of previous extracting brackets - options the options bits - isclass TRUE if inside a character class - cd pointer to char tables block - -Returns: zero or positive => a data character - negative => a special escape sequence - on error, errorptr is set -*/ - -static int -check_escape(const uschar **ptrptr, const char **errorptr, int bracount, - int options, BOOL isclass, compile_data *cd) -{ -const uschar *ptr = *ptrptr; -int c, i; - -/* If backslash is at the end of the pattern, it's an error. */ - -c = *(++ptr); -if (c == 0) *errorptr = ERR1; - -/* Digits or letters may have special meaning; all others are literals. */ - -else if (c < '0' || c > 'z') {} - -/* Do an initial lookup in a table. A non-zero result is something that can be -returned immediately. Otherwise further processing may be required. */ - -else if ((i = escapes[c - '0']) != 0) c = i; - -/* Escapes that need further processing, or are illegal. */ - -else - { - const uschar *oldptr; - switch (c) - { - /* A number of Perl escapes are not handled by PCRE. We give an explicit - error. */ - - case 'l': - case 'L': - case 'N': - case 'p': - case 'P': - case 'u': - case 'U': - case 'X': - *errorptr = ERR37; - break; - - /* The handling of escape sequences consisting of a string of digits - starting with one that is not zero is not straightforward. By experiment, - the way Perl works seems to be as follows: - - Outside a character class, the digits are read as a decimal number. If the - number is less than 10, or if there are that many previous extracting - left brackets, then it is a back reference. Otherwise, up to three octal - digits are read to form an escaped byte. Thus \123 is likely to be octal - 123 (cf \0123, which is octal 012 followed by the literal 3). If the octal - value is greater than 377, the least significant 8 bits are taken. Inside a - character class, \ followed by a digit is always an octal number. */ - - case '1': case '2': case '3': case '4': case '5': - case '6': case '7': case '8': case '9': - - if (!isclass) - { - oldptr = ptr; - c -= '0'; - while ((digitab[ptr[1]] & ctype_digit) != 0) - c = c * 10 + *(++ptr) - '0'; - if (c < 10 || c <= bracount) - { - c = -(ESC_REF + c); - break; - } - ptr = oldptr; /* Put the pointer back and fall through */ - } - - /* Handle an octal number following \. If the first digit is 8 or 9, Perl - generates a binary zero byte and treats the digit as a following literal. - Thus we have to pull back the pointer by one. */ - - if ((c = *ptr) >= '8') - { - ptr--; - c = 0; - break; - } - - /* \0 always starts an octal number, but we may drop through to here with a - larger first octal digit. */ - - case '0': - c -= '0'; - while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7') - c = c * 8 + *(++ptr) - '0'; - c &= 255; /* Take least significant 8 bits */ - break; - - /* \x is complicated when UTF-8 is enabled. \x{ddd} is a character number - which can be greater than 0xff, but only if the ddd are hex digits. */ - - case 'x': -#ifdef SUPPORT_UTF8 - if (ptr[1] == '{' && (options & PCRE_UTF8) != 0) - { - const uschar *pt = ptr + 2; - register int count = 0; - c = 0; - while ((digitab[*pt] & ctype_xdigit) != 0) - { - int cc = *pt++; - if (cc >= 'a') cc -= 32; /* Convert to upper case */ - count++; - c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10)); - } - if (*pt == '}') - { - if (c < 0 || count > 8) *errorptr = ERR34; - ptr = pt; - break; - } - /* If the sequence of hex digits does not end with '}', then we don't - recognize this construct; fall through to the normal \x handling. */ - } -#endif - - /* Read just a single hex char */ - - c = 0; - while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0) - { - int cc = *(++ptr); - if (cc >= 'a') cc -= 32; /* Convert to upper case */ - c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10)); - } - break; - - /* Other special escapes not starting with a digit are straightforward */ - - case 'c': - c = *(++ptr); - if (c == 0) - { - *errorptr = ERR2; - return 0; - } - - /* A letter is upper-cased; then the 0x40 bit is flipped. This coding - is ASCII-specific, but then the whole concept of \cx is ASCII-specific. */ - - if (c >= 'a' && c <= 'z') c -= 32; - c ^= 0x40; - break; - - /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any - other alphameric following \ is an error if PCRE_EXTRA was set; otherwise, - for Perl compatibility, it is a literal. This code looks a bit odd, but - there used to be some cases other than the default, and there may be again - in future, so I haven't "optimized" it. */ - - default: - if ((options & PCRE_EXTRA) != 0) switch(c) - { - default: - *errorptr = ERR3; - break; - } - break; - } - } - -*ptrptr = ptr; -return c; -} - - - -/************************************************* -* Check for counted repeat * -*************************************************/ - -/* This function is called when a '{' is encountered in a place where it might -start a quantifier. It looks ahead to see if it really is a quantifier or not. -It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd} -where the ddds are digits. - -Arguments: - p pointer to the first char after '{' - cd pointer to char tables block - -Returns: TRUE or FALSE -*/ - -static BOOL -is_counted_repeat(const uschar *p, compile_data *cd) -{ -if ((digitab[*p++] && ctype_digit) == 0) return FALSE; -while ((digitab[*p] & ctype_digit) != 0) p++; -if (*p == '}') return TRUE; - -if (*p++ != ',') return FALSE; -if (*p == '}') return TRUE; - -if ((digitab[*p++] && ctype_digit) == 0) return FALSE; -while ((digitab[*p] & ctype_digit) != 0) p++; - -return (*p == '}'); -} - - - -/************************************************* -* Read repeat counts * -*************************************************/ - -/* Read an item of the form {n,m} and return the values. This is called only -after is_counted_repeat() has confirmed that a repeat-count quantifier exists, -so the syntax is guaranteed to be correct, but we need to check the values. - -Arguments: - p pointer to first char after '{' - minp pointer to int for min - maxp pointer to int for max - returned as -1 if no max - errorptr points to pointer to error message - cd pointer to character tables clock - -Returns: pointer to '}' on success; - current ptr on error, with errorptr set -*/ - -static const uschar * -read_repeat_counts(const uschar *p, int *minp, int *maxp, - const char **errorptr, compile_data *cd) -{ -int min = 0; -int max = -1; - -while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0'; - -if (*p == '}') max = min; else - { - if (*(++p) != '}') - { - max = 0; - while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0'; - if (max < min) - { - *errorptr = ERR4; - return p; - } - } - } - -/* Do paranoid checks, then fill in the required variables, and pass back the -pointer to the terminating '}'. */ - -if (min > 65535 || max > 65535) - *errorptr = ERR5; -else - { - *minp = min; - *maxp = max; - } -return p; -} - - - -/************************************************* -* Find first significant op code * -*************************************************/ - -/* This is called by several functions that scan a compiled expression looking -for a fixed first character, or an anchoring op code etc. It skips over things -that do not influence this. For some calls, a change of option is important. - -Arguments: - code pointer to the start of the group - options pointer to external options - optbit the option bit whose changing is significant, or - zero if none are - -Returns: pointer to the first significant opcode -*/ - -static const uschar* -first_significant_code(const uschar *code, int *options, int optbit) -{ -for (;;) - { - switch ((int)*code) - { - case OP_OPT: - if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit)) - *options = (int)code[1]; - code += 2; - break; - - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - do code += GET(code, 1); while (*code == OP_ALT); - /* Fall through */ - - case OP_CALLOUT: - case OP_CREF: - case OP_BRANUMBER: - case OP_WORD_BOUNDARY: - case OP_NOT_WORD_BOUNDARY: - code += OP_lengths[*code]; - break; - - default: - return code; - } - } -/* Control never reaches here */ -} - - - - -/************************************************* -* Find the fixed length of a pattern * -*************************************************/ - -/* Scan a pattern and compute the fixed length of subject that will match it, -if the length is fixed. This is needed for dealing with backward assertions. -In UTF8 mode, the result is in characters rather than bytes. - -Arguments: - code points to the start of the pattern (the bracket) - options the compiling options - -Returns: the fixed length, or -1 if there is no fixed length, - or -2 if \C was encountered -*/ - -static int -find_fixedlength(uschar *code, int options) -{ -int length = -1; - -register int branchlength = 0; -register uschar *cc = code + 1 + LINK_SIZE; - -/* Scan along the opcodes for this branch. If we get to the end of the -branch, check the length against that of the other branches. */ - -for (;;) - { - int d; - register int op = *cc; - if (op >= OP_BRA) op = OP_BRA; - - switch (op) - { - case OP_BRA: - case OP_ONCE: - case OP_COND: - d = find_fixedlength(cc, options); - if (d < 0) return d; - branchlength += d; - do cc += GET(cc, 1); while (*cc == OP_ALT); - cc += 1 + LINK_SIZE; - break; - - /* Reached end of a branch; if it's a ket it is the end of a nested - call. If it's ALT it is an alternation in a nested call. If it is - END it's the end of the outer call. All can be handled by the same code. */ - - case OP_ALT: - case OP_KET: - case OP_KETRMAX: - case OP_KETRMIN: - case OP_END: - if (length < 0) length = branchlength; - else if (length != branchlength) return -1; - if (*cc != OP_ALT) return length; - cc += 1 + LINK_SIZE; - branchlength = 0; - break; - - /* Skip over assertive subpatterns */ - - case OP_ASSERT: - case OP_ASSERT_NOT: - case OP_ASSERTBACK: - case OP_ASSERTBACK_NOT: - do cc += GET(cc, 1); while (*cc == OP_ALT); - /* Fall through */ - - /* Skip over things that don't match chars */ - - case OP_REVERSE: - case OP_BRANUMBER: - case OP_CREF: - case OP_OPT: - case OP_CALLOUT: - case OP_SOD: - case OP_SOM: - case OP_EOD: - case OP_EODN: - case OP_CIRC: - case OP_DOLL: - case OP_NOT_WORD_BOUNDARY: - case OP_WORD_BOUNDARY: - cc += OP_lengths[*cc]; - break; - - /* Handle char strings. In UTF-8 mode we must count characters, not bytes. - This requires a scan of the string, unfortunately. We assume valid UTF-8 - strings, so all we do is reduce the length by one for every byte whose bits - are 10xxxxxx. */ - - case OP_CHARS: - branchlength += *(++cc); -#ifdef SUPPORT_UTF8 - if ((options & PCRE_UTF8) != 0) - for (d = 1; d <= *cc; d++) - if ((cc[d] & 0xc0) == 0x80) branchlength--; -#endif - cc += *cc + 1; - break; - - /* Handle exact repetitions. The count is already in characters, but we - need to skip over a multibyte character in UTF8 mode. */ - - case OP_EXACT: - branchlength += GET2(cc,1); - cc += 4; -#ifdef SUPPORT_UTF8 - if ((options & PCRE_UTF8) != 0) - { - while((*cc & 0x80) == 0x80) cc++; - } -#endif - break; - - case OP_TYPEEXACT: - branchlength += GET2(cc,1); - cc += 4; - break; - - /* Handle single-char matchers */ - - case OP_NOT_DIGIT: - case OP_DIGIT: - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: - case OP_ANY: - branchlength++; - cc++; - break; - - /* The single-byte matcher isn't allowed */ - - case OP_ANYBYTE: - return -2; - - /* Check a class for variable quantification */ - -#ifdef SUPPORT_UTF8 - case OP_XCLASS: - cc += GET(cc, 1) - 33; - /* Fall through */ -#endif - - case OP_CLASS: - case OP_NCLASS: - cc += 33; - - switch (*cc) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRQUERY: - case OP_CRMINQUERY: - return -1; - - case OP_CRRANGE: - case OP_CRMINRANGE: - if (GET2(cc,1) != GET2(cc,3)) return -1; - branchlength += GET2(cc,1); - cc += 5; - break; - - default: - branchlength++; - } - break; - - /* Anything else is variable length */ - - default: - return -1; - } - } -/* Control never gets here */ -} - - - - -/************************************************* -* Scan compiled regex for numbered bracket * -*************************************************/ - -/* This little function scans through a compiled pattern until it finds a -capturing bracket with the given number. - -Arguments: - code points to start of expression - utf8 TRUE in UTF-8 mode - number the required bracket number - -Returns: pointer to the opcode for the bracket, or NULL if not found -*/ - -static const uschar * -find_bracket(const uschar *code, BOOL utf8, int number) -{ -#ifndef SUPPORT_UTF8 -utf8 = utf8; /* Stop pedantic compilers complaining */ -#endif - -for (;;) - { - register int c = *code; - if (c == OP_END) return NULL; - else if (c == OP_CHARS) code += code[1] + OP_lengths[c]; - else if (c > OP_BRA) - { - int n = c - OP_BRA; - if (n > EXTRACT_BASIC_MAX) n = GET2(code, 2+LINK_SIZE); - if (n == number) return (uschar *)code; - code += OP_lengths[OP_BRA]; - } - else - { - code += OP_lengths[c]; - - /* In UTF-8 mode, opcodes that are followed by a character may be followed - by a multi-byte character. The length in the table is a minimum, so we have - to scan along to skip the extra characters. All opcodes are less than 128, - so we can use relatively efficient code. */ - -#ifdef SUPPORT_UTF8 - if (utf8) switch(c) - { - case OP_EXACT: - case OP_UPTO: - case OP_MINUPTO: - case OP_STAR: - case OP_MINSTAR: - case OP_PLUS: - case OP_MINPLUS: - case OP_QUERY: - case OP_MINQUERY: - while ((*code & 0xc0) == 0x80) code++; - break; - } -#endif - } - } -} - - - -/************************************************* -* Scan compiled branch for non-emptiness * -*************************************************/ - -/* This function scans through a branch of a compiled pattern to see whether it -can match the empty string or not. It is called only from could_be_empty() -below. Note that first_significant_code() skips over assertions. If we hit an -unclosed bracket, we return "empty" - this means we've struck an inner bracket -whose current branch will already have been scanned. - -Arguments: - code points to start of search - endcode points to where to stop - utf8 TRUE if in UTF8 mode - -Returns: TRUE if what is matched could be empty -*/ - -static BOOL -could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8) -{ -register int c; -for (code = first_significant_code(code + 1 + LINK_SIZE, NULL, 0); - code < endcode; - code = first_significant_code(code + OP_lengths[c], NULL, 0)) - { - const uschar *ccode; - - c = *code; - - if (c >= OP_BRA) - { - BOOL empty_branch; - if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */ - - /* Scan a closed bracket */ - - empty_branch = FALSE; - do - { - if (!empty_branch && could_be_empty_branch(code, endcode, utf8)) - empty_branch = TRUE; - code += GET(code, 1); - } - while (*code == OP_ALT); - if (!empty_branch) return FALSE; /* All branches are non-empty */ - code += 1 + LINK_SIZE; - c = *code; - } - - else switch (c) - { - /* Check for quantifiers after a class */ - -#ifdef SUPPORT_UTF8 - case OP_XCLASS: - ccode = code + GET(code, 1); - goto CHECK_CLASS_REPEAT; -#endif - - case OP_CLASS: - case OP_NCLASS: - ccode = code + 33; - -#ifdef SUPPORT_UTF8 - CHECK_CLASS_REPEAT: -#endif - - switch (*ccode) - { - case OP_CRSTAR: /* These could be empty; continue */ - case OP_CRMINSTAR: - case OP_CRQUERY: - case OP_CRMINQUERY: - break; - - default: /* Non-repeat => class must match */ - case OP_CRPLUS: /* These repeats aren't empty */ - case OP_CRMINPLUS: - return FALSE; - - case OP_CRRANGE: - case OP_CRMINRANGE: - if (GET2(ccode, 1) > 0) return FALSE; /* Minimum > 0 */ - break; - } - break; - - /* Opcodes that must match a character */ - - case OP_NOT_DIGIT: - case OP_DIGIT: - case OP_NOT_WHITESPACE: - case OP_WHITESPACE: - case OP_NOT_WORDCHAR: - case OP_WORDCHAR: - case OP_ANY: - case OP_ANYBYTE: - case OP_CHARS: - case OP_NOT: - case OP_PLUS: - case OP_MINPLUS: - case OP_EXACT: - case OP_NOTPLUS: - case OP_NOTMINPLUS: - case OP_NOTEXACT: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEEXACT: - return FALSE; - - /* End of branch */ - - case OP_KET: - case OP_KETRMAX: - case OP_KETRMIN: - case OP_ALT: - return TRUE; - - /* In UTF-8 mode, STAR, MINSTAR, QUERY, MINQUERY, UPTO, and MINUPTO may be - followed by a multibyte character */ - -#ifdef SUPPORT_UTF8 - case OP_STAR: - case OP_MINSTAR: - case OP_QUERY: - case OP_MINQUERY: - case OP_UPTO: - case OP_MINUPTO: - if (utf8) while ((code[2] & 0xc0) == 0x80) code++; - break; -#endif - } - } - -return TRUE; -} - - - -/************************************************* -* Scan compiled regex for non-emptiness * -*************************************************/ - -/* This function is called to check for left recursive calls. We want to check -the current branch of the current pattern to see if it could match the empty -string. If it could, we must look outwards for branches at other levels, -stopping when we pass beyond the bracket which is the subject of the recursion. - -Arguments: - code points to start of the recursion - endcode points to where to stop (current RECURSE item) - bcptr points to the chain of current (unclosed) branch starts - utf8 TRUE if in UTF-8 mode - -Returns: TRUE if what is matched could be empty -*/ - -static BOOL -could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr, - BOOL utf8) -{ -while (bcptr != NULL && bcptr->current >= code) - { - if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE; - bcptr = bcptr->outer; - } -return TRUE; -} - - - -/************************************************* -* Check for POSIX class syntax * -*************************************************/ - -/* This function is called when the sequence "[:" or "[." or "[=" is -encountered in a character class. It checks whether this is followed by an -optional ^ and then a sequence of letters, terminated by a matching ":]" or -".]" or "=]". - -Argument: - ptr pointer to the initial [ - endptr where to return the end pointer - cd pointer to compile data - -Returns: TRUE or FALSE -*/ - -static BOOL -check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd) -{ -int terminator; /* Don't combine these lines; the Solaris cc */ -terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */ -if (*(++ptr) == '^') ptr++; -while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++; -if (*ptr == terminator && ptr[1] == ']') - { - *endptr = ptr; - return TRUE; - } -return FALSE; -} - - - - -/************************************************* -* Check POSIX class name * -*************************************************/ - -/* This function is called to check the name given in a POSIX-style class entry -such as [:alnum:]. - -Arguments: - ptr points to the first letter - len the length of the name - -Returns: a value representing the name, or -1 if unknown -*/ - -static int -check_posix_name(const uschar *ptr, int len) -{ -register int yield = 0; -while (posix_name_lengths[yield] != 0) - { - if (len == posix_name_lengths[yield] && - strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield; - yield++; - } -return -1; -} - - - - -/************************************************* -* Compile one branch * -*************************************************/ - -/* Scan the pattern, compiling it into the code vector. If the options are -changed during the branch, the pointer is used to change the external options -bits. - -Arguments: - optionsptr pointer to the option bits - brackets points to number of extracting brackets used - code points to the pointer to the current code point - ptrptr points to the current pattern pointer - errorptr points to pointer to error message - firstbyteptr set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE) - reqbyteptr set to the last literal character required, else < 0 - bcptr points to current branch chain - cd contains pointers to tables etc. - -Returns: TRUE on success - FALSE, with *errorptr set on error -*/ - -static BOOL -compile_branch(int *optionsptr, int *brackets, uschar **codeptr, - const uschar **ptrptr, const char **errorptr, int *firstbyteptr, - int *reqbyteptr, branch_chain *bcptr, compile_data *cd) -{ -int repeat_type, op_type; -int repeat_min = 0, repeat_max = 0; /* To please picky compilers */ -int bravalue = 0; -int length; -int greedy_default, greedy_non_default; -int firstbyte, reqbyte; -int zeroreqbyte, zerofirstbyte; -int req_caseopt, reqvary, tempreqvary; -int condcount = 0; -int options = *optionsptr; -register int c; -register uschar *code = *codeptr; -uschar *tempcode; -BOOL inescq = FALSE; -BOOL groupsetfirstbyte = FALSE; -const uschar *ptr = *ptrptr; -const uschar *tempptr; -uschar *previous = NULL; -uschar class[32]; - -#ifdef SUPPORT_UTF8 -BOOL class_utf8; -BOOL utf8 = (options & PCRE_UTF8) != 0; -uschar *class_utf8data; -uschar utf8_char[6]; -#else -BOOL utf8 = FALSE; -#endif - -/* Set up the default and non-default settings for greediness */ - -greedy_default = ((options & PCRE_UNGREEDY) != 0); -greedy_non_default = greedy_default ^ 1; - -/* Initialize no first char, no required char. REQ_UNSET means "no char -matching encountered yet". It gets changed to REQ_NONE if we hit something that -matches a non-fixed char first char; reqbyte just remains unset if we never -find one. - -When we hit a repeat whose minimum is zero, we may have to adjust these values -to take the zero repeat into account. This is implemented by setting them to -zerofirstbyte and zeroreqbyte when such a repeat is encountered. The individual -item types that can be repeated set these backoff variables appropriately. */ - -firstbyte = reqbyte = zerofirstbyte = zeroreqbyte = REQ_UNSET; - -/* The variable req_caseopt contains either the REQ_CASELESS value or zero, -according to the current setting of the caseless flag. REQ_CASELESS is a bit -value > 255. It is added into the firstbyte or reqbyte variables to record the -case status of the value. */ - -req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0; - -/* Switch on next character until the end of the branch */ - -for (;; ptr++) - { - BOOL negate_class; - BOOL possessive_quantifier; - int class_charcount; - int class_lastchar; - int newoptions; - int recno; - int skipbytes; - int subreqbyte; - int subfirstbyte; - - c = *ptr; - if (inescq && c != 0) goto NORMAL_CHAR; - - if ((options & PCRE_EXTENDED) != 0) - { - if ((cd->ctypes[c] & ctype_space) != 0) continue; - if (c == '#') - { - /* The space before the ; is to avoid a warning on a silly compiler - on the Macintosh. */ - while ((c = *(++ptr)) != 0 && c != NEWLINE) ; - if (c != 0) continue; /* Else fall through to handle end of string */ - } - } - - switch(c) - { - /* The branch terminates at end of string, |, or ). */ - - case 0: - case '|': - case ')': - *firstbyteptr = firstbyte; - *reqbyteptr = reqbyte; - *codeptr = code; - *ptrptr = ptr; - return TRUE; - - /* Handle single-character metacharacters. In multiline mode, ^ disables - the setting of any following char as a first character. */ - - case '^': - if ((options & PCRE_MULTILINE) != 0) - { - if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; - } - previous = NULL; - *code++ = OP_CIRC; - break; - - case '$': - previous = NULL; - *code++ = OP_DOLL; - break; - - /* There can never be a first char if '.' is first, whatever happens about - repeats. The value of reqbyte doesn't change either. */ - - case '.': - if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; - zerofirstbyte = firstbyte; - zeroreqbyte = reqbyte; - previous = code; - *code++ = OP_ANY; - break; - - /* Character classes. If the included characters are all < 255 in value, we - build a 32-byte bitmap of the permitted characters, except in the special - case where there is only one such character. For negated classes, we build - the map as usual, then invert it at the end. However, we use a different - opcode so that data characters > 255 can be handled correctly. - - If the class contains characters outside the 0-255 range, a different - opcode is compiled. It may optionally have a bit map for characters < 256, - but those above are are explicitly listed afterwards. A flag byte tells - whether the bitmap is present, and whether this is a negated class or not. - */ - - case '[': - previous = code; - - /* PCRE supports POSIX class stuff inside a class. Perl gives an error if - they are encountered at the top level, so we'll do that too. */ - - if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') && - check_posix_syntax(ptr, &tempptr, cd)) - { - *errorptr = (ptr[1] == ':')? ERR13 : ERR31; - goto FAILED; - } - - /* If the first character is '^', set the negation flag and skip it. */ - - if ((c = *(++ptr)) == '^') - { - negate_class = TRUE; - c = *(++ptr); - } - else - { - negate_class = FALSE; - } - - /* Keep a count of chars with values < 256 so that we can optimize the case - of just a single character (as long as it's < 256). For higher valued UTF-8 - characters, we don't yet do any optimization. */ - - class_charcount = 0; - class_lastchar = -1; - -#ifdef SUPPORT_UTF8 - class_utf8 = FALSE; /* No chars >= 256 */ - class_utf8data = code + LINK_SIZE + 34; /* For UTF-8 items */ -#endif - - /* Initialize the 32-char bit map to all zeros. We have to build the - map in a temporary bit of store, in case the class contains only 1 - character (< 256), because in that case the compiled code doesn't use the - bit map. */ - - memset(class, 0, 32 * sizeof(uschar)); - - /* Process characters until ] is reached. By writing this as a "do" it - means that an initial ] is taken as a data character. The first pass - through the regex checked the overall syntax, so we don't need to be very - strict here. At the start of the loop, c contains the first byte of the - character. */ - - do - { -#ifdef SUPPORT_UTF8 - if (utf8 && c > 127) - { /* Braces are required because the */ - GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */ - } -#endif - - /* Inside \Q...\E everything is literal except \E */ - - if (inescq) - { - if (c == '\\' && ptr[1] == 'E') - { - inescq = FALSE; - ptr++; - continue; - } - else goto LONE_SINGLE_CHARACTER; - } - - /* Handle POSIX class names. Perl allows a negation extension of the - form [:^name:]. A square bracket that doesn't match the syntax is - treated as a literal. We also recognize the POSIX constructions - [.ch.] and [=ch=] ("collating elements") and fault them, as Perl - 5.6 and 5.8 do. */ - - if (c == '[' && - (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') && - check_posix_syntax(ptr, &tempptr, cd)) - { - BOOL local_negate = FALSE; - int posix_class, i; - register const uschar *cbits = cd->cbits; - - if (ptr[1] != ':') - { - *errorptr = ERR31; - goto FAILED; - } - - ptr += 2; - if (*ptr == '^') - { - local_negate = TRUE; - ptr++; - } - - posix_class = check_posix_name(ptr, tempptr - ptr); - if (posix_class < 0) - { - *errorptr = ERR30; - goto FAILED; - } - - /* If matching is caseless, upper and lower are converted to - alpha. This relies on the fact that the class table starts with - alpha, lower, upper as the first 3 entries. */ - - if ((options & PCRE_CASELESS) != 0 && posix_class <= 2) - posix_class = 0; - - /* Or into the map we are building up to 3 of the static class - tables, or their negations. The [:blank:] class sets up the same - chars as the [:space:] class (all white space). We remove the vertical - white space chars afterwards. */ - - posix_class *= 3; - for (i = 0; i < 3; i++) - { - BOOL isblank = strncmp((char *)ptr, "blank", 5) == 0; - int taboffset = posix_class_maps[posix_class + i]; - if (taboffset < 0) break; - if (local_negate) - { - for (c = 0; c < 32; c++) class[c] |= ~cbits[c+taboffset]; - if (isblank) class[1] |= 0x3c; - } - else - { - for (c = 0; c < 32; c++) class[c] |= cbits[c+taboffset]; - if (isblank) class[1] &= ~0x3c; - } - } - - ptr = tempptr + 1; - class_charcount = 10; /* Set > 1; assumes more than 1 per class */ - continue; /* End of POSIX syntax handling */ - } - - /* Backslash may introduce a single character, or it may introduce one - of the specials, which just set a flag. Escaped items are checked for - validity in the pre-compiling pass. The sequence \b is a special case. - Inside a class (and only there) it is treated as backspace. Elsewhere - it marks a word boundary. Other escapes have preset maps ready to - or into the one we are building. We assume they have more than one - character in them, so set class_charcount bigger than one. */ - - if (c == '\\') - { - c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); - if (-c == ESC_b) c = '\b'; /* \b is backslash in a class */ - - if (-c == ESC_Q) /* Handle start of quoted string */ - { - if (ptr[1] == '\\' && ptr[2] == 'E') - { - ptr += 2; /* avoid empty string */ - } - else inescq = TRUE; - continue; - } - - else if (c < 0) - { - register const uschar *cbits = cd->cbits; - class_charcount = 10; /* Greater than 1 is what matters */ - switch (-c) - { - case ESC_d: - for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit]; - continue; - - case ESC_D: - for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit]; - continue; - - case ESC_w: - for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_word]; - continue; - - case ESC_W: - for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_word]; - continue; - - case ESC_s: - for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space]; - class[1] &= ~0x08; /* Perl 5.004 onwards omits VT from \s */ - continue; - - case ESC_S: - for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space]; - class[1] |= 0x08; /* Perl 5.004 onwards omits VT from \s */ - continue; - - /* Unrecognized escapes are faulted if PCRE is running in its - strict mode. By default, for compatibility with Perl, they are - treated as literals. */ - - default: - if ((options & PCRE_EXTRA) != 0) - { - *errorptr = ERR7; - goto FAILED; - } - c = *ptr; /* The final character */ - } - } - - /* Fall through if we have a single character (c >= 0). This may be - > 256 in UTF-8 mode. */ - - } /* End of backslash handling */ - - /* A single character may be followed by '-' to form a range. However, - Perl does not permit ']' to be the end of the range. A '-' character - here is treated as a literal. */ - - if (ptr[1] == '-' && ptr[2] != ']') - { - int d; - ptr += 2; - -#ifdef SUPPORT_UTF8 - if (utf8) - { /* Braces are required because the */ - GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */ - } - else -#endif - d = *ptr; - - /* The second part of a range can be a single-character escape, but - not any of the other escapes. Perl 5.6 treats a hyphen as a literal - in such circumstances. */ - - if (d == '\\') - { - const uschar *oldptr = ptr; - d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); - - /* \b is backslash; any other special means the '-' was literal */ - - if (d < 0) - { - if (d == -ESC_b) d = '\b'; else - { - ptr = oldptr - 2; - goto LONE_SINGLE_CHARACTER; /* A few lines below */ - } - } - } - - /* Check that the two values are in the correct order */ - - if (d < c) - { - *errorptr = ERR8; - goto FAILED; - } - - /* If d is greater than 255, we can't just use the bit map, so set up - for the UTF-8 supporting class type. If we are not caseless, we can - just set up a single range. If we are caseless, the characters < 256 - are handled with a bitmap, in order to get the case-insensitive - handling. */ - -#ifdef SUPPORT_UTF8 - if (d > 255) - { - class_utf8 = TRUE; - *class_utf8data++ = XCL_RANGE; - if ((options & PCRE_CASELESS) == 0) - { - class_utf8data += ord2utf8(c, class_utf8data); - class_utf8data += ord2utf8(d, class_utf8data); - continue; /* Go get the next char in the class */ - } - class_utf8data += ord2utf8(256, class_utf8data); - class_utf8data += ord2utf8(d, class_utf8data); - d = 255; - /* Fall through */ - } -#endif - /* We use the bit map if the range is entirely < 255, or if part of it - is < 255 and matching is caseless. */ - - for (; c <= d; c++) - { - class[c/8] |= (1 << (c&7)); - if ((options & PCRE_CASELESS) != 0) - { - int uc = cd->fcc[c]; /* flip case */ - class[uc/8] |= (1 << (uc&7)); - } - class_charcount++; /* in case a one-char range */ - class_lastchar = c; - } - - continue; /* Go get the next char in the class */ - } - - /* Handle a lone single character - we can get here for a normal - non-escape char, or after \ that introduces a single character. */ - - LONE_SINGLE_CHARACTER: - - /* Handle a multibyte character */ - -#ifdef SUPPORT_UTF8 - if (utf8 && c > 255) - { - class_utf8 = TRUE; - *class_utf8data++ = XCL_SINGLE; - class_utf8data += ord2utf8(c, class_utf8data); - } - else -#endif - /* Handle a single-byte character */ - { - class [c/8] |= (1 << (c&7)); - if ((options & PCRE_CASELESS) != 0) - { - c = cd->fcc[c]; /* flip case */ - class[c/8] |= (1 << (c&7)); - } - class_charcount++; - class_lastchar = c; - } - } - - /* Loop until ']' reached; the check for end of string happens inside the - loop. This "while" is the end of the "do" above. */ - - while ((c = *(++ptr)) != ']' || inescq); - - /* If class_charcount is 1, we saw precisely one character with a value < - 256. In UTF-8 mode, we can optimize if there were no characters >= 256 and - the one character is < 128. In non-UTF-8 mode we can always optimize. - - The optimization throws away the bit map. We turn the item into a - 1-character OP_CHARS if it's positive, or OP_NOT if it's negative. Note - that OP_NOT does not support multibyte characters. In the positive case, it - can cause firstbyte to be set. Otherwise, there can be no first char if - this item is first, whatever repeat count may follow. In the case of - reqbyte, save the previous value for reinstating. */ - -#ifdef SUPPORT_UTF8 - if (class_charcount == 1 && - (!utf8 || - (!class_utf8 && class_lastchar < 128))) -#else - if (class_charcount == 1) -#endif - { - zeroreqbyte = reqbyte; - if (negate_class) - { - if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; - zerofirstbyte = firstbyte; - *code++ = OP_NOT; - } - else - { - if (firstbyte == REQ_UNSET) - { - zerofirstbyte = REQ_NONE; - firstbyte = class_lastchar | req_caseopt; - } - else - { - zerofirstbyte = firstbyte; - reqbyte = class_lastchar | req_caseopt | cd->req_varyopt; - } - *code++ = OP_CHARS; - *code++ = 1; - } - *code++ = class_lastchar; - break; /* End of class handling */ - } /* End of 1-byte optimization */ - - /* Otherwise, if this is the first thing in the branch, there can be no - first char setting, whatever the repeat count. Any reqbyte setting must - remain unchanged after any kind of repeat. */ - - if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; - zerofirstbyte = firstbyte; - zeroreqbyte = reqbyte; - - /* If there are characters with values > 255, we have to compile an - extended class, with its own opcode. If there are no characters < 256, - we can omit the bitmap. */ - -#ifdef SUPPORT_UTF8 - if (class_utf8) - { - *class_utf8data++ = XCL_END; /* Marks the end of extra data */ - *code++ = OP_XCLASS; - code += LINK_SIZE; - *code = negate_class? XCL_NOT : 0; - - /* If the map is required, install it, and move on to the end of - the extra data */ - - if (class_charcount > 0) - { - *code++ |= XCL_MAP; - memcpy(code, class, 32); - code = class_utf8data; - } - - /* If the map is not required, slide down the extra data. */ - - else - { - int len = class_utf8data - (code + 33); - memmove(code + 1, code + 33, len); - code += len + 1; - } - - /* Now fill in the complete length of the item */ - - PUT(previous, 1, code - previous); - break; /* End of class handling */ - } -#endif - - /* If there are no characters > 255, negate the 32-byte map if necessary, - and copy it into the code vector. If this is the first thing in the branch, - there can be no first char setting, whatever the repeat count. Any reqbyte - setting must remain unchanged after any kind of repeat. */ - - if (negate_class) - { - *code++ = OP_NCLASS; - for (c = 0; c < 32; c++) code[c] = ~class[c]; - } - else - { - *code++ = OP_CLASS; - memcpy(code, class, 32); - } - code += 32; - break; - - /* Various kinds of repeat */ - - case '{': - if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR; - ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd); - if (*errorptr != NULL) goto FAILED; - goto REPEAT; - - case '*': - repeat_min = 0; - repeat_max = -1; - goto REPEAT; - - case '+': - repeat_min = 1; - repeat_max = -1; - goto REPEAT; - - case '?': - repeat_min = 0; - repeat_max = 1; - - REPEAT: - if (previous == NULL) - { - *errorptr = ERR9; - goto FAILED; - } - - if (repeat_min == 0) - { - firstbyte = zerofirstbyte; /* Adjust for zero repeat */ - reqbyte = zeroreqbyte; /* Ditto */ - } - - /* Remember whether this is a variable length repeat */ - - reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY; - - op_type = 0; /* Default single-char op codes */ - possessive_quantifier = FALSE; /* Default not possessive quantifier */ - - /* Save start of previous item, in case we have to move it up to make space - for an inserted OP_ONCE for the additional '+' extension. */ - - tempcode = previous; - - /* If the next character is '+', we have a possessive quantifier. This - implies greediness, whatever the setting of the PCRE_UNGREEDY option. - If the next character is '?' this is a minimizing repeat, by default, - but if PCRE_UNGREEDY is set, it works the other way round. We change the - repeat type to the non-default. */ - - if (ptr[1] == '+') - { - repeat_type = 0; /* Force greedy */ - possessive_quantifier = TRUE; - ptr++; - } - else if (ptr[1] == '?') - { - repeat_type = greedy_non_default; - ptr++; - } - else repeat_type = greedy_default; - - /* If previous was a recursion, we need to wrap it inside brackets so that - it can be replicated if necessary. */ - - if (*previous == OP_RECURSE) - { - memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE); - code += 1 + LINK_SIZE; - *previous = OP_BRA; - PUT(previous, 1, code - previous); - *code = OP_KET; - PUT(code, 1, code - previous); - code += 1 + LINK_SIZE; - } - - /* If previous was a string of characters, chop off the last one and use it - as the subject of the repeat. If there was only one character, we can - abolish the previous item altogether. If a one-char item has a minumum of - more than one, ensure that it is set in reqbyte - it might not be if a - sequence such as x{3} is the first thing in a branch because the x will - have gone into firstbyte instead. */ - - if (*previous == OP_CHARS) - { - /* Deal with UTF-8 characters that take up more than one byte. It's - easier to write this out separately than try to macrify it. Use c to - hold the length of the character in bytes, plus 0x80 to flag that it's a - length rather than a small character. */ - -#ifdef SUPPORT_UTF8 - if (utf8 && (code[-1] & 0x80) != 0) - { - uschar *lastchar = code - 1; - while((*lastchar & 0xc0) == 0x80) lastchar--; - c = code - lastchar; /* Length of UTF-8 character */ - memcpy(utf8_char, lastchar, c); /* Save the char */ - if (lastchar == previous + 2) /* There was only one character */ - { - code = previous; /* Abolish the previous item */ - } - else - { - previous[1] -= c; /* Adjust length of previous */ - code = lastchar; /* Lost char off the end */ - tempcode = code; /* Adjust position to be moved for '+' */ - } - c |= 0x80; /* Flag c as a length */ - } - else -#endif - - /* Handle the case of a single byte - either with no UTF8 support, or - with UTF-8 disabled, or for a UTF-8 character < 128. */ - - { - c = *(--code); - if (code == previous + 2) /* There was only one character */ - { - code = previous; /* Abolish the previous item */ - if (repeat_min > 1) reqbyte = c | req_caseopt | cd->req_varyopt; - } - else - { - previous[1]--; /* adjust length */ - tempcode = code; /* Adjust position to be moved for '+' */ - } - } - - goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ - } - - /* If previous was a single negated character ([^a] or similar), we use - one of the special opcodes, replacing it. The code is shared with single- - character repeats by setting opt_type to add a suitable offset into - repeat_type. OP_NOT is currently used only for single-byte chars. */ - - else if (*previous == OP_NOT) - { - op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */ - c = previous[1]; - code = previous; - goto OUTPUT_SINGLE_REPEAT; - } - - /* If previous was a character type match (\d or similar), abolish it and - create a suitable repeat item. The code is shared with single-character - repeats by setting op_type to add a suitable offset into repeat_type. */ - - else if (*previous < OP_EODN) - { - op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */ - c = *previous; - code = previous; - - OUTPUT_SINGLE_REPEAT: - - /* If the maximum is zero then the minimum must also be zero; Perl allows - this case, so we do too - by simply omitting the item altogether. */ - - if (repeat_max == 0) goto END_REPEAT; - - /* Combine the op_type with the repeat_type */ - - repeat_type += op_type; - - /* A minimum of zero is handled either as the special case * or ?, or as - an UPTO, with the maximum given. */ - - if (repeat_min == 0) - { - if (repeat_max == -1) *code++ = OP_STAR + repeat_type; - else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type; - else - { - *code++ = OP_UPTO + repeat_type; - PUT2INC(code, 0, repeat_max); - } - } - - /* The case {1,} is handled as the special case + */ - - else if (repeat_min == 1 && repeat_max == -1) - *code++ = OP_PLUS + repeat_type; - - /* The case {n,n} is just an EXACT, while the general case {n,m} is - handled as an EXACT followed by an UPTO. An EXACT of 1 is optimized. */ - - else - { - if (repeat_min != 1) - { - *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */ - PUT2INC(code, 0, repeat_min); - } - - /* If the mininum is 1 and the previous item was a character string, - we either have to put back the item that got cancelled if the string - length was 1, or add the character back onto the end of a longer - string. For a character type nothing need be done; it will just get - put back naturally. Note that the final character is always going to - get added below, so we leave code ready for its insertion. */ - - else if (*previous == OP_CHARS) - { - if (code == previous) code += 2; else - - /* In UTF-8 mode, a multibyte char has its length in c, with the 0x80 - bit set as a flag. The length will always be between 2 and 6. */ - -#ifdef SUPPORT_UTF8 - if (utf8 && c >= 128) previous[1] += c & 7; else -#endif - previous[1]++; - } - - /* For a single negated character we also have to put back the - item that got cancelled. At present this applies only to single byte - characters in any mode. */ - - else if (*previous == OP_NOT) code++; - - /* If the maximum is unlimited, insert an OP_STAR. Before doing so, - we have to insert the character for the previous code. In UTF-8 mode, - long characters have their length in c, with the 0x80 bit as a flag. */ - - if (repeat_max < 0) - { -#ifdef SUPPORT_UTF8 - if (utf8 && c >= 128) - { - memcpy(code, utf8_char, c & 7); - code += c & 7; - } - else -#endif - *code++ = c; - *code++ = OP_STAR + repeat_type; - } - - /* Else insert an UPTO if the max is greater than the min, again - preceded by the character, for the previously inserted code. */ - - else if (repeat_max != repeat_min) - { -#ifdef SUPPORT_UTF8 - if (utf8 && c >= 128) - { - memcpy(code, utf8_char, c & 7); - code += c & 7; - } - else -#endif - *code++ = c; - repeat_max -= repeat_min; - *code++ = OP_UPTO + repeat_type; - PUT2INC(code, 0, repeat_max); - } - } - - /* The character or character type itself comes last in all cases. */ - -#ifdef SUPPORT_UTF8 - if (utf8 && c >= 128) - { - memcpy(code, utf8_char, c & 7); - code += c & 7; - } - else -#endif - - *code++ = c; - } - - /* If previous was a character class or a back reference, we put the repeat - stuff after it, but just skip the item if the repeat was {0,0}. */ - - else if (*previous == OP_CLASS || - *previous == OP_NCLASS || -#ifdef SUPPORT_UTF8 - *previous == OP_XCLASS || -#endif - *previous == OP_REF) - { - if (repeat_max == 0) - { - code = previous; - goto END_REPEAT; - } - if (repeat_min == 0 && repeat_max == -1) - *code++ = OP_CRSTAR + repeat_type; - else if (repeat_min == 1 && repeat_max == -1) - *code++ = OP_CRPLUS + repeat_type; - else if (repeat_min == 0 && repeat_max == 1) - *code++ = OP_CRQUERY + repeat_type; - else - { - *code++ = OP_CRRANGE + repeat_type; - PUT2INC(code, 0, repeat_min); - if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */ - PUT2INC(code, 0, repeat_max); - } - } - - /* If previous was a bracket group, we may have to replicate it in certain - cases. */ - - else if (*previous >= OP_BRA || *previous == OP_ONCE || - *previous == OP_COND) - { - register int i; - int ketoffset = 0; - int len = code - previous; - uschar *bralink = NULL; - - /* If the maximum repeat count is unlimited, find the end of the bracket - by scanning through from the start, and compute the offset back to it - from the current code pointer. There may be an OP_OPT setting following - the final KET, so we can't find the end just by going back from the code - pointer. */ - - if (repeat_max == -1) - { - register uschar *ket = previous; - do ket += GET(ket, 1); while (*ket != OP_KET); - ketoffset = code - ket; - } - - /* The case of a zero minimum is special because of the need to stick - OP_BRAZERO in front of it, and because the group appears once in the - data, whereas in other cases it appears the minimum number of times. For - this reason, it is simplest to treat this case separately, as otherwise - the code gets far too messy. There are several special subcases when the - minimum is zero. */ - - if (repeat_min == 0) - { - /* If the maximum is also zero, we just omit the group from the output - altogether. */ - - if (repeat_max == 0) - { - code = previous; - goto END_REPEAT; - } - - /* If the maximum is 1 or unlimited, we just have to stick in the - BRAZERO and do no more at this point. */ - - if (repeat_max <= 1) - { - memmove(previous+1, previous, len); - code++; - *previous++ = OP_BRAZERO + repeat_type; - } - - /* If the maximum is greater than 1 and limited, we have to replicate - in a nested fashion, sticking OP_BRAZERO before each set of brackets. - The first one has to be handled carefully because it's the original - copy, which has to be moved up. The remainder can be handled by code - that is common with the non-zero minimum case below. We just have to - adjust the value or repeat_max, since one less copy is required. */ - - else - { - int offset; - memmove(previous + 2 + LINK_SIZE, previous, len); - code += 2 + LINK_SIZE; - *previous++ = OP_BRAZERO + repeat_type; - *previous++ = OP_BRA; - - /* We chain together the bracket offset fields that have to be - filled in later when the ends of the brackets are reached. */ - - offset = (bralink == NULL)? 0 : previous - bralink; - bralink = previous; - PUTINC(previous, 0, offset); - } - - repeat_max--; - } - - /* If the minimum is greater than zero, replicate the group as many - times as necessary, and adjust the maximum to the number of subsequent - copies that we need. If we set a first char from the group, and didn't - set a required char, copy the latter from the former. */ - - else - { - if (repeat_min > 1) - { - if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte; - for (i = 1; i < repeat_min; i++) - { - memcpy(code, previous, len); - code += len; - } - } - if (repeat_max > 0) repeat_max -= repeat_min; - } - - /* This code is common to both the zero and non-zero minimum cases. If - the maximum is limited, it replicates the group in a nested fashion, - remembering the bracket starts on a stack. In the case of a zero minimum, - the first one was set up above. In all cases the repeat_max now specifies - the number of additional copies needed. */ - - if (repeat_max >= 0) - { - for (i = repeat_max - 1; i >= 0; i--) - { - *code++ = OP_BRAZERO + repeat_type; - - /* All but the final copy start a new nesting, maintaining the - chain of brackets outstanding. */ - - if (i != 0) - { - int offset; - *code++ = OP_BRA; - offset = (bralink == NULL)? 0 : code - bralink; - bralink = code; - PUTINC(code, 0, offset); - } - - memcpy(code, previous, len); - code += len; - } - - /* Now chain through the pending brackets, and fill in their length - fields (which are holding the chain links pro tem). */ - - while (bralink != NULL) - { - int oldlinkoffset; - int offset = code - bralink + 1; - uschar *bra = code - offset; - oldlinkoffset = GET(bra, 1); - bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; - *code++ = OP_KET; - PUTINC(code, 0, offset); - PUT(bra, 1, offset); - } - } - - /* If the maximum is unlimited, set a repeater in the final copy. We - can't just offset backwards from the current code point, because we - don't know if there's been an options resetting after the ket. The - correct offset was computed above. */ - - else code[-ketoffset] = OP_KETRMAX + repeat_type; - } - - /* Else there's some kind of shambles */ - - else - { - *errorptr = ERR11; - goto FAILED; - } - - /* If the character following a repeat is '+', we wrap the entire repeated - item inside OP_ONCE brackets. This is just syntactic sugar, taken from - Sun's Java package. The repeated item starts at tempcode, not at previous, - which might be the first part of a string whose (former) last char we - repeated. However, we don't support '+' after a greediness '?'. */ - - if (possessive_quantifier) - { - int len = code - tempcode; - memmove(tempcode + 1+LINK_SIZE, tempcode, len); - code += 1 + LINK_SIZE; - len += 1 + LINK_SIZE; - tempcode[0] = OP_ONCE; - *code++ = OP_KET; - PUTINC(code, 0, len); - PUT(tempcode, 1, len); - } - - /* In all case we no longer have a previous item. We also set the - "follows varying string" flag for subsequently encountered reqbytes if - it isn't already set and we have just passed a varying length item. */ - - END_REPEAT: - previous = NULL; - cd->req_varyopt |= reqvary; - break; - - - /* Start of nested bracket sub-expression, or comment or lookahead or - lookbehind or option setting or condition. First deal with special things - that can come after a bracket; all are introduced by ?, and the appearance - of any of them means that this is not a referencing group. They were - checked for validity in the first pass over the string, so we don't have to - check for syntax errors here. */ - - case '(': - newoptions = options; - skipbytes = 0; - - if (*(++ptr) == '?') - { - int set, unset; - int *optset; - - switch (*(++ptr)) - { - case '#': /* Comment; skip to ket */ - ptr++; - while (*ptr != ')') ptr++; - continue; - - case ':': /* Non-extracting bracket */ - bravalue = OP_BRA; - ptr++; - break; - - case '(': - bravalue = OP_COND; /* Conditional group */ - - /* Condition to test for recursion */ - - if (ptr[1] == 'R') - { - code[1+LINK_SIZE] = OP_CREF; - PUT2(code, 2+LINK_SIZE, CREF_RECURSE); - skipbytes = 3; - ptr += 3; - } - - /* Condition to test for a numbered subpattern match. We know that - if a digit follows ( then there will just be digits until ) because - the syntax was checked in the first pass. */ - - else if ((digitab[ptr[1]] && ctype_digit) != 0) - { - int condref; /* Don't amalgamate; some compilers */ - condref = *(++ptr) - '0'; /* grumble at autoincrement in declaration */ - while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; - if (condref == 0) - { - *errorptr = ERR35; - goto FAILED; - } - ptr++; - code[1+LINK_SIZE] = OP_CREF; - PUT2(code, 2+LINK_SIZE, condref); - skipbytes = 3; - } - /* For conditions that are assertions, we just fall through, having - set bravalue above. */ - break; - - case '=': /* Positive lookahead */ - bravalue = OP_ASSERT; - ptr++; - break; - - case '!': /* Negative lookahead */ - bravalue = OP_ASSERT_NOT; - ptr++; - break; - - case '<': /* Lookbehinds */ - switch (*(++ptr)) - { - case '=': /* Positive lookbehind */ - bravalue = OP_ASSERTBACK; - ptr++; - break; - - case '!': /* Negative lookbehind */ - bravalue = OP_ASSERTBACK_NOT; - ptr++; - break; - } - break; - - case '>': /* One-time brackets */ - bravalue = OP_ONCE; - ptr++; - break; - - case 'C': /* Callout - may be followed by digits */ - *code++ = OP_CALLOUT; - { - int n = 0; - while ((digitab[*(++ptr)] & ctype_digit) != 0) - n = n * 10 + *ptr - '0'; - if (n > 255) - { - *errorptr = ERR38; - goto FAILED; - } - *code++ = n; - } - previous = NULL; - continue; - - case 'P': /* Named subpattern handling */ - if (*(++ptr) == '<') /* Definition */ - { - int i, namelen; - uschar *slot = cd->name_table; - const uschar *name; /* Don't amalgamate; some compilers */ - name = ++ptr; /* grumble at autoincrement in declaration */ - - while (*ptr++ != '>'); - namelen = ptr - name - 1; - - for (i = 0; i < cd->names_found; i++) - { - int crc = memcmp(name, slot+2, namelen); - if (crc == 0) - { - if (slot[2+namelen] == 0) - { - *errorptr = ERR43; - goto FAILED; - } - crc = -1; /* Current name is substring */ - } - if (crc < 0) - { - memmove(slot + cd->name_entry_size, slot, - (cd->names_found - i) * cd->name_entry_size); - break; - } - slot += cd->name_entry_size; - } - - PUT2(slot, 0, *brackets + 1); - memcpy(slot + 2, name, namelen); - slot[2+namelen] = 0; - cd->names_found++; - goto NUMBERED_GROUP; - } - - if (*ptr == '=' || *ptr == '>') /* Reference or recursion */ - { - int i, namelen; - int type = *ptr++; - const uschar *name = ptr; - uschar *slot = cd->name_table; - - while (*ptr != ')') ptr++; - namelen = ptr - name; - - for (i = 0; i < cd->names_found; i++) - { - if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break; - slot += cd->name_entry_size; - } - if (i >= cd->names_found) - { - *errorptr = ERR15; - goto FAILED; - } - - recno = GET2(slot, 0); - - if (type == '>') goto HANDLE_RECURSION; /* A few lines below */ - - /* Back reference */ - - previous = code; - *code++ = OP_REF; - PUT2INC(code, 0, recno); - cd->backref_map |= (recno < 32)? (1 << recno) : 1; - if (recno > cd->top_backref) cd->top_backref = recno; - continue; - } - - /* Should never happen */ - break; - - case 'R': /* Pattern recursion */ - ptr++; /* Same as (?0) */ - /* Fall through */ - - /* Recursion or "subroutine" call */ - - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - { - const uschar *called; - recno = 0; - while((digitab[*ptr] & ctype_digit) != 0) - recno = recno * 10 + *ptr++ - '0'; - - /* Come here from code above that handles a named recursion */ - - HANDLE_RECURSION: - - previous = code; - - /* Find the bracket that is being referenced. Temporarily end the - regex in case it doesn't exist. */ - - *code = OP_END; - called = (recno == 0)? - cd->start_code : find_bracket(cd->start_code, utf8, recno); - - if (called == NULL) - { - *errorptr = ERR15; - goto FAILED; - } - - /* If the subpattern is still open, this is a recursive call. We - check to see if this is a left recursion that could loop for ever, - and diagnose that case. */ - - if (GET(called, 1) == 0 && could_be_empty(called, code, bcptr, utf8)) - { - *errorptr = ERR40; - goto FAILED; - } - - /* Insert the recursion/subroutine item */ - - *code = OP_RECURSE; - PUT(code, 1, called - cd->start_code); - code += 1 + LINK_SIZE; - } - continue; - - /* Character after (? not specially recognized */ - - default: /* Option setting */ - set = unset = 0; - optset = &set; - - while (*ptr != ')' && *ptr != ':') - { - switch (*ptr++) - { - case '-': optset = &unset; break; - - case 'i': *optset |= PCRE_CASELESS; break; - case 'm': *optset |= PCRE_MULTILINE; break; - case 's': *optset |= PCRE_DOTALL; break; - case 'x': *optset |= PCRE_EXTENDED; break; - case 'U': *optset |= PCRE_UNGREEDY; break; - case 'X': *optset |= PCRE_EXTRA; break; - } - } - - /* Set up the changed option bits, but don't change anything yet. */ - - newoptions = (options | set) & (~unset); - - /* If the options ended with ')' this is not the start of a nested - group with option changes, so the options change at this level. Compile - code to change the ims options if this setting actually changes any of - them. We also pass the new setting back so that it can be put at the - start of any following branches, and when this group ends (if we are in - a group), a resetting item can be compiled. - - Note that if this item is right at the start of the pattern, the - options will have been abstracted and made global, so there will be no - change to compile. */ - - if (*ptr == ')') - { - if ((options & PCRE_IMS) != (newoptions & PCRE_IMS)) - { - *code++ = OP_OPT; - *code++ = newoptions & PCRE_IMS; - } - - /* Change options at this level, and pass them back for use - in subsequent branches. Reset the greedy defaults and the case - value for firstbyte and reqbyte. */ - - *optionsptr = options = newoptions; - greedy_default = ((newoptions & PCRE_UNGREEDY) != 0); - greedy_non_default = greedy_default ^ 1; - req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0; - - previous = NULL; /* This item can't be repeated */ - continue; /* It is complete */ - } - - /* If the options ended with ':' we are heading into a nested group - with possible change of options. Such groups are non-capturing and are - not assertions of any kind. All we need to do is skip over the ':'; - the newoptions value is handled below. */ - - bravalue = OP_BRA; - ptr++; - } - } - - /* If PCRE_NO_AUTO_CAPTURE is set, all unadorned brackets become - non-capturing and behave like (?:...) brackets */ - - else if ((options & PCRE_NO_AUTO_CAPTURE) != 0) - { - bravalue = OP_BRA; - } - - /* Else we have a referencing group; adjust the opcode. If the bracket - number is greater than EXTRACT_BASIC_MAX, we set the opcode one higher, and - arrange for the true number to follow later, in an OP_BRANUMBER item. */ - - else - { - NUMBERED_GROUP: - if (++(*brackets) > EXTRACT_BASIC_MAX) - { - bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1; - code[1+LINK_SIZE] = OP_BRANUMBER; - PUT2(code, 2+LINK_SIZE, *brackets); - skipbytes = 3; - } - else bravalue = OP_BRA + *brackets; - } - - /* Process nested bracketed re. Assertions may not be repeated, but other - kinds can be. We copy code into a non-register variable in order to be able - to pass its address because some compilers complain otherwise. Pass in a - new setting for the ims options if they have changed. */ - - previous = (bravalue >= OP_ONCE)? code : NULL; - *code = bravalue; - tempcode = code; - tempreqvary = cd->req_varyopt; /* Save value before bracket */ - - if (!compile_regex( - newoptions, /* The complete new option state */ - options & PCRE_IMS, /* The previous ims option state */ - brackets, /* Extracting bracket count */ - &tempcode, /* Where to put code (updated) */ - &ptr, /* Input pointer (updated) */ - errorptr, /* Where to put an error message */ - (bravalue == OP_ASSERTBACK || - bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ - skipbytes, /* Skip over OP_COND/OP_BRANUMBER */ - &subfirstbyte, /* For possible first char */ - &subreqbyte, /* For possible last char */ - bcptr, /* Current branch chain */ - cd)) /* Tables block */ - goto FAILED; - - /* At the end of compiling, code is still pointing to the start of the - group, while tempcode has been updated to point past the end of the group - and any option resetting that may follow it. The pattern pointer (ptr) - is on the bracket. */ - - /* If this is a conditional bracket, check that there are no more than - two branches in the group. */ - - else if (bravalue == OP_COND) - { - uschar *tc = code; - condcount = 0; - - do { - condcount++; - tc += GET(tc,1); - } - while (*tc != OP_KET); - - if (condcount > 2) - { - *errorptr = ERR27; - goto FAILED; - } - - /* If there is just one branch, we must not make use of its firstbyte or - reqbyte, because this is equivalent to an empty second branch. */ - - if (condcount == 1) subfirstbyte = subreqbyte = REQ_NONE; - } - - /* Handle updating of the required and first characters. Update for normal - brackets of all kinds, and conditions with two branches (see code above). - If the bracket is followed by a quantifier with zero repeat, we have to - back off. Hence the definition of zeroreqbyte and zerofirstbyte outside the - main loop so that they can be accessed for the back off. */ - - zeroreqbyte = reqbyte; - zerofirstbyte = firstbyte; - groupsetfirstbyte = FALSE; - - if (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_COND) - { - /* If we have not yet set a firstbyte in this branch, take it from the - subpattern, remembering that it was set here so that a repeat of more - than one can replicate it as reqbyte if necessary. If the subpattern has - no firstbyte, set "none" for the whole branch. In both cases, a zero - repeat forces firstbyte to "none". */ - - if (firstbyte == REQ_UNSET) - { - if (subfirstbyte >= 0) - { - firstbyte = subfirstbyte; - groupsetfirstbyte = TRUE; - } - else firstbyte = REQ_NONE; - zerofirstbyte = REQ_NONE; - } - - /* If firstbyte was previously set, convert the subpattern's firstbyte - into reqbyte if there wasn't one, using the vary flag that was in - existence beforehand. */ - - else if (subfirstbyte >= 0 && subreqbyte < 0) - subreqbyte = subfirstbyte | tempreqvary; - - /* If the subpattern set a required byte (or set a first byte that isn't - really the first byte - see above), set it. */ - - if (subreqbyte >= 0) reqbyte = subreqbyte; - } - - /* For a forward assertion, we take the reqbyte, if set. This can be - helpful if the pattern that follows the assertion doesn't set a different - char. For example, it's useful for /(?=abcde).+/. We can't set firstbyte - for an assertion, however because it leads to incorrect effect for patterns - such as /(?=a)a.+/ when the "real" "a" would then become a reqbyte instead - of a firstbyte. This is overcome by a scan at the end if there's no - firstbyte, looking for an asserted first char. */ - - else if (bravalue == OP_ASSERT && subreqbyte >= 0) reqbyte = subreqbyte; - - /* Now update the main code pointer to the end of the group. */ - - code = tempcode; - - /* Error if hit end of pattern */ - - if (*ptr != ')') - { - *errorptr = ERR14; - goto FAILED; - } - break; - - /* Check \ for being a real metacharacter; if not, fall through and handle - it as a data character at the start of a string. Escape items are checked - for validity in the pre-compiling pass. */ - - case '\\': - tempptr = ptr; - c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); - - /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values - are arranged to be the negation of the corresponding OP_values. For the - back references, the values are ESC_REF plus the reference number. Only - back references and those types that consume a character may be repeated. - We can test for values between ESC_b and ESC_Z for the latter; this may - have to change if any new ones are ever created. */ - - if (c < 0) - { - if (-c == ESC_Q) /* Handle start of quoted string */ - { - if (ptr[1] == '\\' && ptr[2] == 'E') ptr += 2; /* avoid empty string */ - else inescq = TRUE; - continue; - } - - /* For metasequences that actually match a character, we disable the - setting of a first character if it hasn't already been set. */ - - if (firstbyte == REQ_UNSET && -c > ESC_b && -c < ESC_Z) - firstbyte = REQ_NONE; - - /* Set values to reset to if this is followed by a zero repeat. */ - - zerofirstbyte = firstbyte; - zeroreqbyte = reqbyte; - - /* Back references are handled specially */ - - if (-c >= ESC_REF) - { - int number = -c - ESC_REF; - previous = code; - *code++ = OP_REF; - PUT2INC(code, 0, number); - } - else - { - previous = (-c > ESC_b && -c < ESC_Z)? code : NULL; - *code++ = -c; - } - continue; - } - - /* Data character: reset and fall through */ - - ptr = tempptr; - c = '\\'; - - /* Handle a run of data characters until a metacharacter is encountered. - The first character is guaranteed not to be whitespace or # when the - extended flag is set. */ - - NORMAL_CHAR: - default: - previous = code; - *code = OP_CHARS; - code += 2; - length = 0; - - do - { - /* If in \Q...\E, check for the end; if not, we always have a literal */ - - if (inescq) - { - if (c == '\\' && ptr[1] == 'E') - { - inescq = FALSE; - ptr++; - } - else - { - *code++ = c; - length++; - } - continue; - } - - /* Skip white space and comments for /x patterns */ - - if ((options & PCRE_EXTENDED) != 0) - { - if ((cd->ctypes[c] & ctype_space) != 0) continue; - if (c == '#') - { - /* The space before the ; is to avoid a warning on a silly compiler - on the Macintosh. */ - while ((c = *(++ptr)) != 0 && c != NEWLINE) ; - if (c == 0) break; - continue; - } - } - - /* Backslash may introduce a data char or a metacharacter. Escaped items - are checked for validity in the pre-compiling pass. Stop the string - before a metaitem. */ - - if (c == '\\') - { - tempptr = ptr; - c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); - if (c < 0) { ptr = tempptr; break; } - - /* If a character is > 127 in UTF-8 mode, we have to turn it into - two or more characters in the UTF-8 encoding. */ - -#ifdef SUPPORT_UTF8 - if (utf8 && c > 127) - { - uschar buffer[8]; - int len = ord2utf8(c, buffer); - for (c = 0; c < len; c++) *code++ = buffer[c]; - length += len; - continue; - } -#endif - } - - /* Ordinary character or single-char escape */ - - *code++ = c; - length++; - } - - /* This "while" is the end of the "do" above. */ - - while (length < MAXLIT && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0); - - /* Update the first and last requirements. These are always bytes, even in - UTF-8 mode. However, there is a special case to be considered when there - are only one or two characters. Because this gets messy in UTF-8 mode, the - code is kept separate. When we get here "length" contains the number of - bytes. */ - -#ifdef SUPPORT_UTF8 - if (utf8 && length > 1) - { - uschar *t = previous + 3; /* After this code, t */ - while (t < code && (*t & 0xc0) == 0x80) t++; /* follows the 1st char */ - - /* Handle the case when there is only one multibyte character. It must - have at least two bytes because of the "length > 1" test above. */ - - if (t == code) - { - /* If no previous first byte, set it from this character, but revert to - none on a zero repeat. */ - - if (firstbyte == REQ_UNSET) - { - zerofirstbyte = REQ_NONE; - firstbyte = previous[2]; - } - - /* Otherwise, leave the first byte value alone, and don't change it on - a zero repeat */ - - else zerofirstbyte = firstbyte; - - /* In both cases, a zero repeat resets the previous required byte */ - - zeroreqbyte = reqbyte; - } - - /* Handle the case when there is more than one character. These may be - single-byte or multibyte characters */ - - else - { - t = code - 1; /* After this code, t is at the */ - while ((*t & 0xc0) == 0x80) t--; /* start of the last character */ - - /* If no previous first byte, set it from the first character, and - retain it on a zero repeat (of the last character). The required byte - is reset on a zero repeat, either to the byte before the last - character, unless this is the first byte of the string. In that case, - it reverts to its previous value. */ - - if (firstbyte == REQ_UNSET) - { - zerofirstbyte = firstbyte = previous[2] | req_caseopt; - zeroreqbyte = (t - 1 == previous + 2)? - reqbyte : t[-1] | req_caseopt | cd->req_varyopt; - } - - /* If there was a previous first byte, leave it alone, and don't change - it on a zero repeat. The required byte is reset on a zero repeat to the - byte before the last character. */ - - else - { - zerofirstbyte = firstbyte; - zeroreqbyte = t[-1] | req_caseopt | cd->req_varyopt; - } - } - - /* In all cases (we know length > 1), the new required byte is the last - byte of the string. */ - - reqbyte = code[-1] | req_caseopt | cd->req_varyopt; - } - - else /* End of UTF-8 coding */ -#endif - - /* This is the code for non-UTF-8 operation, either without UTF-8 support, - or when UTF-8 is not enabled. */ - - { - /* firstbyte was not previously set; take it from this string */ - - if (firstbyte == REQ_UNSET) - { - if (length == 1) - { - zerofirstbyte = REQ_NONE; - firstbyte = previous[2] | req_caseopt; - zeroreqbyte = reqbyte; - } - else - { - zerofirstbyte = firstbyte = previous[2] | req_caseopt; - zeroreqbyte = (length > 2)? - (code[-2] | req_caseopt | cd->req_varyopt) : reqbyte; - reqbyte = code[-1] | req_caseopt | cd->req_varyopt; - } - } - - /* firstbyte was previously set */ - - else - { - zerofirstbyte = firstbyte; - zeroreqbyte = (length == 1)? reqbyte : - code[-2] | req_caseopt | cd->req_varyopt; - reqbyte = code[-1] | req_caseopt | cd->req_varyopt; - } - } - - /* Set the length in the data vector, and advance to the next state. */ - - previous[1] = length; - if (length < MAXLIT) ptr--; - break; - } - } /* end of big loop */ - -/* Control never reaches here by falling through, only by a goto for all the -error states. Pass back the position in the pattern so that it can be displayed -to the user for diagnosing the error. */ - -FAILED: -*ptrptr = ptr; -return FALSE; -} - - - - -/************************************************* -* Compile sequence of alternatives * -*************************************************/ - -/* On entry, ptr is pointing past the bracket character, but on return -it points to the closing bracket, or vertical bar, or end of string. -The code variable is pointing at the byte into which the BRA operator has been -stored. If the ims options are changed at the start (for a (?ims: group) or -during any branch, we need to insert an OP_OPT item at the start of every -following branch to ensure they get set correctly at run time, and also pass -the new options into every subsequent branch compile. - -Argument: - options option bits, including any changes for this subpattern - oldims previous settings of ims option bits - brackets -> int containing the number of extracting brackets used - codeptr -> the address of the current code pointer - ptrptr -> the address of the current pattern pointer - errorptr -> pointer to error message - lookbehind TRUE if this is a lookbehind assertion - skipbytes skip this many bytes at start (for OP_COND, OP_BRANUMBER) - firstbyteptr place to put the first required character, or a negative number - reqbyteptr place to put the last required character, or a negative number - bcptr pointer to the chain of currently open branches - cd points to the data block with tables pointers etc. - -Returns: TRUE on success -*/ - -static BOOL -compile_regex(int options, int oldims, int *brackets, uschar **codeptr, - const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int skipbytes, - int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd) -{ -const uschar *ptr = *ptrptr; -uschar *code = *codeptr; -uschar *last_branch = code; -uschar *start_bracket = code; -uschar *reverse_count = NULL; -int firstbyte, reqbyte; -int branchfirstbyte, branchreqbyte; -branch_chain bc; - -bc.outer = bcptr; -bc.current = code; - -firstbyte = reqbyte = REQ_UNSET; - -/* Offset is set zero to mark that this bracket is still open */ - -PUT(code, 1, 0); -code += 1 + LINK_SIZE + skipbytes; - -/* Loop for each alternative branch */ - -for (;;) - { - /* Handle a change of ims options at the start of the branch */ - - if ((options & PCRE_IMS) != oldims) - { - *code++ = OP_OPT; - *code++ = options & PCRE_IMS; - } - - /* Set up dummy OP_REVERSE if lookbehind assertion */ - - if (lookbehind) - { - *code++ = OP_REVERSE; - reverse_count = code; - PUTINC(code, 0, 0); - } - - /* Now compile the branch */ - - if (!compile_branch(&options, brackets, &code, &ptr, errorptr, - &branchfirstbyte, &branchreqbyte, &bc, cd)) - { - *ptrptr = ptr; - return FALSE; - } - - /* If this is the first branch, the firstbyte and reqbyte values for the - branch become the values for the regex. */ - - if (*last_branch != OP_ALT) - { - firstbyte = branchfirstbyte; - reqbyte = branchreqbyte; - } - - /* If this is not the first branch, the first char and reqbyte have to - match the values from all the previous branches, except that if the previous - value for reqbyte didn't have REQ_VARY set, it can still match, and we set - REQ_VARY for the regex. */ - - else - { - /* If we previously had a firstbyte, but it doesn't match the new branch, - we have to abandon the firstbyte for the regex, but if there was previously - no reqbyte, it takes on the value of the old firstbyte. */ - - if (firstbyte >= 0 && firstbyte != branchfirstbyte) - { - if (reqbyte < 0) reqbyte = firstbyte; - firstbyte = REQ_NONE; - } - - /* If we (now or from before) have no firstbyte, a firstbyte from the - branch becomes a reqbyte if there isn't a branch reqbyte. */ - - if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0) - branchreqbyte = branchfirstbyte; - - /* Now ensure that the reqbytes match */ - - if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY)) - reqbyte = REQ_NONE; - else reqbyte |= branchreqbyte; /* To "or" REQ_VARY */ - } - - /* If lookbehind, check that this branch matches a fixed-length string, - and put the length into the OP_REVERSE item. Temporarily mark the end of - the branch with OP_END. */ - - if (lookbehind) - { - int length; - *code = OP_END; - length = find_fixedlength(last_branch, options); - DPRINTF(("fixed length = %d\n", length)); - if (length < 0) - { - *errorptr = (length == -2)? ERR36 : ERR25; - *ptrptr = ptr; - return FALSE; - } - PUT(reverse_count, 0, length); - } - - /* Reached end of expression, either ')' or end of pattern. Go back through - the alternative branches and reverse the chain of offsets, with the field in - the BRA item now becoming an offset to the first alternative. If there are - no alternatives, it points to the end of the group. The length in the - terminating ket is always the length of the whole bracketed item. If any of - the ims options were changed inside the group, compile a resetting op-code - following, except at the very end of the pattern. Return leaving the pointer - at the terminating char. */ - - if (*ptr != '|') - { - int length = code - last_branch; - do - { - int prev_length = GET(last_branch, 1); - PUT(last_branch, 1, length); - length = prev_length; - last_branch -= length; - } - while (length > 0); - - /* Fill in the ket */ - - *code = OP_KET; - PUT(code, 1, code - start_bracket); - code += 1 + LINK_SIZE; - - /* Resetting option if needed */ - - if ((options & PCRE_IMS) != oldims && *ptr == ')') - { - *code++ = OP_OPT; - *code++ = oldims; - } - - /* Set values to pass back */ - - *codeptr = code; - *ptrptr = ptr; - *firstbyteptr = firstbyte; - *reqbyteptr = reqbyte; - return TRUE; - } - - /* Another branch follows; insert an "or" node. Its length field points back - to the previous branch while the bracket remains open. At the end the chain - is reversed. It's done like this so that the start of the bracket has a - zero offset until it is closed, making it possible to detect recursion. */ - - *code = OP_ALT; - PUT(code, 1, code - last_branch); - bc.current = last_branch = code; - code += 1 + LINK_SIZE; - ptr++; - } -/* Control never reaches here */ -} - - - - -/************************************************* -* Check for anchored expression * -*************************************************/ - -/* Try to find out if this is an anchored regular expression. Consider each -alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket -all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then -it's anchored. However, if this is a multiline pattern, then only OP_SOD -counts, since OP_CIRC can match in the middle. - -We can also consider a regex to be anchored if OP_SOM starts all its branches. -This is the code for \G, which means "match at start of match position, taking -into account the match offset". - -A branch is also implicitly anchored if it starts with .* and DOTALL is set, -because that will try the rest of the pattern at all possible matching points, -so there is no point trying again.... er .... - -.... except when the .* appears inside capturing parentheses, and there is a -subsequent back reference to those parentheses. We haven't enough information -to catch that case precisely. - -At first, the best we could do was to detect when .* was in capturing brackets -and the highest back reference was greater than or equal to that level. -However, by keeping a bitmap of the first 31 back references, we can catch some -of the more common cases more precisely. - -Arguments: - code points to start of expression (the bracket) - options points to the options setting - bracket_map a bitmap of which brackets we are inside while testing; this - handles up to substring 31; after that we just have to take - the less precise approach - backref_map the back reference bitmap - -Returns: TRUE or FALSE -*/ - -static BOOL -is_anchored(register const uschar *code, int *options, unsigned int bracket_map, - unsigned int backref_map) -{ -do { - const uschar *scode = - first_significant_code(code + 1+LINK_SIZE, options, PCRE_MULTILINE); - register int op = *scode; - - /* Capturing brackets */ - - if (op > OP_BRA) - { - int new_map; - op -= OP_BRA; - if (op > EXTRACT_BASIC_MAX) op = GET2(scode, 2+LINK_SIZE); - new_map = bracket_map | ((op < 32)? (1 << op) : 1); - if (!is_anchored(scode, options, new_map, backref_map)) return FALSE; - } - - /* Other brackets */ - - else if (op == OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) - { - if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE; - } - - /* .* is not anchored unless DOTALL is set and it isn't in brackets that - are or may be referenced. */ - - else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) && - (*options & PCRE_DOTALL) != 0) - { - if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE; - } - - /* Check for explicit anchoring */ - - else if (op != OP_SOD && op != OP_SOM && - ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC)) - return FALSE; - code += GET(code, 1); - } -while (*code == OP_ALT); /* Loop for each alternative */ -return TRUE; -} - - - -/************************************************* -* Check for starting with ^ or .* * -*************************************************/ - -/* This is called to find out if every branch starts with ^ or .* so that -"first char" processing can be done to speed things up in multiline -matching and for non-DOTALL patterns that start with .* (which must start at -the beginning or after \n). As in the case of is_anchored() (see above), we -have to take account of back references to capturing brackets that contain .* -because in that case we can't make the assumption. - -Arguments: - code points to start of expression (the bracket) - bracket_map a bitmap of which brackets we are inside while testing; this - handles up to substring 31; after that we just have to take - the less precise approach - backref_map the back reference bitmap - -Returns: TRUE or FALSE -*/ - -static BOOL -is_startline(const uschar *code, unsigned int bracket_map, - unsigned int backref_map) -{ -do { - const uschar *scode = first_significant_code(code + 1+LINK_SIZE, NULL, 0); - register int op = *scode; - - /* Capturing brackets */ - - if (op > OP_BRA) - { - int new_map; - op -= OP_BRA; - if (op > EXTRACT_BASIC_MAX) op = GET2(scode, 2+LINK_SIZE); - new_map = bracket_map | ((op < 32)? (1 << op) : 1); - if (!is_startline(scode, new_map, backref_map)) return FALSE; - } - - /* Other brackets */ - - else if (op == OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) - { if (!is_startline(scode, bracket_map, backref_map)) return FALSE; } - - /* .* is not anchored unless DOTALL is set and it isn't in brackets that - may be referenced. */ - - else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR) - { - if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE; - } - - /* Check for explicit circumflex */ - - else if (op != OP_CIRC) return FALSE; - code += GET(code, 1); - } -while (*code == OP_ALT); /* Loop for each alternative */ -return TRUE; -} - - - -/************************************************* -* Check for asserted fixed first char * -*************************************************/ - -/* During compilation, the "first char" settings from forward assertions are -discarded, because they can cause conflicts with actual literals that follow. -However, if we end up without a first char setting for an unanchored pattern, -it is worth scanning the regex to see if there is an initial asserted first -char. If all branches start with the same asserted char, or with a bracket all -of whose alternatives start with the same asserted char (recurse ad lib), then -we return that char, otherwise -1. - -Arguments: - code points to start of expression (the bracket) - options pointer to the options (used to check casing changes) - inassert TRUE if in an assertion - -Returns: -1 or the fixed first char -*/ - -static int -find_firstassertedchar(const uschar *code, int *options, BOOL inassert) -{ -register int c = -1; -do { - int d; - const uschar *scode = - first_significant_code(code + 1+LINK_SIZE, options, PCRE_CASELESS); - register int op = *scode; - - if (op >= OP_BRA) op = OP_BRA; - - switch(op) - { - default: - return -1; - - case OP_BRA: - case OP_ASSERT: - case OP_ONCE: - case OP_COND: - if ((d = find_firstassertedchar(scode, options, op == OP_ASSERT)) < 0) - return -1; - if (c < 0) c = d; else if (c != d) return -1; - break; - - case OP_EXACT: /* Fall through */ - scode++; - - case OP_CHARS: /* Fall through */ - scode++; - - case OP_PLUS: - case OP_MINPLUS: - if (!inassert) return -1; - if (c < 0) - { - c = scode[1]; - if ((*options & PCRE_CASELESS) != 0) c |= REQ_CASELESS; - } - else if (c != scode[1]) return -1; - break; - } - - code += GET(code, 1); - } -while (*code == OP_ALT); -return c; -} - - - - -/************************************************* -* Compile a Regular Expression * -*************************************************/ - -/* This function takes a string and returns a pointer to a block of store -holding a compiled version of the expression. - -Arguments: - pattern the regular expression - options various option bits - errorptr pointer to pointer to error text - erroroffset ptr offset in pattern where error was detected - tables pointer to character tables or NULL - -Returns: pointer to compiled data block, or NULL on error, - with errorptr and erroroffset set -*/ - -pcre * -pcre_compile(const char *pattern, int options, const char **errorptr, - int *erroroffset, const unsigned char *tables) -{ -real_pcre *re; -int length = 1 + LINK_SIZE; /* For initial BRA plus length */ -int runlength; -int c, firstbyte, reqbyte; -int bracount = 0; -int branch_extra = 0; -int branch_newextra; -int item_count = -1; -int name_count = 0; -int max_name_size = 0; -#ifdef SUPPORT_UTF8 -int lastcharlength = 0; -BOOL utf8; -BOOL class_utf8; -#endif -BOOL inescq = FALSE; -unsigned int brastackptr = 0; -size_t size; -uschar *code; -const uschar *codestart; -const uschar *ptr; -compile_data compile_block; -int brastack[BRASTACK_SIZE]; -uschar bralenstack[BRASTACK_SIZE]; - -/* We can't pass back an error message if errorptr is NULL; I guess the best we -can do is just return NULL. */ - -if (errorptr == NULL) return NULL; -*errorptr = NULL; - -/* However, we can give a message for this error */ - -if (erroroffset == NULL) - { - *errorptr = ERR16; - return NULL; - } -*erroroffset = 0; - -/* Can't support UTF8 unless PCRE has been compiled to include the code. */ - -#ifdef SUPPORT_UTF8 -utf8 = (options & PCRE_UTF8) != 0; -#else -if ((options & PCRE_UTF8) != 0) - { - *errorptr = ERR32; - return NULL; - } -#endif - -if ((options & ~PUBLIC_OPTIONS) != 0) - { - *errorptr = ERR17; - return NULL; - } - -/* Set up pointers to the individual character tables */ - -if (tables == NULL) tables = pcre_default_tables; -compile_block.lcc = tables + lcc_offset; -compile_block.fcc = tables + fcc_offset; -compile_block.cbits = tables + cbits_offset; -compile_block.ctypes = tables + ctypes_offset; - -/* Maximum back reference and backref bitmap. This is updated for numeric -references during the first pass, but for named references during the actual -compile pass. The bitmap records up to 31 back references to help in deciding -whether (.*) can be treated as anchored or not. */ - -compile_block.top_backref = 0; -compile_block.backref_map = 0; - -/* Reflect pattern for debugging output */ - -DPRINTF(("------------------------------------------------------------------\n")); -DPRINTF(("%s\n", pattern)); - -/* The first thing to do is to make a pass over the pattern to compute the -amount of store required to hold the compiled code. This does not have to be -perfect as long as errors are overestimates. At the same time we can detect any -flag settings right at the start, and extract them. Make an attempt to correct -for any counted white space if an "extended" flag setting appears late in the -pattern. We can't be so clever for #-comments. */ - -ptr = (const uschar *)(pattern - 1); -while ((c = *(++ptr)) != 0) - { - int min, max; - int class_optcount; - int bracket_length; - int duplength; - - /* If we are inside a \Q...\E sequence, all chars are literal */ - - if (inescq) goto NORMAL_CHAR; - - /* Otherwise, first check for ignored whitespace and comments */ - - if ((options & PCRE_EXTENDED) != 0) - { - if ((compile_block.ctypes[c] & ctype_space) != 0) continue; - if (c == '#') - { - /* The space before the ; is to avoid a warning on a silly compiler - on the Macintosh. */ - while ((c = *(++ptr)) != 0 && c != NEWLINE) ; - if (c == 0) break; - continue; - } - } - - item_count++; /* Is zero for the first non-comment item */ - - switch(c) - { - /* A backslashed item may be an escaped "normal" character or a - character type. For a "normal" character, put the pointers and - character back so that tests for whitespace etc. in the input - are done correctly. */ - - case '\\': - { - const uschar *save_ptr = ptr; - c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block); - if (*errorptr != NULL) goto PCRE_ERROR_RETURN; - if (c >= 0) - { - ptr = save_ptr; - c = '\\'; - goto NORMAL_CHAR; - } - } - - /* If \Q, enter "literal" mode */ - - if (-c == ESC_Q) - { - inescq = TRUE; - continue; - } - - /* Other escapes need one byte, and are of length one for repeats */ - - length++; -#ifdef SUPPORT_UTF8 - lastcharlength = 1; -#endif - - /* A back reference needs an additional 2 bytes, plus either one or 5 - bytes for a repeat. We also need to keep the value of the highest - back reference. */ - - if (c <= -ESC_REF) - { - int refnum = -c - ESC_REF; - compile_block.backref_map |= (refnum < 32)? (1 << refnum) : 1; - if (refnum > compile_block.top_backref) - compile_block.top_backref = refnum; - length += 2; /* For single back reference */ - if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) - { - ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); - if (*errorptr != NULL) goto PCRE_ERROR_RETURN; - if ((min == 0 && (max == 1 || max == -1)) || - (min == 1 && max == -1)) - length++; - else length += 5; - if (ptr[1] == '?') ptr++; - } - } - continue; - - case '^': /* Single-byte metacharacters */ - case '.': - case '$': - length++; -#ifdef SUPPORT_UTF8 - lastcharlength = 1; -#endif - continue; - - case '*': /* These repeats won't be after brackets; */ - case '+': /* those are handled separately */ - case '?': - length++; - goto POSESSIVE; /* A few lines below */ - - /* This covers the cases of braced repeats after a single char, metachar, - class, or back reference. */ - - case '{': - if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR; - ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block); - if (*errorptr != NULL) goto PCRE_ERROR_RETURN; - - /* These special cases just insert one extra opcode */ - - if ((min == 0 && (max == 1 || max == -1)) || - (min == 1 && max == -1)) - length++; - - /* These cases might insert additional copies of a preceding character. */ - - else - { -#ifdef SUPPORT_UTF8 - /* In UTF-8 mode, we should find the length in lastcharlength */ - if (utf8) - { - if (min != 1) - { - length -= lastcharlength; /* Uncount the original char or metachar */ - if (min > 0) length += 3 + lastcharlength; - } - length += lastcharlength + ((max > 0)? 3 : 1); - } - else -#endif - - /* Not UTF-8 mode: all characters are one byte */ - { - if (min != 1) - { - length--; /* Uncount the original char or metachar */ - if (min > 0) length += 4; - } - - length += (max > 0)? 4 : 2; - } - } - - if (ptr[1] == '?') ptr++; /* Needs no extra length */ - - POSESSIVE: /* Test for possessive quantifier */ - if (ptr[1] == '+') - { - ptr++; - length += 2 + 2*LINK_SIZE; /* Allow for atomic brackets */ - } - continue; - - /* An alternation contains an offset to the next branch or ket. If any ims - options changed in the previous branch(es), and/or if we are in a - lookbehind assertion, extra space will be needed at the start of the - branch. This is handled by branch_extra. */ - - case '|': - length += 1 + LINK_SIZE + branch_extra; - continue; - - /* A character class uses 33 characters provided that all the character - values are less than 256. Otherwise, it uses a bit map for low valued - characters, and individual items for others. Don't worry about character - types that aren't allowed in classes - they'll get picked up during the - compile. A character class that contains only one single-byte character - uses 2 or 3 bytes, depending on whether it is negated or not. Notice this - where we can. (In UTF-8 mode we can do this only for chars < 128.) */ - - case '[': - class_optcount = 0; - -#ifdef SUPPORT_UTF8 - class_utf8 = FALSE; -#endif - - if (*(++ptr) == '^') ptr++; - - /* Written as a "do" so that an initial ']' is taken as data */ - - if (*ptr != 0) do - { - /* Inside \Q...\E everything is literal except \E */ - - if (inescq) - { - if (*ptr != '\\' || ptr[1] != 'E') goto NON_SPECIAL_CHARACTER; - inescq = FALSE; - ptr += 1; - continue; - } - - /* Outside \Q...\E, check for escapes */ - - if (*ptr == '\\') - { -#ifdef SUPPORT_UTF8 - int prevchar = ptr[-1]; -#endif - int ch = check_escape(&ptr, errorptr, bracount, options, TRUE, - &compile_block); - if (*errorptr != NULL) goto PCRE_ERROR_RETURN; - - /* \b is backspace inside a class */ - - if (-ch == ESC_b) ch = '\b'; - - /* \Q enters quoting mode */ - - if (-ch == ESC_Q) - { - inescq = TRUE; - continue; - } - - /* Handle escapes that turn into characters */ - - if (ch >= 0) - { -#ifdef SUPPORT_UTF8 - if (utf8) - { - if (ch > 127) class_optcount = 10; /* Ensure > 1 */ - if (ch > 255) - { - uschar buffer[6]; - if (!class_utf8) - { - class_utf8 = TRUE; - length += LINK_SIZE + 1 + 1; - } - length += 1 + ord2utf8(ch, buffer); - - /* If this wide character is preceded by '-', add an extra 2 to - the length in case the previous character was < 128, because in - this case the whole range will be put into the list. */ - - if (prevchar == '-') length += 2; - } - } -#endif - class_optcount++; /* for possible optimization */ - } - else class_optcount = 10; /* \d, \s etc; make sure > 1 */ - } - - /* Check the syntax for POSIX stuff. The bits we actually handle are - checked during the real compile phase. */ - - else if (*ptr == '[' && check_posix_syntax(ptr, &ptr, &compile_block)) - { - ptr++; - class_optcount = 10; /* Make sure > 1 */ - } - - /* Anything else just increments the possible optimization count. If - there are wide characters, we are going to have to use an XCLASS. */ - - else - { - NON_SPECIAL_CHARACTER: - class_optcount++; - -#ifdef SUPPORT_UTF8 - if (utf8) - { - int ch; - int extra = 0; - GETCHARLEN(ch, ptr, extra); - if (ch > 127) class_optcount = 10; /* No optimization possible */ - if (ch > 255) - { - if (!class_utf8) - { - class_utf8 = TRUE; - length += LINK_SIZE + 1 + 1; - } - length += 2 + extra; - - /* If this wide character is preceded by '-', add an extra 2 to - the length in case the previous character was < 128, because in - this case the whole range will be put into the list. */ - - if (ptr[-1] == '-') length += 2; - - /* Advance to the end of this character */ - - ptr += extra; - } - } -#endif - } - } - while (*(++ptr) != 0 && (inescq || *ptr != ']')); /* Concludes "do" above */ - - if (*ptr == 0) /* Missing terminating ']' */ - { - *errorptr = ERR6; - goto PCRE_ERROR_RETURN; - } - - /* We can optimize when there was only one optimizable character. Repeats - for positive and negated single one-byte chars are handled by the general - code. Here, we handle repeats for the class opcodes. */ - - if (class_optcount == 1) length += 3; else - { - length += 33; - - /* A repeat needs either 1 or 5 bytes. */ - - if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) - { - ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); - if (*errorptr != NULL) goto PCRE_ERROR_RETURN; - if ((min == 0 && (max == 1 || max == -1)) || - (min == 1 && max == -1)) - length++; - else length += 5; - if (ptr[1] == '?') ptr++; - } - } - continue; - - /* Brackets may be genuine groups or special things */ - - case '(': - branch_newextra = 0; - bracket_length = 1 + LINK_SIZE; - - /* Handle special forms of bracket, which all start (? */ - - if (ptr[1] == '?') - { - int set, unset; - int *optset; - - switch (c = ptr[2]) - { - /* Skip over comments entirely */ - case '#': - ptr += 3; - while (*ptr != 0 && *ptr != ')') ptr++; - if (*ptr == 0) - { - *errorptr = ERR18; - goto PCRE_ERROR_RETURN; - } - continue; - - /* Non-referencing groups and lookaheads just move the pointer on, and - then behave like a non-special bracket, except that they don't increment - the count of extracting brackets. Ditto for the "once only" bracket, - which is in Perl from version 5.005. */ - - case ':': - case '=': - case '!': - case '>': - ptr += 2; - break; - - /* (?R) specifies a recursive call to the regex, which is an extension - to provide the facility which can be obtained by (?p{perl-code}) in - Perl 5.6. In Perl 5.8 this has become (??{perl-code}). - - From PCRE 4.00, items such as (?3) specify subroutine-like "calls" to - the appropriate numbered brackets. This includes both recursive and - non-recursive calls. (?R) is now synonymous with (?0). */ - - case 'R': - ptr++; - - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - ptr += 2; - if (c != 'R') - while ((digitab[*(++ptr)] & ctype_digit) != 0); - if (*ptr != ')') - { - *errorptr = ERR29; - goto PCRE_ERROR_RETURN; - } - length += 1 + LINK_SIZE; - - /* If this item is quantified, it will get wrapped inside brackets so - as to use the code for quantified brackets. We jump down and use the - code that handles this for real brackets. */ - - if (ptr[1] == '+' || ptr[1] == '*' || ptr[1] == '?' || ptr[1] == '{') - { - length += 2 + 2 * LINK_SIZE; /* to make bracketed */ - duplength = 5 + 3 * LINK_SIZE; - goto HANDLE_QUANTIFIED_BRACKETS; - } - continue; - - /* (?C) is an extension which provides "callout" - to provide a bit of - the functionality of the Perl (?{...}) feature. An optional number may - follow (default is zero). */ - - case 'C': - ptr += 2; - while ((digitab[*(++ptr)] & ctype_digit) != 0); - if (*ptr != ')') - { - *errorptr = ERR39; - goto PCRE_ERROR_RETURN; - } - length += 2; - continue; - - /* Named subpatterns are an extension copied from Python */ - - case 'P': - ptr += 3; - if (*ptr == '<') - { - const uschar *p; /* Don't amalgamate; some compilers */ - p = ++ptr; /* grumble at autoincrement in declaration */ - while ((compile_block.ctypes[*ptr] & ctype_word) != 0) ptr++; - if (*ptr != '>') - { - *errorptr = ERR42; - goto PCRE_ERROR_RETURN; - } - name_count++; - if (ptr - p > max_name_size) max_name_size = (ptr - p); - break; - } - - if (*ptr == '=' || *ptr == '>') - { - while ((compile_block.ctypes[*(++ptr)] & ctype_word) != 0); - if (*ptr != ')') - { - *errorptr = ERR42; - goto PCRE_ERROR_RETURN; - } - break; - } - - /* Unknown character after (?P */ - - *errorptr = ERR41; - goto PCRE_ERROR_RETURN; - - /* Lookbehinds are in Perl from version 5.005 */ - - case '<': - ptr += 3; - if (*ptr == '=' || *ptr == '!') - { - branch_newextra = 1 + LINK_SIZE; - length += 1 + LINK_SIZE; /* For the first branch */ - break; - } - *errorptr = ERR24; - goto PCRE_ERROR_RETURN; - - /* Conditionals are in Perl from version 5.005. The bracket must either - be followed by a number (for bracket reference) or by an assertion - group, or (a PCRE extension) by 'R' for a recursion test. */ - - case '(': - if (ptr[3] == 'R' && ptr[4] == ')') - { - ptr += 4; - length += 3; - } - else if ((digitab[ptr[3]] & ctype_digit) != 0) - { - ptr += 4; - length += 3; - while ((digitab[*ptr] & ctype_digit) != 0) ptr++; - if (*ptr != ')') - { - *errorptr = ERR26; - goto PCRE_ERROR_RETURN; - } - } - else /* An assertion must follow */ - { - ptr++; /* Can treat like ':' as far as spacing is concerned */ - if (ptr[2] != '?' || - (ptr[3] != '=' && ptr[3] != '!' && ptr[3] != '<') ) - { - ptr += 2; /* To get right offset in message */ - *errorptr = ERR28; - goto PCRE_ERROR_RETURN; - } - } - break; - - /* Else loop checking valid options until ) is met. Anything else is an - error. If we are without any brackets, i.e. at top level, the settings - act as if specified in the options, so massage the options immediately. - This is for backward compatibility with Perl 5.004. */ - - default: - set = unset = 0; - optset = &set; - ptr += 2; - - for (;; ptr++) - { - c = *ptr; - switch (c) - { - case 'i': - *optset |= PCRE_CASELESS; - continue; - - case 'm': - *optset |= PCRE_MULTILINE; - continue; - - case 's': - *optset |= PCRE_DOTALL; - continue; - - case 'x': - *optset |= PCRE_EXTENDED; - continue; - - case 'X': - *optset |= PCRE_EXTRA; - continue; - - case 'U': - *optset |= PCRE_UNGREEDY; - continue; - - case '-': - optset = &unset; - continue; - - /* A termination by ')' indicates an options-setting-only item; if - this is at the very start of the pattern (indicated by item_count - being zero), we use it to set the global options. This is helpful - when analyzing the pattern for first characters, etc. Otherwise - nothing is done here and it is handled during the compiling - process. - - [Historical note: Up to Perl 5.8, options settings at top level - were always global settings, wherever they appeared in the pattern. - That is, they were equivalent to an external setting. From 5.8 - onwards, they apply only to what follows (which is what you might - expect).] */ - - case ')': - if (item_count == 0) - { - options = (options | set) & (~unset); - set = unset = 0; /* To save length */ - item_count--; /* To allow for several */ - } - - /* Fall through */ - - /* A termination by ':' indicates the start of a nested group with - the given options set. This is again handled at compile time, but - we must allow for compiled space if any of the ims options are - set. We also have to allow for resetting space at the end of - the group, which is why 4 is added to the length and not just 2. - If there are several changes of options within the same group, this - will lead to an over-estimate on the length, but this shouldn't - matter very much. We also have to allow for resetting options at - the start of any alternations, which we do by setting - branch_newextra to 2. Finally, we record whether the case-dependent - flag ever changes within the regex. This is used by the "required - character" code. */ - - case ':': - if (((set|unset) & PCRE_IMS) != 0) - { - length += 4; - branch_newextra = 2; - if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED; - } - goto END_OPTIONS; - - /* Unrecognized option character */ - - default: - *errorptr = ERR12; - goto PCRE_ERROR_RETURN; - } - } - - /* If we hit a closing bracket, that's it - this is a freestanding - option-setting. We need to ensure that branch_extra is updated if - necessary. The only values branch_newextra can have here are 0 or 2. - If the value is 2, then branch_extra must either be 2 or 5, depending - on whether this is a lookbehind group or not. */ - - END_OPTIONS: - if (c == ')') - { - if (branch_newextra == 2 && - (branch_extra == 0 || branch_extra == 1+LINK_SIZE)) - branch_extra += branch_newextra; - continue; - } - - /* If options were terminated by ':' control comes here. Fall through - to handle the group below. */ - } - } - - /* Extracting brackets must be counted so we can process escapes in a - Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to - need an additional 3 bytes of store per extracting bracket. However, if - PCRE_NO_AUTO)CAPTURE is set, unadorned brackets become non-capturing, so we - must leave the count alone (it will aways be zero). */ - - else if ((options & PCRE_NO_AUTO_CAPTURE) == 0) - { - bracount++; - if (bracount > EXTRACT_BASIC_MAX) bracket_length += 3; - } - - /* Save length for computing whole length at end if there's a repeat that - requires duplication of the group. Also save the current value of - branch_extra, and start the new group with the new value. If non-zero, this - will either be 2 for a (?imsx: group, or 3 for a lookbehind assertion. */ - - if (brastackptr >= sizeof(brastack)/sizeof(int)) - { - *errorptr = ERR19; - goto PCRE_ERROR_RETURN; - } - - bralenstack[brastackptr] = branch_extra; - branch_extra = branch_newextra; - - brastack[brastackptr++] = length; - length += bracket_length; - continue; - - /* Handle ket. Look for subsequent max/min; for certain sets of values we - have to replicate this bracket up to that many times. If brastackptr is - 0 this is an unmatched bracket which will generate an error, but take care - not to try to access brastack[-1] when computing the length and restoring - the branch_extra value. */ - - case ')': - length += 1 + LINK_SIZE; - if (brastackptr > 0) - { - duplength = length - brastack[--brastackptr]; - branch_extra = bralenstack[brastackptr]; - } - else duplength = 0; - - /* The following code is also used when a recursion such as (?3) is - followed by a quantifier, because in that case, it has to be wrapped inside - brackets so that the quantifier works. The value of duplength must be - set before arrival. */ - - HANDLE_QUANTIFIED_BRACKETS: - - /* Leave ptr at the final char; for read_repeat_counts this happens - automatically; for the others we need an increment. */ - - if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block)) - { - ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); - if (*errorptr != NULL) goto PCRE_ERROR_RETURN; - } - else if (c == '*') { min = 0; max = -1; ptr++; } - else if (c == '+') { min = 1; max = -1; ptr++; } - else if (c == '?') { min = 0; max = 1; ptr++; } - else { min = 1; max = 1; } - - /* If the minimum is zero, we have to allow for an OP_BRAZERO before the - group, and if the maximum is greater than zero, we have to replicate - maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting - bracket set. */ - - if (min == 0) - { - length++; - if (max > 0) length += (max - 1) * (duplength + 3 + 2*LINK_SIZE); - } - - /* When the minimum is greater than zero, we have to replicate up to - minval-1 times, with no additions required in the copies. Then, if there - is a limited maximum we have to replicate up to maxval-1 times allowing - for a BRAZERO item before each optional copy and nesting brackets for all - but one of the optional copies. */ - - else - { - length += (min - 1) * duplength; - if (max > min) /* Need this test as max=-1 means no limit */ - length += (max - min) * (duplength + 3 + 2*LINK_SIZE) - - (2 + 2*LINK_SIZE); - } - - /* Allow space for once brackets for "possessive quantifier" */ - - if (ptr[1] == '+') - { - ptr++; - length += 2 + 2*LINK_SIZE; - } - continue; - - /* Non-special character. For a run of such characters the length required - is the number of characters + 2, except that the maximum run length is - MAXLIT. We won't get a skipped space or a non-data escape or the start of a - # comment as the first character, so the length can't be zero. */ - - NORMAL_CHAR: - default: - length += 2; - runlength = 0; - do - { -#ifdef SUPPORT_UTF8 - lastcharlength = 1; /* Need length of last char for UTF-8 repeats */ -#endif - - /* If in a \Q...\E sequence, check for end; otherwise it's a literal */ - if (inescq) - { - if (c == '\\' && ptr[1] == 'E') - { - inescq = FALSE; - ptr++; - } - else runlength++; - continue; - } - - /* Skip whitespace and comments for /x */ - - if ((options & PCRE_EXTENDED) != 0) - { - if ((compile_block.ctypes[c] & ctype_space) != 0) continue; - if (c == '#') - { - /* The space before the ; is to avoid a warning on a silly compiler - on the Macintosh. */ - while ((c = *(++ptr)) != 0 && c != NEWLINE) ; - continue; - } - } - - /* Backslash may introduce a data char or a metacharacter; stop the - string before the latter. */ - - if (c == '\\') - { - const uschar *saveptr = ptr; - c = check_escape(&ptr, errorptr, bracount, options, FALSE, - &compile_block); - if (*errorptr != NULL) goto PCRE_ERROR_RETURN; - if (c < 0) { ptr = saveptr; break; } - - /* In UTF-8 mode, add on the number of additional bytes needed to - encode this character, and save the total length in case this is a - final char that is repeated. */ - -#ifdef SUPPORT_UTF8 - if (utf8 && c > 127) - { - int i; - for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++) - if (c <= utf8_table1[i]) break; - runlength += i; - lastcharlength += i; - } -#endif - } - - /* Ordinary character or single-char escape */ - - runlength++; - } - - /* This "while" is the end of the "do" above. */ - - while (runlength < MAXLIT && - (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0); - - /* If we hit a meta-character, back off to point to it */ - - if (runlength < MAXLIT) ptr--; - - /* If the last char in the string is a UTF-8 multibyte character, we must - set lastcharlength correctly. If it was specified as an escape, this will - already have been done above. However, we also have to support in-line - UTF-8 characters, so check backwards from where we are. */ - -#ifdef SUPPORT_UTF8 - if (utf8) - { - const uschar *lastptr = ptr - 1; - if ((*lastptr & 0x80) != 0) - { - while((*lastptr & 0xc0) == 0x80) lastptr--; - lastcharlength = ptr - lastptr; - } - } -#endif - - length += runlength; - continue; - } - } - -length += 2 + LINK_SIZE; /* For final KET and END */ - -if (length > MAX_PATTERN_SIZE) - { - *errorptr = ERR20; - return NULL; - } - -/* Compute the size of data block needed and get it, either from malloc or -externally provided function. */ - -size = length + sizeof(real_pcre) + name_count * (max_name_size + 3); -re = (real_pcre *)(pcre_malloc)(size); - -if (re == NULL) - { - *errorptr = ERR21; - return NULL; - } - -/* Put in the magic number, and save the size, options, and table pointer */ - -re->magic_number = MAGIC_NUMBER; -re->size = size; -re->options = options; -re->tables = tables; -re->name_entry_size = max_name_size + 3; -re->name_count = name_count; - -/* The starting points of the name/number translation table and of the code are -passed around in the compile data block. */ - -compile_block.names_found = 0; -compile_block.name_entry_size = max_name_size + 3; -compile_block.name_table = (uschar *)re + sizeof(real_pcre); -codestart = compile_block.name_table + re->name_entry_size * re->name_count; -compile_block.start_code = codestart; -compile_block.req_varyopt = 0; - -/* Set up a starting, non-extracting bracket, then compile the expression. On -error, *errorptr will be set non-NULL, so we don't need to look at the result -of the function here. */ - -ptr = (const uschar *)pattern; -code = (uschar *)codestart; -*code = OP_BRA; -bracount = 0; -(void)compile_regex(options, options & PCRE_IMS, &bracount, &code, &ptr, - errorptr, FALSE, 0, &firstbyte, &reqbyte, NULL, &compile_block); -re->top_bracket = bracount; -re->top_backref = compile_block.top_backref; - -/* If not reached end of pattern on success, there's an excess bracket. */ - -if (*errorptr == NULL && *ptr != 0) *errorptr = ERR22; - -/* Fill in the terminating state and check for disastrous overflow, but -if debugging, leave the test till after things are printed out. */ - -*code++ = OP_END; - -#ifndef DEBUG -if (code - codestart > length) *errorptr = ERR23; -#endif - -/* Give an error if there's back reference to a non-existent capturing -subpattern. */ - -if (re->top_backref > re->top_bracket) *errorptr = ERR15; - -/* Failed to compile, or error while post-processing */ - -if (*errorptr != NULL) - { - (pcre_free)(re); - PCRE_ERROR_RETURN: - *erroroffset = ptr - (const uschar *)pattern; - return NULL; - } - -/* If the anchored option was not passed, set the flag if we can determine that -the pattern is anchored by virtue of ^ characters or \A or anything else (such -as starting with .* when DOTALL is set). - -Otherwise, if we know what the first character has to be, save it, because that -speeds up unanchored matches no end. If not, see if we can set the -PCRE_STARTLINE flag. This is helpful for multiline matches when all branches -start with ^. and also when all branches start with .* for non-DOTALL matches. -*/ - -if ((options & PCRE_ANCHORED) == 0) - { - int temp_options = options; - if (is_anchored(codestart, &temp_options, 0, compile_block.backref_map)) - re->options |= PCRE_ANCHORED; - else - { - if (firstbyte < 0) - firstbyte = find_firstassertedchar(codestart, &temp_options, FALSE); - if (firstbyte >= 0) /* Remove caseless flag for non-caseable chars */ - { - int ch = firstbyte & 255; - re->first_byte = ((firstbyte & REQ_CASELESS) != 0 && - compile_block.fcc[ch] == ch)? ch : firstbyte; - re->options |= PCRE_FIRSTSET; - } - else if (is_startline(codestart, 0, compile_block.backref_map)) - re->options |= PCRE_STARTLINE; - } - } - -/* For an anchored pattern, we use the "required byte" only if it follows a -variable length item in the regex. Remove the caseless flag for non-caseable -chars. */ - -if (reqbyte >= 0 && - ((re->options & PCRE_ANCHORED) == 0 || (reqbyte & REQ_VARY) != 0)) - { - int ch = reqbyte & 255; - re->req_byte = ((reqbyte & REQ_CASELESS) != 0 && - compile_block.fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte; - re->options |= PCRE_REQCHSET; - } - -/* Print out the compiled data for debugging */ - -#ifdef DEBUG - -printf("Length = %d top_bracket = %d top_backref = %d\n", - length, re->top_bracket, re->top_backref); - -if (re->options != 0) - { - printf("%s%s%s%s%s%s%s%s%s\n", - ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "", - ((re->options & PCRE_CASELESS) != 0)? "caseless " : "", - ((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "", - ((re->options & PCRE_EXTENDED) != 0)? "extended " : "", - ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "", - ((re->options & PCRE_DOTALL) != 0)? "dotall " : "", - ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "", - ((re->options & PCRE_EXTRA) != 0)? "extra " : "", - ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : ""); - } - -if ((re->options & PCRE_FIRSTSET) != 0) - { - int ch = re->first_byte & 255; - char *caseless = ((re->first_byte & REQ_CASELESS) == 0)? "" : " (caseless)"; - if (isprint(ch)) printf("First char = %c%s\n", ch, caseless); - else printf("First char = \\x%02x%s\n", ch, caseless); - } - -if ((re->options & PCRE_REQCHSET) != 0) - { - int ch = re->req_byte & 255; - char *caseless = ((re->req_byte & REQ_CASELESS) == 0)? "" : " (caseless)"; - if (isprint(ch)) printf("Req char = %c%s\n", ch, caseless); - else printf("Req char = \\x%02x%s\n", ch, caseless); - } - -print_internals(re, stdout); - -/* This check is done here in the debugging case so that the code that -was compiled can be seen. */ - -if (code - codestart > length) - { - *errorptr = ERR23; - (pcre_free)(re); - *erroroffset = ptr - (uschar *)pattern; - return NULL; - } -#endif - -return (pcre *)re; -} - - - -/************************************************* -* Match a back-reference * -*************************************************/ - -/* If a back reference hasn't been set, the length that is passed is greater -than the number of characters left in the string, so the match fails. - -Arguments: - offset index into the offset vector - eptr points into the subject - length length to be matched - md points to match data block - ims the ims flags - -Returns: TRUE if matched -*/ - -static BOOL -match_ref(int offset, register const uschar *eptr, int length, match_data *md, - unsigned long int ims) -{ -const uschar *p = md->start_subject + md->offset_vector[offset]; - -#ifdef DEBUG -if (eptr >= md->end_subject) - printf("matching subject "); -else - { - printf("matching subject "); - pchars(eptr, length, TRUE, md); - } -printf(" against backref "); -pchars(p, length, FALSE, md); -printf("\n"); -#endif - -/* Always fail if not enough characters left */ - -if (length > md->end_subject - eptr) return FALSE; - -/* Separate the caselesss case for speed */ - -if ((ims & PCRE_CASELESS) != 0) - { - while (length-- > 0) - if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; - } -else - { while (length-- > 0) if (*p++ != *eptr++) return FALSE; } - -return TRUE; -} - - -#ifdef SUPPORT_UTF8 -/************************************************* -* Match character against an XCLASS * -*************************************************/ - -/* This function is called from within the XCLASS code below, to match a -character against an extended class which might match values > 255. - -Arguments: - c the character - data points to the flag byte of the XCLASS data - -Returns: TRUE if character matches, else FALSE -*/ - -static BOOL -match_xclass(int c, const uschar *data) -{ -int t; -BOOL negated = (*data & XCL_NOT) != 0; - -/* Character values < 256 are matched against a bitmap, if one is present. If -not, we still carry on, because there may be ranges that start below 256 in the -additional data. */ - -if (c < 256) - { - if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0) - return !negated; /* char found */ - } - -/* Now match against the list of large chars or ranges that end with a large -char. First skip the bit map if present. */ - -if ((*data++ & XCL_MAP) != 0) data += 32; - -while ((t = *data++) != XCL_END) - { - int x, y; - GETCHARINC(x, data); - if (t == XCL_SINGLE) - { - if (c == x) return !negated; - } - else - { - GETCHARINC(y, data); - if (c >= x && c <= y) return !negated; - } - } - -return negated; /* char was not found */ -} -#endif - - - - -/************************************************* -* Match from current position * -*************************************************/ - -/* On entry ecode points to the first opcode, and eptr to the first character -in the subject string, while eptrb holds the value of eptr at the start of the -last bracketed group - used for breaking infinite loops matching zero-length -strings. This function is called recursively in many circumstances. Whenever it -returns a negative (error) response, the outer incarnation must also return the -same response. - -Performance note: It might be tempting to extract commonly used fields from the -md structure (e.g. utf8, end_subject) into individual variables to improve -performance. Tests using gcc on a SPARC disproved this; in the first case, it -made performance worse. - -Arguments: - eptr pointer in subject - ecode position in code - offset_top current top pointer - md pointer to "static" info for the match - ims current /i, /m, and /s options - eptrb pointer to chain of blocks containing eptr at start of - brackets - for testing for empty matches - flags can contain - match_condassert - this is an assertion condition - match_isgroup - this is the start of a bracketed group - -Returns: MATCH_MATCH if matched ) these values are >= 0 - MATCH_NOMATCH if failed to match ) - a negative PCRE_ERROR_xxx value if aborted by an error condition - (e.g. stopped by recursion limit) -*/ - -static int -match(register const uschar *eptr, register const uschar *ecode, - int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb, - int flags) -{ -unsigned long int original_ims = ims; /* Save for resetting on ')' */ -register int rrc; -eptrblock newptrb; - -if (md->match_call_count++ >= md->match_limit) return PCRE_ERROR_MATCHLIMIT; - -/* At the start of a bracketed group, add the current subject pointer to the -stack of such pointers, to be re-instated at the end of the group when we hit -the closing ket. When match() is called in other circumstances, we don't add to -the stack. */ - -if ((flags & match_isgroup) != 0) - { - newptrb.prev = eptrb; - newptrb.saved_eptr = eptr; - eptrb = &newptrb; - } - -/* Now start processing the operations. */ - -for (;;) - { - int op = (int)*ecode; - int min, max, ctype; - register int i; - register int c; - BOOL minimize = FALSE; - - /* Opening capturing bracket. If there is space in the offset vector, save - the current subject position in the working slot at the top of the vector. We - mustn't change the current values of the data slot, because they may be set - from a previous iteration of this group, and be referred to by a reference - inside the group. - - If the bracket fails to match, we need to restore this value and also the - values of the final offsets, in case they were set by a previous iteration of - the same bracket. - - If there isn't enough space in the offset vector, treat this as if it were a - non-capturing bracket. Don't worry about setting the flag for the error case - here; that is handled in the code for KET. */ - - if (op > OP_BRA) - { - int offset; - int number = op - OP_BRA; - - /* For extended extraction brackets (large number), we have to fish out the - number from a dummy opcode at the start. */ - - if (number > EXTRACT_BASIC_MAX) - number = GET2(ecode, 2+LINK_SIZE); - offset = number << 1; - -#ifdef DEBUG - printf("start bracket %d subject=", number); - pchars(eptr, 16, TRUE, md); - printf("\n"); -#endif - - if (offset < md->offset_max) - { - int save_offset1 = md->offset_vector[offset]; - int save_offset2 = md->offset_vector[offset+1]; - int save_offset3 = md->offset_vector[md->offset_end - number]; - int save_capture_last = md->capture_last; - - DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); - md->offset_vector[md->offset_end - number] = eptr - md->start_subject; - - do - { - if ((rrc = match(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, - eptrb, match_isgroup)) != MATCH_NOMATCH) return rrc; - md->capture_last = save_capture_last; - ecode += GET(ecode, 1); - } - while (*ecode == OP_ALT); - - DPRINTF(("bracket %d failed\n", number)); - - md->offset_vector[offset] = save_offset1; - md->offset_vector[offset+1] = save_offset2; - md->offset_vector[md->offset_end - number] = save_offset3; - - return MATCH_NOMATCH; - } - - /* Insufficient room for saving captured contents */ - - else op = OP_BRA; - } - - /* Other types of node can be handled by a switch */ - - switch(op) - { - case OP_BRA: /* Non-capturing bracket: optimized */ - DPRINTF(("start bracket 0\n")); - do - { - if ((rrc = match(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, - match_isgroup)) != MATCH_NOMATCH) return rrc; - ecode += GET(ecode, 1); - } - while (*ecode == OP_ALT); - DPRINTF(("bracket 0 failed\n")); - return MATCH_NOMATCH; - - /* Conditional group: compilation checked that there are no more than - two branches. If the condition is false, skipping the first branch takes us - past the end if there is only one branch, but that's OK because that is - exactly what going to the ket would do. */ - - case OP_COND: - if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */ - { - int offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */ - BOOL condition = (offset == CREF_RECURSE * 2)? - (md->recursive != NULL) : - (offset < offset_top && md->offset_vector[offset] >= 0); - return match(eptr, ecode + (condition? - (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))), - offset_top, md, ims, eptrb, match_isgroup); - } - - /* The condition is an assertion. Call match() to evaluate it - setting - the final argument TRUE causes it to stop at the end of an assertion. */ - - else - { - if ((rrc = match(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, - match_condassert | match_isgroup)) == MATCH_MATCH) - { - ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2); - while (*ecode == OP_ALT) ecode += GET(ecode, 1); - } - else if (rrc != MATCH_NOMATCH) return rrc; - else ecode += GET(ecode, 1); - return match(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, - match_isgroup); - } - /* Control never reaches here */ - - /* Skip over conditional reference or large extraction number data if - encountered. */ - - case OP_CREF: - case OP_BRANUMBER: - ecode += 3; - break; - - /* End of the pattern. If we are in a recursion, we should restore the - offsets appropriately and continue from after the call. */ - - case OP_END: - if (md->recursive != NULL && md->recursive->group_num == 0) - { - recursion_info *rec = md->recursive; - DPRINTF(("Hit the end in a (?0) recursion\n")); - md->recursive = rec->prev; - memmove(md->offset_vector, rec->offset_save, - rec->saved_max * sizeof(int)); - md->start_match = rec->save_start; - ims = original_ims; - ecode = rec->after_call; - break; - } - - /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty - string - backtracking will then try other alternatives, if any. */ - - if (md->notempty && eptr == md->start_match) return MATCH_NOMATCH; - md->end_match_ptr = eptr; /* Record where we ended */ - md->end_offset_top = offset_top; /* and how many extracts were taken */ - return MATCH_MATCH; - - /* Change option settings */ - - case OP_OPT: - ims = ecode[1]; - ecode += 2; - DPRINTF(("ims set to %02lx\n", ims)); - break; - - /* Assertion brackets. Check the alternative branches in turn - the - matching won't pass the KET for an assertion. If any one branch matches, - the assertion is true. Lookbehind assertions have an OP_REVERSE item at the - start of each branch to move the current point backwards, so the code at - this level is identical to the lookahead case. */ - - case OP_ASSERT: - case OP_ASSERTBACK: - do - { - if ((rrc = match(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, - match_isgroup)) == MATCH_MATCH) break; - if (rrc != MATCH_NOMATCH) return rrc; - ecode += GET(ecode, 1); - } - while (*ecode == OP_ALT); - if (*ecode == OP_KET) return MATCH_NOMATCH; - - /* If checking an assertion for a condition, return MATCH_MATCH. */ - - if ((flags & match_condassert) != 0) return MATCH_MATCH; - - /* Continue from after the assertion, updating the offsets high water - mark, since extracts may have been taken during the assertion. */ - - do ecode += GET(ecode,1); while (*ecode == OP_ALT); - ecode += 1 + LINK_SIZE; - offset_top = md->end_offset_top; - continue; - - /* Negative assertion: all branches must fail to match */ - - case OP_ASSERT_NOT: - case OP_ASSERTBACK_NOT: - do - { - if ((rrc = match(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, - match_isgroup)) == MATCH_MATCH) return MATCH_NOMATCH; - if (rrc != MATCH_NOMATCH) return rrc; - ecode += GET(ecode,1); - } - while (*ecode == OP_ALT); - - if ((flags & match_condassert) != 0) return MATCH_MATCH; - - ecode += 1 + LINK_SIZE; - continue; - - /* Move the subject pointer back. This occurs only at the start of - each branch of a lookbehind assertion. If we are too close to the start to - move back, this match function fails. When working with UTF-8 we move - back a number of characters, not bytes. */ - - case OP_REVERSE: -#ifdef SUPPORT_UTF8 - if (md->utf8) - { - c = GET(ecode,1); - for (i = 0; i < c; i++) - { - eptr--; - if (eptr < md->start_subject) return MATCH_NOMATCH; - BACKCHAR(eptr) - } - } - else -#endif - - /* No UTF-8 support, or not in UTF-8 mode: count is byte count */ - - { - eptr -= GET(ecode,1); - if (eptr < md->start_subject) return MATCH_NOMATCH; - } - - /* Skip to next op code */ - - ecode += 1 + LINK_SIZE; - break; - - /* The callout item calls an external function, if one is provided, passing - details of the match so far. This is mainly for debugging, though the - function is able to force a failure. */ - - case OP_CALLOUT: - if (pcre_callout != NULL) - { - pcre_callout_block cb; - cb.version = 0; /* Version 0 of the callout block */ - cb.callout_number = ecode[1]; - cb.offset_vector = md->offset_vector; - cb.subject = (const char *)md->start_subject; - cb.subject_length = md->end_subject - md->start_subject; - cb.start_match = md->start_match - md->start_subject; - cb.current_position = eptr - md->start_subject; - cb.capture_top = offset_top/2; - cb.capture_last = md->capture_last; - cb.callout_data = md->callout_data; - if ((rrc = (*pcre_callout)(&cb)) > 0) return MATCH_NOMATCH; - if (rrc < 0) return rrc; - } - ecode += 2; - break; - - /* Recursion either matches the current regex, or some subexpression. The - offset data is the offset to the starting bracket from the start of the - whole pattern. However, it is possible that a BRAZERO was inserted before - this bracket after we took the offset - we just skip it if encountered. - - If there are any capturing brackets started but not finished, we have to - save their starting points and reinstate them after the recursion. However, - we don't know how many such there are (offset_top records the completed - total) so we just have to save all the potential data. There may be up to - 65535 such values, which is too large to put on the stack, but using malloc - for small numbers seems expensive. As a compromise, the stack is used when - there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc - is used. A problem is what to do if the malloc fails ... there is no way of - returning to the top level with an error. Save the top REC_STACK_SAVE_MAX - values on the stack, and accept that the rest may be wrong. - - There are also other values that have to be saved. We use a chained - sequence of blocks that actually live on the stack. Thanks to Robin Houston - for the original version of this logic. */ - - case OP_RECURSE: - { - int stacksave[REC_STACK_SAVE_MAX]; - recursion_info new_recursive; - const uschar *callpat = md->start_code + GET(ecode, 1); - - if (*callpat == OP_BRAZERO) callpat++; - - new_recursive.group_num = *callpat - OP_BRA; - - /* For extended extraction brackets (large number), we have to fish out - the number from a dummy opcode at the start. */ - - if (new_recursive.group_num > EXTRACT_BASIC_MAX) - new_recursive.group_num = GET2(callpat, 2+LINK_SIZE); - - /* Add to "recursing stack" */ - - new_recursive.prev = md->recursive; - md->recursive = &new_recursive; - - /* Find where to continue from afterwards */ - - ecode += 1 + LINK_SIZE; - new_recursive.after_call = ecode; - - /* Now save the offset data. */ - - new_recursive.saved_max = md->offset_end; - if (new_recursive.saved_max <= REC_STACK_SAVE_MAX) - new_recursive.offset_save = stacksave; - else - { - new_recursive.offset_save = - (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int)); - if (new_recursive.offset_save == NULL) return PCRE_ERROR_NOMEMORY; - } - - memcpy(new_recursive.offset_save, md->offset_vector, - new_recursive.saved_max * sizeof(int)); - new_recursive.save_start = md->start_match; - md->start_match = eptr; - - /* OK, now we can do the recursion. For each top-level alternative we - restore the offset and recursion data. */ - - DPRINTF(("Recursing into group %d\n", new_recursive.group_num)); - do - { - if ((rrc = match(eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims, - eptrb, match_isgroup)) == MATCH_MATCH) - { - md->recursive = new_recursive.prev; - if (new_recursive.offset_save != stacksave) - (pcre_free)(new_recursive.offset_save); - return MATCH_MATCH; - } - else if (rrc != MATCH_NOMATCH) return rrc; - - md->recursive = &new_recursive; - memcpy(md->offset_vector, new_recursive.offset_save, - new_recursive.saved_max * sizeof(int)); - callpat += GET(callpat, 1); - } - while (*callpat == OP_ALT); - - DPRINTF(("Recursion didn't match\n")); - md->recursive = new_recursive.prev; - if (new_recursive.offset_save != stacksave) - (pcre_free)(new_recursive.offset_save); - return MATCH_NOMATCH; - } - /* Control never reaches here */ - - /* "Once" brackets are like assertion brackets except that after a match, - the point in the subject string is not moved back. Thus there can never be - a move back into the brackets. Friedl calls these "atomic" subpatterns. - Check the alternative branches in turn - the matching won't pass the KET - for this kind of subpattern. If any one branch matches, we carry on as at - the end of a normal bracket, leaving the subject pointer. */ - - case OP_ONCE: - { - const uschar *prev = ecode; - const uschar *saved_eptr = eptr; - - do - { - if ((rrc = match(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, - eptrb, match_isgroup)) == MATCH_MATCH) break; - if (rrc != MATCH_NOMATCH) return rrc; - ecode += GET(ecode,1); - } - while (*ecode == OP_ALT); - - /* If hit the end of the group (which could be repeated), fail */ - - if (*ecode != OP_ONCE && *ecode != OP_ALT) return MATCH_NOMATCH; - - /* Continue as from after the assertion, updating the offsets high water - mark, since extracts may have been taken. */ - - do ecode += GET(ecode,1); while (*ecode == OP_ALT); - - offset_top = md->end_offset_top; - eptr = md->end_match_ptr; - - /* For a non-repeating ket, just continue at this level. This also - happens for a repeating ket if no characters were matched in the group. - This is the forcible breaking of infinite loops as implemented in Perl - 5.005. If there is an options reset, it will get obeyed in the normal - course of events. */ - - if (*ecode == OP_KET || eptr == saved_eptr) - { - ecode += 1+LINK_SIZE; - break; - } - - /* The repeating kets try the rest of the pattern or restart from the - preceding bracket, in the appropriate order. We need to reset any options - that changed within the bracket before re-running it, so check the next - opcode. */ - - if (ecode[1+LINK_SIZE] == OP_OPT) - { - ims = (ims & ~PCRE_IMS) | ecode[4]; - DPRINTF(("ims set to %02lx at group repeat\n", ims)); - } - - if (*ecode == OP_KETRMIN) - { - if ((rrc = match(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, - eptrb, 0)) != MATCH_NOMATCH) return rrc; - if ((rrc = match(eptr, prev, offset_top, md, ims, eptrb, - match_isgroup)) != MATCH_NOMATCH) return rrc; - } - else /* OP_KETRMAX */ - { - if ((rrc = match(eptr, prev, offset_top, md, ims, eptrb, - match_isgroup)) != MATCH_NOMATCH) return rrc; - if ((rrc = match(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, - 0)) != MATCH_NOMATCH) return rrc; - } - } - return MATCH_NOMATCH; - - /* An alternation is the end of a branch; scan along to find the end of the - bracketed group and go to there. */ - - case OP_ALT: - do ecode += GET(ecode,1); while (*ecode == OP_ALT); - break; - - /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating - that it may occur zero times. It may repeat infinitely, or not at all - - i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper - repeat limits are compiled as a number of copies, with the optional ones - preceded by BRAZERO or BRAMINZERO. */ - - case OP_BRAZERO: - { - const uschar *next = ecode+1; - if ((rrc = match(eptr, next, offset_top, md, ims, eptrb, match_isgroup)) - != MATCH_NOMATCH) return rrc; - do next += GET(next,1); while (*next == OP_ALT); - ecode = next + 1+LINK_SIZE; - } - break; - - case OP_BRAMINZERO: - { - const uschar *next = ecode+1; - do next += GET(next,1); while (*next == OP_ALT); - if ((rrc = match(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, - match_isgroup)) != MATCH_NOMATCH) return rrc; - ecode++; - } - break; - - /* End of a group, repeated or non-repeating. If we are at the end of - an assertion "group", stop matching and return MATCH_MATCH, but record the - current high water mark for use by positive assertions. Do this also - for the "once" (not-backup up) groups. */ - - case OP_KET: - case OP_KETRMIN: - case OP_KETRMAX: - { - const uschar *prev = ecode - GET(ecode, 1); - const uschar *saved_eptr = eptrb->saved_eptr; - - eptrb = eptrb->prev; /* Back up the stack of bracket start pointers */ - - if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || - *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT || - *prev == OP_ONCE) - { - md->end_match_ptr = eptr; /* For ONCE */ - md->end_offset_top = offset_top; - return MATCH_MATCH; - } - - /* In all other cases except a conditional group we have to check the - group number back at the start and if necessary complete handling an - extraction by setting the offsets and bumping the high water mark. */ - - if (*prev != OP_COND) - { - int offset; - int number = *prev - OP_BRA; - - /* For extended extraction brackets (large number), we have to fish out - the number from a dummy opcode at the start. */ - - if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE); - offset = number << 1; - -#ifdef DEBUG - printf("end bracket %d", number); - printf("\n"); -#endif - - /* Test for a numbered group. This includes groups called as a result - of recursion. Note that whole-pattern recursion is coded as a recurse - into group 0, so it won't be picked up here. Instead, we catch it when - the OP_END is reached. */ - - if (number > 0) - { - md->capture_last = number; - if (offset >= md->offset_max) md->offset_overflow = TRUE; else - { - md->offset_vector[offset] = - md->offset_vector[md->offset_end - number]; - md->offset_vector[offset+1] = eptr - md->start_subject; - if (offset_top <= offset) offset_top = offset + 2; - } - - /* Handle a recursively called group. Restore the offsets - appropriately and continue from after the call. */ - - if (md->recursive != NULL && md->recursive->group_num == number) - { - recursion_info *rec = md->recursive; - DPRINTF(("Recursion (%d) succeeded - continuing\n", number)); - md->recursive = rec->prev; - md->start_match = rec->save_start; - memcpy(md->offset_vector, rec->offset_save, - rec->saved_max * sizeof(int)); - ecode = rec->after_call; - ims = original_ims; - break; - } - } - } - - /* Reset the value of the ims flags, in case they got changed during - the group. */ - - ims = original_ims; - DPRINTF(("ims reset to %02lx\n", ims)); - - /* For a non-repeating ket, just continue at this level. This also - happens for a repeating ket if no characters were matched in the group. - This is the forcible breaking of infinite loops as implemented in Perl - 5.005. If there is an options reset, it will get obeyed in the normal - course of events. */ - - if (*ecode == OP_KET || eptr == saved_eptr) - { - ecode += 1 + LINK_SIZE; - break; - } - - /* The repeating kets try the rest of the pattern or restart from the - preceding bracket, in the appropriate order. */ - - if (*ecode == OP_KETRMIN) - { - if ((rrc = match(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, - 0)) != MATCH_NOMATCH) return rrc; - if ((rrc = match(eptr, prev, offset_top, md, ims, eptrb, - match_isgroup)) != MATCH_NOMATCH) return rrc; - } - else /* OP_KETRMAX */ - { - if ((rrc = match(eptr, prev, offset_top, md, ims, eptrb, - match_isgroup)) != MATCH_NOMATCH) return rrc; - if ((rrc = match(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, - 0)) != MATCH_NOMATCH) return rrc; - } - } - return MATCH_NOMATCH; - - /* Start of subject unless notbol, or after internal newline if multiline */ - - case OP_CIRC: - if (md->notbol && eptr == md->start_subject) return MATCH_NOMATCH; - if ((ims & PCRE_MULTILINE) != 0) - { - if (eptr != md->start_subject && eptr[-1] != NEWLINE) - return MATCH_NOMATCH; - ecode++; - break; - } - /* ... else fall through */ - - /* Start of subject assertion */ - - case OP_SOD: - if (eptr != md->start_subject) return MATCH_NOMATCH; - ecode++; - break; - - /* Start of match assertion */ - - case OP_SOM: - if (eptr != md->start_subject + md->start_offset) return MATCH_NOMATCH; - ecode++; - break; - - /* Assert before internal newline if multiline, or before a terminating - newline unless endonly is set, else end of subject unless noteol is set. */ - - case OP_DOLL: - if ((ims & PCRE_MULTILINE) != 0) - { - if (eptr < md->end_subject) - { if (*eptr != NEWLINE) return MATCH_NOMATCH; } - else - { if (md->noteol) return MATCH_NOMATCH; } - ecode++; - break; - } - else - { - if (md->noteol) return MATCH_NOMATCH; - if (!md->endonly) - { - if (eptr < md->end_subject - 1 || - (eptr == md->end_subject - 1 && *eptr != NEWLINE)) - return MATCH_NOMATCH; - ecode++; - break; - } - } - /* ... else fall through */ - - /* End of subject assertion (\z) */ - - case OP_EOD: - if (eptr < md->end_subject) return MATCH_NOMATCH; - ecode++; - break; - - /* End of subject or ending \n assertion (\Z) */ - - case OP_EODN: - if (eptr < md->end_subject - 1 || - (eptr == md->end_subject - 1 && *eptr != NEWLINE)) return MATCH_NOMATCH; - ecode++; - break; - - /* Word boundary assertions */ - - case OP_NOT_WORD_BOUNDARY: - case OP_WORD_BOUNDARY: - { - BOOL prev_is_word, cur_is_word; - - /* Find out if the previous and current characters are "word" characters. - It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to - be "non-word" characters. */ - -#ifdef SUPPORT_UTF8 - if (md->utf8) - { - if (eptr == md->start_subject) prev_is_word = FALSE; else - { - const uschar *lastptr = eptr - 1; - while((*lastptr & 0xc0) == 0x80) lastptr--; - GETCHAR(c, lastptr); - prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; - } - if (eptr >= md->end_subject) cur_is_word = FALSE; else - { - GETCHAR(c, eptr); - cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; - } - } - else -#endif - - /* More streamlined when not in UTF-8 mode */ - - { - prev_is_word = (eptr != md->start_subject) && - ((md->ctypes[eptr[-1]] & ctype_word) != 0); - cur_is_word = (eptr < md->end_subject) && - ((md->ctypes[*eptr] & ctype_word) != 0); - } - - /* Now see if the situation is what we want */ - - if ((*ecode++ == OP_WORD_BOUNDARY)? - cur_is_word == prev_is_word : cur_is_word != prev_is_word) - return MATCH_NOMATCH; - } - break; - - /* Match a single character type; inline for speed */ - - case OP_ANY: - if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE) - return MATCH_NOMATCH; - if (eptr++ >= md->end_subject) return MATCH_NOMATCH; -#ifdef SUPPORT_UTF8 - if (md->utf8) - while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; -#endif - ecode++; - break; - - /* Match a single byte, even in UTF-8 mode. This opcode really does match - any byte, even newline, independent of the setting of PCRE_DOTALL. */ - - case OP_ANYBYTE: - if (eptr++ >= md->end_subject) return MATCH_NOMATCH; - ecode++; - break; - - case OP_NOT_DIGIT: - if (eptr >= md->end_subject) return MATCH_NOMATCH; - GETCHARINCTEST(c, eptr); - if ( -#ifdef SUPPORT_UTF8 - c < 256 && -#endif - (md->ctypes[c] & ctype_digit) != 0 - ) - return MATCH_NOMATCH; - ecode++; - break; - - case OP_DIGIT: - if (eptr >= md->end_subject) return MATCH_NOMATCH; - GETCHARINCTEST(c, eptr); - if ( -#ifdef SUPPORT_UTF8 - c >= 256 || -#endif - (md->ctypes[c] & ctype_digit) == 0 - ) - return MATCH_NOMATCH; - ecode++; - break; - - case OP_NOT_WHITESPACE: - if (eptr >= md->end_subject) return MATCH_NOMATCH; - GETCHARINCTEST(c, eptr); - if ( -#ifdef SUPPORT_UTF8 - c < 256 && -#endif - (md->ctypes[c] & ctype_space) != 0 - ) - return MATCH_NOMATCH; - ecode++; - break; - - case OP_WHITESPACE: - if (eptr >= md->end_subject) return MATCH_NOMATCH; - GETCHARINCTEST(c, eptr); - if ( -#ifdef SUPPORT_UTF8 - c >= 256 || -#endif - (md->ctypes[c] & ctype_space) == 0 - ) - return MATCH_NOMATCH; - ecode++; - break; - - case OP_NOT_WORDCHAR: - if (eptr >= md->end_subject) return MATCH_NOMATCH; - GETCHARINCTEST(c, eptr); - if ( -#ifdef SUPPORT_UTF8 - c < 256 && -#endif - (md->ctypes[c] & ctype_word) != 0 - ) - return MATCH_NOMATCH; - ecode++; - break; - - case OP_WORDCHAR: - if (eptr >= md->end_subject) return MATCH_NOMATCH; - GETCHARINCTEST(c, eptr); - if ( -#ifdef SUPPORT_UTF8 - c >= 256 || -#endif - (md->ctypes[c] & ctype_word) == 0 - ) - return MATCH_NOMATCH; - ecode++; - break; - - /* Match a back reference, possibly repeatedly. Look past the end of the - item to see if there is repeat information following. The code is similar - to that for character classes, but repeated for efficiency. Then obey - similar code to character type repeats - written out again for speed. - However, if the referenced string is the empty string, always treat - it as matched, any number of times (otherwise there could be infinite - loops). */ - - case OP_REF: - { - int length; - int offset = GET2(ecode, 1) << 1; /* Doubled ref number */ - ecode += 3; /* Advance past item */ - - /* If the reference is unset, set the length to be longer than the amount - of subject left; this ensures that every attempt at a match fails. We - can't just fail here, because of the possibility of quantifiers with zero - minima. */ - - length = (offset >= offset_top || md->offset_vector[offset] < 0)? - md->end_subject - eptr + 1 : - md->offset_vector[offset+1] - md->offset_vector[offset]; - - /* Set up for repetition, or handle the non-repeated case */ - - switch (*ecode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - c = *ecode++ - OP_CRSTAR; - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - minimize = (*ecode == OP_CRMINRANGE); - min = GET2(ecode, 1); - max = GET2(ecode, 3); - if (max == 0) max = INT_MAX; - ecode += 5; - break; - - default: /* No repeat follows */ - if (!match_ref(offset, eptr, length, md, ims)) return MATCH_NOMATCH; - eptr += length; - continue; /* With the main loop */ - } - - /* If the length of the reference is zero, just continue with the - main loop. */ - - if (length == 0) continue; - - /* First, ensure the minimum number of matches are present. We get back - the length of the reference string explicitly rather than passing the - address of eptr, so that eptr can be a register variable. */ - - for (i = 1; i <= min; i++) - { - if (!match_ref(offset, eptr, length, md, ims)) return MATCH_NOMATCH; - eptr += length; - } - - /* If min = max, continue at the same level without recursion. - They are not both allowed to be zero. */ - - if (min == max) continue; - - /* If minimizing, keep trying and advancing the pointer */ - - if (minimize) - { - for (i = min;; i++) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - if (i >= max || !match_ref(offset, eptr, length, md, ims)) - return MATCH_NOMATCH; - eptr += length; - } - /* Control never gets here */ - } - - /* If maximizing, find the longest string and work backwards */ - - else - { - const uschar *pp = eptr; - for (i = min; i < max; i++) - { - if (!match_ref(offset, eptr, length, md, ims)) break; - eptr += length; - } - while (eptr >= pp) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - eptr -= length; - } - return MATCH_NOMATCH; - } - } - /* Control never gets here */ - - - - /* Match a bit-mapped character class, possibly repeatedly. This op code is - used when all the characters in the class have values in the range 0-255. - The only difference between OP_CLASS and OP_NCLASS occurs when a data - character outside the range is encountered. - - First, look past the end of the item to see if there is repeat information - following. Then obey similar code to character type repeats - written out - again for speed. */ - - case OP_NCLASS: - case OP_CLASS: - { - const uschar *data = ecode + 1; /* Save for matching */ - ecode += 33; /* Advance past the item */ - - switch (*ecode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - c = *ecode++ - OP_CRSTAR; - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - minimize = (*ecode == OP_CRMINRANGE); - min = GET2(ecode, 1); - max = GET2(ecode, 3); - if (max == 0) max = INT_MAX; - ecode += 5; - break; - - default: /* No repeat follows */ - min = max = 1; - break; - } - - /* First, ensure the minimum number of matches are present. */ - -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (md->utf8) - { - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) return MATCH_NOMATCH; - GETCHARINC(c, eptr); - if (c > 255) - { - if (op == OP_CLASS) return MATCH_NOMATCH; - } - else - { - if ((data[c/8] & (1 << (c&7))) == 0) return MATCH_NOMATCH; - } - } - } - else -#endif - /* Not UTF-8 mode */ - { - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) return MATCH_NOMATCH; - c = *eptr++; - if ((data[c/8] & (1 << (c&7))) == 0) return MATCH_NOMATCH; - } - } - - /* If max == min we can continue with the main loop without the - need to recurse. */ - - if (min == max) continue; - - /* If minimizing, keep testing the rest of the expression and advancing - the pointer while it matches the class. */ - - if (minimize) - { -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (md->utf8) - { - for (i = min;; i++) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - if (i >= max || eptr >= md->end_subject) return MATCH_NOMATCH; - GETCHARINC(c, eptr); - if (c > 255) - { - if (op == OP_CLASS) return MATCH_NOMATCH; - } - else - { - if ((data[c/8] & (1 << (c&7))) == 0) return MATCH_NOMATCH; - } - } - } - else -#endif - /* Not UTF-8 mode */ - { - for (i = min;; i++) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - if (i >= max || eptr >= md->end_subject) return MATCH_NOMATCH; - c = *eptr++; - if ((data[c/8] & (1 << (c&7))) == 0) return MATCH_NOMATCH; - } - } - /* Control never gets here */ - } - - /* If maximizing, find the longest possible run, then work backwards. */ - - else - { - const uschar *pp = eptr; - -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (md->utf8) - { - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) break; - GETCHARLEN(c, eptr, len); - if (c > 255) - { - if (op == OP_CLASS) break; - } - else - { - if ((data[c/8] & (1 << (c&7))) == 0) break; - } - eptr += len; - } - for (;;) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - if (eptr-- == pp) break; /* Stop if tried at original pos */ - BACKCHAR(eptr); - } - } - else -#endif - /* Not UTF-8 mode */ - { - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject) break; - c = *eptr; - if ((data[c/8] & (1 << (c&7))) == 0) break; - eptr++; - } - while (eptr >= pp) - { - if ((rrc = match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - } - } - - return MATCH_NOMATCH; - } - } - /* Control never gets here */ - - - /* Match an extended character class. This opcode is encountered only - in UTF-8 mode, because that's the only time it is compiled. */ - -#ifdef SUPPORT_UTF8 - case OP_XCLASS: - { - const uschar *data = ecode + 1 + LINK_SIZE; /* Save for matching */ - ecode += GET(ecode, 1); /* Advance past the item */ - - switch (*ecode) - { - case OP_CRSTAR: - case OP_CRMINSTAR: - case OP_CRPLUS: - case OP_CRMINPLUS: - case OP_CRQUERY: - case OP_CRMINQUERY: - c = *ecode++ - OP_CRSTAR; - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - break; - - case OP_CRRANGE: - case OP_CRMINRANGE: - minimize = (*ecode == OP_CRMINRANGE); - min = GET2(ecode, 1); - max = GET2(ecode, 3); - if (max == 0) max = INT_MAX; - ecode += 5; - break; - - default: /* No repeat follows */ - min = max = 1; - break; - } - - /* First, ensure the minimum number of matches are present. */ - - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) return MATCH_NOMATCH; - GETCHARINC(c, eptr); - if (!match_xclass(c, data)) return MATCH_NOMATCH; - } - - /* If max == min we can continue with the main loop without the - need to recurse. */ - - if (min == max) continue; - - /* If minimizing, keep testing the rest of the expression and advancing - the pointer while it matches the class. */ - - if (minimize) - { - for (i = min;; i++) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - if (i >= max || eptr >= md->end_subject) return MATCH_NOMATCH; - GETCHARINC(c, eptr); - if (!match_xclass(c, data)) return MATCH_NOMATCH; - } - /* Control never gets here */ - } - - /* If maximizing, find the longest possible run, then work backwards. */ - - else - { - const uschar *pp = eptr; - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) break; - GETCHARLEN(c, eptr, len); - if (!match_xclass(c, data)) break; - eptr += len; - } - for(;;) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - if (eptr-- == pp) break; /* Stop if tried at original pos */ - BACKCHAR(eptr) - } - return MATCH_NOMATCH; - } - - /* Control never gets here */ - } -#endif /* End of XCLASS */ - - /* Match a run of characters */ - - case OP_CHARS: - { - register int length = ecode[1]; - ecode += 2; - -#ifdef DEBUG /* Sigh. Some compilers never learn. */ - if (eptr >= md->end_subject) - printf("matching subject against pattern "); - else - { - printf("matching subject "); - pchars(eptr, length, TRUE, md); - printf(" against pattern "); - } - pchars(ecode, length, FALSE, md); - printf("\n"); -#endif - - if (length > md->end_subject - eptr) return MATCH_NOMATCH; - if ((ims & PCRE_CASELESS) != 0) - { - while (length-- > 0) - if (md->lcc[*ecode++] != md->lcc[*eptr++]) - return MATCH_NOMATCH; - } - else - { - while (length-- > 0) if (*ecode++ != *eptr++) return MATCH_NOMATCH; - } - } - break; - - /* Match a single character repeatedly; different opcodes share code. */ - - case OP_EXACT: - min = max = GET2(ecode, 1); - ecode += 3; - goto REPEATCHAR; - - case OP_UPTO: - case OP_MINUPTO: - min = 0; - max = GET2(ecode, 1); - minimize = *ecode == OP_MINUPTO; - ecode += 3; - goto REPEATCHAR; - - case OP_STAR: - case OP_MINSTAR: - case OP_PLUS: - case OP_MINPLUS: - case OP_QUERY: - case OP_MINQUERY: - c = *ecode++ - OP_STAR; - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - - /* Common code for all repeated single-character matches. We can give - up quickly if there are fewer than the minimum number of characters left in - the subject. */ - - REPEATCHAR: -#ifdef SUPPORT_UTF8 - if (md->utf8) - { - int len = 1; - const uschar *charptr = ecode; - GETCHARLEN(c, ecode, len); - if (min * len > md->end_subject - eptr) return MATCH_NOMATCH; - ecode += len; - - /* Handle multibyte character matching specially here. There is no - support for any kind of casing for multibyte characters. */ - - if (len > 1) - { - for (i = 1; i <= min; i++) - { - if (memcmp(eptr, charptr, len) != 0) return MATCH_NOMATCH; - eptr += len; - } - - if (min == max) continue; - - if (minimize) - { - for (i = min;; i++) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - if (i >= max || - eptr >= md->end_subject || - memcmp(eptr, charptr, len) != 0) - return MATCH_NOMATCH; - eptr += len; - } - /* Control never gets here */ - } - else - { - const uschar *pp = eptr; - for (i = min; i < max; i++) - { - if (eptr > md->end_subject - len || - memcmp(eptr, charptr, len) != 0) - break; - eptr += len; - } - while (eptr >= pp) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - eptr -= len; - } - return MATCH_NOMATCH; - } - /* Control never gets here */ - } - - /* If the length of a UTF-8 character is 1, we fall through here, and - obey the code as for non-UTF-8 characters below, though in this case the - value of c will always be < 128. */ - } - else -#endif - - /* When not in UTF-8 mode, load a single-byte character. */ - { - if (min > md->end_subject - eptr) return MATCH_NOMATCH; - c = *ecode++; - } - - /* The value of c at this point is always less than 256, though we may or - may not be in UTF-8 mode. The code is duplicated for the caseless and - caseful cases, for speed, since matching characters is likely to be quite - common. First, ensure the minimum number of matches are present. If min = - max, continue at the same level without recursing. Otherwise, if - minimizing, keep trying the rest of the expression and advancing one - matching character if failing, up to the maximum. Alternatively, if - maximizing, find the maximum number of characters and work backwards. */ - - DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max, - max, eptr)); - - if ((ims & PCRE_CASELESS) != 0) - { - c = md->lcc[c]; - for (i = 1; i <= min; i++) - if (c != md->lcc[*eptr++]) return MATCH_NOMATCH; - if (min == max) continue; - if (minimize) - { - for (i = min;; i++) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - if (i >= max || eptr >= md->end_subject || - c != md->lcc[*eptr++]) - return MATCH_NOMATCH; - } - /* Control never gets here */ - } - else - { - const uschar *pp = eptr; - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || c != md->lcc[*eptr]) break; - eptr++; - } - while (eptr >= pp) - if ((rrc = match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - return MATCH_NOMATCH; - } - /* Control never gets here */ - } - - /* Caseful comparisons (includes all multi-byte characters) */ - - else - { - for (i = 1; i <= min; i++) if (c != *eptr++) return MATCH_NOMATCH; - if (min == max) continue; - if (minimize) - { - for (i = min;; i++) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - if (i >= max || eptr >= md->end_subject || c != *eptr++) - return MATCH_NOMATCH; - } - /* Control never gets here */ - } - else - { - const uschar *pp = eptr; - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || c != *eptr) break; - eptr++; - } - while (eptr >= pp) - if ((rrc = match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - return MATCH_NOMATCH; - } - } - /* Control never gets here */ - - /* Match a negated single one-byte character. The character we are - checking can be multibyte. */ - - case OP_NOT: - if (eptr >= md->end_subject) return MATCH_NOMATCH; - ecode++; - GETCHARINCTEST(c, eptr); - if ((ims & PCRE_CASELESS) != 0) - { -#ifdef SUPPORT_UTF8 - if (c < 256) -#endif - c = md->lcc[c]; - if (md->lcc[*ecode++] == c) return MATCH_NOMATCH; - } - else - { - if (*ecode++ == c) return MATCH_NOMATCH; - } - break; - - /* Match a negated single one-byte character repeatedly. This is almost a - repeat of the code for a repeated single character, but I haven't found a - nice way of commoning these up that doesn't require a test of the - positive/negative option for each character match. Maybe that wouldn't add - very much to the time taken, but character matching *is* what this is all - about... */ - - case OP_NOTEXACT: - min = max = GET2(ecode, 1); - ecode += 3; - goto REPEATNOTCHAR; - - case OP_NOTUPTO: - case OP_NOTMINUPTO: - min = 0; - max = GET2(ecode, 1); - minimize = *ecode == OP_NOTMINUPTO; - ecode += 3; - goto REPEATNOTCHAR; - - case OP_NOTSTAR: - case OP_NOTMINSTAR: - case OP_NOTPLUS: - case OP_NOTMINPLUS: - case OP_NOTQUERY: - case OP_NOTMINQUERY: - c = *ecode++ - OP_NOTSTAR; - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - - /* Common code for all repeated single-character (less than 255) matches. - We can give up quickly if there are fewer than the minimum number of - characters left in the subject. */ - - REPEATNOTCHAR: - if (min > md->end_subject - eptr) return MATCH_NOMATCH; - c = *ecode++; - - /* The code is duplicated for the caseless and caseful cases, for speed, - since matching characters is likely to be quite common. First, ensure the - minimum number of matches are present. If min = max, continue at the same - level without recursing. Otherwise, if minimizing, keep trying the rest of - the expression and advancing one matching character if failing, up to the - maximum. Alternatively, if maximizing, find the maximum number of - characters and work backwards. */ - - DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max, - max, eptr)); - - if ((ims & PCRE_CASELESS) != 0) - { - c = md->lcc[c]; - -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (md->utf8) - { - register int d; - for (i = 1; i <= min; i++) - { - GETCHARINC(d, eptr); - if (d < 256) d = md->lcc[d]; - if (c == d) return MATCH_NOMATCH; - } - } - else -#endif - - /* Not UTF-8 mode */ - { - for (i = 1; i <= min; i++) - if (c == md->lcc[*eptr++]) return MATCH_NOMATCH; - } - - if (min == max) continue; - - if (minimize) - { -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (md->utf8) - { - register int d; - for (i = min;; i++) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - GETCHARINC(d, eptr); - if (d < 256) d = md->lcc[d]; - if (i >= max || eptr >= md->end_subject || c == d) - return MATCH_NOMATCH; - } - } - else -#endif - /* Not UTF-8 mode */ - { - for (i = min;; i++) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - if (i >= max || eptr >= md->end_subject || c == md->lcc[*eptr++]) - return MATCH_NOMATCH; - } - } - /* Control never gets here */ - } - - /* Maximize case */ - - else - { - const uschar *pp = eptr; - -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (md->utf8) - { - register int d; - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) break; - GETCHARLEN(d, eptr, len); - if (d < 256) d = md->lcc[d]; - if (c == d) break; - eptr += len; - } - for(;;) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - if (eptr-- == pp) break; /* Stop if tried at original pos */ - BACKCHAR(eptr); - } - } - else -#endif - /* Not UTF-8 mode */ - { - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || c == md->lcc[*eptr]) break; - eptr++; - } - while (eptr >= pp) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - eptr--; - } - } - - return MATCH_NOMATCH; - } - /* Control never gets here */ - } - - /* Caseful comparisons */ - - else - { -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (md->utf8) - { - register int d; - for (i = 1; i <= min; i++) - { - GETCHARINC(d, eptr); - if (c == d) return MATCH_NOMATCH; - } - } - else -#endif - /* Not UTF-8 mode */ - { - for (i = 1; i <= min; i++) - if (c == *eptr++) return MATCH_NOMATCH; - } - - if (min == max) continue; - - if (minimize) - { -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (md->utf8) - { - register int d; - for (i = min;; i++) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - GETCHARINC(d, eptr); - if (i >= max || eptr >= md->end_subject || c == d) - return MATCH_NOMATCH; - } - } - else -#endif - /* Not UTF-8 mode */ - { - for (i = min;; i++) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - if (i >= max || eptr >= md->end_subject || c == *eptr++) - return MATCH_NOMATCH; - } - } - /* Control never gets here */ - } - - /* Maximize case */ - - else - { - const uschar *pp = eptr; - -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (md->utf8) - { - register int d; - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) break; - GETCHARLEN(d, eptr, len); - if (c == d) break; - eptr += len; - } - for(;;) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - if (eptr-- == pp) break; /* Stop if tried at original pos */ - BACKCHAR(eptr); - } - } - else -#endif - /* Not UTF-8 mode */ - { - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || c == *eptr) break; - eptr++; - } - while (eptr >= pp) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - eptr--; - } - } - - return MATCH_NOMATCH; - } - } - /* Control never gets here */ - - /* Match a single character type repeatedly; several different opcodes - share code. This is very similar to the code for single characters, but we - repeat it in the interests of efficiency. */ - - case OP_TYPEEXACT: - min = max = GET2(ecode, 1); - minimize = TRUE; - ecode += 3; - goto REPEATTYPE; - - case OP_TYPEUPTO: - case OP_TYPEMINUPTO: - min = 0; - max = GET2(ecode, 1); - minimize = *ecode == OP_TYPEMINUPTO; - ecode += 3; - goto REPEATTYPE; - - case OP_TYPESTAR: - case OP_TYPEMINSTAR: - case OP_TYPEPLUS: - case OP_TYPEMINPLUS: - case OP_TYPEQUERY: - case OP_TYPEMINQUERY: - c = *ecode++ - OP_TYPESTAR; - minimize = (c & 1) != 0; - min = rep_min[c]; /* Pick up values from tables; */ - max = rep_max[c]; /* zero for max => infinity */ - if (max == 0) max = INT_MAX; - - /* Common code for all repeated single character type matches. Note that - in UTF-8 mode, '.' matches a character of any length, but for the other - character types, the valid characters are all one-byte long. */ - - REPEATTYPE: - ctype = *ecode++; /* Code for the character type */ - - /* First, ensure the minimum number of matches are present. Use inline - code for maximizing the speed, and do the type test once at the start - (i.e. keep it out of the loop). Also we can test that there are at least - the minimum number of bytes before we start. This isn't as effective in - UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that - is tidier. */ - - if (min > md->end_subject - eptr) return MATCH_NOMATCH; - if (min > 0) - { -#ifdef SUPPORT_UTF8 - if (md->utf8) switch(ctype) - { - case OP_ANY: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject || - (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0)) - return MATCH_NOMATCH; - while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; - } - break; - - case OP_ANYBYTE: - eptr += min; - break; - - case OP_NOT_DIGIT: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject) return MATCH_NOMATCH; - GETCHARINC(c, eptr); - if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) - return MATCH_NOMATCH; - } - break; - - case OP_DIGIT: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject || - *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0) - return MATCH_NOMATCH; - /* No need to skip more bytes - we know it's a 1-byte character */ - } - break; - - case OP_NOT_WHITESPACE: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject || - (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0)) - return MATCH_NOMATCH; - while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; - } - break; - - case OP_WHITESPACE: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject || - *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0) - return MATCH_NOMATCH; - /* No need to skip more bytes - we know it's a 1-byte character */ - } - break; - - case OP_NOT_WORDCHAR: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject || - (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0)) - return MATCH_NOMATCH; - while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; - } - break; - - case OP_WORDCHAR: - for (i = 1; i <= min; i++) - { - if (eptr >= md->end_subject || - *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0) - return MATCH_NOMATCH; - /* No need to skip more bytes - we know it's a 1-byte character */ - } - break; - } - else -#endif - - /* Code for the non-UTF-8 case for minimum matching */ - - switch(ctype) - { - case OP_ANY: - if ((ims & PCRE_DOTALL) == 0) - { - for (i = 1; i <= min; i++) - if (*eptr++ == NEWLINE) return MATCH_NOMATCH; - } - else eptr += min; - break; - - case OP_ANYBYTE: - eptr += min; - break; - - case OP_NOT_DIGIT: - for (i = 1; i <= min; i++) - if ((md->ctypes[*eptr++] & ctype_digit) != 0) return MATCH_NOMATCH; - break; - - case OP_DIGIT: - for (i = 1; i <= min; i++) - if ((md->ctypes[*eptr++] & ctype_digit) == 0) return MATCH_NOMATCH; - break; - - case OP_NOT_WHITESPACE: - for (i = 1; i <= min; i++) - if ((md->ctypes[*eptr++] & ctype_space) != 0) return MATCH_NOMATCH; - break; - - case OP_WHITESPACE: - for (i = 1; i <= min; i++) - if ((md->ctypes[*eptr++] & ctype_space) == 0) return MATCH_NOMATCH; - break; - - case OP_NOT_WORDCHAR: - for (i = 1; i <= min; i++) - if ((md->ctypes[*eptr++] & ctype_word) != 0) - return MATCH_NOMATCH; - break; - - case OP_WORDCHAR: - for (i = 1; i <= min; i++) - if ((md->ctypes[*eptr++] & ctype_word) == 0) - return MATCH_NOMATCH; - break; - } - } - - /* If min = max, continue at the same level without recursing */ - - if (min == max) continue; - - /* If minimizing, we have to test the rest of the pattern before each - subsequent match. Again, separate the UTF-8 case for speed. */ - - if (minimize) - { -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - if (md->utf8) - { - for (i = min;; i++) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - if (i >= max || eptr >= md->end_subject) return MATCH_NOMATCH; - - GETCHARINC(c, eptr); - switch(ctype) - { - case OP_ANY: - if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) return MATCH_NOMATCH; - break; - - case OP_ANYBYTE: - break; - - case OP_NOT_DIGIT: - if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) - return MATCH_NOMATCH; - break; - - case OP_DIGIT: - if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0) - return MATCH_NOMATCH; - break; - - case OP_NOT_WHITESPACE: - if (c < 256 && (md->ctypes[c] & ctype_space) != 0) - return MATCH_NOMATCH; - break; - - case OP_WHITESPACE: - if (c >= 256 || (md->ctypes[c] & ctype_space) == 0) - return MATCH_NOMATCH; - break; - - case OP_NOT_WORDCHAR: - if (c < 256 && (md->ctypes[c] & ctype_word) != 0) - return MATCH_NOMATCH; - break; - - case OP_WORDCHAR: - if (c >= 256 && (md->ctypes[c] & ctype_word) == 0) - return MATCH_NOMATCH; - break; - } - } - } - else -#endif - /* Not UTF-8 mode */ - { - for (i = min;; i++) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - if (i >= max || eptr >= md->end_subject) return MATCH_NOMATCH; - c = *eptr++; - switch(ctype) - { - case OP_ANY: - if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) return MATCH_NOMATCH; - break; - - case OP_ANYBYTE: - break; - - case OP_NOT_DIGIT: - if ((md->ctypes[c] & ctype_digit) != 0) return MATCH_NOMATCH; - break; - - case OP_DIGIT: - if ((md->ctypes[c] & ctype_digit) == 0) return MATCH_NOMATCH; - break; - - case OP_NOT_WHITESPACE: - if ((md->ctypes[c] & ctype_space) != 0) return MATCH_NOMATCH; - break; - - case OP_WHITESPACE: - if ((md->ctypes[c] & ctype_space) == 0) return MATCH_NOMATCH; - break; - - case OP_NOT_WORDCHAR: - if ((md->ctypes[c] & ctype_word) != 0) return MATCH_NOMATCH; - break; - - case OP_WORDCHAR: - if ((md->ctypes[c] & ctype_word) == 0) return MATCH_NOMATCH; - break; - } - } - } - /* Control never gets here */ - } - - /* If maximizing it is worth using inline code for speed, doing the type - test once at the start (i.e. keep it out of the loop). Again, keep the - UTF-8 stuff separate. */ - - else - { - const uschar *pp = eptr; - -#ifdef SUPPORT_UTF8 - /* UTF-8 mode */ - - if (md->utf8) - { - switch(ctype) - { - case OP_ANY: - - /* Special code is required for UTF8, but when the maximum is unlimited - we don't need it, so we repeat the non-UTF8 code. This is probably - worth it, because .* is quite a common idiom. */ - - if (max < INT_MAX) - { - if ((ims & PCRE_DOTALL) == 0) - { - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || *eptr == NEWLINE) break; - eptr++; - while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; - } - } - else - { - for (i = min; i < max; i++) - { - eptr++; - while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; - } - } - } - - /* Handle unlimited UTF-8 repeat */ - - else - { - if ((ims & PCRE_DOTALL) == 0) - { - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || *eptr == NEWLINE) break; - eptr++; - } - break; - } - else - { - c = max - min; - if (c > md->end_subject - eptr) c = md->end_subject - eptr; - eptr += c; - } - } - break; - - /* The byte case is the same as non-UTF8 */ - - case OP_ANYBYTE: - c = max - min; - if (c > md->end_subject - eptr) c = md->end_subject - eptr; - eptr += c; - break; - - case OP_NOT_DIGIT: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) break; - GETCHARLEN(c, eptr, len); - if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break; - eptr+= len; - } - break; - - case OP_DIGIT: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) break; - GETCHARLEN(c, eptr, len); - if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break; - eptr+= len; - } - break; - - case OP_NOT_WHITESPACE: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) break; - GETCHARLEN(c, eptr, len); - if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break; - eptr+= len; - } - break; - - case OP_WHITESPACE: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) break; - GETCHARLEN(c, eptr, len); - if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break; - eptr+= len; - } - break; - - case OP_NOT_WORDCHAR: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) break; - GETCHARLEN(c, eptr, len); - if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break; - eptr+= len; - } - break; - - case OP_WORDCHAR: - for (i = min; i < max; i++) - { - int len = 1; - if (eptr >= md->end_subject) break; - GETCHARLEN(c, eptr, len); - if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break; - eptr+= len; - } - break; - } - - /* eptr is now past the end of the maximum run */ - - for(;;) - { - if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - if (eptr-- == pp) break; /* Stop if tried at original pos */ - BACKCHAR(eptr); - } - } - else -#endif - - /* Not UTF-8 mode */ - { - switch(ctype) - { - case OP_ANY: - if ((ims & PCRE_DOTALL) == 0) - { - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || *eptr == NEWLINE) break; - eptr++; - } - break; - } - /* For DOTALL case, fall through and treat as \C */ - - case OP_ANYBYTE: - c = max - min; - if (c > md->end_subject - eptr) c = md->end_subject - eptr; - eptr += c; - break; - - case OP_NOT_DIGIT: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0) - break; - eptr++; - } - break; - - case OP_DIGIT: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0) - break; - eptr++; - } - break; - - case OP_NOT_WHITESPACE: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0) - break; - eptr++; - } - break; - - case OP_WHITESPACE: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0) - break; - eptr++; - } - break; - - case OP_NOT_WORDCHAR: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0) - break; - eptr++; - } - break; - - case OP_WORDCHAR: - for (i = min; i < max; i++) - { - if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0) - break; - eptr++; - } - break; - } - - /* eptr is now past the end of the maximum run */ - - while (eptr >= pp) - { - if ((rrc = match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) != - MATCH_NOMATCH) return rrc; - } - } - - /* Get here if we can't make it match with any permitted repetitions */ - - return MATCH_NOMATCH; - } - /* Control never gets here */ - - /* There's been some horrible disaster. Since all codes > OP_BRA are - for capturing brackets, and there shouldn't be any gaps between 0 and - OP_BRA, arrival here can only mean there is something seriously wrong - in the code above or the OP_xxx definitions. */ - - default: - DPRINTF(("Unknown opcode %d\n", *ecode)); - return PCRE_ERROR_UNKNOWN_NODE; - } - - /* Do not stick any code in here without much thought; it is assumed - that "continue" in the code above comes out to here to repeat the main - loop. */ - - } /* End of main loop */ -/* Control never reaches here */ -} - - - - -/************************************************* -* Execute a Regular Expression * -*************************************************/ - -/* This function applies a compiled re to a subject string and picks out -portions of the string if it matches. Two elements in the vector are set for -each substring: the offsets to the start and end of the substring. - -Arguments: - external_re points to the compiled expression - extra_data points to extra data or is NULL - subject points to the subject string - length length of subject string (may contain binary zeros) - start_offset where to start in the subject string - options option bits - offsets points to a vector of ints to be filled in with offsets - offsetcount the number of elements in the vector - -Returns: > 0 => success; value is the number of elements filled in - = 0 => success, but offsets is not big enough - -1 => failed to match - < -1 => some kind of unexpected problem -*/ - -int -pcre_exec(const pcre *external_re, const pcre_extra *extra_data, - const char *subject, int length, int start_offset, int options, int *offsets, - int offsetcount) -{ -int rc, resetcount, ocount; -int first_byte = -1; -int req_byte = -1; -int req_byte2 = -1; -unsigned long int ims = 0; -BOOL using_temporary_offsets = FALSE; -BOOL anchored; -BOOL startline; -BOOL first_byte_caseless = FALSE; -BOOL req_byte_caseless = FALSE; -match_data match_block; -const uschar *start_bits = NULL; -const uschar *start_match = (const uschar *)subject + start_offset; -const uschar *end_subject; -const uschar *req_byte_ptr = start_match - 1; -const pcre_study_data *study; -const real_pcre *re = (const real_pcre *)external_re; - -/* Plausibility checks */ - -if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; -if (re == NULL || subject == NULL || - (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; - -/* Fish out the optional data from the extra_data structure, first setting -the default values. */ - -study = NULL; -match_block.match_limit = MATCH_LIMIT; -match_block.callout_data = NULL; - -if (extra_data != NULL) - { - register unsigned int flags = extra_data->flags; - if ((flags & PCRE_EXTRA_STUDY_DATA) != 0) - study = extra_data->study_data; - if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) - match_block.match_limit = extra_data->match_limit; - if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0) - match_block.callout_data = extra_data->callout_data; - } - -/* Now we have re supposedly pointing to the regex */ - -if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; - -anchored = ((re->options | options) & PCRE_ANCHORED) != 0; -startline = (re->options & PCRE_STARTLINE) != 0; - -match_block.start_code = - (const uschar *)re + sizeof(real_pcre) + re->name_count * re->name_entry_size; -match_block.start_subject = (const uschar *)subject; -match_block.start_offset = start_offset; -match_block.end_subject = match_block.start_subject + length; -end_subject = match_block.end_subject; - -match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; -match_block.utf8 = (re->options & PCRE_UTF8) != 0; - -match_block.notbol = (options & PCRE_NOTBOL) != 0; -match_block.noteol = (options & PCRE_NOTEOL) != 0; -match_block.notempty = (options & PCRE_NOTEMPTY) != 0; - -match_block.recursive = NULL; /* No recursion at top level */ - -match_block.lcc = re->tables + lcc_offset; -match_block.ctypes = re->tables + ctypes_offset; - -/* The ims options can vary during the matching as a result of the presence -of (?ims) items in the pattern. They are kept in a local variable so that -restoring at the exit of a group is easy. */ - -ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL); - -/* If the expression has got more back references than the offsets supplied can -hold, we get a temporary bit of working store to use during the matching. -Otherwise, we can use the vector supplied, rounding down its size to a multiple -of 3. */ - -ocount = offsetcount - (offsetcount % 3); - -if (re->top_backref > 0 && re->top_backref >= ocount/3) - { - ocount = re->top_backref * 3 + 3; - match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int)); - if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY; - using_temporary_offsets = TRUE; - DPRINTF(("Got memory to hold back references\n")); - } -else match_block.offset_vector = offsets; - -match_block.offset_end = ocount; -match_block.offset_max = (2*ocount)/3; -match_block.offset_overflow = FALSE; -match_block.capture_last = -1; - -/* Compute the minimum number of offsets that we need to reset each time. Doing -this makes a huge difference to execution time when there aren't many brackets -in the pattern. */ - -resetcount = 2 + re->top_bracket * 2; -if (resetcount > offsetcount) resetcount = ocount; - -/* Reset the working variable associated with each extraction. These should -never be used unless previously set, but they get saved and restored, and so we -initialize them to avoid reading uninitialized locations. */ - -if (match_block.offset_vector != NULL) - { - register int *iptr = match_block.offset_vector + ocount; - register int *iend = iptr - resetcount/2 + 1; - while (--iptr >= iend) *iptr = -1; - } - -/* Set up the first character to match, if available. The first_byte value is -never set for an anchored regular expression, but the anchoring may be forced -at run time, so we have to test for anchoring. The first char may be unset for -an unanchored pattern, of course. If there's no first char and the pattern was -studied, there may be a bitmap of possible first characters. */ - -if (!anchored) - { - if ((re->options & PCRE_FIRSTSET) != 0) - { - first_byte = re->first_byte & 255; - if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE) - first_byte = match_block.lcc[first_byte]; - } - else - if (!startline && study != NULL && - (study->options & PCRE_STUDY_MAPPED) != 0) - start_bits = study->start_bits; - } - -/* For anchored or unanchored matches, there may be a "last known required -character" set. */ - -if ((re->options & PCRE_REQCHSET) != 0) - { - req_byte = re->req_byte & 255; - req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0; - req_byte2 = (re->tables + fcc_offset)[req_byte]; /* case flipped */ - } - -/* Loop for handling unanchored repeated matching attempts; for anchored regexs -the loop runs just once. */ - -do - { - register int *iptr = match_block.offset_vector; - register int *iend = iptr + resetcount; - - /* Reset the maximum number of extractions we might see. */ - - while (iptr < iend) *iptr++ = -1; - - /* Advance to a unique first char if possible */ - - if (first_byte >= 0) - { - if (first_byte_caseless) - while (start_match < end_subject && - match_block.lcc[*start_match] != first_byte) - start_match++; - else - while (start_match < end_subject && *start_match != first_byte) - start_match++; - } - - /* Or to just after \n for a multiline match if possible */ - - else if (startline) - { - if (start_match > match_block.start_subject + start_offset) - { - while (start_match < end_subject && start_match[-1] != NEWLINE) - start_match++; - } - } - - /* Or to a non-unique first char after study */ - - else if (start_bits != NULL) - { - while (start_match < end_subject) - { - register int c = *start_match; - if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break; - } - } - -#ifdef DEBUG /* Sigh. Some compilers never learn. */ - printf(">>>> Match against: "); - pchars(start_match, end_subject - start_match, TRUE, &match_block); - printf("\n"); -#endif - - /* If req_byte is set, we know that that character must appear in the subject - for the match to succeed. If the first character is set, req_byte must be - later in the subject; otherwise the test starts at the match point. This - optimization can save a huge amount of backtracking in patterns with nested - unlimited repeats that aren't going to match. Writing separate code for - cased/caseless versions makes it go faster, as does using an autoincrement - and backing off on a match. - - HOWEVER: when the subject string is very, very long, searching to its end can - take a long time, and give bad performance on quite ordinary patterns. This - showed up when somebody was matching /^C/ on a 32-megabyte string... so we - don't do this when the string is sufficiently long. */ - - if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX) - { - register const uschar *p = start_match + ((first_byte >= 0)? 1 : 0); - - /* We don't need to repeat the search if we haven't yet reached the - place we found it at last time. */ - - if (p > req_byte_ptr) - { - if (req_byte_caseless) - { - while (p < end_subject) - { - register int pp = *p++; - if (pp == req_byte || pp == req_byte2) { p--; break; } - } - } - else - { - while (p < end_subject) - { - if (*p++ == req_byte) { p--; break; } - } - } - - /* If we can't find the required character, break the matching loop */ - - if (p >= end_subject) break; - - /* If we have found the required character, save the point where we - found it, so that we don't search again next time round the loop if - the start hasn't passed this character yet. */ - - req_byte_ptr = p; - } - } - - /* When a match occurs, substrings will be set for all internal extractions; - we just need to set up the whole thing as substring 0 before returning. If - there were too many extractions, set the return code to zero. In the case - where we had to get some local store to hold offsets for backreferences, copy - those back references that we can. In this case there need not be overflow - if certain parts of the pattern were not used. */ - - match_block.start_match = start_match; - match_block.match_call_count = 0; - - rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL, - match_isgroup); - - if (rc == MATCH_NOMATCH) - { - start_match++; -#ifdef SUPPORT_UTF8 - if (match_block.utf8) - while((*start_match & 0xc0) == 0x80) start_match++; -#endif - continue; - } - - if (rc != MATCH_MATCH) - { - DPRINTF((">>>> error: returning %d\n", rc)); - return rc; - } - - /* We have a match! Copy the offset information from temporary store if - necessary */ - - if (using_temporary_offsets) - { - if (offsetcount >= 4) - { - memcpy(offsets + 2, match_block.offset_vector + 2, - (offsetcount - 2) * sizeof(int)); - DPRINTF(("Copied offsets from temporary memory\n")); - } - if (match_block.end_offset_top > offsetcount) - match_block.offset_overflow = TRUE; - - DPRINTF(("Freeing temporary memory\n")); - (pcre_free)(match_block.offset_vector); - } - - rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2; - - if (offsetcount < 2) rc = 0; else - { - offsets[0] = start_match - match_block.start_subject; - offsets[1] = match_block.end_match_ptr - match_block.start_subject; - } - - DPRINTF((">>>> returning %d\n", rc)); - return rc; - } - -/* This "while" is the end of the "do" above */ - -while (!anchored && start_match <= end_subject); - -if (using_temporary_offsets) - { - DPRINTF(("Freeing temporary memory\n")); - (pcre_free)(match_block.offset_vector); - } - -DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n")); - -return PCRE_ERROR_NOMATCH; -} - -/* End of pcre.c */ diff --git a/libpcre/pcre.def b/libpcre/pcre.def deleted file mode 100644 index 4f6c4bff40..0000000000 --- a/libpcre/pcre.def +++ /dev/null @@ -1,22 +0,0 @@ -EXPORTS - -pcre_malloc DATA -pcre_free DATA - -pcre_compile -pcre_copy_substring -pcre_exec -pcre_get_substring -pcre_get_substring_list -pcre_free_substring -pcre_free_substring_list -pcre_info -pcre_fullinfo -pcre_maketables -pcre_study -pcre_version - -regcomp -regexec -regerror -regfree diff --git a/libpcre/pcre.h b/libpcre/pcre.h new file mode 100644 index 0000000000..b2596a83d0 --- /dev/null +++ b/libpcre/pcre.h @@ -0,0 +1,258 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* In its original form, this is the .in file that is transformed by +"configure" into pcre.h. + + Copyright (c) 1997-2005 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + +#ifndef _PCRE_H +#define _PCRE_H + +/* The file pcre.h is build by "configure". Do not edit it; instead +make changes to pcre.in. */ + +#define PCRE_MAJOR 6 +#define PCRE_MINOR 3 +#define PCRE_DATE 15-Aug-2005 + +/* Win32 uses DLL by default; it needs special stuff for exported functions. */ + +#ifdef _WIN32 +# ifdef PCRE_DEFINITION +# ifdef DLL_EXPORT +# define PCRE_DATA_SCOPE __declspec(dllexport) +# endif +# else +# ifndef PCRE_STATIC +# define PCRE_DATA_SCOPE extern __declspec(dllimport) +# endif +# endif +#endif + +/* For other operating systems, we use the standard "extern". */ + +#ifndef PCRE_DATA_SCOPE +# ifdef __cplusplus +# define PCRE_DATA_SCOPE extern "C" +# else +# define PCRE_DATA_SCOPE extern +# endif +#endif + +/* Have to include stdlib.h in order to ensure that size_t is defined; +it is needed here for malloc. */ + +#include + +/* Allow for C++ users */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* Options */ + +#define PCRE_CASELESS 0x00000001 +#define PCRE_MULTILINE 0x00000002 +#define PCRE_DOTALL 0x00000004 +#define PCRE_EXTENDED 0x00000008 +#define PCRE_ANCHORED 0x00000010 +#define PCRE_DOLLAR_ENDONLY 0x00000020 +#define PCRE_EXTRA 0x00000040 +#define PCRE_NOTBOL 0x00000080 +#define PCRE_NOTEOL 0x00000100 +#define PCRE_UNGREEDY 0x00000200 +#define PCRE_NOTEMPTY 0x00000400 +#define PCRE_UTF8 0x00000800 +#define PCRE_NO_AUTO_CAPTURE 0x00001000 +#define PCRE_NO_UTF8_CHECK 0x00002000 +#define PCRE_AUTO_CALLOUT 0x00004000 +#define PCRE_PARTIAL 0x00008000 +#define PCRE_DFA_SHORTEST 0x00010000 +#define PCRE_DFA_RESTART 0x00020000 +#define PCRE_FIRSTLINE 0x00040000 + +/* Exec-time and get/set-time error codes */ + +#define PCRE_ERROR_NOMATCH (-1) +#define PCRE_ERROR_NULL (-2) +#define PCRE_ERROR_BADOPTION (-3) +#define PCRE_ERROR_BADMAGIC (-4) +#define PCRE_ERROR_UNKNOWN_NODE (-5) +#define PCRE_ERROR_NOMEMORY (-6) +#define PCRE_ERROR_NOSUBSTRING (-7) +#define PCRE_ERROR_MATCHLIMIT (-8) +#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */ +#define PCRE_ERROR_BADUTF8 (-10) +#define PCRE_ERROR_BADUTF8_OFFSET (-11) +#define PCRE_ERROR_PARTIAL (-12) +#define PCRE_ERROR_BADPARTIAL (-13) +#define PCRE_ERROR_INTERNAL (-14) +#define PCRE_ERROR_BADCOUNT (-15) +#define PCRE_ERROR_DFA_UITEM (-16) +#define PCRE_ERROR_DFA_UCOND (-17) +#define PCRE_ERROR_DFA_UMLIMIT (-18) +#define PCRE_ERROR_DFA_WSSIZE (-19) +#define PCRE_ERROR_DFA_RECURSE (-20) + +/* Request types for pcre_fullinfo() */ + +#define PCRE_INFO_OPTIONS 0 +#define PCRE_INFO_SIZE 1 +#define PCRE_INFO_CAPTURECOUNT 2 +#define PCRE_INFO_BACKREFMAX 3 +#define PCRE_INFO_FIRSTBYTE 4 +#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */ +#define PCRE_INFO_FIRSTTABLE 5 +#define PCRE_INFO_LASTLITERAL 6 +#define PCRE_INFO_NAMEENTRYSIZE 7 +#define PCRE_INFO_NAMECOUNT 8 +#define PCRE_INFO_NAMETABLE 9 +#define PCRE_INFO_STUDYSIZE 10 +#define PCRE_INFO_DEFAULT_TABLES 11 + +/* Request types for pcre_config() */ + +#define PCRE_CONFIG_UTF8 0 +#define PCRE_CONFIG_NEWLINE 1 +#define PCRE_CONFIG_LINK_SIZE 2 +#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3 +#define PCRE_CONFIG_MATCH_LIMIT 4 +#define PCRE_CONFIG_STACKRECURSE 5 +#define PCRE_CONFIG_UNICODE_PROPERTIES 6 + +/* Bit flags for the pcre_extra structure */ + +#define PCRE_EXTRA_STUDY_DATA 0x0001 +#define PCRE_EXTRA_MATCH_LIMIT 0x0002 +#define PCRE_EXTRA_CALLOUT_DATA 0x0004 +#define PCRE_EXTRA_TABLES 0x0008 + +/* Types */ + +struct real_pcre; /* declaration; the definition is private */ +typedef struct real_pcre pcre; + +/* The structure for passing additional data to pcre_exec(). This is defined in +such as way as to be extensible. Always add new fields at the end, in order to +remain compatible. */ + +typedef struct pcre_extra { + unsigned long int flags; /* Bits for which fields are set */ + void *study_data; /* Opaque data from pcre_study() */ + unsigned long int match_limit; /* Maximum number of calls to match() */ + void *callout_data; /* Data passed back in callouts */ + const unsigned char *tables; /* Pointer to character tables */ +} pcre_extra; + +/* The structure for passing out data via the pcre_callout_function. We use a +structure so that new fields can be added on the end in future versions, +without changing the API of the function, thereby allowing old clients to work +without modification. */ + +typedef struct pcre_callout_block { + int version; /* Identifies version of block */ + /* ------------------------ Version 0 ------------------------------- */ + int callout_number; /* Number compiled into pattern */ + int *offset_vector; /* The offset vector */ + const char *subject; /* The subject being matched */ + int subject_length; /* The length of the subject */ + int start_match; /* Offset to start of this match attempt */ + int current_position; /* Where we currently are in the subject */ + int capture_top; /* Max current capture */ + int capture_last; /* Most recently closed capture */ + void *callout_data; /* Data passed in with the call */ + /* ------------------- Added for Version 1 -------------------------- */ + int pattern_position; /* Offset to next item in the pattern */ + int next_item_length; /* Length of next item in the pattern */ + /* ------------------------------------------------------------------ */ +} pcre_callout_block; + +/* Indirection for store get and free functions. These can be set to +alternative malloc/free functions if required. Special ones are used in the +non-recursive case for "frames". There is also an optional callout function +that is triggered by the (?) regex item. For Virtual Pascal, these definitions +have to take another form. */ + +#ifndef VPCOMPAT +PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t); +PCRE_DATA_SCOPE void (*pcre_free)(void *); +PCRE_DATA_SCOPE void *(*pcre_stack_malloc)(size_t); +PCRE_DATA_SCOPE void (*pcre_stack_free)(void *); +PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *); +#else /* VPCOMPAT */ +PCRE_DATA_SCOPE void *pcre_malloc(size_t); +PCRE_DATA_SCOPE void pcre_free(void *); +PCRE_DATA_SCOPE void *pcre_stack_malloc(size_t); +PCRE_DATA_SCOPE void pcre_stack_free(void *); +PCRE_DATA_SCOPE int pcre_callout(pcre_callout_block *); +#endif /* VPCOMPAT */ + +/* Exported PCRE functions */ + +PCRE_DATA_SCOPE pcre *pcre_compile(const char *, int, const char **, int *, + const unsigned char *); +PCRE_DATA_SCOPE pcre *pcre_compile2(const char *, int, int *, const char **, + int *, const unsigned char *); +PCRE_DATA_SCOPE int pcre_config(int, void *); +PCRE_DATA_SCOPE int pcre_copy_named_substring(const pcre *, const char *, + int *, int, const char *, char *, int); +PCRE_DATA_SCOPE int pcre_copy_substring(const char *, int *, int, int, char *, + int); +PCRE_DATA_SCOPE int pcre_dfa_exec(const pcre *, const pcre_extra *, + const char *, int, int, int, int *, int , int *, int); +PCRE_DATA_SCOPE int pcre_exec(const pcre *, const pcre_extra *, const char *, + int, int, int, int *, int); +PCRE_DATA_SCOPE void pcre_free_substring(const char *); +PCRE_DATA_SCOPE void pcre_free_substring_list(const char **); +PCRE_DATA_SCOPE int pcre_fullinfo(const pcre *, const pcre_extra *, int, + void *); +PCRE_DATA_SCOPE int pcre_get_named_substring(const pcre *, const char *, + int *, int, const char *, const char **); +PCRE_DATA_SCOPE int pcre_get_stringnumber(const pcre *, const char *); +PCRE_DATA_SCOPE int pcre_get_substring(const char *, int *, int, int, + const char **); +PCRE_DATA_SCOPE int pcre_get_substring_list(const char *, int *, int, + const char ***); +PCRE_DATA_SCOPE int pcre_info(const pcre *, int *, int *); +PCRE_DATA_SCOPE const unsigned char *pcre_maketables(void); +PCRE_DATA_SCOPE int pcre_refcount(pcre *, int); +PCRE_DATA_SCOPE pcre_extra *pcre_study(const pcre *, int, const char **); +PCRE_DATA_SCOPE const char *pcre_version(void); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* End of pcre.h */ diff --git a/libpcre/pcre.in b/libpcre/pcre.in index 2aa44b90a7..be1546c4e1 100644 --- a/libpcre/pcre.in +++ b/libpcre/pcre.in @@ -2,7 +2,39 @@ * Perl-Compatible Regular Expressions * *************************************************/ -/* Copyright (c) 1997-2003 University of Cambridge */ +/* In its original form, this is the .in file that is transformed by +"configure" into pcre.h. + + Copyright (c) 1997-2005 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ #ifndef _PCRE_H #define _PCRE_H @@ -14,7 +46,7 @@ make changes to pcre.in. */ #define PCRE_MINOR @PCRE_MINOR@ #define PCRE_DATE @PCRE_DATE@ -/* Win32 uses DLL by default */ +/* Win32 uses DLL by default; it needs special stuff for exported functions. */ #ifdef _WIN32 # ifdef PCRE_DEFINITION @@ -23,12 +55,19 @@ make changes to pcre.in. */ # endif # else # ifndef PCRE_STATIC -# define PCRE_DATA_SCOPE __declspec(dllimport) +# define PCRE_DATA_SCOPE extern __declspec(dllimport) # endif # endif #endif + +/* For other operating systems, we use the standard "extern". */ + #ifndef PCRE_DATA_SCOPE -# define PCRE_DATA_SCOPE extern +# ifdef __cplusplus +# define PCRE_DATA_SCOPE extern "C" +# else +# define PCRE_DATA_SCOPE extern +# endif #endif /* Have to include stdlib.h in order to ensure that size_t is defined; @@ -44,31 +83,48 @@ extern "C" { /* Options */ -#define PCRE_CASELESS 0x0001 -#define PCRE_MULTILINE 0x0002 -#define PCRE_DOTALL 0x0004 -#define PCRE_EXTENDED 0x0008 -#define PCRE_ANCHORED 0x0010 -#define PCRE_DOLLAR_ENDONLY 0x0020 -#define PCRE_EXTRA 0x0040 -#define PCRE_NOTBOL 0x0080 -#define PCRE_NOTEOL 0x0100 -#define PCRE_UNGREEDY 0x0200 -#define PCRE_NOTEMPTY 0x0400 -#define PCRE_UTF8 0x0800 -#define PCRE_NO_AUTO_CAPTURE 0x1000 +#define PCRE_CASELESS 0x00000001 +#define PCRE_MULTILINE 0x00000002 +#define PCRE_DOTALL 0x00000004 +#define PCRE_EXTENDED 0x00000008 +#define PCRE_ANCHORED 0x00000010 +#define PCRE_DOLLAR_ENDONLY 0x00000020 +#define PCRE_EXTRA 0x00000040 +#define PCRE_NOTBOL 0x00000080 +#define PCRE_NOTEOL 0x00000100 +#define PCRE_UNGREEDY 0x00000200 +#define PCRE_NOTEMPTY 0x00000400 +#define PCRE_UTF8 0x00000800 +#define PCRE_NO_AUTO_CAPTURE 0x00001000 +#define PCRE_NO_UTF8_CHECK 0x00002000 +#define PCRE_AUTO_CALLOUT 0x00004000 +#define PCRE_PARTIAL 0x00008000 +#define PCRE_DFA_SHORTEST 0x00010000 +#define PCRE_DFA_RESTART 0x00020000 +#define PCRE_FIRSTLINE 0x00040000 /* Exec-time and get/set-time error codes */ -#define PCRE_ERROR_NOMATCH (-1) -#define PCRE_ERROR_NULL (-2) -#define PCRE_ERROR_BADOPTION (-3) -#define PCRE_ERROR_BADMAGIC (-4) -#define PCRE_ERROR_UNKNOWN_NODE (-5) -#define PCRE_ERROR_NOMEMORY (-6) -#define PCRE_ERROR_NOSUBSTRING (-7) -#define PCRE_ERROR_MATCHLIMIT (-8) -#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */ +#define PCRE_ERROR_NOMATCH (-1) +#define PCRE_ERROR_NULL (-2) +#define PCRE_ERROR_BADOPTION (-3) +#define PCRE_ERROR_BADMAGIC (-4) +#define PCRE_ERROR_UNKNOWN_NODE (-5) +#define PCRE_ERROR_NOMEMORY (-6) +#define PCRE_ERROR_NOSUBSTRING (-7) +#define PCRE_ERROR_MATCHLIMIT (-8) +#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */ +#define PCRE_ERROR_BADUTF8 (-10) +#define PCRE_ERROR_BADUTF8_OFFSET (-11) +#define PCRE_ERROR_PARTIAL (-12) +#define PCRE_ERROR_BADPARTIAL (-13) +#define PCRE_ERROR_INTERNAL (-14) +#define PCRE_ERROR_BADCOUNT (-15) +#define PCRE_ERROR_DFA_UITEM (-16) +#define PCRE_ERROR_DFA_UCOND (-17) +#define PCRE_ERROR_DFA_UMLIMIT (-18) +#define PCRE_ERROR_DFA_WSSIZE (-19) +#define PCRE_ERROR_DFA_RECURSE (-20) /* Request types for pcre_fullinfo() */ @@ -84,6 +140,7 @@ extern "C" { #define PCRE_INFO_NAMECOUNT 8 #define PCRE_INFO_NAMETABLE 9 #define PCRE_INFO_STUDYSIZE 10 +#define PCRE_INFO_DEFAULT_TABLES 11 /* Request types for pcre_config() */ @@ -92,12 +149,15 @@ extern "C" { #define PCRE_CONFIG_LINK_SIZE 2 #define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3 #define PCRE_CONFIG_MATCH_LIMIT 4 +#define PCRE_CONFIG_STACKRECURSE 5 +#define PCRE_CONFIG_UNICODE_PROPERTIES 6 /* Bit flags for the pcre_extra structure */ #define PCRE_EXTRA_STUDY_DATA 0x0001 #define PCRE_EXTRA_MATCH_LIMIT 0x0002 #define PCRE_EXTRA_CALLOUT_DATA 0x0004 +#define PCRE_EXTRA_TABLES 0x0008 /* Types */ @@ -105,13 +165,15 @@ struct real_pcre; /* declaration; the definition is private */ typedef struct real_pcre pcre; /* The structure for passing additional data to pcre_exec(). This is defined in -such as way as to be extensible. */ +such as way as to be extensible. Always add new fields at the end, in order to +remain compatible. */ typedef struct pcre_extra { unsigned long int flags; /* Bits for which fields are set */ void *study_data; /* Opaque data from pcre_study() */ unsigned long int match_limit; /* Maximum number of calls to match() */ void *callout_data; /* Data passed back in callouts */ + const unsigned char *tables; /* Pointer to character tables */ } pcre_extra; /* The structure for passing out data via the pcre_callout_function. We use a @@ -127,55 +189,67 @@ typedef struct pcre_callout_block { const char *subject; /* The subject being matched */ int subject_length; /* The length of the subject */ int start_match; /* Offset to start of this match attempt */ - int current_position; /* Where we currently are */ + int current_position; /* Where we currently are in the subject */ int capture_top; /* Max current capture */ int capture_last; /* Most recently closed capture */ void *callout_data; /* Data passed in with the call */ + /* ------------------- Added for Version 1 -------------------------- */ + int pattern_position; /* Offset to next item in the pattern */ + int next_item_length; /* Length of next item in the pattern */ /* ------------------------------------------------------------------ */ } pcre_callout_block; /* Indirection for store get and free functions. These can be set to -alternative malloc/free functions if required. There is also an optional -callout function that is triggered by the (?) regex item. Some magic is -required for Win32 DLL; it is null on other OS. For Virtual Pascal, these -have to be different again. */ +alternative malloc/free functions if required. Special ones are used in the +non-recursive case for "frames". There is also an optional callout function +that is triggered by the (?) regex item. For Virtual Pascal, these definitions +have to take another form. */ #ifndef VPCOMPAT PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t); PCRE_DATA_SCOPE void (*pcre_free)(void *); +PCRE_DATA_SCOPE void *(*pcre_stack_malloc)(size_t); +PCRE_DATA_SCOPE void (*pcre_stack_free)(void *); PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *); #else /* VPCOMPAT */ -extern void *pcre_malloc(size_t); -extern void pcre_free(void *); -extern int pcre_callout(pcre_callout_block *); +PCRE_DATA_SCOPE void *pcre_malloc(size_t); +PCRE_DATA_SCOPE void pcre_free(void *); +PCRE_DATA_SCOPE void *pcre_stack_malloc(size_t); +PCRE_DATA_SCOPE void pcre_stack_free(void *); +PCRE_DATA_SCOPE int pcre_callout(pcre_callout_block *); #endif /* VPCOMPAT */ /* Exported PCRE functions */ -extern pcre *pcre_compile(const char *, int, const char **, - int *, const unsigned char *); -extern int pcre_config(int, void *); -extern int pcre_copy_named_substring(const pcre *, const char *, - int *, int, const char *, char *, int); -extern int pcre_copy_substring(const char *, int *, int, int, - char *, int); -extern int pcre_exec(const pcre *, const pcre_extra *, - const char *, int, int, int, int *, int); -extern void pcre_free_substring(const char *); -extern void pcre_free_substring_list(const char **); -extern int pcre_fullinfo(const pcre *, const pcre_extra *, int, - void *); -extern int pcre_get_named_substring(const pcre *, const char *, - int *, int, const char *, const char **); -extern int pcre_get_stringnumber(const pcre *, const char *); -extern int pcre_get_substring(const char *, int *, int, int, - const char **); -extern int pcre_get_substring_list(const char *, int *, int, - const char ***); -extern int pcre_info(const pcre *, int *, int *); -extern const unsigned char *pcre_maketables(void); -extern pcre_extra *pcre_study(const pcre *, int, const char **); -extern const char *pcre_version(void); +PCRE_DATA_SCOPE pcre *pcre_compile(const char *, int, const char **, int *, + const unsigned char *); +PCRE_DATA_SCOPE pcre *pcre_compile2(const char *, int, int *, const char **, + int *, const unsigned char *); +PCRE_DATA_SCOPE int pcre_config(int, void *); +PCRE_DATA_SCOPE int pcre_copy_named_substring(const pcre *, const char *, + int *, int, const char *, char *, int); +PCRE_DATA_SCOPE int pcre_copy_substring(const char *, int *, int, int, char *, + int); +PCRE_DATA_SCOPE int pcre_dfa_exec(const pcre *, const pcre_extra *, + const char *, int, int, int, int *, int , int *, int); +PCRE_DATA_SCOPE int pcre_exec(const pcre *, const pcre_extra *, const char *, + int, int, int, int *, int); +PCRE_DATA_SCOPE void pcre_free_substring(const char *); +PCRE_DATA_SCOPE void pcre_free_substring_list(const char **); +PCRE_DATA_SCOPE int pcre_fullinfo(const pcre *, const pcre_extra *, int, + void *); +PCRE_DATA_SCOPE int pcre_get_named_substring(const pcre *, const char *, + int *, int, const char *, const char **); +PCRE_DATA_SCOPE int pcre_get_stringnumber(const pcre *, const char *); +PCRE_DATA_SCOPE int pcre_get_substring(const char *, int *, int, int, + const char **); +PCRE_DATA_SCOPE int pcre_get_substring_list(const char *, int *, int, + const char ***); +PCRE_DATA_SCOPE int pcre_info(const pcre *, int *, int *); +PCRE_DATA_SCOPE const unsigned char *pcre_maketables(void); +PCRE_DATA_SCOPE int pcre_refcount(pcre *, int); +PCRE_DATA_SCOPE pcre_extra *pcre_study(const pcre *, int, const char **); +PCRE_DATA_SCOPE const char *pcre_version(void); #ifdef __cplusplus } /* extern "C" */ diff --git a/libpcre/chartables.c b/libpcre/pcre_chartables.c similarity index 96% rename from libpcre/chartables.c rename to libpcre/pcre_chartables.c index 55e413cfca..d491433e25 100644 --- a/libpcre/chartables.c +++ b/libpcre/pcre_chartables.c @@ -6,11 +6,11 @@ program. If you edit it by hand, you might like to edit the Makefile to prevent its ever being regenerated. -This file is #included in the compilation of pcre.c to build the default -character tables which are used when no tables are passed to the compile -function. */ +This file contains the default tables for characters with codes less than +128 (ASCII characters). These tables are used when no external tables are +passed to PCRE. */ -static unsigned char pcre_default_tables[] = { +const unsigned char _pcre_default_tables[] = { /* This table is a lower casing table. */ diff --git a/libpcre/pcre_compile.c b/libpcre/pcre_compile.c new file mode 100644 index 0000000000..2289952525 --- /dev/null +++ b/libpcre/pcre_compile.c @@ -0,0 +1,5059 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains the external function pcre_compile(), along with +supporting internal functions that are not used by other modules. */ + + +#include "pcre_internal.h" + + +/************************************************* +* Code parameters and static tables * +*************************************************/ + +/* Maximum number of items on the nested bracket stacks at compile time. This +applies to the nesting of all kinds of parentheses. It does not limit +un-nested, non-capturing parentheses. This number can be made bigger if +necessary - it is used to dimension one int and one unsigned char vector at +compile time. */ + +#define BRASTACK_SIZE 200 + + +/* Table for handling escaped characters in the range '0'-'z'. Positive returns +are simple data values; negative values are for special things like \d and so +on. Zero means further processing is needed (for things like \x), or the escape +is invalid. */ + +#if !EBCDIC /* This is the "normal" table for ASCII systems */ +static const short int escapes[] = { + 0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */ + 0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */ + '@', -ESC_A, -ESC_B, -ESC_C, -ESC_D, -ESC_E, 0, -ESC_G, /* @ - G */ + 0, 0, 0, 0, 0, 0, 0, 0, /* H - O */ +-ESC_P, -ESC_Q, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */ +-ESC_X, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */ + '`', 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, /* ` - g */ + 0, 0, 0, 0, 0, 0, ESC_n, 0, /* h - o */ +-ESC_p, 0, ESC_r, -ESC_s, ESC_tee, 0, 0, -ESC_w, /* p - w */ + 0, 0, -ESC_z /* x - z */ +}; + +#else /* This is the "abnormal" table for EBCDIC systems */ +static const short int escapes[] = { +/* 48 */ 0, 0, 0, '.', '<', '(', '+', '|', +/* 50 */ '&', 0, 0, 0, 0, 0, 0, 0, +/* 58 */ 0, 0, '!', '$', '*', ')', ';', '~', +/* 60 */ '-', '/', 0, 0, 0, 0, 0, 0, +/* 68 */ 0, 0, '|', ',', '%', '_', '>', '?', +/* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, +/* 78 */ 0, '`', ':', '#', '@', '\'', '=', '"', +/* 80 */ 0, 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, +/* 88 */ 0, 0, 0, '{', 0, 0, 0, 0, +/* 90 */ 0, 0, 0, 'l', 0, ESC_n, 0, -ESC_p, +/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0, +/* A0 */ 0, '~', -ESC_s, ESC_tee, 0, 0, -ESC_w, 0, +/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0, +/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, +/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-', +/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G, +/* C8 */ 0, 0, 0, 0, 0, 0, 0, 0, +/* D0 */ '}', 0, 0, 0, 0, 0, 0, -ESC_P, +/* D8 */-ESC_Q, 0, 0, 0, 0, 0, 0, 0, +/* E0 */ '\\', 0, -ESC_S, 0, 0, 0, -ESC_W, -ESC_X, +/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0, +/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, +/* F8 */ 0, 0, 0, 0, 0, 0, 0, 0 +}; +#endif + + +/* Tables of names of POSIX character classes and their lengths. The list is +terminated by a zero length entry. The first three must be alpha, upper, lower, +as this is assumed for handling case independence. */ + +static const char *const posix_names[] = { + "alpha", "lower", "upper", + "alnum", "ascii", "blank", "cntrl", "digit", "graph", + "print", "punct", "space", "word", "xdigit" }; + +static const uschar posix_name_lengths[] = { + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; + +/* Table of class bit maps for each POSIX class; up to three may be combined +to form the class. The table for [:blank:] is dynamically modified to remove +the vertical space characters. */ + +static const int posix_class_maps[] = { + cbit_lower, cbit_upper, -1, /* alpha */ + cbit_lower, -1, -1, /* lower */ + cbit_upper, -1, -1, /* upper */ + cbit_digit, cbit_lower, cbit_upper, /* alnum */ + cbit_print, cbit_cntrl, -1, /* ascii */ + cbit_space, -1, -1, /* blank - a GNU extension */ + cbit_cntrl, -1, -1, /* cntrl */ + cbit_digit, -1, -1, /* digit */ + cbit_graph, -1, -1, /* graph */ + cbit_print, -1, -1, /* print */ + cbit_punct, -1, -1, /* punct */ + cbit_space, -1, -1, /* space */ + cbit_word, -1, -1, /* word - a Perl extension */ + cbit_xdigit,-1, -1 /* xdigit */ +}; + + +/* The texts of compile-time error messages. These are "char *" because they +are passed to the outside world. */ + +static const char *error_texts[] = { + "no error", + "\\ at end of pattern", + "\\c at end of pattern", + "unrecognized character follows \\", + "numbers out of order in {} quantifier", + /* 5 */ + "number too big in {} quantifier", + "missing terminating ] for character class", + "invalid escape sequence in character class", + "range out of order in character class", + "nothing to repeat", + /* 10 */ + "operand of unlimited repeat could match the empty string", + "internal error: unexpected repeat", + "unrecognized character after (?", + "POSIX named classes are supported only within a class", + "missing )", + /* 15 */ + "reference to non-existent subpattern", + "erroffset passed as NULL", + "unknown option bit(s) set", + "missing ) after comment", + "parentheses nested too deeply", + /* 20 */ + "regular expression too large", + "failed to get memory", + "unmatched parentheses", + "internal error: code overflow", + "unrecognized character after (?<", + /* 25 */ + "lookbehind assertion is not fixed length", + "malformed number after (?(", + "conditional group contains more than two branches", + "assertion expected after (?(", + "(?R or (?digits must be followed by )", + /* 30 */ + "unknown POSIX class name", + "POSIX collating elements are not supported", + "this version of PCRE is not compiled with PCRE_UTF8 support", + "spare error", + "character value in \\x{...} sequence is too large", + /* 35 */ + "invalid condition (?(0)", + "\\C not allowed in lookbehind assertion", + "PCRE does not support \\L, \\l, \\N, \\U, or \\u", + "number after (?C is > 255", + "closing ) for (?C expected", + /* 40 */ + "recursive call could loop indefinitely", + "unrecognized character after (?P", + "syntax error after (?P", + "two named groups have the same name", + "invalid UTF-8 string", + /* 45 */ + "support for \\P, \\p, and \\X has not been compiled", + "malformed \\P or \\p sequence", + "unknown property name after \\P or \\p" +}; + + +/* Table to identify digits and hex digits. This is used when compiling +patterns. Note that the tables in chartables are dependent on the locale, and +may mark arbitrary characters as digits - but the PCRE compiling code expects +to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have +a private table here. It costs 256 bytes, but it is a lot faster than doing +character value tests (at least in some simple cases I timed), and in some +applications one wants PCRE to compile efficiently as well as match +efficiently. + +For convenience, we use the same bit definitions as in chartables: + + 0x04 decimal digit + 0x08 hexadecimal digit + +Then we can use ctype_digit and ctype_xdigit in the code. */ + +#if !EBCDIC /* This is the "normal" case, for ASCII systems */ +static const unsigned char digitab[] = + { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */ + 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 */ + 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */ + 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* @ - G */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H - O */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* P - W */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* X - _ */ + 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* ` - g */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h - o */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* p - w */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* x -127 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ + +#else /* This is the "abnormal" case, for EBCDIC systems */ +static const unsigned char digitab[] = + { + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 10 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 32- 39 20 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 30 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 40 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 72- | */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 50 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 88- ¬ */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 60 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ? */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */ + 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* 128- g 80 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144- p 90 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160- x A0 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 B0 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ + 0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* { - G C0 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* } - P D0 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* \ - X E0 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */ + 0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 F0 */ + 0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */ + +static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */ + 0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 0- 7 */ + 0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */ + 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 16- 23 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ + 0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 32- 39 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */ + 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 */ + 0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /* 72- | */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 */ + 0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /* 88- ¬ */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 */ + 0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ? */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */ + 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */ + 0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* 128- g */ + 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */ + 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* 144- p */ + 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */ + 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* 160- x */ + 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */ + 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 */ + 0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ + 0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* { - G */ + 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */ + 0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* } - P */ + 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */ + 0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* \ - X */ + 0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */ + 0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ + 0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */ +#endif + + +/* Definition to allow mutual recursion */ + +static BOOL + compile_regex(int, int, int *, uschar **, const uschar **, int *, BOOL, int, + int *, int *, branch_chain *, compile_data *); + + + +/************************************************* +* Handle escapes * +*************************************************/ + +/* This function is called when a \ has been encountered. It either returns a +positive value for a simple escape such as \n, or a negative value which +encodes one of the more complicated things such as \d. When UTF-8 is enabled, +a positive value greater than 255 may be returned. On entry, ptr is pointing at +the \. On exit, it is on the final character of the escape sequence. + +Arguments: + ptrptr points to the pattern position pointer + errorcodeptr points to the errorcode variable + bracount number of previous extracting brackets + options the options bits + isclass TRUE if inside a character class + +Returns: zero or positive => a data character + negative => a special escape sequence + on error, errorptr is set +*/ + +static int +check_escape(const uschar **ptrptr, int *errorcodeptr, int bracount, + int options, BOOL isclass) +{ +const uschar *ptr = *ptrptr; +int c, i; + +/* If backslash is at the end of the pattern, it's an error. */ + +c = *(++ptr); +if (c == 0) *errorcodeptr = ERR1; + +/* Non-alphamerics are literals. For digits or letters, do an initial lookup in +a table. A non-zero result is something that can be returned immediately. +Otherwise further processing may be required. */ + +#if !EBCDIC /* ASCII coding */ +else if (c < '0' || c > 'z') {} /* Not alphameric */ +else if ((i = escapes[c - '0']) != 0) c = i; + +#else /* EBCDIC coding */ +else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphameric */ +else if ((i = escapes[c - 0x48]) != 0) c = i; +#endif + +/* Escapes that need further processing, or are illegal. */ + +else + { + const uschar *oldptr; + switch (c) + { + /* A number of Perl escapes are not handled by PCRE. We give an explicit + error. */ + + case 'l': + case 'L': + case 'N': + case 'u': + case 'U': + *errorcodeptr = ERR37; + break; + + /* The handling of escape sequences consisting of a string of digits + starting with one that is not zero is not straightforward. By experiment, + the way Perl works seems to be as follows: + + Outside a character class, the digits are read as a decimal number. If the + number is less than 10, or if there are that many previous extracting + left brackets, then it is a back reference. Otherwise, up to three octal + digits are read to form an escaped byte. Thus \123 is likely to be octal + 123 (cf \0123, which is octal 012 followed by the literal 3). If the octal + value is greater than 377, the least significant 8 bits are taken. Inside a + character class, \ followed by a digit is always an octal number. */ + + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + + if (!isclass) + { + oldptr = ptr; + c -= '0'; + while ((digitab[ptr[1]] & ctype_digit) != 0) + c = c * 10 + *(++ptr) - '0'; + if (c < 10 || c <= bracount) + { + c = -(ESC_REF + c); + break; + } + ptr = oldptr; /* Put the pointer back and fall through */ + } + + /* Handle an octal number following \. If the first digit is 8 or 9, Perl + generates a binary zero byte and treats the digit as a following literal. + Thus we have to pull back the pointer by one. */ + + if ((c = *ptr) >= '8') + { + ptr--; + c = 0; + break; + } + + /* \0 always starts an octal number, but we may drop through to here with a + larger first octal digit. */ + + case '0': + c -= '0'; + while(i++ < 2 && ptr[1] >= '0' && ptr[1] <= '7') + c = c * 8 + *(++ptr) - '0'; + c &= 255; /* Take least significant 8 bits */ + break; + + /* \x is complicated when UTF-8 is enabled. \x{ddd} is a character number + which can be greater than 0xff, but only if the ddd are hex digits. */ + + case 'x': +#ifdef SUPPORT_UTF8 + if (ptr[1] == '{' && (options & PCRE_UTF8) != 0) + { + const uschar *pt = ptr + 2; + register int count = 0; + c = 0; + while ((digitab[*pt] & ctype_xdigit) != 0) + { + int cc = *pt++; + count++; +#if !EBCDIC /* ASCII coding */ + if (cc >= 'a') cc -= 32; /* Convert to upper case */ + c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10)); +#else /* EBCDIC coding */ + if (cc >= 'a' && cc <= 'z') cc += 64; /* Convert to upper case */ + c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10)); +#endif + } + if (*pt == '}') + { + if (c < 0 || count > 8) *errorcodeptr = ERR34; + ptr = pt; + break; + } + /* If the sequence of hex digits does not end with '}', then we don't + recognize this construct; fall through to the normal \x handling. */ + } +#endif + + /* Read just a single hex char */ + + c = 0; + while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0) + { + int cc; /* Some compilers don't like ++ */ + cc = *(++ptr); /* in initializers */ +#if !EBCDIC /* ASCII coding */ + if (cc >= 'a') cc -= 32; /* Convert to upper case */ + c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10)); +#else /* EBCDIC coding */ + if (cc <= 'z') cc += 64; /* Convert to upper case */ + c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10)); +#endif + } + break; + + /* Other special escapes not starting with a digit are straightforward */ + + case 'c': + c = *(++ptr); + if (c == 0) + { + *errorcodeptr = ERR2; + return 0; + } + + /* A letter is upper-cased; then the 0x40 bit is flipped. This coding + is ASCII-specific, but then the whole concept of \cx is ASCII-specific. + (However, an EBCDIC equivalent has now been added.) */ + +#if !EBCDIC /* ASCII coding */ + if (c >= 'a' && c <= 'z') c -= 32; + c ^= 0x40; +#else /* EBCDIC coding */ + if (c >= 'a' && c <= 'z') c += 64; + c ^= 0xC0; +#endif + break; + + /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any + other alphameric following \ is an error if PCRE_EXTRA was set; otherwise, + for Perl compatibility, it is a literal. This code looks a bit odd, but + there used to be some cases other than the default, and there may be again + in future, so I haven't "optimized" it. */ + + default: + if ((options & PCRE_EXTRA) != 0) switch(c) + { + default: + *errorcodeptr = ERR3; + break; + } + break; + } + } + +*ptrptr = ptr; +return c; +} + + + +#ifdef SUPPORT_UCP +/************************************************* +* Handle \P and \p * +*************************************************/ + +/* This function is called after \P or \p has been encountered, provided that +PCRE is compiled with support for Unicode properties. On entry, ptrptr is +pointing at the P or p. On exit, it is pointing at the final character of the +escape sequence. + +Argument: + ptrptr points to the pattern position pointer + negptr points to a boolean that is set TRUE for negation else FALSE + errorcodeptr points to the error code variable + +Returns: value from ucp_type_table, or -1 for an invalid type +*/ + +static int +get_ucp(const uschar **ptrptr, BOOL *negptr, int *errorcodeptr) +{ +int c, i, bot, top; +const uschar *ptr = *ptrptr; +char name[4]; + +c = *(++ptr); +if (c == 0) goto ERROR_RETURN; + +*negptr = FALSE; + +/* \P or \p can be followed by a one- or two-character name in {}, optionally +preceded by ^ for negation. */ + +if (c == '{') + { + if (ptr[1] == '^') + { + *negptr = TRUE; + ptr++; + } + for (i = 0; i <= 2; i++) + { + c = *(++ptr); + if (c == 0) goto ERROR_RETURN; + if (c == '}') break; + name[i] = c; + } + if (c !='}') /* Try to distinguish error cases */ + { + while (*(++ptr) != 0 && *ptr != '}'); + if (*ptr == '}') goto UNKNOWN_RETURN; else goto ERROR_RETURN; + } + name[i] = 0; + } + +/* Otherwise there is just one following character */ + +else + { + name[0] = c; + name[1] = 0; + } + +*ptrptr = ptr; + +/* Search for a recognized property name using binary chop */ + +bot = 0; +top = _pcre_utt_size; + +while (bot < top) + { + i = (bot + top)/2; + c = strcmp(name, _pcre_utt[i].name); + if (c == 0) return _pcre_utt[i].value; + if (c > 0) bot = i + 1; else top = i; + } + +UNKNOWN_RETURN: +*errorcodeptr = ERR47; +*ptrptr = ptr; +return -1; + +ERROR_RETURN: +*errorcodeptr = ERR46; +*ptrptr = ptr; +return -1; +} +#endif + + + + +/************************************************* +* Check for counted repeat * +*************************************************/ + +/* This function is called when a '{' is encountered in a place where it might +start a quantifier. It looks ahead to see if it really is a quantifier or not. +It is only a quantifier if it is one of the forms {ddd} {ddd,} or {ddd,ddd} +where the ddds are digits. + +Arguments: + p pointer to the first char after '{' + +Returns: TRUE or FALSE +*/ + +static BOOL +is_counted_repeat(const uschar *p) +{ +if ((digitab[*p++] & ctype_digit) == 0) return FALSE; +while ((digitab[*p] & ctype_digit) != 0) p++; +if (*p == '}') return TRUE; + +if (*p++ != ',') return FALSE; +if (*p == '}') return TRUE; + +if ((digitab[*p++] & ctype_digit) == 0) return FALSE; +while ((digitab[*p] & ctype_digit) != 0) p++; + +return (*p == '}'); +} + + + +/************************************************* +* Read repeat counts * +*************************************************/ + +/* Read an item of the form {n,m} and return the values. This is called only +after is_counted_repeat() has confirmed that a repeat-count quantifier exists, +so the syntax is guaranteed to be correct, but we need to check the values. + +Arguments: + p pointer to first char after '{' + minp pointer to int for min + maxp pointer to int for max + returned as -1 if no max + errorcodeptr points to error code variable + +Returns: pointer to '}' on success; + current ptr on error, with errorcodeptr set non-zero +*/ + +static const uschar * +read_repeat_counts(const uschar *p, int *minp, int *maxp, int *errorcodeptr) +{ +int min = 0; +int max = -1; + +/* Read the minimum value and do a paranoid check: a negative value indicates +an integer overflow. */ + +while ((digitab[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0'; +if (min < 0 || min > 65535) + { + *errorcodeptr = ERR5; + return p; + } + +/* Read the maximum value if there is one, and again do a paranoid on its size. +Also, max must not be less than min. */ + +if (*p == '}') max = min; else + { + if (*(++p) != '}') + { + max = 0; + while((digitab[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0'; + if (max < 0 || max > 65535) + { + *errorcodeptr = ERR5; + return p; + } + if (max < min) + { + *errorcodeptr = ERR4; + return p; + } + } + } + +/* Fill in the required variables, and pass back the pointer to the terminating +'}'. */ + +*minp = min; +*maxp = max; +return p; +} + + + +/************************************************* +* Find first significant op code * +*************************************************/ + +/* This is called by several functions that scan a compiled expression looking +for a fixed first character, or an anchoring op code etc. It skips over things +that do not influence this. For some calls, a change of option is important. +For some calls, it makes sense to skip negative forward and all backward +assertions, and also the \b assertion; for others it does not. + +Arguments: + code pointer to the start of the group + options pointer to external options + optbit the option bit whose changing is significant, or + zero if none are + skipassert TRUE if certain assertions are to be skipped + +Returns: pointer to the first significant opcode +*/ + +static const uschar* +first_significant_code(const uschar *code, int *options, int optbit, + BOOL skipassert) +{ +for (;;) + { + switch ((int)*code) + { + case OP_OPT: + if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit)) + *options = (int)code[1]; + code += 2; + break; + + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + if (!skipassert) return code; + do code += GET(code, 1); while (*code == OP_ALT); + code += _pcre_OP_lengths[*code]; + break; + + case OP_WORD_BOUNDARY: + case OP_NOT_WORD_BOUNDARY: + if (!skipassert) return code; + /* Fall through */ + + case OP_CALLOUT: + case OP_CREF: + case OP_BRANUMBER: + code += _pcre_OP_lengths[*code]; + break; + + default: + return code; + } + } +/* Control never reaches here */ +} + + + + +/************************************************* +* Find the fixed length of a pattern * +*************************************************/ + +/* Scan a pattern and compute the fixed length of subject that will match it, +if the length is fixed. This is needed for dealing with backward assertions. +In UTF8 mode, the result is in characters rather than bytes. + +Arguments: + code points to the start of the pattern (the bracket) + options the compiling options + +Returns: the fixed length, or -1 if there is no fixed length, + or -2 if \C was encountered +*/ + +static int +find_fixedlength(uschar *code, int options) +{ +int length = -1; + +register int branchlength = 0; +register uschar *cc = code + 1 + LINK_SIZE; + +/* Scan along the opcodes for this branch. If we get to the end of the +branch, check the length against that of the other branches. */ + +for (;;) + { + int d; + register int op = *cc; + if (op >= OP_BRA) op = OP_BRA; + + switch (op) + { + case OP_BRA: + case OP_ONCE: + case OP_COND: + d = find_fixedlength(cc, options); + if (d < 0) return d; + branchlength += d; + do cc += GET(cc, 1); while (*cc == OP_ALT); + cc += 1 + LINK_SIZE; + break; + + /* Reached end of a branch; if it's a ket it is the end of a nested + call. If it's ALT it is an alternation in a nested call. If it is + END it's the end of the outer call. All can be handled by the same code. */ + + case OP_ALT: + case OP_KET: + case OP_KETRMAX: + case OP_KETRMIN: + case OP_END: + if (length < 0) length = branchlength; + else if (length != branchlength) return -1; + if (*cc != OP_ALT) return length; + cc += 1 + LINK_SIZE; + branchlength = 0; + break; + + /* Skip over assertive subpatterns */ + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + do cc += GET(cc, 1); while (*cc == OP_ALT); + /* Fall through */ + + /* Skip over things that don't match chars */ + + case OP_REVERSE: + case OP_BRANUMBER: + case OP_CREF: + case OP_OPT: + case OP_CALLOUT: + case OP_SOD: + case OP_SOM: + case OP_EOD: + case OP_EODN: + case OP_CIRC: + case OP_DOLL: + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + cc += _pcre_OP_lengths[*cc]; + break; + + /* Handle literal characters */ + + case OP_CHAR: + case OP_CHARNC: + branchlength++; + cc += 2; +#ifdef SUPPORT_UTF8 + if ((options & PCRE_UTF8) != 0) + { + while ((*cc & 0xc0) == 0x80) cc++; + } +#endif + break; + + /* Handle exact repetitions. The count is already in characters, but we + need to skip over a multibyte character in UTF8 mode. */ + + case OP_EXACT: + branchlength += GET2(cc,1); + cc += 4; +#ifdef SUPPORT_UTF8 + if ((options & PCRE_UTF8) != 0) + { + while((*cc & 0x80) == 0x80) cc++; + } +#endif + break; + + case OP_TYPEEXACT: + branchlength += GET2(cc,1); + cc += 4; + break; + + /* Handle single-char matchers */ + + case OP_PROP: + case OP_NOTPROP: + cc++; + /* Fall through */ + + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + branchlength++; + cc++; + break; + + /* The single-byte matcher isn't allowed */ + + case OP_ANYBYTE: + return -2; + + /* Check a class for variable quantification */ + +#ifdef SUPPORT_UTF8 + case OP_XCLASS: + cc += GET(cc, 1) - 33; + /* Fall through */ +#endif + + case OP_CLASS: + case OP_NCLASS: + cc += 33; + + switch (*cc) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRQUERY: + case OP_CRMINQUERY: + return -1; + + case OP_CRRANGE: + case OP_CRMINRANGE: + if (GET2(cc,1) != GET2(cc,3)) return -1; + branchlength += GET2(cc,1); + cc += 5; + break; + + default: + branchlength++; + } + break; + + /* Anything else is variable length */ + + default: + return -1; + } + } +/* Control never gets here */ +} + + + + +/************************************************* +* Scan compiled regex for numbered bracket * +*************************************************/ + +/* This little function scans through a compiled pattern until it finds a +capturing bracket with the given number. + +Arguments: + code points to start of expression + utf8 TRUE in UTF-8 mode + number the required bracket number + +Returns: pointer to the opcode for the bracket, or NULL if not found +*/ + +static const uschar * +find_bracket(const uschar *code, BOOL utf8, int number) +{ +#ifndef SUPPORT_UTF8 +utf8 = utf8; /* Stop pedantic compilers complaining */ +#endif + +for (;;) + { + register int c = *code; + if (c == OP_END) return NULL; + else if (c > OP_BRA) + { + int n = c - OP_BRA; + if (n > EXTRACT_BASIC_MAX) n = GET2(code, 2+LINK_SIZE); + if (n == number) return (uschar *)code; + code += _pcre_OP_lengths[OP_BRA]; + } + else + { + code += _pcre_OP_lengths[c]; + +#ifdef SUPPORT_UTF8 + + /* In UTF-8 mode, opcodes that are followed by a character may be followed + by a multi-byte character. The length in the table is a minimum, so we have + to scan along to skip the extra bytes. All opcodes are less than 128, so we + can use relatively efficient code. */ + + if (utf8) switch(c) + { + case OP_CHAR: + case OP_CHARNC: + case OP_EXACT: + case OP_UPTO: + case OP_MINUPTO: + case OP_STAR: + case OP_MINSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_QUERY: + case OP_MINQUERY: + while ((*code & 0xc0) == 0x80) code++; + break; + + /* XCLASS is used for classes that cannot be represented just by a bit + map. This includes negated single high-valued characters. The length in + the table is zero; the actual length is stored in the compiled code. */ + + case OP_XCLASS: + code += GET(code, 1) + 1; + break; + } +#endif + } + } +} + + + +/************************************************* +* Scan compiled regex for recursion reference * +*************************************************/ + +/* This little function scans through a compiled pattern until it finds an +instance of OP_RECURSE. + +Arguments: + code points to start of expression + utf8 TRUE in UTF-8 mode + +Returns: pointer to the opcode for OP_RECURSE, or NULL if not found +*/ + +static const uschar * +find_recurse(const uschar *code, BOOL utf8) +{ +#ifndef SUPPORT_UTF8 +utf8 = utf8; /* Stop pedantic compilers complaining */ +#endif + +for (;;) + { + register int c = *code; + if (c == OP_END) return NULL; + else if (c == OP_RECURSE) return code; + else if (c > OP_BRA) + { + code += _pcre_OP_lengths[OP_BRA]; + } + else + { + code += _pcre_OP_lengths[c]; + +#ifdef SUPPORT_UTF8 + + /* In UTF-8 mode, opcodes that are followed by a character may be followed + by a multi-byte character. The length in the table is a minimum, so we have + to scan along to skip the extra bytes. All opcodes are less than 128, so we + can use relatively efficient code. */ + + if (utf8) switch(c) + { + case OP_CHAR: + case OP_CHARNC: + case OP_EXACT: + case OP_UPTO: + case OP_MINUPTO: + case OP_STAR: + case OP_MINSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_QUERY: + case OP_MINQUERY: + while ((*code & 0xc0) == 0x80) code++; + break; + + /* XCLASS is used for classes that cannot be represented just by a bit + map. This includes negated single high-valued characters. The length in + the table is zero; the actual length is stored in the compiled code. */ + + case OP_XCLASS: + code += GET(code, 1) + 1; + break; + } +#endif + } + } +} + + + +/************************************************* +* Scan compiled branch for non-emptiness * +*************************************************/ + +/* This function scans through a branch of a compiled pattern to see whether it +can match the empty string or not. It is called only from could_be_empty() +below. Note that first_significant_code() skips over assertions. If we hit an +unclosed bracket, we return "empty" - this means we've struck an inner bracket +whose current branch will already have been scanned. + +Arguments: + code points to start of search + endcode points to where to stop + utf8 TRUE if in UTF8 mode + +Returns: TRUE if what is matched could be empty +*/ + +static BOOL +could_be_empty_branch(const uschar *code, const uschar *endcode, BOOL utf8) +{ +register int c; +for (code = first_significant_code(code + 1 + LINK_SIZE, NULL, 0, TRUE); + code < endcode; + code = first_significant_code(code + _pcre_OP_lengths[c], NULL, 0, TRUE)) + { + const uschar *ccode; + + c = *code; + + if (c >= OP_BRA) + { + BOOL empty_branch; + if (GET(code, 1) == 0) return TRUE; /* Hit unclosed bracket */ + + /* Scan a closed bracket */ + + empty_branch = FALSE; + do + { + if (!empty_branch && could_be_empty_branch(code, endcode, utf8)) + empty_branch = TRUE; + code += GET(code, 1); + } + while (*code == OP_ALT); + if (!empty_branch) return FALSE; /* All branches are non-empty */ + code += 1 + LINK_SIZE; + c = *code; + } + + else switch (c) + { + /* Check for quantifiers after a class */ + +#ifdef SUPPORT_UTF8 + case OP_XCLASS: + ccode = code + GET(code, 1); + goto CHECK_CLASS_REPEAT; +#endif + + case OP_CLASS: + case OP_NCLASS: + ccode = code + 33; + +#ifdef SUPPORT_UTF8 + CHECK_CLASS_REPEAT: +#endif + + switch (*ccode) + { + case OP_CRSTAR: /* These could be empty; continue */ + case OP_CRMINSTAR: + case OP_CRQUERY: + case OP_CRMINQUERY: + break; + + default: /* Non-repeat => class must match */ + case OP_CRPLUS: /* These repeats aren't empty */ + case OP_CRMINPLUS: + return FALSE; + + case OP_CRRANGE: + case OP_CRMINRANGE: + if (GET2(ccode, 1) > 0) return FALSE; /* Minimum > 0 */ + break; + } + break; + + /* Opcodes that must match a character */ + + case OP_PROP: + case OP_NOTPROP: + case OP_EXTUNI: + case OP_NOT_DIGIT: + case OP_DIGIT: + case OP_NOT_WHITESPACE: + case OP_WHITESPACE: + case OP_NOT_WORDCHAR: + case OP_WORDCHAR: + case OP_ANY: + case OP_ANYBYTE: + case OP_CHAR: + case OP_CHARNC: + case OP_NOT: + case OP_PLUS: + case OP_MINPLUS: + case OP_EXACT: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTEXACT: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEEXACT: + return FALSE; + + /* End of branch */ + + case OP_KET: + case OP_KETRMAX: + case OP_KETRMIN: + case OP_ALT: + return TRUE; + + /* In UTF-8 mode, STAR, MINSTAR, QUERY, MINQUERY, UPTO, and MINUPTO may be + followed by a multibyte character */ + +#ifdef SUPPORT_UTF8 + case OP_STAR: + case OP_MINSTAR: + case OP_QUERY: + case OP_MINQUERY: + case OP_UPTO: + case OP_MINUPTO: + if (utf8) while ((code[2] & 0xc0) == 0x80) code++; + break; +#endif + } + } + +return TRUE; +} + + + +/************************************************* +* Scan compiled regex for non-emptiness * +*************************************************/ + +/* This function is called to check for left recursive calls. We want to check +the current branch of the current pattern to see if it could match the empty +string. If it could, we must look outwards for branches at other levels, +stopping when we pass beyond the bracket which is the subject of the recursion. + +Arguments: + code points to start of the recursion + endcode points to where to stop (current RECURSE item) + bcptr points to the chain of current (unclosed) branch starts + utf8 TRUE if in UTF-8 mode + +Returns: TRUE if what is matched could be empty +*/ + +static BOOL +could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr, + BOOL utf8) +{ +while (bcptr != NULL && bcptr->current >= code) + { + if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE; + bcptr = bcptr->outer; + } +return TRUE; +} + + + +/************************************************* +* Check for POSIX class syntax * +*************************************************/ + +/* This function is called when the sequence "[:" or "[." or "[=" is +encountered in a character class. It checks whether this is followed by an +optional ^ and then a sequence of letters, terminated by a matching ":]" or +".]" or "=]". + +Argument: + ptr pointer to the initial [ + endptr where to return the end pointer + cd pointer to compile data + +Returns: TRUE or FALSE +*/ + +static BOOL +check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd) +{ +int terminator; /* Don't combine these lines; the Solaris cc */ +terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */ +if (*(++ptr) == '^') ptr++; +while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++; +if (*ptr == terminator && ptr[1] == ']') + { + *endptr = ptr; + return TRUE; + } +return FALSE; +} + + + + +/************************************************* +* Check POSIX class name * +*************************************************/ + +/* This function is called to check the name given in a POSIX-style class entry +such as [:alnum:]. + +Arguments: + ptr points to the first letter + len the length of the name + +Returns: a value representing the name, or -1 if unknown +*/ + +static int +check_posix_name(const uschar *ptr, int len) +{ +register int yield = 0; +while (posix_name_lengths[yield] != 0) + { + if (len == posix_name_lengths[yield] && + strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield; + yield++; + } +return -1; +} + + +/************************************************* +* Adjust OP_RECURSE items in repeated group * +*************************************************/ + +/* OP_RECURSE items contain an offset from the start of the regex to the group +that is referenced. This means that groups can be replicated for fixed +repetition simply by copying (because the recursion is allowed to refer to +earlier groups that are outside the current group). However, when a group is +optional (i.e. the minimum quantifier is zero), OP_BRAZERO is inserted before +it, after it has been compiled. This means that any OP_RECURSE items within it +that refer to the group itself or any contained groups have to have their +offsets adjusted. That is the job of this function. Before it is called, the +partially compiled regex must be temporarily terminated with OP_END. + +Arguments: + group points to the start of the group + adjust the amount by which the group is to be moved + utf8 TRUE in UTF-8 mode + cd contains pointers to tables etc. + +Returns: nothing +*/ + +static void +adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd) +{ +uschar *ptr = group; +while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL) + { + int offset = GET(ptr, 1); + if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust); + ptr += 1 + LINK_SIZE; + } +} + + + +/************************************************* +* Insert an automatic callout point * +*************************************************/ + +/* This function is called when the PCRE_AUTO_CALLOUT option is set, to insert +callout points before each pattern item. + +Arguments: + code current code pointer + ptr current pattern pointer + cd pointers to tables etc + +Returns: new code pointer +*/ + +static uschar * +auto_callout(uschar *code, const uschar *ptr, compile_data *cd) +{ +*code++ = OP_CALLOUT; +*code++ = 255; +PUT(code, 0, ptr - cd->start_pattern); /* Pattern offset */ +PUT(code, LINK_SIZE, 0); /* Default length */ +return code + 2*LINK_SIZE; +} + + + +/************************************************* +* Complete a callout item * +*************************************************/ + +/* A callout item contains the length of the next item in the pattern, which +we can't fill in till after we have reached the relevant point. This is used +for both automatic and manual callouts. + +Arguments: + previous_callout points to previous callout item + ptr current pattern pointer + cd pointers to tables etc + +Returns: nothing +*/ + +static void +complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd) +{ +int length = ptr - cd->start_pattern - GET(previous_callout, 2); +PUT(previous_callout, 2 + LINK_SIZE, length); +} + + + +#ifdef SUPPORT_UCP +/************************************************* +* Get othercase range * +*************************************************/ + +/* This function is passed the start and end of a class range, in UTF-8 mode +with UCP support. It searches up the characters, looking for internal ranges of +characters in the "other" case. Each call returns the next one, updating the +start address. + +Arguments: + cptr points to starting character value; updated + d end value + ocptr where to put start of othercase range + odptr where to put end of othercase range + +Yield: TRUE when range returned; FALSE when no more +*/ + +static BOOL +get_othercase_range(int *cptr, int d, int *ocptr, int *odptr) +{ +int c, chartype, othercase, next; + +for (c = *cptr; c <= d; c++) + { + if (_pcre_ucp_findchar(c, &chartype, &othercase) == ucp_L && othercase != 0) + break; + } + +if (c > d) return FALSE; + +*ocptr = othercase; +next = othercase + 1; + +for (++c; c <= d; c++) + { + if (_pcre_ucp_findchar(c, &chartype, &othercase) != ucp_L || + othercase != next) + break; + next++; + } + +*odptr = next - 1; +*cptr = c; + +return TRUE; +} +#endif /* SUPPORT_UCP */ + + +/************************************************* +* Compile one branch * +*************************************************/ + +/* Scan the pattern, compiling it into the code vector. If the options are +changed during the branch, the pointer is used to change the external options +bits. + +Arguments: + optionsptr pointer to the option bits + brackets points to number of extracting brackets used + codeptr points to the pointer to the current code point + ptrptr points to the current pattern pointer + errorcodeptr points to error code variable + firstbyteptr set to initial literal character, or < 0 (REQ_UNSET, REQ_NONE) + reqbyteptr set to the last literal character required, else < 0 + bcptr points to current branch chain + cd contains pointers to tables etc. + +Returns: TRUE on success + FALSE, with *errorcodeptr set non-zero on error +*/ + +static BOOL +compile_branch(int *optionsptr, int *brackets, uschar **codeptr, + const uschar **ptrptr, int *errorcodeptr, int *firstbyteptr, + int *reqbyteptr, branch_chain *bcptr, compile_data *cd) +{ +int repeat_type, op_type; +int repeat_min = 0, repeat_max = 0; /* To please picky compilers */ +int bravalue = 0; +int greedy_default, greedy_non_default; +int firstbyte, reqbyte; +int zeroreqbyte, zerofirstbyte; +int req_caseopt, reqvary, tempreqvary; +int condcount = 0; +int options = *optionsptr; +int after_manual_callout = 0; +register int c; +register uschar *code = *codeptr; +uschar *tempcode; +BOOL inescq = FALSE; +BOOL groupsetfirstbyte = FALSE; +const uschar *ptr = *ptrptr; +const uschar *tempptr; +uschar *previous = NULL; +uschar *previous_callout = NULL; +uschar classbits[32]; + +#ifdef SUPPORT_UTF8 +BOOL class_utf8; +BOOL utf8 = (options & PCRE_UTF8) != 0; +uschar *class_utf8data; +uschar utf8_char[6]; +#else +BOOL utf8 = FALSE; +#endif + +/* Set up the default and non-default settings for greediness */ + +greedy_default = ((options & PCRE_UNGREEDY) != 0); +greedy_non_default = greedy_default ^ 1; + +/* Initialize no first byte, no required byte. REQ_UNSET means "no char +matching encountered yet". It gets changed to REQ_NONE if we hit something that +matches a non-fixed char first char; reqbyte just remains unset if we never +find one. + +When we hit a repeat whose minimum is zero, we may have to adjust these values +to take the zero repeat into account. This is implemented by setting them to +zerofirstbyte and zeroreqbyte when such a repeat is encountered. The individual +item types that can be repeated set these backoff variables appropriately. */ + +firstbyte = reqbyte = zerofirstbyte = zeroreqbyte = REQ_UNSET; + +/* The variable req_caseopt contains either the REQ_CASELESS value or zero, +according to the current setting of the caseless flag. REQ_CASELESS is a bit +value > 255. It is added into the firstbyte or reqbyte variables to record the +case status of the value. This is used only for ASCII characters. */ + +req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0; + +/* Switch on next character until the end of the branch */ + +for (;; ptr++) + { + BOOL negate_class; + BOOL possessive_quantifier; + BOOL is_quantifier; + int class_charcount; + int class_lastchar; + int newoptions; + int recno; + int skipbytes; + int subreqbyte; + int subfirstbyte; + int mclength; + uschar mcbuffer[8]; + + /* Next byte in the pattern */ + + c = *ptr; + + /* If in \Q...\E, check for the end; if not, we have a literal */ + + if (inescq && c != 0) + { + if (c == '\\' && ptr[1] == 'E') + { + inescq = FALSE; + ptr++; + continue; + } + else + { + if (previous_callout != NULL) + { + complete_callout(previous_callout, ptr, cd); + previous_callout = NULL; + } + if ((options & PCRE_AUTO_CALLOUT) != 0) + { + previous_callout = code; + code = auto_callout(code, ptr, cd); + } + goto NORMAL_CHAR; + } + } + + /* Fill in length of a previous callout, except when the next thing is + a quantifier. */ + + is_quantifier = c == '*' || c == '+' || c == '?' || + (c == '{' && is_counted_repeat(ptr+1)); + + if (!is_quantifier && previous_callout != NULL && + after_manual_callout-- <= 0) + { + complete_callout(previous_callout, ptr, cd); + previous_callout = NULL; + } + + /* In extended mode, skip white space and comments */ + + if ((options & PCRE_EXTENDED) != 0) + { + if ((cd->ctypes[c] & ctype_space) != 0) continue; + if (c == '#') + { + /* The space before the ; is to avoid a warning on a silly compiler + on the Macintosh. */ + while ((c = *(++ptr)) != 0 && c != NEWLINE) ; + if (c != 0) continue; /* Else fall through to handle end of string */ + } + } + + /* No auto callout for quantifiers. */ + + if ((options & PCRE_AUTO_CALLOUT) != 0 && !is_quantifier) + { + previous_callout = code; + code = auto_callout(code, ptr, cd); + } + + switch(c) + { + /* The branch terminates at end of string, |, or ). */ + + case 0: + case '|': + case ')': + *firstbyteptr = firstbyte; + *reqbyteptr = reqbyte; + *codeptr = code; + *ptrptr = ptr; + return TRUE; + + /* Handle single-character metacharacters. In multiline mode, ^ disables + the setting of any following char as a first character. */ + + case '^': + if ((options & PCRE_MULTILINE) != 0) + { + if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; + } + previous = NULL; + *code++ = OP_CIRC; + break; + + case '$': + previous = NULL; + *code++ = OP_DOLL; + break; + + /* There can never be a first char if '.' is first, whatever happens about + repeats. The value of reqbyte doesn't change either. */ + + case '.': + if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; + zerofirstbyte = firstbyte; + zeroreqbyte = reqbyte; + previous = code; + *code++ = OP_ANY; + break; + + /* Character classes. If the included characters are all < 255 in value, we + build a 32-byte bitmap of the permitted characters, except in the special + case where there is only one such character. For negated classes, we build + the map as usual, then invert it at the end. However, we use a different + opcode so that data characters > 255 can be handled correctly. + + If the class contains characters outside the 0-255 range, a different + opcode is compiled. It may optionally have a bit map for characters < 256, + but those above are are explicitly listed afterwards. A flag byte tells + whether the bitmap is present, and whether this is a negated class or not. + */ + + case '[': + previous = code; + + /* PCRE supports POSIX class stuff inside a class. Perl gives an error if + they are encountered at the top level, so we'll do that too. */ + + if ((ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') && + check_posix_syntax(ptr, &tempptr, cd)) + { + *errorcodeptr = (ptr[1] == ':')? ERR13 : ERR31; + goto FAILED; + } + + /* If the first character is '^', set the negation flag and skip it. */ + + if ((c = *(++ptr)) == '^') + { + negate_class = TRUE; + c = *(++ptr); + } + else + { + negate_class = FALSE; + } + + /* Keep a count of chars with values < 256 so that we can optimize the case + of just a single character (as long as it's < 256). For higher valued UTF-8 + characters, we don't yet do any optimization. */ + + class_charcount = 0; + class_lastchar = -1; + +#ifdef SUPPORT_UTF8 + class_utf8 = FALSE; /* No chars >= 256 */ + class_utf8data = code + LINK_SIZE + 34; /* For UTF-8 items */ +#endif + + /* Initialize the 32-char bit map to all zeros. We have to build the + map in a temporary bit of store, in case the class contains only 1 + character (< 256), because in that case the compiled code doesn't use the + bit map. */ + + memset(classbits, 0, 32 * sizeof(uschar)); + + /* Process characters until ] is reached. By writing this as a "do" it + means that an initial ] is taken as a data character. The first pass + through the regex checked the overall syntax, so we don't need to be very + strict here. At the start of the loop, c contains the first byte of the + character. */ + + do + { +#ifdef SUPPORT_UTF8 + if (utf8 && c > 127) + { /* Braces are required because the */ + GETCHARLEN(c, ptr, ptr); /* macro generates multiple statements */ + } +#endif + + /* Inside \Q...\E everything is literal except \E */ + + if (inescq) + { + if (c == '\\' && ptr[1] == 'E') + { + inescq = FALSE; + ptr++; + continue; + } + else goto LONE_SINGLE_CHARACTER; + } + + /* Handle POSIX class names. Perl allows a negation extension of the + form [:^name:]. A square bracket that doesn't match the syntax is + treated as a literal. We also recognize the POSIX constructions + [.ch.] and [=ch=] ("collating elements") and fault them, as Perl + 5.6 and 5.8 do. */ + + if (c == '[' && + (ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') && + check_posix_syntax(ptr, &tempptr, cd)) + { + BOOL local_negate = FALSE; + int posix_class, i; + register const uschar *cbits = cd->cbits; + + if (ptr[1] != ':') + { + *errorcodeptr = ERR31; + goto FAILED; + } + + ptr += 2; + if (*ptr == '^') + { + local_negate = TRUE; + ptr++; + } + + posix_class = check_posix_name(ptr, tempptr - ptr); + if (posix_class < 0) + { + *errorcodeptr = ERR30; + goto FAILED; + } + + /* If matching is caseless, upper and lower are converted to + alpha. This relies on the fact that the class table starts with + alpha, lower, upper as the first 3 entries. */ + + if ((options & PCRE_CASELESS) != 0 && posix_class <= 2) + posix_class = 0; + + /* Or into the map we are building up to 3 of the static class + tables, or their negations. The [:blank:] class sets up the same + chars as the [:space:] class (all white space). We remove the vertical + white space chars afterwards. */ + + posix_class *= 3; + for (i = 0; i < 3; i++) + { + BOOL blankclass = strncmp((char *)ptr, "blank", 5) == 0; + int taboffset = posix_class_maps[posix_class + i]; + if (taboffset < 0) break; + if (local_negate) + { + if (i == 0) + for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+taboffset]; + else + for (c = 0; c < 32; c++) classbits[c] &= ~cbits[c+taboffset]; + if (blankclass) classbits[1] |= 0x3c; + } + else + { + for (c = 0; c < 32; c++) classbits[c] |= cbits[c+taboffset]; + if (blankclass) classbits[1] &= ~0x3c; + } + } + + ptr = tempptr + 1; + class_charcount = 10; /* Set > 1; assumes more than 1 per class */ + continue; /* End of POSIX syntax handling */ + } + + /* Backslash may introduce a single character, or it may introduce one + of the specials, which just set a flag. Escaped items are checked for + validity in the pre-compiling pass. The sequence \b is a special case. + Inside a class (and only there) it is treated as backspace. Elsewhere + it marks a word boundary. Other escapes have preset maps ready to + or into the one we are building. We assume they have more than one + character in them, so set class_charcount bigger than one. */ + + if (c == '\\') + { + c = check_escape(&ptr, errorcodeptr, *brackets, options, TRUE); + + if (-c == ESC_b) c = '\b'; /* \b is backslash in a class */ + else if (-c == ESC_X) c = 'X'; /* \X is literal X in a class */ + else if (-c == ESC_Q) /* Handle start of quoted string */ + { + if (ptr[1] == '\\' && ptr[2] == 'E') + { + ptr += 2; /* avoid empty string */ + } + else inescq = TRUE; + continue; + } + + if (c < 0) + { + register const uschar *cbits = cd->cbits; + class_charcount += 2; /* Greater than 1 is what matters */ + switch (-c) + { + case ESC_d: + for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit]; + continue; + + case ESC_D: + for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit]; + continue; + + case ESC_w: + for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_word]; + continue; + + case ESC_W: + for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word]; + continue; + + case ESC_s: + for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space]; + classbits[1] &= ~0x08; /* Perl 5.004 onwards omits VT from \s */ + continue; + + case ESC_S: + for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space]; + classbits[1] |= 0x08; /* Perl 5.004 onwards omits VT from \s */ + continue; + +#ifdef SUPPORT_UCP + case ESC_p: + case ESC_P: + { + BOOL negated; + int property = get_ucp(&ptr, &negated, errorcodeptr); + if (property < 0) goto FAILED; + class_utf8 = TRUE; + *class_utf8data++ = ((-c == ESC_p) != negated)? + XCL_PROP : XCL_NOTPROP; + *class_utf8data++ = property; + class_charcount -= 2; /* Not a < 256 character */ + } + continue; +#endif + + /* Unrecognized escapes are faulted if PCRE is running in its + strict mode. By default, for compatibility with Perl, they are + treated as literals. */ + + default: + if ((options & PCRE_EXTRA) != 0) + { + *errorcodeptr = ERR7; + goto FAILED; + } + c = *ptr; /* The final character */ + class_charcount -= 2; /* Undo the default count from above */ + } + } + + /* Fall through if we have a single character (c >= 0). This may be + > 256 in UTF-8 mode. */ + + } /* End of backslash handling */ + + /* A single character may be followed by '-' to form a range. However, + Perl does not permit ']' to be the end of the range. A '-' character + here is treated as a literal. */ + + if (ptr[1] == '-' && ptr[2] != ']') + { + int d; + ptr += 2; + +#ifdef SUPPORT_UTF8 + if (utf8) + { /* Braces are required because the */ + GETCHARLEN(d, ptr, ptr); /* macro generates multiple statements */ + } + else +#endif + d = *ptr; /* Not UTF-8 mode */ + + /* The second part of a range can be a single-character escape, but + not any of the other escapes. Perl 5.6 treats a hyphen as a literal + in such circumstances. */ + + if (d == '\\') + { + const uschar *oldptr = ptr; + d = check_escape(&ptr, errorcodeptr, *brackets, options, TRUE); + + /* \b is backslash; \X is literal X; any other special means the '-' + was literal */ + + if (d < 0) + { + if (d == -ESC_b) d = '\b'; + else if (d == -ESC_X) d = 'X'; else + { + ptr = oldptr - 2; + goto LONE_SINGLE_CHARACTER; /* A few lines below */ + } + } + } + + /* The check that the two values are in the correct order happens in + the pre-pass. Optimize one-character ranges */ + + if (d == c) goto LONE_SINGLE_CHARACTER; /* A few lines below */ + + /* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless + matching, we have to use an XCLASS with extra data items. Caseless + matching for characters > 127 is available only if UCP support is + available. */ + +#ifdef SUPPORT_UTF8 + if (utf8 && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127))) + { + class_utf8 = TRUE; + + /* With UCP support, we can find the other case equivalents of + the relevant characters. There may be several ranges. Optimize how + they fit with the basic range. */ + +#ifdef SUPPORT_UCP + if ((options & PCRE_CASELESS) != 0) + { + int occ, ocd; + int cc = c; + int origd = d; + while (get_othercase_range(&cc, origd, &occ, &ocd)) + { + if (occ >= c && ocd <= d) continue; /* Skip embedded ranges */ + + if (occ < c && ocd >= c - 1) /* Extend the basic range */ + { /* if there is overlap, */ + c = occ; /* noting that if occ < c */ + continue; /* we can't have ocd > d */ + } /* because a subrange is */ + if (ocd > d && occ <= d + 1) /* always shorter than */ + { /* the basic range. */ + d = ocd; + continue; + } + + if (occ == ocd) + { + *class_utf8data++ = XCL_SINGLE; + } + else + { + *class_utf8data++ = XCL_RANGE; + class_utf8data += _pcre_ord2utf8(occ, class_utf8data); + } + class_utf8data += _pcre_ord2utf8(ocd, class_utf8data); + } + } +#endif /* SUPPORT_UCP */ + + /* Now record the original range, possibly modified for UCP caseless + overlapping ranges. */ + + *class_utf8data++ = XCL_RANGE; + class_utf8data += _pcre_ord2utf8(c, class_utf8data); + class_utf8data += _pcre_ord2utf8(d, class_utf8data); + + /* With UCP support, we are done. Without UCP support, there is no + caseless matching for UTF-8 characters > 127; we can use the bit map + for the smaller ones. */ + +#ifdef SUPPORT_UCP + continue; /* With next character in the class */ +#else + if ((options & PCRE_CASELESS) == 0 || c > 127) continue; + + /* Adjust upper limit and fall through to set up the map */ + + d = 127; + +#endif /* SUPPORT_UCP */ + } +#endif /* SUPPORT_UTF8 */ + + /* We use the bit map for all cases when not in UTF-8 mode; else + ranges that lie entirely within 0-127 when there is UCP support; else + for partial ranges without UCP support. */ + + for (; c <= d; c++) + { + classbits[c/8] |= (1 << (c&7)); + if ((options & PCRE_CASELESS) != 0) + { + int uc = cd->fcc[c]; /* flip case */ + classbits[uc/8] |= (1 << (uc&7)); + } + class_charcount++; /* in case a one-char range */ + class_lastchar = c; + } + + continue; /* Go get the next char in the class */ + } + + /* Handle a lone single character - we can get here for a normal + non-escape char, or after \ that introduces a single character or for an + apparent range that isn't. */ + + LONE_SINGLE_CHARACTER: + + /* Handle a character that cannot go in the bit map */ + +#ifdef SUPPORT_UTF8 + if (utf8 && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127))) + { + class_utf8 = TRUE; + *class_utf8data++ = XCL_SINGLE; + class_utf8data += _pcre_ord2utf8(c, class_utf8data); + +#ifdef SUPPORT_UCP + if ((options & PCRE_CASELESS) != 0) + { + int chartype; + int othercase; + if (_pcre_ucp_findchar(c, &chartype, &othercase) >= 0 && + othercase > 0) + { + *class_utf8data++ = XCL_SINGLE; + class_utf8data += _pcre_ord2utf8(othercase, class_utf8data); + } + } +#endif /* SUPPORT_UCP */ + + } + else +#endif /* SUPPORT_UTF8 */ + + /* Handle a single-byte character */ + { + classbits[c/8] |= (1 << (c&7)); + if ((options & PCRE_CASELESS) != 0) + { + c = cd->fcc[c]; /* flip case */ + classbits[c/8] |= (1 << (c&7)); + } + class_charcount++; + class_lastchar = c; + } + } + + /* Loop until ']' reached; the check for end of string happens inside the + loop. This "while" is the end of the "do" above. */ + + while ((c = *(++ptr)) != ']' || inescq); + + /* If class_charcount is 1, we saw precisely one character whose value is + less than 256. In non-UTF-8 mode we can always optimize. In UTF-8 mode, we + can optimize the negative case only if there were no characters >= 128 + because OP_NOT and the related opcodes like OP_NOTSTAR operate on + single-bytes only. This is an historical hangover. Maybe one day we can + tidy these opcodes to handle multi-byte characters. + + The optimization throws away the bit map. We turn the item into a + 1-character OP_CHAR[NC] if it's positive, or OP_NOT if it's negative. Note + that OP_NOT does not support multibyte characters. In the positive case, it + can cause firstbyte to be set. Otherwise, there can be no first char if + this item is first, whatever repeat count may follow. In the case of + reqbyte, save the previous value for reinstating. */ + +#ifdef SUPPORT_UTF8 + if (class_charcount == 1 && + (!utf8 || + (!class_utf8 && (!negate_class || class_lastchar < 128)))) + +#else + if (class_charcount == 1) +#endif + { + zeroreqbyte = reqbyte; + + /* The OP_NOT opcode works on one-byte characters only. */ + + if (negate_class) + { + if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; + zerofirstbyte = firstbyte; + *code++ = OP_NOT; + *code++ = class_lastchar; + break; + } + + /* For a single, positive character, get the value into mcbuffer, and + then we can handle this with the normal one-character code. */ + +#ifdef SUPPORT_UTF8 + if (utf8 && class_lastchar > 127) + mclength = _pcre_ord2utf8(class_lastchar, mcbuffer); + else +#endif + { + mcbuffer[0] = class_lastchar; + mclength = 1; + } + goto ONE_CHAR; + } /* End of 1-char optimization */ + + /* The general case - not the one-char optimization. If this is the first + thing in the branch, there can be no first char setting, whatever the + repeat count. Any reqbyte setting must remain unchanged after any kind of + repeat. */ + + if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; + zerofirstbyte = firstbyte; + zeroreqbyte = reqbyte; + + /* If there are characters with values > 255, we have to compile an + extended class, with its own opcode. If there are no characters < 256, + we can omit the bitmap. */ + +#ifdef SUPPORT_UTF8 + if (class_utf8) + { + *class_utf8data++ = XCL_END; /* Marks the end of extra data */ + *code++ = OP_XCLASS; + code += LINK_SIZE; + *code = negate_class? XCL_NOT : 0; + + /* If the map is required, install it, and move on to the end of + the extra data */ + + if (class_charcount > 0) + { + *code++ |= XCL_MAP; + memcpy(code, classbits, 32); + code = class_utf8data; + } + + /* If the map is not required, slide down the extra data. */ + + else + { + int len = class_utf8data - (code + 33); + memmove(code + 1, code + 33, len); + code += len + 1; + } + + /* Now fill in the complete length of the item */ + + PUT(previous, 1, code - previous); + break; /* End of class handling */ + } +#endif + + /* If there are no characters > 255, negate the 32-byte map if necessary, + and copy it into the code vector. If this is the first thing in the branch, + there can be no first char setting, whatever the repeat count. Any reqbyte + setting must remain unchanged after any kind of repeat. */ + + if (negate_class) + { + *code++ = OP_NCLASS; + for (c = 0; c < 32; c++) code[c] = ~classbits[c]; + } + else + { + *code++ = OP_CLASS; + memcpy(code, classbits, 32); + } + code += 32; + break; + + /* Various kinds of repeat; '{' is not necessarily a quantifier, but this + has been tested above. */ + + case '{': + if (!is_quantifier) goto NORMAL_CHAR; + ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorcodeptr); + if (*errorcodeptr != 0) goto FAILED; + goto REPEAT; + + case '*': + repeat_min = 0; + repeat_max = -1; + goto REPEAT; + + case '+': + repeat_min = 1; + repeat_max = -1; + goto REPEAT; + + case '?': + repeat_min = 0; + repeat_max = 1; + + REPEAT: + if (previous == NULL) + { + *errorcodeptr = ERR9; + goto FAILED; + } + + if (repeat_min == 0) + { + firstbyte = zerofirstbyte; /* Adjust for zero repeat */ + reqbyte = zeroreqbyte; /* Ditto */ + } + + /* Remember whether this is a variable length repeat */ + + reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY; + + op_type = 0; /* Default single-char op codes */ + possessive_quantifier = FALSE; /* Default not possessive quantifier */ + + /* Save start of previous item, in case we have to move it up to make space + for an inserted OP_ONCE for the additional '+' extension. */ + + tempcode = previous; + + /* If the next character is '+', we have a possessive quantifier. This + implies greediness, whatever the setting of the PCRE_UNGREEDY option. + If the next character is '?' this is a minimizing repeat, by default, + but if PCRE_UNGREEDY is set, it works the other way round. We change the + repeat type to the non-default. */ + + if (ptr[1] == '+') + { + repeat_type = 0; /* Force greedy */ + possessive_quantifier = TRUE; + ptr++; + } + else if (ptr[1] == '?') + { + repeat_type = greedy_non_default; + ptr++; + } + else repeat_type = greedy_default; + + /* If previous was a recursion, we need to wrap it inside brackets so that + it can be replicated if necessary. */ + + if (*previous == OP_RECURSE) + { + memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE); + code += 1 + LINK_SIZE; + *previous = OP_BRA; + PUT(previous, 1, code - previous); + *code = OP_KET; + PUT(code, 1, code - previous); + code += 1 + LINK_SIZE; + } + + /* If previous was a character match, abolish the item and generate a + repeat item instead. If a char item has a minumum of more than one, ensure + that it is set in reqbyte - it might not be if a sequence such as x{3} is + the first thing in a branch because the x will have gone into firstbyte + instead. */ + + if (*previous == OP_CHAR || *previous == OP_CHARNC) + { + /* Deal with UTF-8 characters that take up more than one byte. It's + easier to write this out separately than try to macrify it. Use c to + hold the length of the character in bytes, plus 0x80 to flag that it's a + length rather than a small character. */ + +#ifdef SUPPORT_UTF8 + if (utf8 && (code[-1] & 0x80) != 0) + { + uschar *lastchar = code - 1; + while((*lastchar & 0xc0) == 0x80) lastchar--; + c = code - lastchar; /* Length of UTF-8 character */ + memcpy(utf8_char, lastchar, c); /* Save the char */ + c |= 0x80; /* Flag c as a length */ + } + else +#endif + + /* Handle the case of a single byte - either with no UTF8 support, or + with UTF-8 disabled, or for a UTF-8 character < 128. */ + + { + c = code[-1]; + if (repeat_min > 1) reqbyte = c | req_caseopt | cd->req_varyopt; + } + + goto OUTPUT_SINGLE_REPEAT; /* Code shared with single character types */ + } + + /* If previous was a single negated character ([^a] or similar), we use + one of the special opcodes, replacing it. The code is shared with single- + character repeats by setting opt_type to add a suitable offset into + repeat_type. OP_NOT is currently used only for single-byte chars. */ + + else if (*previous == OP_NOT) + { + op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */ + c = previous[1]; + goto OUTPUT_SINGLE_REPEAT; + } + + /* If previous was a character type match (\d or similar), abolish it and + create a suitable repeat item. The code is shared with single-character + repeats by setting op_type to add a suitable offset into repeat_type. Note + the the Unicode property types will be present only when SUPPORT_UCP is + defined, but we don't wrap the little bits of code here because it just + makes it horribly messy. */ + + else if (*previous < OP_EODN) + { + uschar *oldcode; + int prop_type; + op_type = OP_TYPESTAR - OP_STAR; /* Use type opcodes */ + c = *previous; + + OUTPUT_SINGLE_REPEAT: + prop_type = (*previous == OP_PROP || *previous == OP_NOTPROP)? + previous[1] : -1; + + oldcode = code; + code = previous; /* Usually overwrite previous item */ + + /* If the maximum is zero then the minimum must also be zero; Perl allows + this case, so we do too - by simply omitting the item altogether. */ + + if (repeat_max == 0) goto END_REPEAT; + + /* All real repeats make it impossible to handle partial matching (maybe + one day we will be able to remove this restriction). */ + + if (repeat_max != 1) cd->nopartial = TRUE; + + /* Combine the op_type with the repeat_type */ + + repeat_type += op_type; + + /* A minimum of zero is handled either as the special case * or ?, or as + an UPTO, with the maximum given. */ + + if (repeat_min == 0) + { + if (repeat_max == -1) *code++ = OP_STAR + repeat_type; + else if (repeat_max == 1) *code++ = OP_QUERY + repeat_type; + else + { + *code++ = OP_UPTO + repeat_type; + PUT2INC(code, 0, repeat_max); + } + } + + /* A repeat minimum of 1 is optimized into some special cases. If the + maximum is unlimited, we use OP_PLUS. Otherwise, the original item it + left in place and, if the maximum is greater than 1, we use OP_UPTO with + one less than the maximum. */ + + else if (repeat_min == 1) + { + if (repeat_max == -1) + *code++ = OP_PLUS + repeat_type; + else + { + code = oldcode; /* leave previous item in place */ + if (repeat_max == 1) goto END_REPEAT; + *code++ = OP_UPTO + repeat_type; + PUT2INC(code, 0, repeat_max - 1); + } + } + + /* The case {n,n} is just an EXACT, while the general case {n,m} is + handled as an EXACT followed by an UPTO. */ + + else + { + *code++ = OP_EXACT + op_type; /* NB EXACT doesn't have repeat_type */ + PUT2INC(code, 0, repeat_min); + + /* If the maximum is unlimited, insert an OP_STAR. Before doing so, + we have to insert the character for the previous code. For a repeated + Unicode property match, there is an extra byte that defines the + required property. In UTF-8 mode, long characters have their length in + c, with the 0x80 bit as a flag. */ + + if (repeat_max < 0) + { +#ifdef SUPPORT_UTF8 + if (utf8 && c >= 128) + { + memcpy(code, utf8_char, c & 7); + code += c & 7; + } + else +#endif + { + *code++ = c; + if (prop_type >= 0) *code++ = prop_type; + } + *code++ = OP_STAR + repeat_type; + } + + /* Else insert an UPTO if the max is greater than the min, again + preceded by the character, for the previously inserted code. */ + + else if (repeat_max != repeat_min) + { +#ifdef SUPPORT_UTF8 + if (utf8 && c >= 128) + { + memcpy(code, utf8_char, c & 7); + code += c & 7; + } + else +#endif + *code++ = c; + if (prop_type >= 0) *code++ = prop_type; + repeat_max -= repeat_min; + *code++ = OP_UPTO + repeat_type; + PUT2INC(code, 0, repeat_max); + } + } + + /* The character or character type itself comes last in all cases. */ + +#ifdef SUPPORT_UTF8 + if (utf8 && c >= 128) + { + memcpy(code, utf8_char, c & 7); + code += c & 7; + } + else +#endif + *code++ = c; + + /* For a repeated Unicode property match, there is an extra byte that + defines the required property. */ + +#ifdef SUPPORT_UCP + if (prop_type >= 0) *code++ = prop_type; +#endif + } + + /* If previous was a character class or a back reference, we put the repeat + stuff after it, but just skip the item if the repeat was {0,0}. */ + + else if (*previous == OP_CLASS || + *previous == OP_NCLASS || +#ifdef SUPPORT_UTF8 + *previous == OP_XCLASS || +#endif + *previous == OP_REF) + { + if (repeat_max == 0) + { + code = previous; + goto END_REPEAT; + } + + /* All real repeats make it impossible to handle partial matching (maybe + one day we will be able to remove this restriction). */ + + if (repeat_max != 1) cd->nopartial = TRUE; + + if (repeat_min == 0 && repeat_max == -1) + *code++ = OP_CRSTAR + repeat_type; + else if (repeat_min == 1 && repeat_max == -1) + *code++ = OP_CRPLUS + repeat_type; + else if (repeat_min == 0 && repeat_max == 1) + *code++ = OP_CRQUERY + repeat_type; + else + { + *code++ = OP_CRRANGE + repeat_type; + PUT2INC(code, 0, repeat_min); + if (repeat_max == -1) repeat_max = 0; /* 2-byte encoding for max */ + PUT2INC(code, 0, repeat_max); + } + } + + /* If previous was a bracket group, we may have to replicate it in certain + cases. */ + + else if (*previous >= OP_BRA || *previous == OP_ONCE || + *previous == OP_COND) + { + register int i; + int ketoffset = 0; + int len = code - previous; + uschar *bralink = NULL; + + /* If the maximum repeat count is unlimited, find the end of the bracket + by scanning through from the start, and compute the offset back to it + from the current code pointer. There may be an OP_OPT setting following + the final KET, so we can't find the end just by going back from the code + pointer. */ + + if (repeat_max == -1) + { + register uschar *ket = previous; + do ket += GET(ket, 1); while (*ket != OP_KET); + ketoffset = code - ket; + } + + /* The case of a zero minimum is special because of the need to stick + OP_BRAZERO in front of it, and because the group appears once in the + data, whereas in other cases it appears the minimum number of times. For + this reason, it is simplest to treat this case separately, as otherwise + the code gets far too messy. There are several special subcases when the + minimum is zero. */ + + if (repeat_min == 0) + { + /* If the maximum is also zero, we just omit the group from the output + altogether. */ + + if (repeat_max == 0) + { + code = previous; + goto END_REPEAT; + } + + /* If the maximum is 1 or unlimited, we just have to stick in the + BRAZERO and do no more at this point. However, we do need to adjust + any OP_RECURSE calls inside the group that refer to the group itself or + any internal group, because the offset is from the start of the whole + regex. Temporarily terminate the pattern while doing this. */ + + if (repeat_max <= 1) + { + *code = OP_END; + adjust_recurse(previous, 1, utf8, cd); + memmove(previous+1, previous, len); + code++; + *previous++ = OP_BRAZERO + repeat_type; + } + + /* If the maximum is greater than 1 and limited, we have to replicate + in a nested fashion, sticking OP_BRAZERO before each set of brackets. + The first one has to be handled carefully because it's the original + copy, which has to be moved up. The remainder can be handled by code + that is common with the non-zero minimum case below. We have to + adjust the value or repeat_max, since one less copy is required. Once + again, we may have to adjust any OP_RECURSE calls inside the group. */ + + else + { + int offset; + *code = OP_END; + adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd); + memmove(previous + 2 + LINK_SIZE, previous, len); + code += 2 + LINK_SIZE; + *previous++ = OP_BRAZERO + repeat_type; + *previous++ = OP_BRA; + + /* We chain together the bracket offset fields that have to be + filled in later when the ends of the brackets are reached. */ + + offset = (bralink == NULL)? 0 : previous - bralink; + bralink = previous; + PUTINC(previous, 0, offset); + } + + repeat_max--; + } + + /* If the minimum is greater than zero, replicate the group as many + times as necessary, and adjust the maximum to the number of subsequent + copies that we need. If we set a first char from the group, and didn't + set a required char, copy the latter from the former. */ + + else + { + if (repeat_min > 1) + { + if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte; + for (i = 1; i < repeat_min; i++) + { + memcpy(code, previous, len); + code += len; + } + } + if (repeat_max > 0) repeat_max -= repeat_min; + } + + /* This code is common to both the zero and non-zero minimum cases. If + the maximum is limited, it replicates the group in a nested fashion, + remembering the bracket starts on a stack. In the case of a zero minimum, + the first one was set up above. In all cases the repeat_max now specifies + the number of additional copies needed. */ + + if (repeat_max >= 0) + { + for (i = repeat_max - 1; i >= 0; i--) + { + *code++ = OP_BRAZERO + repeat_type; + + /* All but the final copy start a new nesting, maintaining the + chain of brackets outstanding. */ + + if (i != 0) + { + int offset; + *code++ = OP_BRA; + offset = (bralink == NULL)? 0 : code - bralink; + bralink = code; + PUTINC(code, 0, offset); + } + + memcpy(code, previous, len); + code += len; + } + + /* Now chain through the pending brackets, and fill in their length + fields (which are holding the chain links pro tem). */ + + while (bralink != NULL) + { + int oldlinkoffset; + int offset = code - bralink + 1; + uschar *bra = code - offset; + oldlinkoffset = GET(bra, 1); + bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset; + *code++ = OP_KET; + PUTINC(code, 0, offset); + PUT(bra, 1, offset); + } + } + + /* If the maximum is unlimited, set a repeater in the final copy. We + can't just offset backwards from the current code point, because we + don't know if there's been an options resetting after the ket. The + correct offset was computed above. */ + + else code[-ketoffset] = OP_KETRMAX + repeat_type; + } + + /* Else there's some kind of shambles */ + + else + { + *errorcodeptr = ERR11; + goto FAILED; + } + + /* If the character following a repeat is '+', we wrap the entire repeated + item inside OP_ONCE brackets. This is just syntactic sugar, taken from + Sun's Java package. The repeated item starts at tempcode, not at previous, + which might be the first part of a string whose (former) last char we + repeated. However, we don't support '+' after a greediness '?'. */ + + if (possessive_quantifier) + { + int len = code - tempcode; + memmove(tempcode + 1+LINK_SIZE, tempcode, len); + code += 1 + LINK_SIZE; + len += 1 + LINK_SIZE; + tempcode[0] = OP_ONCE; + *code++ = OP_KET; + PUTINC(code, 0, len); + PUT(tempcode, 1, len); + } + + /* In all case we no longer have a previous item. We also set the + "follows varying string" flag for subsequently encountered reqbytes if + it isn't already set and we have just passed a varying length item. */ + + END_REPEAT: + previous = NULL; + cd->req_varyopt |= reqvary; + break; + + + /* Start of nested bracket sub-expression, or comment or lookahead or + lookbehind or option setting or condition. First deal with special things + that can come after a bracket; all are introduced by ?, and the appearance + of any of them means that this is not a referencing group. They were + checked for validity in the first pass over the string, so we don't have to + check for syntax errors here. */ + + case '(': + newoptions = options; + skipbytes = 0; + + if (*(++ptr) == '?') + { + int set, unset; + int *optset; + + switch (*(++ptr)) + { + case '#': /* Comment; skip to ket */ + ptr++; + while (*ptr != ')') ptr++; + continue; + + case ':': /* Non-extracting bracket */ + bravalue = OP_BRA; + ptr++; + break; + + case '(': + bravalue = OP_COND; /* Conditional group */ + + /* Condition to test for recursion */ + + if (ptr[1] == 'R') + { + code[1+LINK_SIZE] = OP_CREF; + PUT2(code, 2+LINK_SIZE, CREF_RECURSE); + skipbytes = 3; + ptr += 3; + } + + /* Condition to test for a numbered subpattern match. We know that + if a digit follows ( then there will just be digits until ) because + the syntax was checked in the first pass. */ + + else if ((digitab[ptr[1]] && ctype_digit) != 0) + { + int condref; /* Don't amalgamate; some compilers */ + condref = *(++ptr) - '0'; /* grumble at autoincrement in declaration */ + while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; + if (condref == 0) + { + *errorcodeptr = ERR35; + goto FAILED; + } + ptr++; + code[1+LINK_SIZE] = OP_CREF; + PUT2(code, 2+LINK_SIZE, condref); + skipbytes = 3; + } + /* For conditions that are assertions, we just fall through, having + set bravalue above. */ + break; + + case '=': /* Positive lookahead */ + bravalue = OP_ASSERT; + ptr++; + break; + + case '!': /* Negative lookahead */ + bravalue = OP_ASSERT_NOT; + ptr++; + break; + + case '<': /* Lookbehinds */ + switch (*(++ptr)) + { + case '=': /* Positive lookbehind */ + bravalue = OP_ASSERTBACK; + ptr++; + break; + + case '!': /* Negative lookbehind */ + bravalue = OP_ASSERTBACK_NOT; + ptr++; + break; + } + break; + + case '>': /* One-time brackets */ + bravalue = OP_ONCE; + ptr++; + break; + + case 'C': /* Callout - may be followed by digits; */ + previous_callout = code; /* Save for later completion */ + after_manual_callout = 1; /* Skip one item before completing */ + *code++ = OP_CALLOUT; /* Already checked that the terminating */ + { /* closing parenthesis is present. */ + int n = 0; + while ((digitab[*(++ptr)] & ctype_digit) != 0) + n = n * 10 + *ptr - '0'; + if (n > 255) + { + *errorcodeptr = ERR38; + goto FAILED; + } + *code++ = n; + PUT(code, 0, ptr - cd->start_pattern + 1); /* Pattern offset */ + PUT(code, LINK_SIZE, 0); /* Default length */ + code += 2 * LINK_SIZE; + } + previous = NULL; + continue; + + case 'P': /* Named subpattern handling */ + if (*(++ptr) == '<') /* Definition */ + { + int i, namelen; + uschar *slot = cd->name_table; + const uschar *name; /* Don't amalgamate; some compilers */ + name = ++ptr; /* grumble at autoincrement in declaration */ + + while (*ptr++ != '>'); + namelen = ptr - name - 1; + + for (i = 0; i < cd->names_found; i++) + { + int crc = memcmp(name, slot+2, namelen); + if (crc == 0) + { + if (slot[2+namelen] == 0) + { + *errorcodeptr = ERR43; + goto FAILED; + } + crc = -1; /* Current name is substring */ + } + if (crc < 0) + { + memmove(slot + cd->name_entry_size, slot, + (cd->names_found - i) * cd->name_entry_size); + break; + } + slot += cd->name_entry_size; + } + + PUT2(slot, 0, *brackets + 1); + memcpy(slot + 2, name, namelen); + slot[2+namelen] = 0; + cd->names_found++; + goto NUMBERED_GROUP; + } + + if (*ptr == '=' || *ptr == '>') /* Reference or recursion */ + { + int i, namelen; + int type = *ptr++; + const uschar *name = ptr; + uschar *slot = cd->name_table; + + while (*ptr != ')') ptr++; + namelen = ptr - name; + + for (i = 0; i < cd->names_found; i++) + { + if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break; + slot += cd->name_entry_size; + } + if (i >= cd->names_found) + { + *errorcodeptr = ERR15; + goto FAILED; + } + + recno = GET2(slot, 0); + + if (type == '>') goto HANDLE_RECURSION; /* A few lines below */ + + /* Back reference */ + + previous = code; + *code++ = OP_REF; + PUT2INC(code, 0, recno); + cd->backref_map |= (recno < 32)? (1 << recno) : 1; + if (recno > cd->top_backref) cd->top_backref = recno; + continue; + } + + /* Should never happen */ + break; + + case 'R': /* Pattern recursion */ + ptr++; /* Same as (?0) */ + /* Fall through */ + + /* Recursion or "subroutine" call */ + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + { + const uschar *called; + recno = 0; + while((digitab[*ptr] & ctype_digit) != 0) + recno = recno * 10 + *ptr++ - '0'; + + /* Come here from code above that handles a named recursion */ + + HANDLE_RECURSION: + + previous = code; + + /* Find the bracket that is being referenced. Temporarily end the + regex in case it doesn't exist. */ + + *code = OP_END; + called = (recno == 0)? + cd->start_code : find_bracket(cd->start_code, utf8, recno); + + if (called == NULL) + { + *errorcodeptr = ERR15; + goto FAILED; + } + + /* If the subpattern is still open, this is a recursive call. We + check to see if this is a left recursion that could loop for ever, + and diagnose that case. */ + + if (GET(called, 1) == 0 && could_be_empty(called, code, bcptr, utf8)) + { + *errorcodeptr = ERR40; + goto FAILED; + } + + /* Insert the recursion/subroutine item */ + + *code = OP_RECURSE; + PUT(code, 1, called - cd->start_code); + code += 1 + LINK_SIZE; + } + continue; + + /* Character after (? not specially recognized */ + + default: /* Option setting */ + set = unset = 0; + optset = &set; + + while (*ptr != ')' && *ptr != ':') + { + switch (*ptr++) + { + case '-': optset = &unset; break; + + case 'i': *optset |= PCRE_CASELESS; break; + case 'm': *optset |= PCRE_MULTILINE; break; + case 's': *optset |= PCRE_DOTALL; break; + case 'x': *optset |= PCRE_EXTENDED; break; + case 'U': *optset |= PCRE_UNGREEDY; break; + case 'X': *optset |= PCRE_EXTRA; break; + } + } + + /* Set up the changed option bits, but don't change anything yet. */ + + newoptions = (options | set) & (~unset); + + /* If the options ended with ')' this is not the start of a nested + group with option changes, so the options change at this level. Compile + code to change the ims options if this setting actually changes any of + them. We also pass the new setting back so that it can be put at the + start of any following branches, and when this group ends (if we are in + a group), a resetting item can be compiled. + + Note that if this item is right at the start of the pattern, the + options will have been abstracted and made global, so there will be no + change to compile. */ + + if (*ptr == ')') + { + if ((options & PCRE_IMS) != (newoptions & PCRE_IMS)) + { + *code++ = OP_OPT; + *code++ = newoptions & PCRE_IMS; + } + + /* Change options at this level, and pass them back for use + in subsequent branches. Reset the greedy defaults and the case + value for firstbyte and reqbyte. */ + + *optionsptr = options = newoptions; + greedy_default = ((newoptions & PCRE_UNGREEDY) != 0); + greedy_non_default = greedy_default ^ 1; + req_caseopt = ((options & PCRE_CASELESS) != 0)? REQ_CASELESS : 0; + + previous = NULL; /* This item can't be repeated */ + continue; /* It is complete */ + } + + /* If the options ended with ':' we are heading into a nested group + with possible change of options. Such groups are non-capturing and are + not assertions of any kind. All we need to do is skip over the ':'; + the newoptions value is handled below. */ + + bravalue = OP_BRA; + ptr++; + } + } + + /* If PCRE_NO_AUTO_CAPTURE is set, all unadorned brackets become + non-capturing and behave like (?:...) brackets */ + + else if ((options & PCRE_NO_AUTO_CAPTURE) != 0) + { + bravalue = OP_BRA; + } + + /* Else we have a referencing group; adjust the opcode. If the bracket + number is greater than EXTRACT_BASIC_MAX, we set the opcode one higher, and + arrange for the true number to follow later, in an OP_BRANUMBER item. */ + + else + { + NUMBERED_GROUP: + if (++(*brackets) > EXTRACT_BASIC_MAX) + { + bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1; + code[1+LINK_SIZE] = OP_BRANUMBER; + PUT2(code, 2+LINK_SIZE, *brackets); + skipbytes = 3; + } + else bravalue = OP_BRA + *brackets; + } + + /* Process nested bracketed re. Assertions may not be repeated, but other + kinds can be. We copy code into a non-register variable in order to be able + to pass its address because some compilers complain otherwise. Pass in a + new setting for the ims options if they have changed. */ + + previous = (bravalue >= OP_ONCE)? code : NULL; + *code = bravalue; + tempcode = code; + tempreqvary = cd->req_varyopt; /* Save value before bracket */ + + if (!compile_regex( + newoptions, /* The complete new option state */ + options & PCRE_IMS, /* The previous ims option state */ + brackets, /* Extracting bracket count */ + &tempcode, /* Where to put code (updated) */ + &ptr, /* Input pointer (updated) */ + errorcodeptr, /* Where to put an error message */ + (bravalue == OP_ASSERTBACK || + bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ + skipbytes, /* Skip over OP_COND/OP_BRANUMBER */ + &subfirstbyte, /* For possible first char */ + &subreqbyte, /* For possible last char */ + bcptr, /* Current branch chain */ + cd)) /* Tables block */ + goto FAILED; + + /* At the end of compiling, code is still pointing to the start of the + group, while tempcode has been updated to point past the end of the group + and any option resetting that may follow it. The pattern pointer (ptr) + is on the bracket. */ + + /* If this is a conditional bracket, check that there are no more than + two branches in the group. */ + + else if (bravalue == OP_COND) + { + uschar *tc = code; + condcount = 0; + + do { + condcount++; + tc += GET(tc,1); + } + while (*tc != OP_KET); + + if (condcount > 2) + { + *errorcodeptr = ERR27; + goto FAILED; + } + + /* If there is just one branch, we must not make use of its firstbyte or + reqbyte, because this is equivalent to an empty second branch. */ + + if (condcount == 1) subfirstbyte = subreqbyte = REQ_NONE; + } + + /* Handle updating of the required and first characters. Update for normal + brackets of all kinds, and conditions with two branches (see code above). + If the bracket is followed by a quantifier with zero repeat, we have to + back off. Hence the definition of zeroreqbyte and zerofirstbyte outside the + main loop so that they can be accessed for the back off. */ + + zeroreqbyte = reqbyte; + zerofirstbyte = firstbyte; + groupsetfirstbyte = FALSE; + + if (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_COND) + { + /* If we have not yet set a firstbyte in this branch, take it from the + subpattern, remembering that it was set here so that a repeat of more + than one can replicate it as reqbyte if necessary. If the subpattern has + no firstbyte, set "none" for the whole branch. In both cases, a zero + repeat forces firstbyte to "none". */ + + if (firstbyte == REQ_UNSET) + { + if (subfirstbyte >= 0) + { + firstbyte = subfirstbyte; + groupsetfirstbyte = TRUE; + } + else firstbyte = REQ_NONE; + zerofirstbyte = REQ_NONE; + } + + /* If firstbyte was previously set, convert the subpattern's firstbyte + into reqbyte if there wasn't one, using the vary flag that was in + existence beforehand. */ + + else if (subfirstbyte >= 0 && subreqbyte < 0) + subreqbyte = subfirstbyte | tempreqvary; + + /* If the subpattern set a required byte (or set a first byte that isn't + really the first byte - see above), set it. */ + + if (subreqbyte >= 0) reqbyte = subreqbyte; + } + + /* For a forward assertion, we take the reqbyte, if set. This can be + helpful if the pattern that follows the assertion doesn't set a different + char. For example, it's useful for /(?=abcde).+/. We can't set firstbyte + for an assertion, however because it leads to incorrect effect for patterns + such as /(?=a)a.+/ when the "real" "a" would then become a reqbyte instead + of a firstbyte. This is overcome by a scan at the end if there's no + firstbyte, looking for an asserted first char. */ + + else if (bravalue == OP_ASSERT && subreqbyte >= 0) reqbyte = subreqbyte; + + /* Now update the main code pointer to the end of the group. */ + + code = tempcode; + + /* Error if hit end of pattern */ + + if (*ptr != ')') + { + *errorcodeptr = ERR14; + goto FAILED; + } + break; + + /* Check \ for being a real metacharacter; if not, fall through and handle + it as a data character at the start of a string. Escape items are checked + for validity in the pre-compiling pass. */ + + case '\\': + tempptr = ptr; + c = check_escape(&ptr, errorcodeptr, *brackets, options, FALSE); + + /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values + are arranged to be the negation of the corresponding OP_values. For the + back references, the values are ESC_REF plus the reference number. Only + back references and those types that consume a character may be repeated. + We can test for values between ESC_b and ESC_Z for the latter; this may + have to change if any new ones are ever created. */ + + if (c < 0) + { + if (-c == ESC_Q) /* Handle start of quoted string */ + { + if (ptr[1] == '\\' && ptr[2] == 'E') ptr += 2; /* avoid empty string */ + else inescq = TRUE; + continue; + } + + /* For metasequences that actually match a character, we disable the + setting of a first character if it hasn't already been set. */ + + if (firstbyte == REQ_UNSET && -c > ESC_b && -c < ESC_Z) + firstbyte = REQ_NONE; + + /* Set values to reset to if this is followed by a zero repeat. */ + + zerofirstbyte = firstbyte; + zeroreqbyte = reqbyte; + + /* Back references are handled specially */ + + if (-c >= ESC_REF) + { + int number = -c - ESC_REF; + previous = code; + *code++ = OP_REF; + PUT2INC(code, 0, number); + } + + /* So are Unicode property matches, if supported. We know that get_ucp + won't fail because it was tested in the pre-pass. */ + +#ifdef SUPPORT_UCP + else if (-c == ESC_P || -c == ESC_p) + { + BOOL negated; + int value = get_ucp(&ptr, &negated, errorcodeptr); + previous = code; + *code++ = ((-c == ESC_p) != negated)? OP_PROP : OP_NOTPROP; + *code++ = value; + } +#endif + + /* For the rest, we can obtain the OP value by negating the escape + value */ + + else + { + previous = (-c > ESC_b && -c < ESC_Z)? code : NULL; + *code++ = -c; + } + continue; + } + + /* We have a data character whose value is in c. In UTF-8 mode it may have + a value > 127. We set its representation in the length/buffer, and then + handle it as a data character. */ + +#ifdef SUPPORT_UTF8 + if (utf8 && c > 127) + mclength = _pcre_ord2utf8(c, mcbuffer); + else +#endif + + { + mcbuffer[0] = c; + mclength = 1; + } + + goto ONE_CHAR; + + /* Handle a literal character. It is guaranteed not to be whitespace or # + when the extended flag is set. If we are in UTF-8 mode, it may be a + multi-byte literal character. */ + + default: + NORMAL_CHAR: + mclength = 1; + mcbuffer[0] = c; + +#ifdef SUPPORT_UTF8 + if (utf8 && (c & 0xc0) == 0xc0) + { + while ((ptr[1] & 0xc0) == 0x80) + mcbuffer[mclength++] = *(++ptr); + } +#endif + + /* At this point we have the character's bytes in mcbuffer, and the length + in mclength. When not in UTF-8 mode, the length is always 1. */ + + ONE_CHAR: + previous = code; + *code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARNC : OP_CHAR; + for (c = 0; c < mclength; c++) *code++ = mcbuffer[c]; + + /* Set the first and required bytes appropriately. If no previous first + byte, set it from this character, but revert to none on a zero repeat. + Otherwise, leave the firstbyte value alone, and don't change it on a zero + repeat. */ + + if (firstbyte == REQ_UNSET) + { + zerofirstbyte = REQ_NONE; + zeroreqbyte = reqbyte; + + /* If the character is more than one byte long, we can set firstbyte + only if it is not to be matched caselessly. */ + + if (mclength == 1 || req_caseopt == 0) + { + firstbyte = mcbuffer[0] | req_caseopt; + if (mclength != 1) reqbyte = code[-1] | cd->req_varyopt; + } + else firstbyte = reqbyte = REQ_NONE; + } + + /* firstbyte was previously set; we can set reqbyte only the length is + 1 or the matching is caseful. */ + + else + { + zerofirstbyte = firstbyte; + zeroreqbyte = reqbyte; + if (mclength == 1 || req_caseopt == 0) + reqbyte = code[-1] | req_caseopt | cd->req_varyopt; + } + + break; /* End of literal character handling */ + } + } /* end of big loop */ + +/* Control never reaches here by falling through, only by a goto for all the +error states. Pass back the position in the pattern so that it can be displayed +to the user for diagnosing the error. */ + +FAILED: +*ptrptr = ptr; +return FALSE; +} + + + + +/************************************************* +* Compile sequence of alternatives * +*************************************************/ + +/* On entry, ptr is pointing past the bracket character, but on return +it points to the closing bracket, or vertical bar, or end of string. +The code variable is pointing at the byte into which the BRA operator has been +stored. If the ims options are changed at the start (for a (?ims: group) or +during any branch, we need to insert an OP_OPT item at the start of every +following branch to ensure they get set correctly at run time, and also pass +the new options into every subsequent branch compile. + +Argument: + options option bits, including any changes for this subpattern + oldims previous settings of ims option bits + brackets -> int containing the number of extracting brackets used + codeptr -> the address of the current code pointer + ptrptr -> the address of the current pattern pointer + errorcodeptr -> pointer to error code variable + lookbehind TRUE if this is a lookbehind assertion + skipbytes skip this many bytes at start (for OP_COND, OP_BRANUMBER) + firstbyteptr place to put the first required character, or a negative number + reqbyteptr place to put the last required character, or a negative number + bcptr pointer to the chain of currently open branches + cd points to the data block with tables pointers etc. + +Returns: TRUE on success +*/ + +static BOOL +compile_regex(int options, int oldims, int *brackets, uschar **codeptr, + const uschar **ptrptr, int *errorcodeptr, BOOL lookbehind, int skipbytes, + int *firstbyteptr, int *reqbyteptr, branch_chain *bcptr, compile_data *cd) +{ +const uschar *ptr = *ptrptr; +uschar *code = *codeptr; +uschar *last_branch = code; +uschar *start_bracket = code; +uschar *reverse_count = NULL; +int firstbyte, reqbyte; +int branchfirstbyte, branchreqbyte; +branch_chain bc; + +bc.outer = bcptr; +bc.current = code; + +firstbyte = reqbyte = REQ_UNSET; + +/* Offset is set zero to mark that this bracket is still open */ + +PUT(code, 1, 0); +code += 1 + LINK_SIZE + skipbytes; + +/* Loop for each alternative branch */ + +for (;;) + { + /* Handle a change of ims options at the start of the branch */ + + if ((options & PCRE_IMS) != oldims) + { + *code++ = OP_OPT; + *code++ = options & PCRE_IMS; + } + + /* Set up dummy OP_REVERSE if lookbehind assertion */ + + if (lookbehind) + { + *code++ = OP_REVERSE; + reverse_count = code; + PUTINC(code, 0, 0); + } + + /* Now compile the branch */ + + if (!compile_branch(&options, brackets, &code, &ptr, errorcodeptr, + &branchfirstbyte, &branchreqbyte, &bc, cd)) + { + *ptrptr = ptr; + return FALSE; + } + + /* If this is the first branch, the firstbyte and reqbyte values for the + branch become the values for the regex. */ + + if (*last_branch != OP_ALT) + { + firstbyte = branchfirstbyte; + reqbyte = branchreqbyte; + } + + /* If this is not the first branch, the first char and reqbyte have to + match the values from all the previous branches, except that if the previous + value for reqbyte didn't have REQ_VARY set, it can still match, and we set + REQ_VARY for the regex. */ + + else + { + /* If we previously had a firstbyte, but it doesn't match the new branch, + we have to abandon the firstbyte for the regex, but if there was previously + no reqbyte, it takes on the value of the old firstbyte. */ + + if (firstbyte >= 0 && firstbyte != branchfirstbyte) + { + if (reqbyte < 0) reqbyte = firstbyte; + firstbyte = REQ_NONE; + } + + /* If we (now or from before) have no firstbyte, a firstbyte from the + branch becomes a reqbyte if there isn't a branch reqbyte. */ + + if (firstbyte < 0 && branchfirstbyte >= 0 && branchreqbyte < 0) + branchreqbyte = branchfirstbyte; + + /* Now ensure that the reqbytes match */ + + if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY)) + reqbyte = REQ_NONE; + else reqbyte |= branchreqbyte; /* To "or" REQ_VARY */ + } + + /* If lookbehind, check that this branch matches a fixed-length string, + and put the length into the OP_REVERSE item. Temporarily mark the end of + the branch with OP_END. */ + + if (lookbehind) + { + int length; + *code = OP_END; + length = find_fixedlength(last_branch, options); + DPRINTF(("fixed length = %d\n", length)); + if (length < 0) + { + *errorcodeptr = (length == -2)? ERR36 : ERR25; + *ptrptr = ptr; + return FALSE; + } + PUT(reverse_count, 0, length); + } + + /* Reached end of expression, either ')' or end of pattern. Go back through + the alternative branches and reverse the chain of offsets, with the field in + the BRA item now becoming an offset to the first alternative. If there are + no alternatives, it points to the end of the group. The length in the + terminating ket is always the length of the whole bracketed item. If any of + the ims options were changed inside the group, compile a resetting op-code + following, except at the very end of the pattern. Return leaving the pointer + at the terminating char. */ + + if (*ptr != '|') + { + int length = code - last_branch; + do + { + int prev_length = GET(last_branch, 1); + PUT(last_branch, 1, length); + length = prev_length; + last_branch -= length; + } + while (length > 0); + + /* Fill in the ket */ + + *code = OP_KET; + PUT(code, 1, code - start_bracket); + code += 1 + LINK_SIZE; + + /* Resetting option if needed */ + + if ((options & PCRE_IMS) != oldims && *ptr == ')') + { + *code++ = OP_OPT; + *code++ = oldims; + } + + /* Set values to pass back */ + + *codeptr = code; + *ptrptr = ptr; + *firstbyteptr = firstbyte; + *reqbyteptr = reqbyte; + return TRUE; + } + + /* Another branch follows; insert an "or" node. Its length field points back + to the previous branch while the bracket remains open. At the end the chain + is reversed. It's done like this so that the start of the bracket has a + zero offset until it is closed, making it possible to detect recursion. */ + + *code = OP_ALT; + PUT(code, 1, code - last_branch); + bc.current = last_branch = code; + code += 1 + LINK_SIZE; + ptr++; + } +/* Control never reaches here */ +} + + + + +/************************************************* +* Check for anchored expression * +*************************************************/ + +/* Try to find out if this is an anchored regular expression. Consider each +alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket +all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then +it's anchored. However, if this is a multiline pattern, then only OP_SOD +counts, since OP_CIRC can match in the middle. + +We can also consider a regex to be anchored if OP_SOM starts all its branches. +This is the code for \G, which means "match at start of match position, taking +into account the match offset". + +A branch is also implicitly anchored if it starts with .* and DOTALL is set, +because that will try the rest of the pattern at all possible matching points, +so there is no point trying again.... er .... + +.... except when the .* appears inside capturing parentheses, and there is a +subsequent back reference to those parentheses. We haven't enough information +to catch that case precisely. + +At first, the best we could do was to detect when .* was in capturing brackets +and the highest back reference was greater than or equal to that level. +However, by keeping a bitmap of the first 31 back references, we can catch some +of the more common cases more precisely. + +Arguments: + code points to start of expression (the bracket) + options points to the options setting + bracket_map a bitmap of which brackets we are inside while testing; this + handles up to substring 31; after that we just have to take + the less precise approach + backref_map the back reference bitmap + +Returns: TRUE or FALSE +*/ + +static BOOL +is_anchored(register const uschar *code, int *options, unsigned int bracket_map, + unsigned int backref_map) +{ +do { + const uschar *scode = + first_significant_code(code + 1+LINK_SIZE, options, PCRE_MULTILINE, FALSE); + register int op = *scode; + + /* Capturing brackets */ + + if (op > OP_BRA) + { + int new_map; + op -= OP_BRA; + if (op > EXTRACT_BASIC_MAX) op = GET2(scode, 2+LINK_SIZE); + new_map = bracket_map | ((op < 32)? (1 << op) : 1); + if (!is_anchored(scode, options, new_map, backref_map)) return FALSE; + } + + /* Other brackets */ + + else if (op == OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) + { + if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE; + } + + /* .* is not anchored unless DOTALL is set and it isn't in brackets that + are or may be referenced. */ + + else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) && + (*options & PCRE_DOTALL) != 0) + { + if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE; + } + + /* Check for explicit anchoring */ + + else if (op != OP_SOD && op != OP_SOM && + ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC)) + return FALSE; + code += GET(code, 1); + } +while (*code == OP_ALT); /* Loop for each alternative */ +return TRUE; +} + + + +/************************************************* +* Check for starting with ^ or .* * +*************************************************/ + +/* This is called to find out if every branch starts with ^ or .* so that +"first char" processing can be done to speed things up in multiline +matching and for non-DOTALL patterns that start with .* (which must start at +the beginning or after \n). As in the case of is_anchored() (see above), we +have to take account of back references to capturing brackets that contain .* +because in that case we can't make the assumption. + +Arguments: + code points to start of expression (the bracket) + bracket_map a bitmap of which brackets we are inside while testing; this + handles up to substring 31; after that we just have to take + the less precise approach + backref_map the back reference bitmap + +Returns: TRUE or FALSE +*/ + +static BOOL +is_startline(const uschar *code, unsigned int bracket_map, + unsigned int backref_map) +{ +do { + const uschar *scode = first_significant_code(code + 1+LINK_SIZE, NULL, 0, + FALSE); + register int op = *scode; + + /* Capturing brackets */ + + if (op > OP_BRA) + { + int new_map; + op -= OP_BRA; + if (op > EXTRACT_BASIC_MAX) op = GET2(scode, 2+LINK_SIZE); + new_map = bracket_map | ((op < 32)? (1 << op) : 1); + if (!is_startline(scode, new_map, backref_map)) return FALSE; + } + + /* Other brackets */ + + else if (op == OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) + { if (!is_startline(scode, bracket_map, backref_map)) return FALSE; } + + /* .* means "start at start or after \n" if it isn't in brackets that + may be referenced. */ + + else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR) + { + if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE; + } + + /* Check for explicit circumflex */ + + else if (op != OP_CIRC) return FALSE; + + /* Move on to the next alternative */ + + code += GET(code, 1); + } +while (*code == OP_ALT); /* Loop for each alternative */ +return TRUE; +} + + + +/************************************************* +* Check for asserted fixed first char * +*************************************************/ + +/* During compilation, the "first char" settings from forward assertions are +discarded, because they can cause conflicts with actual literals that follow. +However, if we end up without a first char setting for an unanchored pattern, +it is worth scanning the regex to see if there is an initial asserted first +char. If all branches start with the same asserted char, or with a bracket all +of whose alternatives start with the same asserted char (recurse ad lib), then +we return that char, otherwise -1. + +Arguments: + code points to start of expression (the bracket) + options pointer to the options (used to check casing changes) + inassert TRUE if in an assertion + +Returns: -1 or the fixed first char +*/ + +static int +find_firstassertedchar(const uschar *code, int *options, BOOL inassert) +{ +register int c = -1; +do { + int d; + const uschar *scode = + first_significant_code(code + 1+LINK_SIZE, options, PCRE_CASELESS, TRUE); + register int op = *scode; + + if (op >= OP_BRA) op = OP_BRA; + + switch(op) + { + default: + return -1; + + case OP_BRA: + case OP_ASSERT: + case OP_ONCE: + case OP_COND: + if ((d = find_firstassertedchar(scode, options, op == OP_ASSERT)) < 0) + return -1; + if (c < 0) c = d; else if (c != d) return -1; + break; + + case OP_EXACT: /* Fall through */ + scode += 2; + + case OP_CHAR: + case OP_CHARNC: + case OP_PLUS: + case OP_MINPLUS: + if (!inassert) return -1; + if (c < 0) + { + c = scode[1]; + if ((*options & PCRE_CASELESS) != 0) c |= REQ_CASELESS; + } + else if (c != scode[1]) return -1; + break; + } + + code += GET(code, 1); + } +while (*code == OP_ALT); +return c; +} + + + +/************************************************* +* Compile a Regular Expression * +*************************************************/ + +/* This function takes a string and returns a pointer to a block of store +holding a compiled version of the expression. The original API for this +function had no error code return variable; it is retained for backwards +compatibility. The new function is given a new name. + +Arguments: + pattern the regular expression + options various option bits + errorcodeptr pointer to error code variable (pcre_compile2() only) + can be NULL if you don't want a code value + errorptr pointer to pointer to error text + erroroffset ptr offset in pattern where error was detected + tables pointer to character tables or NULL + +Returns: pointer to compiled data block, or NULL on error, + with errorptr and erroroffset set +*/ + +EXPORT pcre * +pcre_compile(const char *pattern, int options, const char **errorptr, + int *erroroffset, const unsigned char *tables) +{ +return pcre_compile2(pattern, options, NULL, errorptr, erroroffset, tables); +} + + +EXPORT pcre * +pcre_compile2(const char *pattern, int options, int *errorcodeptr, + const char **errorptr, int *erroroffset, const unsigned char *tables) +{ +real_pcre *re; +int length = 1 + LINK_SIZE; /* For initial BRA plus length */ +int c, firstbyte, reqbyte; +int bracount = 0; +int branch_extra = 0; +int branch_newextra; +int item_count = -1; +int name_count = 0; +int max_name_size = 0; +int lastitemlength = 0; +int errorcode = 0; +#ifdef SUPPORT_UTF8 +BOOL utf8; +BOOL class_utf8; +#endif +BOOL inescq = FALSE; +BOOL capturing; +unsigned int brastackptr = 0; +size_t size; +uschar *code; +const uschar *codestart; +const uschar *ptr; +compile_data compile_block; +int brastack[BRASTACK_SIZE]; +uschar bralenstack[BRASTACK_SIZE]; + +/* We can't pass back an error message if errorptr is NULL; I guess the best we +can do is just return NULL, but we can set a code value if there is a code +pointer. */ + +if (errorptr == NULL) + { + if (errorcodeptr != NULL) *errorcodeptr = 99; + return NULL; + } + +*errorptr = NULL; +if (errorcodeptr != NULL) *errorcodeptr = ERR0; + +/* However, we can give a message for this error */ + +if (erroroffset == NULL) + { + errorcode = ERR16; + goto PCRE_EARLY_ERROR_RETURN; + } + +*erroroffset = 0; + +/* Can't support UTF8 unless PCRE has been compiled to include the code. */ + +#ifdef SUPPORT_UTF8 +utf8 = (options & PCRE_UTF8) != 0; +if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 && + (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0) + { + errorcode = ERR44; + goto PCRE_EARLY_ERROR_RETURN; + } +#else +if ((options & PCRE_UTF8) != 0) + { + errorcode = ERR32; + goto PCRE_EARLY_ERROR_RETURN; + } +#endif + +if ((options & ~PUBLIC_OPTIONS) != 0) + { + errorcode = ERR17; + goto PCRE_EARLY_ERROR_RETURN; + } + +/* Set up pointers to the individual character tables */ + +if (tables == NULL) tables = _pcre_default_tables; +compile_block.lcc = tables + lcc_offset; +compile_block.fcc = tables + fcc_offset; +compile_block.cbits = tables + cbits_offset; +compile_block.ctypes = tables + ctypes_offset; + +/* Maximum back reference and backref bitmap. This is updated for numeric +references during the first pass, but for named references during the actual +compile pass. The bitmap records up to 31 back references to help in deciding +whether (.*) can be treated as anchored or not. */ + +compile_block.top_backref = 0; +compile_block.backref_map = 0; + +/* Reflect pattern for debugging output */ + +DPRINTF(("------------------------------------------------------------------\n")); +DPRINTF(("%s\n", pattern)); + +/* The first thing to do is to make a pass over the pattern to compute the +amount of store required to hold the compiled code. This does not have to be +perfect as long as errors are overestimates. At the same time we can detect any +flag settings right at the start, and extract them. Make an attempt to correct +for any counted white space if an "extended" flag setting appears late in the +pattern. We can't be so clever for #-comments. */ + +ptr = (const uschar *)(pattern - 1); +while ((c = *(++ptr)) != 0) + { + int min, max; + int class_optcount; + int bracket_length; + int duplength; + + /* If we are inside a \Q...\E sequence, all chars are literal */ + + if (inescq) + { + if ((options & PCRE_AUTO_CALLOUT) != 0) length += 2 + 2*LINK_SIZE; + goto NORMAL_CHAR; + } + + /* Otherwise, first check for ignored whitespace and comments */ + + if ((options & PCRE_EXTENDED) != 0) + { + if ((compile_block.ctypes[c] & ctype_space) != 0) continue; + if (c == '#') + { + /* The space before the ; is to avoid a warning on a silly compiler + on the Macintosh. */ + while ((c = *(++ptr)) != 0 && c != NEWLINE) ; + if (c == 0) break; + continue; + } + } + + item_count++; /* Is zero for the first non-comment item */ + + /* Allow space for auto callout before every item except quantifiers. */ + + if ((options & PCRE_AUTO_CALLOUT) != 0 && + c != '*' && c != '+' && c != '?' && + (c != '{' || !is_counted_repeat(ptr + 1))) + length += 2 + 2*LINK_SIZE; + + switch(c) + { + /* A backslashed item may be an escaped data character or it may be a + character type. */ + + case '\\': + c = check_escape(&ptr, &errorcode, bracount, options, FALSE); + if (errorcode != 0) goto PCRE_ERROR_RETURN; + + lastitemlength = 1; /* Default length of last item for repeats */ + + if (c >= 0) /* Data character */ + { + length += 2; /* For a one-byte character */ + +#ifdef SUPPORT_UTF8 + if (utf8 && c > 127) + { + int i; + for (i = 0; i < _pcre_utf8_table1_size; i++) + if (c <= _pcre_utf8_table1[i]) break; + length += i; + lastitemlength += i; + } +#endif + + continue; + } + + /* If \Q, enter "literal" mode */ + + if (-c == ESC_Q) + { + inescq = TRUE; + continue; + } + + /* \X is supported only if Unicode property support is compiled */ + +#ifndef SUPPORT_UCP + if (-c == ESC_X) + { + errorcode = ERR45; + goto PCRE_ERROR_RETURN; + } +#endif + + /* \P and \p are for Unicode properties, but only when the support has + been compiled. Each item needs 2 bytes. */ + + else if (-c == ESC_P || -c == ESC_p) + { +#ifdef SUPPORT_UCP + BOOL negated; + length += 2; + lastitemlength = 2; + if (get_ucp(&ptr, &negated, &errorcode) < 0) goto PCRE_ERROR_RETURN; + continue; +#else + errorcode = ERR45; + goto PCRE_ERROR_RETURN; +#endif + } + + /* Other escapes need one byte */ + + length++; + + /* A back reference needs an additional 2 bytes, plus either one or 5 + bytes for a repeat. We also need to keep the value of the highest + back reference. */ + + if (c <= -ESC_REF) + { + int refnum = -c - ESC_REF; + compile_block.backref_map |= (refnum < 32)? (1 << refnum) : 1; + if (refnum > compile_block.top_backref) + compile_block.top_backref = refnum; + length += 2; /* For single back reference */ + if (ptr[1] == '{' && is_counted_repeat(ptr+2)) + { + ptr = read_repeat_counts(ptr+2, &min, &max, &errorcode); + if (errorcode != 0) goto PCRE_ERROR_RETURN; + if ((min == 0 && (max == 1 || max == -1)) || + (min == 1 && max == -1)) + length++; + else length += 5; + if (ptr[1] == '?') ptr++; + } + } + continue; + + case '^': /* Single-byte metacharacters */ + case '.': + case '$': + length++; + lastitemlength = 1; + continue; + + case '*': /* These repeats won't be after brackets; */ + case '+': /* those are handled separately */ + case '?': + length++; + goto POSESSIVE; /* A few lines below */ + + /* This covers the cases of braced repeats after a single char, metachar, + class, or back reference. */ + + case '{': + if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR; + ptr = read_repeat_counts(ptr+1, &min, &max, &errorcode); + if (errorcode != 0) goto PCRE_ERROR_RETURN; + + /* These special cases just insert one extra opcode */ + + if ((min == 0 && (max == 1 || max == -1)) || + (min == 1 && max == -1)) + length++; + + /* These cases might insert additional copies of a preceding character. */ + + else + { + if (min != 1) + { + length -= lastitemlength; /* Uncount the original char or metachar */ + if (min > 0) length += 3 + lastitemlength; + } + length += lastitemlength + ((max > 0)? 3 : 1); + } + + if (ptr[1] == '?') ptr++; /* Needs no extra length */ + + POSESSIVE: /* Test for possessive quantifier */ + if (ptr[1] == '+') + { + ptr++; + length += 2 + 2*LINK_SIZE; /* Allow for atomic brackets */ + } + continue; + + /* An alternation contains an offset to the next branch or ket. If any ims + options changed in the previous branch(es), and/or if we are in a + lookbehind assertion, extra space will be needed at the start of the + branch. This is handled by branch_extra. */ + + case '|': + length += 1 + LINK_SIZE + branch_extra; + continue; + + /* A character class uses 33 characters provided that all the character + values are less than 256. Otherwise, it uses a bit map for low valued + characters, and individual items for others. Don't worry about character + types that aren't allowed in classes - they'll get picked up during the + compile. A character class that contains only one single-byte character + uses 2 or 3 bytes, depending on whether it is negated or not. Notice this + where we can. (In UTF-8 mode we can do this only for chars < 128.) */ + + case '[': + if (*(++ptr) == '^') + { + class_optcount = 10; /* Greater than one */ + ptr++; + } + else class_optcount = 0; + +#ifdef SUPPORT_UTF8 + class_utf8 = FALSE; +#endif + + /* Written as a "do" so that an initial ']' is taken as data */ + + if (*ptr != 0) do + { + /* Inside \Q...\E everything is literal except \E */ + + if (inescq) + { + if (*ptr != '\\' || ptr[1] != 'E') goto GET_ONE_CHARACTER; + inescq = FALSE; + ptr += 1; + continue; + } + + /* Outside \Q...\E, check for escapes */ + + if (*ptr == '\\') + { + c = check_escape(&ptr, &errorcode, bracount, options, TRUE); + if (errorcode != 0) goto PCRE_ERROR_RETURN; + + /* \b is backspace inside a class; \X is literal */ + + if (-c == ESC_b) c = '\b'; + else if (-c == ESC_X) c = 'X'; + + /* \Q enters quoting mode */ + + else if (-c == ESC_Q) + { + inescq = TRUE; + continue; + } + + /* Handle escapes that turn into characters */ + + if (c >= 0) goto NON_SPECIAL_CHARACTER; + + /* Escapes that are meta-things. The normal ones just affect the + bit map, but Unicode properties require an XCLASS extended item. */ + + else + { + class_optcount = 10; /* \d, \s etc; make sure > 1 */ +#ifdef SUPPORT_UTF8 + if (-c == ESC_p || -c == ESC_P) + { + if (!class_utf8) + { + class_utf8 = TRUE; + length += LINK_SIZE + 2; + } + length += 2; + } +#endif + } + } + + /* Check the syntax for POSIX stuff. The bits we actually handle are + checked during the real compile phase. */ + + else if (*ptr == '[' && check_posix_syntax(ptr, &ptr, &compile_block)) + { + ptr++; + class_optcount = 10; /* Make sure > 1 */ + } + + /* Anything else increments the possible optimization count. We have to + detect ranges here so that we can compute the number of extra ranges for + caseless wide characters when UCP support is available. If there are wide + characters, we are going to have to use an XCLASS, even for single + characters. */ + + else + { + int d; + + GET_ONE_CHARACTER: + +#ifdef SUPPORT_UTF8 + if (utf8) + { + int extra = 0; + GETCHARLEN(c, ptr, extra); + ptr += extra; + } + else c = *ptr; +#else + c = *ptr; +#endif + + /* Come here from handling \ above when it escapes to a char value */ + + NON_SPECIAL_CHARACTER: + class_optcount++; + + d = -1; + if (ptr[1] == '-') + { + uschar const *hyptr = ptr++; + if (ptr[1] == '\\') + { + ptr++; + d = check_escape(&ptr, &errorcode, bracount, options, TRUE); + if (errorcode != 0) goto PCRE_ERROR_RETURN; + if (-d == ESC_b) d = '\b'; /* backspace */ + else if (-d == ESC_X) d = 'X'; /* literal X in a class */ + } + else if (ptr[1] != 0 && ptr[1] != ']') + { + ptr++; +#ifdef SUPPORT_UTF8 + if (utf8) + { + int extra = 0; + GETCHARLEN(d, ptr, extra); + ptr += extra; + } + else +#endif + d = *ptr; + } + if (d < 0) ptr = hyptr; /* go back to hyphen as data */ + } + + /* If d >= 0 we have a range. In UTF-8 mode, if the end is > 255, or > + 127 for caseless matching, we will need to use an XCLASS. */ + + if (d >= 0) + { + class_optcount = 10; /* Ensure > 1 */ + if (d < c) + { + errorcode = ERR8; + goto PCRE_ERROR_RETURN; + } + +#ifdef SUPPORT_UTF8 + if (utf8 && (d > 255 || ((options & PCRE_CASELESS) != 0 && d > 127))) + { + uschar buffer[6]; + if (!class_utf8) /* Allow for XCLASS overhead */ + { + class_utf8 = TRUE; + length += LINK_SIZE + 2; + } + +#ifdef SUPPORT_UCP + /* If we have UCP support, find out how many extra ranges are + needed to map the other case of characters within this range. We + have to mimic the range optimization here, because extending the + range upwards might push d over a boundary that makes is use + another byte in the UTF-8 representation. */ + + if ((options & PCRE_CASELESS) != 0) + { + int occ, ocd; + int cc = c; + int origd = d; + while (get_othercase_range(&cc, origd, &occ, &ocd)) + { + if (occ >= c && ocd <= d) continue; /* Skip embedded */ + + if (occ < c && ocd >= c - 1) /* Extend the basic range */ + { /* if there is overlap, */ + c = occ; /* noting that if occ < c */ + continue; /* we can't have ocd > d */ + } /* because a subrange is */ + if (ocd > d && occ <= d + 1) /* always shorter than */ + { /* the basic range. */ + d = ocd; + continue; + } + + /* An extra item is needed */ + + length += 1 + _pcre_ord2utf8(occ, buffer) + + ((occ == ocd)? 0 : _pcre_ord2utf8(ocd, buffer)); + } + } +#endif /* SUPPORT_UCP */ + + /* The length of the (possibly extended) range */ + + length += 1 + _pcre_ord2utf8(c, buffer) + _pcre_ord2utf8(d, buffer); + } +#endif /* SUPPORT_UTF8 */ + + } + + /* We have a single character. There is nothing to be done unless we + are in UTF-8 mode. If the char is > 255, or 127 when caseless, we must + allow for an XCL_SINGLE item, doubled for caselessness if there is UCP + support. */ + + else + { +#ifdef SUPPORT_UTF8 + if (utf8 && (c > 255 || ((options & PCRE_CASELESS) != 0 && c > 127))) + { + uschar buffer[6]; + class_optcount = 10; /* Ensure > 1 */ + if (!class_utf8) /* Allow for XCLASS overhead */ + { + class_utf8 = TRUE; + length += LINK_SIZE + 2; + } +#ifdef SUPPORT_UCP + length += (((options & PCRE_CASELESS) != 0)? 2 : 1) * + (1 + _pcre_ord2utf8(c, buffer)); +#else /* SUPPORT_UCP */ + length += 1 + _pcre_ord2utf8(c, buffer); +#endif /* SUPPORT_UCP */ + } +#endif /* SUPPORT_UTF8 */ + } + } + } + while (*(++ptr) != 0 && (inescq || *ptr != ']')); /* Concludes "do" above */ + + if (*ptr == 0) /* Missing terminating ']' */ + { + errorcode = ERR6; + goto PCRE_ERROR_RETURN; + } + + /* We can optimize when there was only one optimizable character. Repeats + for positive and negated single one-byte chars are handled by the general + code. Here, we handle repeats for the class opcodes. */ + + if (class_optcount == 1) length += 3; else + { + length += 33; + + /* A repeat needs either 1 or 5 bytes. If it is a possessive quantifier, + we also need extra for wrapping the whole thing in a sub-pattern. */ + + if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2)) + { + ptr = read_repeat_counts(ptr+2, &min, &max, &errorcode); + if (errorcode != 0) goto PCRE_ERROR_RETURN; + if ((min == 0 && (max == 1 || max == -1)) || + (min == 1 && max == -1)) + length++; + else length += 5; + if (ptr[1] == '+') + { + ptr++; + length += 2 + 2*LINK_SIZE; + } + else if (ptr[1] == '?') ptr++; + } + } + continue; + + /* Brackets may be genuine groups or special things */ + + case '(': + branch_newextra = 0; + bracket_length = 1 + LINK_SIZE; + capturing = FALSE; + + /* Handle special forms of bracket, which all start (? */ + + if (ptr[1] == '?') + { + int set, unset; + int *optset; + + switch (c = ptr[2]) + { + /* Skip over comments entirely */ + case '#': + ptr += 3; + while (*ptr != 0 && *ptr != ')') ptr++; + if (*ptr == 0) + { + errorcode = ERR18; + goto PCRE_ERROR_RETURN; + } + continue; + + /* Non-referencing groups and lookaheads just move the pointer on, and + then behave like a non-special bracket, except that they don't increment + the count of extracting brackets. Ditto for the "once only" bracket, + which is in Perl from version 5.005. */ + + case ':': + case '=': + case '!': + case '>': + ptr += 2; + break; + + /* (?R) specifies a recursive call to the regex, which is an extension + to provide the facility which can be obtained by (?p{perl-code}) in + Perl 5.6. In Perl 5.8 this has become (??{perl-code}). + + From PCRE 4.00, items such as (?3) specify subroutine-like "calls" to + the appropriate numbered brackets. This includes both recursive and + non-recursive calls. (?R) is now synonymous with (?0). */ + + case 'R': + ptr++; + + case '0': case '1': case '2': case '3': case '4': + case '5': case '6': case '7': case '8': case '9': + ptr += 2; + if (c != 'R') + while ((digitab[*(++ptr)] & ctype_digit) != 0); + if (*ptr != ')') + { + errorcode = ERR29; + goto PCRE_ERROR_RETURN; + } + length += 1 + LINK_SIZE; + + /* If this item is quantified, it will get wrapped inside brackets so + as to use the code for quantified brackets. We jump down and use the + code that handles this for real brackets. */ + + if (ptr[1] == '+' || ptr[1] == '*' || ptr[1] == '?' || ptr[1] == '{') + { + length += 2 + 2 * LINK_SIZE; /* to make bracketed */ + duplength = 5 + 3 * LINK_SIZE; + goto HANDLE_QUANTIFIED_BRACKETS; + } + continue; + + /* (?C) is an extension which provides "callout" - to provide a bit of + the functionality of the Perl (?{...}) feature. An optional number may + follow (default is zero). */ + + case 'C': + ptr += 2; + while ((digitab[*(++ptr)] & ctype_digit) != 0); + if (*ptr != ')') + { + errorcode = ERR39; + goto PCRE_ERROR_RETURN; + } + length += 2 + 2*LINK_SIZE; + continue; + + /* Named subpatterns are an extension copied from Python */ + + case 'P': + ptr += 3; + + /* Handle the definition of a named subpattern */ + + if (*ptr == '<') + { + const uschar *p; /* Don't amalgamate; some compilers */ + p = ++ptr; /* grumble at autoincrement in declaration */ + while ((compile_block.ctypes[*ptr] & ctype_word) != 0) ptr++; + if (*ptr != '>') + { + errorcode = ERR42; + goto PCRE_ERROR_RETURN; + } + name_count++; + if (ptr - p > max_name_size) max_name_size = (ptr - p); + capturing = TRUE; /* Named parentheses are always capturing */ + break; + } + + /* Handle back references and recursive calls to named subpatterns */ + + if (*ptr == '=' || *ptr == '>') + { + while ((compile_block.ctypes[*(++ptr)] & ctype_word) != 0); + if (*ptr != ')') + { + errorcode = ERR42; + goto PCRE_ERROR_RETURN; + } + break; + } + + /* Unknown character after (?P */ + + errorcode = ERR41; + goto PCRE_ERROR_RETURN; + + /* Lookbehinds are in Perl from version 5.005 */ + + case '<': + ptr += 3; + if (*ptr == '=' || *ptr == '!') + { + branch_newextra = 1 + LINK_SIZE; + length += 1 + LINK_SIZE; /* For the first branch */ + break; + } + errorcode = ERR24; + goto PCRE_ERROR_RETURN; + + /* Conditionals are in Perl from version 5.005. The bracket must either + be followed by a number (for bracket reference) or by an assertion + group, or (a PCRE extension) by 'R' for a recursion test. */ + + case '(': + if (ptr[3] == 'R' && ptr[4] == ')') + { + ptr += 4; + length += 3; + } + else if ((digitab[ptr[3]] & ctype_digit) != 0) + { + ptr += 4; + length += 3; + while ((digitab[*ptr] & ctype_digit) != 0) ptr++; + if (*ptr != ')') + { + errorcode = ERR26; + goto PCRE_ERROR_RETURN; + } + } + else /* An assertion must follow */ + { + ptr++; /* Can treat like ':' as far as spacing is concerned */ + if (ptr[2] != '?' || + (ptr[3] != '=' && ptr[3] != '!' && ptr[3] != '<') ) + { + ptr += 2; /* To get right offset in message */ + errorcode = ERR28; + goto PCRE_ERROR_RETURN; + } + } + break; + + /* Else loop checking valid options until ) is met. Anything else is an + error. If we are without any brackets, i.e. at top level, the settings + act as if specified in the options, so massage the options immediately. + This is for backward compatibility with Perl 5.004. */ + + default: + set = unset = 0; + optset = &set; + ptr += 2; + + for (;; ptr++) + { + c = *ptr; + switch (c) + { + case 'i': + *optset |= PCRE_CASELESS; + continue; + + case 'm': + *optset |= PCRE_MULTILINE; + continue; + + case 's': + *optset |= PCRE_DOTALL; + continue; + + case 'x': + *optset |= PCRE_EXTENDED; + continue; + + case 'X': + *optset |= PCRE_EXTRA; + continue; + + case 'U': + *optset |= PCRE_UNGREEDY; + continue; + + case '-': + optset = &unset; + continue; + + /* A termination by ')' indicates an options-setting-only item; if + this is at the very start of the pattern (indicated by item_count + being zero), we use it to set the global options. This is helpful + when analyzing the pattern for first characters, etc. Otherwise + nothing is done here and it is handled during the compiling + process. + + We allow for more than one options setting at the start. If such + settings do not change the existing options, nothing is compiled. + However, we must leave space just in case something is compiled. + This can happen for pathological sequences such as (?i)(?-i) + because the global options will end up with -i set. The space is + small and not significant. (Before I did this there was a reported + bug with (?i)(?-i) in a machine-generated pattern.) + + [Historical note: Up to Perl 5.8, options settings at top level + were always global settings, wherever they appeared in the pattern. + That is, they were equivalent to an external setting. From 5.8 + onwards, they apply only to what follows (which is what you might + expect).] */ + + case ')': + if (item_count == 0) + { + options = (options | set) & (~unset); + set = unset = 0; /* To save length */ + item_count--; /* To allow for several */ + length += 2; + } + + /* Fall through */ + + /* A termination by ':' indicates the start of a nested group with + the given options set. This is again handled at compile time, but + we must allow for compiled space if any of the ims options are + set. We also have to allow for resetting space at the end of + the group, which is why 4 is added to the length and not just 2. + If there are several changes of options within the same group, this + will lead to an over-estimate on the length, but this shouldn't + matter very much. We also have to allow for resetting options at + the start of any alternations, which we do by setting + branch_newextra to 2. Finally, we record whether the case-dependent + flag ever changes within the regex. This is used by the "required + character" code. */ + + case ':': + if (((set|unset) & PCRE_IMS) != 0) + { + length += 4; + branch_newextra = 2; + if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED; + } + goto END_OPTIONS; + + /* Unrecognized option character */ + + default: + errorcode = ERR12; + goto PCRE_ERROR_RETURN; + } + } + + /* If we hit a closing bracket, that's it - this is a freestanding + option-setting. We need to ensure that branch_extra is updated if + necessary. The only values branch_newextra can have here are 0 or 2. + If the value is 2, then branch_extra must either be 2 or 5, depending + on whether this is a lookbehind group or not. */ + + END_OPTIONS: + if (c == ')') + { + if (branch_newextra == 2 && + (branch_extra == 0 || branch_extra == 1+LINK_SIZE)) + branch_extra += branch_newextra; + continue; + } + + /* If options were terminated by ':' control comes here. This is a + non-capturing group with an options change. There is nothing more that + needs to be done because "capturing" is already set FALSE by default; + we can just fall through. */ + + } + } + + /* Ordinary parentheses, not followed by '?', are capturing unless + PCRE_NO_AUTO_CAPTURE is set. */ + + else capturing = (options & PCRE_NO_AUTO_CAPTURE) == 0; + + /* Capturing brackets must be counted so we can process escapes in a + Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to need + an additional 3 bytes of memory per capturing bracket. */ + + if (capturing) + { + bracount++; + if (bracount > EXTRACT_BASIC_MAX) bracket_length += 3; + } + + /* Save length for computing whole length at end if there's a repeat that + requires duplication of the group. Also save the current value of + branch_extra, and start the new group with the new value. If non-zero, this + will either be 2 for a (?imsx: group, or 3 for a lookbehind assertion. */ + + if (brastackptr >= sizeof(brastack)/sizeof(int)) + { + errorcode = ERR19; + goto PCRE_ERROR_RETURN; + } + + bralenstack[brastackptr] = branch_extra; + branch_extra = branch_newextra; + + brastack[brastackptr++] = length; + length += bracket_length; + continue; + + /* Handle ket. Look for subsequent max/min; for certain sets of values we + have to replicate this bracket up to that many times. If brastackptr is + 0 this is an unmatched bracket which will generate an error, but take care + not to try to access brastack[-1] when computing the length and restoring + the branch_extra value. */ + + case ')': + length += 1 + LINK_SIZE; + if (brastackptr > 0) + { + duplength = length - brastack[--brastackptr]; + branch_extra = bralenstack[brastackptr]; + } + else duplength = 0; + + /* The following code is also used when a recursion such as (?3) is + followed by a quantifier, because in that case, it has to be wrapped inside + brackets so that the quantifier works. The value of duplength must be + set before arrival. */ + + HANDLE_QUANTIFIED_BRACKETS: + + /* Leave ptr at the final char; for read_repeat_counts this happens + automatically; for the others we need an increment. */ + + if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2)) + { + ptr = read_repeat_counts(ptr+2, &min, &max, &errorcode); + if (errorcode != 0) goto PCRE_ERROR_RETURN; + } + else if (c == '*') { min = 0; max = -1; ptr++; } + else if (c == '+') { min = 1; max = -1; ptr++; } + else if (c == '?') { min = 0; max = 1; ptr++; } + else { min = 1; max = 1; } + + /* If the minimum is zero, we have to allow for an OP_BRAZERO before the + group, and if the maximum is greater than zero, we have to replicate + maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting + bracket set. */ + + if (min == 0) + { + length++; + if (max > 0) length += (max - 1) * (duplength + 3 + 2*LINK_SIZE); + } + + /* When the minimum is greater than zero, we have to replicate up to + minval-1 times, with no additions required in the copies. Then, if there + is a limited maximum we have to replicate up to maxval-1 times allowing + for a BRAZERO item before each optional copy and nesting brackets for all + but one of the optional copies. */ + + else + { + length += (min - 1) * duplength; + if (max > min) /* Need this test as max=-1 means no limit */ + length += (max - min) * (duplength + 3 + 2*LINK_SIZE) + - (2 + 2*LINK_SIZE); + } + + /* Allow space for once brackets for "possessive quantifier" */ + + if (ptr[1] == '+') + { + ptr++; + length += 2 + 2*LINK_SIZE; + } + continue; + + /* Non-special character. It won't be space or # in extended mode, so it is + always a genuine character. If we are in a \Q...\E sequence, check for the + end; if not, we have a literal. */ + + default: + NORMAL_CHAR: + + if (inescq && c == '\\' && ptr[1] == 'E') + { + inescq = FALSE; + ptr++; + continue; + } + + length += 2; /* For a one-byte character */ + lastitemlength = 1; /* Default length of last item for repeats */ + + /* In UTF-8 mode, check for additional bytes. */ + +#ifdef SUPPORT_UTF8 + if (utf8 && (c & 0xc0) == 0xc0) + { + while ((ptr[1] & 0xc0) == 0x80) /* Can't flow over the end */ + { /* because the end is marked */ + lastitemlength++; /* by a zero byte. */ + length++; + ptr++; + } + } +#endif + + continue; + } + } + +length += 2 + LINK_SIZE; /* For final KET and END */ + +if ((options & PCRE_AUTO_CALLOUT) != 0) + length += 2 + 2*LINK_SIZE; /* For final callout */ + +if (length > MAX_PATTERN_SIZE) + { + errorcode = ERR20; + goto PCRE_EARLY_ERROR_RETURN; + } + +/* Compute the size of data block needed and get it, either from malloc or +externally provided function. */ + +size = length + sizeof(real_pcre) + name_count * (max_name_size + 3); +re = (real_pcre *)(pcre_malloc)(size); + +if (re == NULL) + { + errorcode = ERR21; + goto PCRE_EARLY_ERROR_RETURN; + } + +/* Put in the magic number, and save the sizes, options, and character table +pointer. NULL is used for the default character tables. The nullpad field is at +the end; it's there to help in the case when a regex compiled on a system with +4-byte pointers is run on another with 8-byte pointers. */ + +re->magic_number = MAGIC_NUMBER; +re->size = size; +re->options = options; +re->dummy1 = 0; +re->name_table_offset = sizeof(real_pcre); +re->name_entry_size = max_name_size + 3; +re->name_count = name_count; +re->ref_count = 0; +re->tables = (tables == _pcre_default_tables)? NULL : tables; +re->nullpad = NULL; + +/* The starting points of the name/number translation table and of the code are +passed around in the compile data block. */ + +compile_block.names_found = 0; +compile_block.name_entry_size = max_name_size + 3; +compile_block.name_table = (uschar *)re + re->name_table_offset; +codestart = compile_block.name_table + re->name_entry_size * re->name_count; +compile_block.start_code = codestart; +compile_block.start_pattern = (const uschar *)pattern; +compile_block.req_varyopt = 0; +compile_block.nopartial = FALSE; + +/* Set up a starting, non-extracting bracket, then compile the expression. On +error, errorcode will be set non-zero, so we don't need to look at the result +of the function here. */ + +ptr = (const uschar *)pattern; +code = (uschar *)codestart; +*code = OP_BRA; +bracount = 0; +(void)compile_regex(options, options & PCRE_IMS, &bracount, &code, &ptr, + &errorcode, FALSE, 0, &firstbyte, &reqbyte, NULL, &compile_block); +re->top_bracket = bracount; +re->top_backref = compile_block.top_backref; + +if (compile_block.nopartial) re->options |= PCRE_NOPARTIAL; + +/* If not reached end of pattern on success, there's an excess bracket. */ + +if (errorcode == 0 && *ptr != 0) errorcode = ERR22; + +/* Fill in the terminating state and check for disastrous overflow, but +if debugging, leave the test till after things are printed out. */ + +*code++ = OP_END; + +#ifndef DEBUG +if (code - codestart > length) errorcode = ERR23; +#endif + +/* Give an error if there's back reference to a non-existent capturing +subpattern. */ + +if (re->top_backref > re->top_bracket) errorcode = ERR15; + +/* Failed to compile, or error while post-processing */ + +if (errorcode != 0) + { + (pcre_free)(re); + PCRE_ERROR_RETURN: + *erroroffset = ptr - (const uschar *)pattern; + PCRE_EARLY_ERROR_RETURN: + *errorptr = error_texts[errorcode]; + if (errorcodeptr != NULL) *errorcodeptr = errorcode; + return NULL; + } + +/* If the anchored option was not passed, set the flag if we can determine that +the pattern is anchored by virtue of ^ characters or \A or anything else (such +as starting with .* when DOTALL is set). + +Otherwise, if we know what the first character has to be, save it, because that +speeds up unanchored matches no end. If not, see if we can set the +PCRE_STARTLINE flag. This is helpful for multiline matches when all branches +start with ^. and also when all branches start with .* for non-DOTALL matches. +*/ + +if ((options & PCRE_ANCHORED) == 0) + { + int temp_options = options; + if (is_anchored(codestart, &temp_options, 0, compile_block.backref_map)) + re->options |= PCRE_ANCHORED; + else + { + if (firstbyte < 0) + firstbyte = find_firstassertedchar(codestart, &temp_options, FALSE); + if (firstbyte >= 0) /* Remove caseless flag for non-caseable chars */ + { + int ch = firstbyte & 255; + re->first_byte = ((firstbyte & REQ_CASELESS) != 0 && + compile_block.fcc[ch] == ch)? ch : firstbyte; + re->options |= PCRE_FIRSTSET; + } + else if (is_startline(codestart, 0, compile_block.backref_map)) + re->options |= PCRE_STARTLINE; + } + } + +/* For an anchored pattern, we use the "required byte" only if it follows a +variable length item in the regex. Remove the caseless flag for non-caseable +bytes. */ + +if (reqbyte >= 0 && + ((re->options & PCRE_ANCHORED) == 0 || (reqbyte & REQ_VARY) != 0)) + { + int ch = reqbyte & 255; + re->req_byte = ((reqbyte & REQ_CASELESS) != 0 && + compile_block.fcc[ch] == ch)? (reqbyte & ~REQ_CASELESS) : reqbyte; + re->options |= PCRE_REQCHSET; + } + +/* Print out the compiled data for debugging */ + +#ifdef DEBUG + +printf("Length = %d top_bracket = %d top_backref = %d\n", + length, re->top_bracket, re->top_backref); + +if (re->options != 0) + { + printf("%s%s%s%s%s%s%s%s%s%s\n", + ((re->options & PCRE_NOPARTIAL) != 0)? "nopartial " : "", + ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "", + ((re->options & PCRE_CASELESS) != 0)? "caseless " : "", + ((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "", + ((re->options & PCRE_EXTENDED) != 0)? "extended " : "", + ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "", + ((re->options & PCRE_DOTALL) != 0)? "dotall " : "", + ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "", + ((re->options & PCRE_EXTRA) != 0)? "extra " : "", + ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : ""); + } + +if ((re->options & PCRE_FIRSTSET) != 0) + { + int ch = re->first_byte & 255; + const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)? "" : " (caseless)"; + if (isprint(ch)) printf("First char = %c%s\n", ch, caseless); + else printf("First char = \\x%02x%s\n", ch, caseless); + } + +if ((re->options & PCRE_REQCHSET) != 0) + { + int ch = re->req_byte & 255; + const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)? "" : " (caseless)"; + if (isprint(ch)) printf("Req char = %c%s\n", ch, caseless); + else printf("Req char = \\x%02x%s\n", ch, caseless); + } + +_pcre_printint(re, stdout); + +/* This check is done here in the debugging case so that the code that +was compiled can be seen. */ + +if (code - codestart > length) + { + (pcre_free)(re); + *errorptr = error_texts[ERR23]; + *erroroffset = ptr - (uschar *)pattern; + if (errorcodeptr != NULL) *errorcodeptr = ERR23; + return NULL; + } +#endif + +return (pcre *)re; +} + +/* End of pcre_compile.c */ diff --git a/libpcre/pcre_config.c b/libpcre/pcre_config.c new file mode 100644 index 0000000000..04029a9464 --- /dev/null +++ b/libpcre/pcre_config.c @@ -0,0 +1,112 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains the external function pcre_config(). */ + + +#include "pcre_internal.h" + + +/************************************************* +* Return info about what features are configured * +*************************************************/ + +/* This function has an extensible interface so that additional items can be +added compatibly. + +Arguments: + what what information is required + where where to put the information + +Returns: 0 if data returned, negative on error +*/ + +EXPORT int +pcre_config(int what, void *where) +{ +switch (what) + { + case PCRE_CONFIG_UTF8: +#ifdef SUPPORT_UTF8 + *((int *)where) = 1; +#else + *((int *)where) = 0; +#endif + break; + + case PCRE_CONFIG_UNICODE_PROPERTIES: +#ifdef SUPPORT_UCP + *((int *)where) = 1; +#else + *((int *)where) = 0; +#endif + break; + + case PCRE_CONFIG_NEWLINE: + *((int *)where) = NEWLINE; + break; + + case PCRE_CONFIG_LINK_SIZE: + *((int *)where) = LINK_SIZE; + break; + + case PCRE_CONFIG_POSIX_MALLOC_THRESHOLD: + *((int *)where) = POSIX_MALLOC_THRESHOLD; + break; + + case PCRE_CONFIG_MATCH_LIMIT: + *((unsigned int *)where) = MATCH_LIMIT; + break; + + case PCRE_CONFIG_STACKRECURSE: +#ifdef NO_RECURSE + *((int *)where) = 0; +#else + *((int *)where) = 1; +#endif + break; + + default: return PCRE_ERROR_BADOPTION; + } + +return 0; +} + +/* End of pcre_config.c */ diff --git a/libpcre/pcre_dfa_exec.c b/libpcre/pcre_dfa_exec.c new file mode 100644 index 0000000000..7101570434 --- /dev/null +++ b/libpcre/pcre_dfa_exec.c @@ -0,0 +1,1922 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains the external function pcre_dfa_exec(), which is an +alternative matching function that uses a DFA algorithm. This is NOT Perl- +compatible, but it has advantages in certain applications. */ + + +#include "pcre_internal.h" + + +/* For use to indent debugging output */ + +#define SP " " + + + +/************************************************* +* Code parameters and static tables * +*************************************************/ + +/* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes +into others, under special conditions. A gap of 10 between the blocks should be +enough. */ + +#define OP_PROP_EXTRA (EXTRACT_BASIC_MAX+1) +#define OP_EXTUNI_EXTRA (EXTRACT_BASIC_MAX+11) + + +/* This table identifies those opcodes that are followed immediately by a +character that is to be tested in some way. This makes is possible to +centralize the loading of these characters. In the case of Type * etc, the +"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a +small value. */ + +static uschar coptable[] = { + 0, /* End */ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */ + 0, 0, /* Any, Anybyte */ + 0, 0, 0, /* NOTPROP, PROP, EXTUNI */ + 0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */ + 1, /* Char */ + 1, /* Charnc */ + 1, /* not */ + /* Positive single-char repeats */ + 1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ + 3, 3, 3, /* upto, minupto, exact */ + /* Negative single-char repeats - only for chars < 256 */ + 1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ + 3, 3, 3, /* NOT upto, minupto, exact */ + /* Positive type repeats */ + 1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ + 3, 3, 3, /* Type upto, minupto, exact */ + /* Character class & ref repeats */ + 0, 0, 0, 0, 0, 0, /* *, *?, +, +?, ?, ?? */ + 0, 0, /* CRRANGE, CRMINRANGE */ + 0, /* CLASS */ + 0, /* NCLASS */ + 0, /* XCLASS - variable length */ + 0, /* REF */ + 0, /* RECURSE */ + 0, /* CALLOUT */ + 0, /* Alt */ + 0, /* Ket */ + 0, /* KetRmax */ + 0, /* KetRmin */ + 0, /* Assert */ + 0, /* Assert not */ + 0, /* Assert behind */ + 0, /* Assert behind not */ + 0, /* Reverse */ + 0, /* Once */ + 0, /* COND */ + 0, /* CREF */ + 0, 0, /* BRAZERO, BRAMINZERO */ + 0, /* BRANUMBER */ + 0 /* BRA */ +}; + +/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, +and \w */ + +static uschar toptable1[] = { + 0, 0, 0, 0, 0, + ctype_digit, ctype_digit, + ctype_space, ctype_space, + ctype_word, ctype_word, + 0 /* OP_ANY */ +}; + +static uschar toptable2[] = { + 0, 0, 0, 0, 0, + ctype_digit, 0, + ctype_space, 0, + ctype_word, 0, + 1 /* OP_ANY */ +}; + + +/* Structure for holding data about a particular state, which is in effect the +current data for an active path through the match tree. It must consist +entirely of ints because the working vector we are passed, and which we put +these structures in, is a vector of ints. */ + +typedef struct stateblock { + int offset; /* Offset to opcode */ + int count; /* Count for repeats */ + int ims; /* ims flag bits */ + int data; /* Some use extra data */ +} stateblock; + +#define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int)) + + +#ifdef DEBUG +/************************************************* +* Print character string * +*************************************************/ + +/* Character string printing function for debugging. + +Arguments: + p points to string + length number of bytes + f where to print + +Returns: nothing +*/ + +static void +pchars(unsigned char *p, int length, FILE *f) +{ +int c; +while (length-- > 0) + { + if (isprint(c = *(p++))) + fprintf(f, "%c", c); + else + fprintf(f, "\\x%02x", c); + } +} +#endif + + + +/************************************************* +* Execute a Regular Expression - DFA engine * +*************************************************/ + +/* This internal function applies a compiled pattern to a subject string, +starting at a given point, using a DFA engine. This function is called from the +external one, possibly multiple times if the pattern is not anchored. The +function calls itself recursively for some kinds of subpattern. + +Arguments: + md the match_data block with fixed information + this_start_code the opening bracket of this subexpression's code + current_subject where we currently are in the subject string + start_offset start offset in the subject string + offsets vector to contain the matching string offsets + offsetcount size of same + workspace vector of workspace + wscount size of same + ims the current ims flags + rlevel function call recursion level + recursing regex recursive call level + +Returns: > 0 => + = 0 => + -1 => failed to match + < -1 => some kind of unexpected problem + +The following macros are used for adding states to the two state vectors (one +for the current character, one for the following character). */ + +#define ADD_ACTIVE(x,y) \ + if (active_count++ < wscount) \ + { \ + next_active_state->offset = (x); \ + next_active_state->count = (y); \ + next_active_state->ims = ims; \ + next_active_state++; \ + DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \ + } \ + else return PCRE_ERROR_DFA_WSSIZE + +#define ADD_ACTIVE_DATA(x,y,z) \ + if (active_count++ < wscount) \ + { \ + next_active_state->offset = (x); \ + next_active_state->count = (y); \ + next_active_state->ims = ims; \ + next_active_state->data = (z); \ + next_active_state++; \ + DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \ + } \ + else return PCRE_ERROR_DFA_WSSIZE + +#define ADD_NEW(x,y) \ + if (new_count++ < wscount) \ + { \ + next_new_state->offset = (x); \ + next_new_state->count = (y); \ + next_new_state->ims = ims; \ + next_new_state++; \ + DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \ + } \ + else return PCRE_ERROR_DFA_WSSIZE + +#define ADD_NEW_DATA(x,y,z) \ + if (new_count++ < wscount) \ + { \ + next_new_state->offset = (x); \ + next_new_state->count = (y); \ + next_new_state->ims = ims; \ + next_new_state->data = (z); \ + next_new_state++; \ + DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \ + } \ + else return PCRE_ERROR_DFA_WSSIZE + +/* And now, here is the code */ + +static int +internal_dfa_exec( + dfa_match_data *md, + const uschar *this_start_code, + const uschar *current_subject, + int start_offset, + int *offsets, + int offsetcount, + int *workspace, + int wscount, + int ims, + int rlevel, + int recursing) +{ +stateblock *active_states, *new_states, *temp_states; +stateblock *next_active_state, *next_new_state; + +const uschar *ctypes, *lcc, *fcc; +const uschar *ptr; +const uschar *end_code; + +int active_count, new_count, match_count; + +/* Some fields in the md block are frequently referenced, so we load them into +independent variables in the hope that this will perform better. */ + +const uschar *start_subject = md->start_subject; +const uschar *end_subject = md->end_subject; +const uschar *start_code = md->start_code; + +BOOL utf8 = (md->poptions & PCRE_UTF8) != 0; + +rlevel++; +offsetcount &= (-2); + +wscount -= 2; +wscount = (wscount - (wscount % (INTS_PER_STATEBLOCK * 2))) / + (2 * INTS_PER_STATEBLOCK); + +DPRINTF(("\n%.*s---------------------\n" + "%.*sCall to internal_dfa_exec f=%d r=%d\n", + rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing)); + +ctypes = md->tables + ctypes_offset; +lcc = md->tables + lcc_offset; +fcc = md->tables + fcc_offset; + +match_count = PCRE_ERROR_NOMATCH; /* A negative number */ + +active_states = (stateblock *)(workspace + 2); +next_new_state = new_states = active_states + wscount; +new_count = 0; + +/* The first thing in any (sub) pattern is a bracket of some sort. Push all +the alternative states onto the list, and find out where the end is. This +makes is possible to use this function recursively, when we want to stop at a +matching internal ket rather than at the end. + +If the first opcode in the first alternative is OP_REVERSE, we are dealing with +a backward assertion. In that case, we have to find out the maximum amount to +move back, and set up each alternative appropriately. */ + +if (this_start_code[1+LINK_SIZE] == OP_REVERSE) + { + int max_back = 0; + int gone_back; + + end_code = this_start_code; + do + { + int back = GET(end_code, 2+LINK_SIZE); + if (back > max_back) max_back = back; + end_code += GET(end_code, 1); + } + while (*end_code == OP_ALT); + + /* If we can't go back the amount required for the longest lookbehind + pattern, go back as far as we can; some alternatives may still be viable. */ + +#ifdef SUPPORT_UTF8 + /* In character mode we have to step back character by character */ + + if (utf8) + { + for (gone_back = 0; gone_back < max_back; gone_back++) + { + if (current_subject <= start_subject) break; + current_subject--; + while (current_subject > start_subject && + (*current_subject & 0xc0) == 0x80) + current_subject--; + } + } + else +#endif + + /* In byte-mode we can do this quickly. */ + + { + gone_back = (current_subject - max_back < start_subject)? + current_subject - start_subject : max_back; + current_subject -= gone_back; + } + + /* Now we can process the individual branches. */ + + end_code = this_start_code; + do + { + int back = GET(end_code, 2+LINK_SIZE); + if (back <= gone_back) + { + int bstate = end_code - start_code + 2 + 2*LINK_SIZE; + ADD_NEW_DATA(-bstate, 0, gone_back - back); + } + end_code += GET(end_code, 1); + } + while (*end_code == OP_ALT); + } + +/* This is the code for a "normal" subpattern (not a backward assertion). The +start of a whole pattern is always one of these. If we are at the top level, +we may be asked to restart matching from the same point that we reached for a +previous partial match. We still have to scan through the top-level branches to +find the end state. */ + +else + { + end_code = this_start_code; + + /* Restarting */ + + if (rlevel == 1 && (md->moptions & PCRE_DFA_RESTART) != 0) + { + do { end_code += GET(end_code, 1); } while (*end_code == OP_ALT); + new_count = workspace[1]; + if (!workspace[0]) + memcpy(new_states, active_states, new_count * sizeof(stateblock)); + } + + /* Not restarting */ + + else + { + do + { + ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0); + end_code += GET(end_code, 1); + } + while (*end_code == OP_ALT); + } + } + +workspace[0] = 0; /* Bit indicating which vector is current */ + +DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code)); + +/* Loop for scanning the subject */ + +ptr = current_subject; +for (;;) + { + int i, j; + int c, d, clen, dlen; + + /* Make the new state list into the active state list and empty the + new state list. */ + + temp_states = active_states; + active_states = new_states; + new_states = temp_states; + active_count = new_count; + new_count = 0; + + workspace[0] ^= 1; /* Remember for the restarting feature */ + workspace[1] = active_count; + +#ifdef DEBUG + printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP); + pchars((uschar *)ptr, strlen((char *)ptr), stdout); + printf("\"\n"); + + printf("%.*sActive states: ", rlevel*2-2, SP); + for (i = 0; i < active_count; i++) + printf("%d/%d ", active_states[i].offset, active_states[i].count); + printf("\n"); +#endif + + /* Set the pointers for adding new states */ + + next_active_state = active_states + active_count; + next_new_state = new_states; + + /* Load the current character from the subject outside the loop, as many + different states may want to look at it, and we assume that at least one + will. */ + + if (ptr < end_subject) + { + clen = 1; +#ifdef SUPPORT_UTF8 + if (utf8) { GETCHARLEN(c, ptr, clen); } else +#endif /* SUPPORT_UTF8 */ + c = *ptr; + } + else + { + clen = 0; /* At end subject */ + c = -1; + } + + /* Scan up the active states and act on each one. The result of an action + may be to add more states to the currently active list (e.g. on hitting a + parenthesis) or it may be to put states on the new list, for considering + when we move the character pointer on. */ + + for (i = 0; i < active_count; i++) + { + stateblock *current_state = active_states + i; + const uschar *code; + int state_offset = current_state->offset; + int count, codevalue; + int chartype, othercase; + +#ifdef DEBUG + printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset); + if (c < 0) printf("-1\n"); + else if (c > 32 && c < 127) printf("'%c'\n", c); + else printf("0x%02x\n", c); +#endif + + /* This variable is referred to implicity in the ADD_xxx macros. */ + + ims = current_state->ims; + + /* A negative offset is a special case meaning "hold off going to this + (negated) state until the number of characters in the data field have + been skipped". */ + + if (state_offset < 0) + { + if (current_state->data > 0) + { + DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP)); + ADD_NEW_DATA(state_offset, current_state->count, + current_state->data - 1); + continue; + } + else + { + current_state->offset = state_offset = -state_offset; + } + } + + /* Check for a duplicate state with the same count, and skip if found. */ + + for (j = 0; j < i; j++) + { + if (active_states[j].offset == state_offset && + active_states[j].count == current_state->count) + { + DPRINTF(("%.*sDuplicate state: skipped\n", rlevel*2-2, SP)); + goto NEXT_ACTIVE_STATE; + } + } + + /* The state offset is the offset to the opcode */ + + code = start_code + state_offset; + codevalue = *code; + if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */ + + /* If this opcode is followed by an inline character, load it. It is + tempting to test for the presence of a subject character here, but that + is wrong, because sometimes zero repetitions of the subject are + permitted. + + We also use this mechanism for opcodes such as OP_TYPEPLUS that take an + argument that is not a data character - but is always one byte long. + Unfortunately, we have to take special action to deal with \P, \p, and + \X in this case. To keep the other cases fast, convert these ones to new + opcodes. */ + + if (coptable[codevalue] > 0) + { + dlen = 1; +#ifdef SUPPORT_UTF8 + if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else +#endif /* SUPPORT_UTF8 */ + d = code[coptable[codevalue]]; + if (codevalue >= OP_TYPESTAR) + { + if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM; + if (d >= OP_NOTPROP) + codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA; + } + } + else + { + dlen = 0; /* Not strictly necessary, but compilers moan */ + d = -1; /* if these variables are not set. */ + } + + + /* Now process the individual opcodes */ + + switch (codevalue) + { + +/* ========================================================================== */ + /* Reached a closing bracket. If not at the end of the pattern, carry + on with the next opcode. Otherwise, unless we have an empty string and + PCRE_NOTEMPTY is set, save the match data, shifting up all previous + matches so we always have the longest first. */ + + case OP_KET: + case OP_KETRMIN: + case OP_KETRMAX: + if (code != end_code) + { + ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0); + if (codevalue != OP_KET) + { + ADD_ACTIVE(state_offset - GET(code, 1), 0); + } + } + else if (ptr > current_subject || (md->moptions & PCRE_NOTEMPTY) == 0) + { + if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0; + else if (match_count > 0 && ++match_count * 2 >= offsetcount) + match_count = 0; + count = ((match_count == 0)? offsetcount : match_count * 2) - 2; + if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int)); + if (offsetcount >= 2) + { + offsets[0] = current_subject - start_subject; + offsets[1] = ptr - start_subject; + DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP, + offsets[1] - offsets[0], current_subject)); + } + if ((md->moptions & PCRE_DFA_SHORTEST) != 0) + { + DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n" + "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, + match_count, rlevel*2-2, SP)); + return match_count; + } + } + break; + +/* ========================================================================== */ + /* These opcodes add to the current list of states without looking + at the current character. */ + + /*-----------------------------------------------------------------*/ + case OP_ALT: + do { code += GET(code, 1); } while (*code == OP_ALT); + ADD_ACTIVE(code - start_code, 0); + break; + + /*-----------------------------------------------------------------*/ + case OP_BRA: + do + { + ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0); + code += GET(code, 1); + } + while (*code == OP_ALT); + break; + + /*-----------------------------------------------------------------*/ + case OP_BRAZERO: + case OP_BRAMINZERO: + ADD_ACTIVE(state_offset + 1, 0); + code += 1 + GET(code, 2); + while (*code == OP_ALT) code += GET(code, 1); + ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0); + break; + + /*-----------------------------------------------------------------*/ + case OP_BRANUMBER: + ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0); + break; + + /*-----------------------------------------------------------------*/ + case OP_CIRC: + if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) || + ((ims & PCRE_MULTILINE) != 0 && ptr[-1] == NEWLINE)) + { ADD_ACTIVE(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_EOD: + if (ptr >= end_subject) { ADD_ACTIVE(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_OPT: + ims = code[1]; + ADD_ACTIVE(state_offset + 2, 0); + break; + + /*-----------------------------------------------------------------*/ + case OP_SOD: + if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_SOM: + if (ptr == start_subject + start_offset) { ADD_ACTIVE(state_offset + 1, 0); } + break; + + +/* ========================================================================== */ + /* These opcodes inspect the next subject character, and sometimes + the previous one as well, but do not have an argument. The variable + clen contains the length of the current character and is zero if we are + at the end of the subject. */ + + /*-----------------------------------------------------------------*/ + case OP_ANY: + if (clen > 0 && (c != NEWLINE || (ims & PCRE_DOTALL) != 0)) + { ADD_NEW(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_EODN: + if (clen == 0 || (c == NEWLINE && ptr + 1 == end_subject)) + { ADD_ACTIVE(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_DOLL: + if ((md->moptions & PCRE_NOTEOL) == 0) + { + if (clen == 0 || (c == NEWLINE && (ptr + 1 == end_subject || + (ims & PCRE_MULTILINE) != 0))) + { ADD_ACTIVE(state_offset + 1, 0); } + } + else if (c == NEWLINE && (ims & PCRE_MULTILINE) != 0) + { ADD_ACTIVE(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + + case OP_DIGIT: + case OP_WHITESPACE: + case OP_WORDCHAR: + if (clen > 0 && c < 256 && + ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0) + { ADD_NEW(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_NOT_DIGIT: + case OP_NOT_WHITESPACE: + case OP_NOT_WORDCHAR: + if (clen > 0 && (c >= 256 || + ((ctypes[c] & toptable1[codevalue]) ^ toptable2[codevalue]) != 0)) + { ADD_NEW(state_offset + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_WORD_BOUNDARY: + case OP_NOT_WORD_BOUNDARY: + { + int left_word, right_word; + + if (ptr > start_subject) + { + const uschar *temp = ptr - 1; +#ifdef SUPPORT_UTF8 + if (utf8) BACKCHAR(temp); +#endif + GETCHARTEST(d, temp); + left_word = d < 256 && (ctypes[d] & ctype_word) != 0; + } + else left_word = 0; + + if (clen > 0) right_word = c < 256 && (ctypes[c] & ctype_word) != 0; + else right_word = 0; + + if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY)) + { ADD_ACTIVE(state_offset + 1, 0); } + } + break; + + +#ifdef SUPPORT_UCP + + /*-----------------------------------------------------------------*/ + /* Check the next character by Unicode property. We will get here only + if the support is in the binary; otherwise a compile-time error occurs. + */ + + case OP_PROP: + case OP_NOTPROP: + if (clen > 0) + { + int rqdtype, category; + category = ucp_findchar(c, &chartype, &othercase); + rqdtype = code[1]; + if (rqdtype >= 128) + { + if ((rqdtype - 128 == category) == (codevalue == OP_PROP)) + { ADD_NEW(state_offset + 2, 0); } + } + else + { + if ((rqdtype == chartype) == (codevalue == OP_PROP)) + { ADD_NEW(state_offset + 2, 0); } + } + } + break; +#endif + + + +/* ========================================================================== */ + /* These opcodes likewise inspect the subject character, but have an + argument that is not a data character. It is one of these opcodes: + OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR, + OP_NOT_WORDCHAR. The value is loaded into d. */ + + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } + if (clen > 0) + { + if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || + (c < 256 && + (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) && + ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) + { + count++; + ADD_NEW(state_offset, count); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + ADD_ACTIVE(state_offset + 2, 0); + if (clen > 0) + { + if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || + (c < 256 && + (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) && + ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) + { + ADD_NEW(state_offset + 2, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + ADD_ACTIVE(state_offset + 2, 0); + if (clen > 0) + { + if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || + (c < 256 && + (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) && + ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) + { + ADD_NEW(state_offset, 0); + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_TYPEEXACT: + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + if (codevalue != OP_TYPEEXACT) + { ADD_ACTIVE(state_offset + 4, 0); } + count = current_state->count; /* Number already matched */ + if (clen > 0) + { + if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) || + (c < 256 && + (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) && + ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0)) + { + if (++count >= GET2(code, 1)) + { ADD_NEW(state_offset + 4, 0); } + else + { ADD_NEW(state_offset, count); } + } + } + break; + +/* ========================================================================== */ + /* These are virtual opcodes that are used when something like + OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It + keeps the code above fast for the other cases. The argument is in the + d variable. */ + + case OP_PROP_EXTRA + OP_TYPEPLUS: + case OP_PROP_EXTRA + OP_TYPEMINPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + 3, 0); } + if (clen > 0) + { + int category = ucp_findchar(c, &chartype, &othercase); + int rqdtype = code[2]; + if ((d == OP_PROP) == + (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype))) + { count++; ADD_NEW(state_offset, count); } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_EXTUNI_EXTRA + OP_TYPEPLUS: + case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } + if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M) + { + const uschar *nptr = ptr + clen; + int ncount = 0; + while (nptr < end_subject) + { + int nd; + int ndlen = 1; + GETCHARLEN(nd, nptr, ndlen); + if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break; + ncount++; + nptr += ndlen; + } + count++; + ADD_NEW_DATA(-state_offset, count, ncount); + } + break; + + /*-----------------------------------------------------------------*/ + case OP_PROP_EXTRA + OP_TYPEQUERY: + case OP_PROP_EXTRA + OP_TYPEMINQUERY: + count = 3; + goto QS1; + + case OP_PROP_EXTRA + OP_TYPESTAR: + case OP_PROP_EXTRA + OP_TYPEMINSTAR: + count = 0; + + QS1: + + ADD_ACTIVE(state_offset + 3, 0); + if (clen > 0) + { + int category = ucp_findchar(c, &chartype, &othercase); + int rqdtype = code[2]; + if ((d == OP_PROP) == + (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype))) + { ADD_NEW(state_offset + count, 0); } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_EXTUNI_EXTRA + OP_TYPEQUERY: + case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY: + count = 2; + goto QS2; + + case OP_EXTUNI_EXTRA + OP_TYPESTAR: + case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR: + count = 0; + + QS2: + + ADD_ACTIVE(state_offset + 2, 0); + if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M) + { + const uschar *nptr = ptr + clen; + int ncount = 0; + while (nptr < end_subject) + { + int nd; + int ndlen = 1; + GETCHARLEN(nd, nptr, ndlen); + if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break; + ncount++; + nptr += ndlen; + } + ADD_NEW_DATA(-(state_offset + count), 0, ncount); + } + break; + + /*-----------------------------------------------------------------*/ + case OP_PROP_EXTRA + OP_TYPEEXACT: + case OP_PROP_EXTRA + OP_TYPEUPTO: + case OP_PROP_EXTRA + OP_TYPEMINUPTO: + if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT) + { ADD_ACTIVE(state_offset + 5, 0); } + count = current_state->count; /* Number already matched */ + if (clen > 0) + { + int category = ucp_findchar(c, &chartype, &othercase); + int rqdtype = code[4]; + if ((d == OP_PROP) == + (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype))) + { + if (++count >= GET2(code, 1)) + { ADD_NEW(state_offset + 5, 0); } + else + { ADD_NEW(state_offset, count); } + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_EXTUNI_EXTRA + OP_TYPEEXACT: + case OP_EXTUNI_EXTRA + OP_TYPEUPTO: + case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO: + if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT) + { ADD_ACTIVE(state_offset + 4, 0); } + count = current_state->count; /* Number already matched */ + if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M) + { + const uschar *nptr = ptr + clen; + int ncount = 0; + while (nptr < end_subject) + { + int nd; + int ndlen = 1; + GETCHARLEN(nd, nptr, ndlen); + if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break; + ncount++; + nptr += ndlen; + } + if (++count >= GET2(code, 1)) + { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); } + else + { ADD_NEW_DATA(-state_offset, count, ncount); } + } + break; + +/* ========================================================================== */ + /* These opcodes are followed by a character that is usually compared + to the current subject character; it is loaded into d. We still get + here even if there is no subject character, because in some cases zero + repetitions are permitted. */ + + /*-----------------------------------------------------------------*/ + case OP_CHAR: + if (clen > 0 && c == d) { ADD_NEW(state_offset + dlen + 1, 0); } + break; + + /*-----------------------------------------------------------------*/ + case OP_CHARNC: + if (clen == 0) break; + +#ifdef SUPPORT_UTF8 + if (utf8) + { + if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else + { + if (c < 128) othercase = fcc[c]; else + + /* If we have Unicode property support, we can use it to test the + other case of the character, if there is one. The result of + ucp_findchar() is < 0 if the char isn't found, and othercase is + returned as zero if there isn't another case. */ + +#ifdef SUPPORT_UCP + if (ucp_findchar(c, &chartype, &othercase) < 0) +#endif + othercase = -1; + + if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); } + } + } + else +#endif /* SUPPORT_UTF8 */ + + /* Non-UTF-8 mode */ + { + if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); } + } + break; + + +#ifdef SUPPORT_UCP + /*-----------------------------------------------------------------*/ + /* This is a tricky one because it can match more than one character. + Find out how many characters to skip, and then set up a negative state + to wait for them to pass before continuing. */ + + case OP_EXTUNI: + if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M) + { + const uschar *nptr = ptr + clen; + int ncount = 0; + while (nptr < end_subject) + { + int nclen = 1; + GETCHARLEN(c, nptr, nclen); + if (ucp_findchar(c, &chartype, &othercase) != ucp_M) break; + ncount++; + nptr += nclen; + } + ADD_NEW_DATA(-(state_offset + 1), 0, ncount); + } + break; +#endif + + /*-----------------------------------------------------------------*/ + /* Match a negated single character. This is only used for one-byte + characters, that is, we know that d < 256. The character we are + checking (c) can be multibyte. */ + + case OP_NOT: + if (clen > 0) + { + int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d; + if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_PLUS: + case OP_MINPLUS: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); } + if (clen > 0) + { + int otherd = -1; + if ((ims & PCRE_CASELESS) != 0) + { +#ifdef SUPPORT_UTF8 + if (utf8 && c >= 128) + { +#ifdef SUPPORT_UCP + if (ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1; +#endif /* SUPPORT_UCP */ + } + else +#endif /* SUPPORT_UTF8 */ + otherd = fcc[d]; + } + if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) + { count++; ADD_NEW(state_offset, count); } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_QUERY: + case OP_MINQUERY: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + ADD_ACTIVE(state_offset + dlen + 1, 0); + if (clen > 0) + { + int otherd = -1; + if ((ims && PCRE_CASELESS) != 0) + { +#ifdef SUPPORT_UTF8 + if (utf8 && c >= 128) + { +#ifdef SUPPORT_UCP + if (ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1; +#endif /* SUPPORT_UCP */ + } + else +#endif /* SUPPORT_UTF8 */ + otherd = fcc[d]; + } + if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) + { ADD_NEW(state_offset + dlen + 1, 0); } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_STAR: + case OP_MINSTAR: + case OP_NOTSTAR: + case OP_NOTMINSTAR: + ADD_ACTIVE(state_offset + dlen + 1, 0); + if (clen > 0) + { + int otherd = -1; + if ((ims && PCRE_CASELESS) != 0) + { +#ifdef SUPPORT_UTF8 + if (utf8 && c >= 128) + { +#ifdef SUPPORT_UCP + if (ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1; +#endif /* SUPPORT_UCP */ + } + else +#endif /* SUPPORT_UTF8 */ + otherd = fcc[d]; + } + if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) + { ADD_NEW(state_offset, 0); } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_EXACT: + case OP_UPTO: + case OP_MINUPTO: + case OP_NOTEXACT: + case OP_NOTUPTO: + case OP_NOTMINUPTO: + if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT) + { ADD_ACTIVE(state_offset + dlen + 3, 0); } + count = current_state->count; /* Number already matched */ + if (clen > 0) + { + int otherd = -1; + if ((ims & PCRE_CASELESS) != 0) + { +#ifdef SUPPORT_UTF8 + if (utf8 && c >= 128) + { +#ifdef SUPPORT_UCP + if (ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1; +#endif /* SUPPORT_UCP */ + } + else +#endif /* SUPPORT_UTF8 */ + otherd = fcc[d]; + } + if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR)) + { + if (++count >= GET2(code, 1)) + { ADD_NEW(state_offset + dlen + 3, 0); } + else + { ADD_NEW(state_offset, count); } + } + } + break; + + +/* ========================================================================== */ + /* These are the class-handling opcodes */ + + case OP_CLASS: + case OP_NCLASS: + case OP_XCLASS: + { + BOOL isinclass = FALSE; + int next_state_offset; + const uschar *ecode; + + /* For a simple class, there is always just a 32-byte table, and we + can set isinclass from it. */ + + if (codevalue != OP_XCLASS) + { + ecode = code + 33; + if (clen > 0) + { + isinclass = (c > 255)? (codevalue == OP_NCLASS) : + ((code[1 + c/8] & (1 << (c&7))) != 0); + } + } + + /* An extended class may have a table or a list of single characters, + ranges, or both, and it may be positive or negative. There's a + function that sorts all this out. */ + + else + { + ecode = code + GET(code, 1); + if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE); + } + + /* At this point, isinclass is set for all kinds of class, and ecode + points to the byte after the end of the class. If there is a + quantifier, this is where it will be. */ + + next_state_offset = ecode - start_code; + + switch (*ecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + ADD_ACTIVE(next_state_offset + 1, 0); + if (isinclass) { ADD_NEW(state_offset, 0); } + break; + + case OP_CRPLUS: + case OP_CRMINPLUS: + count = current_state->count; /* Already matched */ + if (count > 0) { ADD_ACTIVE(next_state_offset + 1, 0); } + if (isinclass) { count++; ADD_NEW(state_offset, count); } + break; + + case OP_CRQUERY: + case OP_CRMINQUERY: + ADD_ACTIVE(next_state_offset + 1, 0); + if (isinclass) { ADD_NEW(next_state_offset + 1, 0); } + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + count = current_state->count; /* Already matched */ + if (count >= GET2(ecode, 1)) + { ADD_ACTIVE(next_state_offset + 5, 0); } + if (isinclass) + { + if (++count >= GET2(ecode, 3)) + { ADD_NEW(next_state_offset + 5, 0); } + else + { ADD_NEW(state_offset, count); } + } + break; + + default: + if (isinclass) { ADD_NEW(next_state_offset, 0); } + break; + } + } + break; + +/* ========================================================================== */ + /* These are the opcodes for fancy brackets of various kinds. We have + to use recursion in order to handle them. */ + + case OP_ASSERT: + case OP_ASSERT_NOT: + case OP_ASSERTBACK: + case OP_ASSERTBACK_NOT: + { + int rc; + int local_offsets[2]; + int local_workspace[1000]; + const uschar *endasscode = code + GET(code, 1); + + while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); + + rc = internal_dfa_exec( + md, /* static match data */ + code, /* this subexpression's code */ + ptr, /* where we currently are */ + ptr - start_subject, /* start offset */ + local_offsets, /* offset vector */ + sizeof(local_offsets)/sizeof(int), /* size of same */ + local_workspace, /* workspace vector */ + sizeof(local_workspace)/sizeof(int), /* size of same */ + ims, /* the current ims flags */ + rlevel, /* function recursion level */ + recursing); /* pass on regex recursion */ + + if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK)) + { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_COND: + { + int local_offsets[1000]; + int local_workspace[1000]; + int condcode = code[LINK_SIZE+1]; + + /* The only supported version of OP_CREF is for the value 0xffff, which + means "test if in a recursion". */ + + if (condcode == OP_CREF) + { + int value = GET2(code, LINK_SIZE+2); + if (value != 0xffff) return PCRE_ERROR_DFA_UCOND; + if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); } + else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); } + } + + /* Otherwise, the condition is an assertion */ + + else + { + int rc; + const uschar *asscode = code + LINK_SIZE + 1; + const uschar *endasscode = asscode + GET(asscode, 1); + + while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1); + + rc = internal_dfa_exec( + md, /* fixed match data */ + asscode, /* this subexpression's code */ + ptr, /* where we currently are */ + ptr - start_subject, /* start offset */ + local_offsets, /* offset vector */ + sizeof(local_offsets)/sizeof(int), /* size of same */ + local_workspace, /* workspace vector */ + sizeof(local_workspace)/sizeof(int), /* size of same */ + ims, /* the current ims flags */ + rlevel, /* function recursion level */ + recursing); /* pass on regex recursion */ + + if ((rc >= 0) == + (condcode == OP_ASSERT || condcode == OP_ASSERTBACK)) + { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); } + else + { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); } + } + } + break; + + /*-----------------------------------------------------------------*/ + case OP_RECURSE: + { + int local_offsets[1000]; + int local_workspace[1000]; + int rc; + + DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP, + recursing + 1)); + + rc = internal_dfa_exec( + md, /* fixed match data */ + start_code + GET(code, 1), /* this subexpression's code */ + ptr, /* where we currently are */ + ptr - start_subject, /* start offset */ + local_offsets, /* offset vector */ + sizeof(local_offsets)/sizeof(int), /* size of same */ + local_workspace, /* workspace vector */ + sizeof(local_workspace)/sizeof(int), /* size of same */ + ims, /* the current ims flags */ + rlevel, /* function recursion level */ + recursing + 1); /* regex recurse level */ + + DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP, + recursing + 1, rc)); + + /* Ran out of internal offsets */ + + if (rc == 0) return PCRE_ERROR_DFA_RECURSE; + + /* For each successful matched substring, set up the next state with a + count of characters to skip before trying it. Note that the count is in + characters, not bytes. */ + + if (rc > 0) + { + for (rc = rc*2 - 2; rc >= 0; rc -= 2) + { + const uschar *p = start_subject + local_offsets[rc]; + const uschar *pp = start_subject + local_offsets[rc+1]; + int charcount = local_offsets[rc+1] - local_offsets[rc]; + while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--; + if (charcount > 0) + { + ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1)); + } + else + { + ADD_ACTIVE(state_offset + LINK_SIZE + 1, 0); + } + } + } + else if (rc != PCRE_ERROR_NOMATCH) return rc; + } + break; + + /*-----------------------------------------------------------------*/ + case OP_ONCE: + { + const uschar *endcode; + int local_offsets[2]; + int local_workspace[1000]; + + int rc = internal_dfa_exec( + md, /* fixed match data */ + code, /* this subexpression's code */ + ptr, /* where we currently are */ + ptr - start_subject, /* start offset */ + local_offsets, /* offset vector */ + sizeof(local_offsets)/sizeof(int), /* size of same */ + local_workspace, /* workspace vector */ + sizeof(local_workspace)/sizeof(int), /* size of same */ + ims, /* the current ims flags */ + rlevel, /* function recursion level */ + recursing); /* pass on regex recursion */ + + if (rc >= 0) + { + const uschar *end_subpattern = code; + int charcount = local_offsets[1] - local_offsets[0]; + int next_state_offset, repeat_state_offset; + BOOL is_repeated; + + do { end_subpattern += GET(end_subpattern, 1); } + while (*end_subpattern == OP_ALT); + next_state_offset = end_subpattern - start_code + LINK_SIZE + 1; + + /* If the end of this subpattern is KETRMAX or KETRMIN, we must + arrange for the repeat state also to be added to the relevant list. + Calculate the offset, or set -1 for no repeat. */ + + repeat_state_offset = (*end_subpattern == OP_KETRMAX || + *end_subpattern == OP_KETRMIN)? + end_subpattern - start_code - GET(end_subpattern, 1) : -1; + + /* If we have matched an empty string, add the next state at the + current character pointer. This is important so that the duplicate + checking kicks in, which is what breaks infinite loops that match an + empty string. */ + + if (charcount == 0) + { + ADD_ACTIVE(next_state_offset, 0); + } + + /* Optimization: if there are no more active states, and there + are no new states yet set up, then skip over the subject string + right here, to save looping. Otherwise, set up the new state to swing + into action when the end of the substring is reached. */ + + else if (i + 1 >= active_count && new_count == 0) + { + ptr += charcount; + clen = 0; + ADD_NEW(next_state_offset, 0); + + /* If we are adding a repeat state at the new character position, + we must fudge things so that it is the only current state. + Otherwise, it might be a duplicate of one we processed before, and + that would cause it to be skipped. */ + + if (repeat_state_offset >= 0) + { + next_active_state = active_states; + active_count = 0; + i = -1; + ADD_ACTIVE(repeat_state_offset, 0); + } + } + else + { + const uschar *p = start_subject + local_offsets[0]; + const uschar *pp = start_subject + local_offsets[1]; + while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--; + ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); + if (repeat_state_offset >= 0) + { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); } + } + + } + else if (rc != PCRE_ERROR_NOMATCH) return rc; + } + break; + + +/* ========================================================================== */ + /* Handle callouts */ + + case OP_CALLOUT: + if (pcre_callout != NULL) + { + int rrc; + pcre_callout_block cb; + cb.version = 1; /* Version 1 of the callout block */ + cb.callout_number = code[1]; + cb.offset_vector = offsets; + cb.subject = (char *)start_subject; + cb.subject_length = end_subject - start_subject; + cb.start_match = current_subject - start_subject; + cb.current_position = ptr - start_subject; + cb.pattern_position = GET(code, 2); + cb.next_item_length = GET(code, 2 + LINK_SIZE); + cb.capture_top = 1; + cb.capture_last = -1; + cb.callout_data = md->callout_data; + if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */ + if (rrc == 0) { ADD_ACTIVE(state_offset + 2 + 2*LINK_SIZE, 0); } + } + break; + + +/* ========================================================================== */ + default: /* Unsupported opcode */ + return PCRE_ERROR_DFA_UITEM; + } + + NEXT_ACTIVE_STATE: continue; + + } /* End of loop scanning active states */ + + /* We have finished the processing at the current subject character. If no + new states have been set for the next character, we have found all the + matches that we are going to find. If we are at the top level and partial + matching has been requested, check for appropriate conditions. */ + + if (new_count <= 0) + { + if (match_count < 0 && /* No matches found */ + rlevel == 1 && /* Top level match function */ + (md->moptions & PCRE_PARTIAL) != 0 && /* Want partial matching */ + ptr >= end_subject && /* Reached end of subject */ + ptr > current_subject) /* Matched non-empty string */ + { + if (offsetcount >= 2) + { + offsets[0] = current_subject - start_subject; + offsets[1] = end_subject - start_subject; + } + match_count = PCRE_ERROR_PARTIAL; + } + + DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n" + "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count, + rlevel*2-2, SP)); + return match_count; + } + + /* One or more states are active for the next character. */ + + ptr += clen; /* Advance to next subject character */ + } /* Loop to move along the subject string */ + +/* Control never gets here, but we must keep the compiler happy. */ + +DPRINTF(("%.*s+++ Unexpected end of internal_dfa_exec %d +++\n" + "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, rlevel*2-2, SP)); +return PCRE_ERROR_NOMATCH; +} + + + + +/************************************************* +* Execute a Regular Expression - DFA engine * +*************************************************/ + +/* This external function applies a compiled re to a subject string using a DFA +engine. This function calls the internal function multiple times if the pattern +is not anchored. + +Arguments: + argument_re points to the compiled expression + extra_data points to extra data or is NULL (not currently used) + subject points to the subject string + length length of subject string (may contain binary zeros) + start_offset where to start in the subject string + options option bits + offsets vector of match offsets + offsetcount size of same + workspace workspace vector + wscount size of same + +Returns: > 0 => number of match offset pairs placed in offsets + = 0 => offsets overflowed; longest matches are present + -1 => failed to match + < -1 => some kind of unexpected problem +*/ + +EXPORT int +pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data, + const char *subject, int length, int start_offset, int options, int *offsets, + int offsetcount, int *workspace, int wscount) +{ +real_pcre *re = (real_pcre *)argument_re; +dfa_match_data match_block; +BOOL utf8, anchored, startline, firstline; +const uschar *current_subject, *end_subject, *lcc; + +pcre_study_data internal_study; +const pcre_study_data *study = NULL; +real_pcre internal_re; + +const uschar *req_byte_ptr; +const uschar *start_bits = NULL; +BOOL first_byte_caseless = FALSE; +BOOL req_byte_caseless = FALSE; +int first_byte = -1; +int req_byte = -1; +int req_byte2 = -1; + +/* Plausibility checks */ + +if ((options & ~PUBLIC_DFA_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; +if (re == NULL || subject == NULL || workspace == NULL || + (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; +if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; +if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE; + +/* We need to find the pointer to any study data before we test for byte +flipping, so we scan the extra_data block first. This may set two fields in the +match block, so we must initialize them beforehand. However, the other fields +in the match block must not be set until after the byte flipping. */ + +match_block.tables = re->tables; +match_block.callout_data = NULL; + +if (extra_data != NULL) + { + unsigned int flags = extra_data->flags; + if ((flags & PCRE_EXTRA_STUDY_DATA) != 0) + study = (const pcre_study_data *)extra_data->study_data; + if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT; + if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0) + match_block.callout_data = extra_data->callout_data; + if ((flags & PCRE_EXTRA_TABLES) != 0) + match_block.tables = extra_data->tables; + } + +/* Check that the first field in the block is the magic number. If it is not, +test for a regex that was compiled on a host of opposite endianness. If this is +the case, flipped values are put in internal_re and internal_study if there was +study data too. */ + +if (re->magic_number != MAGIC_NUMBER) + { + re = _pcre_try_flipped(re, &internal_re, study, &internal_study); + if (re == NULL) return PCRE_ERROR_BADMAGIC; + if (study != NULL) study = &internal_study; + } + +/* Set some local values */ + +current_subject = (const unsigned char *)subject + start_offset; +end_subject = (const unsigned char *)subject + length; +req_byte_ptr = current_subject - 1; + +utf8 = (re->options & PCRE_UTF8) != 0; +anchored = (options & PCRE_ANCHORED) != 0 || (re->options & PCRE_ANCHORED) != 0; + +/* The remaining fixed data for passing around. */ + +match_block.start_code = (const uschar *)argument_re + + re->name_table_offset + re->name_count * re->name_entry_size; +match_block.start_subject = (const unsigned char *)subject; +match_block.end_subject = end_subject; +match_block.moptions = options; +match_block.poptions = re->options; + +/* Check a UTF-8 string if required. Unfortunately there's no way of passing +back the character offset. */ + +#ifdef SUPPORT_UTF8 +if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) + { + if (_pcre_valid_utf8((uschar *)subject, length) >= 0) + return PCRE_ERROR_BADUTF8; + if (start_offset > 0 && start_offset < length) + { + int tb = ((uschar *)subject)[start_offset]; + if (tb > 127) + { + tb &= 0xc0; + if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET; + } + } + } +#endif + +/* If the exec call supplied NULL for tables, use the inbuilt ones. This +is a feature that makes it possible to save compiled regex and re-use them +in other programs later. */ + +if (match_block.tables == NULL) match_block.tables = _pcre_default_tables; + +/* The lower casing table and the "must be at the start of a line" flag are +used in a loop when finding where to start. */ + +lcc = match_block.tables + lcc_offset; +startline = (re->options & PCRE_STARTLINE) != 0; +firstline = (re->options & PCRE_FIRSTLINE) != 0; + +/* Set up the first character to match, if available. The first_byte value is +never set for an anchored regular expression, but the anchoring may be forced +at run time, so we have to test for anchoring. The first char may be unset for +an unanchored pattern, of course. If there's no first char and the pattern was +studied, there may be a bitmap of possible first characters. */ + +if (!anchored) + { + if ((re->options & PCRE_FIRSTSET) != 0) + { + first_byte = re->first_byte & 255; + if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE) + first_byte = lcc[first_byte]; + } + else + { + if (startline && study != NULL && + (study->options & PCRE_STUDY_MAPPED) != 0) + start_bits = study->start_bits; + } + } + +/* For anchored or unanchored matches, there may be a "last known required +character" set. */ + +if ((re->options & PCRE_REQCHSET) != 0) + { + req_byte = re->req_byte & 255; + req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0; + req_byte2 = (match_block.tables + fcc_offset)[req_byte]; /* case flipped */ + } + +/* Call the main matching function, looping for a non-anchored regex after a +failed match. Unless restarting, optimize by moving to the first match +character if possible, when not anchored. Then unless wanting a partial match, +check for a required later character. */ + +for (;;) + { + int rc; + + if ((options & PCRE_DFA_RESTART) == 0) + { + const uschar *save_end_subject = end_subject; + + /* Advance to a unique first char if possible. If firstline is TRUE, the + start of the match is constrained to the first line of a multiline string. + Implement this by temporarily adjusting end_subject so that we stop scanning + at a newline. If the match fails at the newline, later code breaks this loop. + */ + + if (firstline) + { + const uschar *t = current_subject; + while (t < save_end_subject && *t != '\n') t++; + end_subject = t; + } + + if (first_byte >= 0) + { + if (first_byte_caseless) + while (current_subject < end_subject && + lcc[*current_subject] != first_byte) + current_subject++; + else + while (current_subject < end_subject && *current_subject != first_byte) + current_subject++; + } + + /* Or to just after \n for a multiline match if possible */ + + else if (startline) + { + if (current_subject > match_block.start_subject + start_offset) + { + while (current_subject < end_subject && current_subject[-1] != NEWLINE) + current_subject++; + } + } + + /* Or to a non-unique first char after study */ + + else if (start_bits != NULL) + { + while (current_subject < end_subject) + { + register unsigned int c = *current_subject; + if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++; + else break; + } + } + + /* Restore fudged end_subject */ + + end_subject = save_end_subject; + } + + /* If req_byte is set, we know that that character must appear in the subject + for the match to succeed. If the first character is set, req_byte must be + later in the subject; otherwise the test starts at the match point. This + optimization can save a huge amount of work in patterns with nested unlimited + repeats that aren't going to match. Writing separate code for cased/caseless + versions makes it go faster, as does using an autoincrement and backing off + on a match. + + HOWEVER: when the subject string is very, very long, searching to its end can + take a long time, and give bad performance on quite ordinary patterns. This + showed up when somebody was matching /^C/ on a 32-megabyte string... so we + don't do this when the string is sufficiently long. + + ALSO: this processing is disabled when partial matching is requested. + */ + + if (req_byte >= 0 && + end_subject - current_subject < REQ_BYTE_MAX && + (options & PCRE_PARTIAL) == 0) + { + register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0); + + /* We don't need to repeat the search if we haven't yet reached the + place we found it at last time. */ + + if (p > req_byte_ptr) + { + if (req_byte_caseless) + { + while (p < end_subject) + { + register int pp = *p++; + if (pp == req_byte || pp == req_byte2) { p--; break; } + } + } + else + { + while (p < end_subject) + { + if (*p++ == req_byte) { p--; break; } + } + } + + /* If we can't find the required character, break the matching loop, + which will cause a return or PCRE_ERROR_NOMATCH. */ + + if (p >= end_subject) break; + + /* If we have found the required character, save the point where we + found it, so that we don't search again next time round the loop if + the start hasn't passed this character yet. */ + + req_byte_ptr = p; + } + } + + /* OK, now we can do the business */ + + rc = internal_dfa_exec( + &match_block, /* fixed match data */ + match_block.start_code, /* this subexpression's code */ + current_subject, /* where we currently are */ + start_offset, /* start offset in subject */ + offsets, /* offset vector */ + offsetcount, /* size of same */ + workspace, /* workspace vector */ + wscount, /* size of same */ + re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */ + 0, /* function recurse level */ + 0); /* regex recurse level */ + + /* Anything other than "no match" means we are done, always; otherwise, carry + on only if not anchored. */ + + if (rc != PCRE_ERROR_NOMATCH || anchored) return rc; + + /* Advance to the next subject character unless we are at the end of a line + and firstline is set. */ + + if (firstline && *current_subject == NEWLINE) break; + current_subject++; + +#ifdef SUPPORT_UTF8 + if (utf8) + { + while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80) + current_subject++; + } +#endif + + if (current_subject > end_subject) break; + } + +return PCRE_ERROR_NOMATCH; +} + +/* End of pcre_dfa_exec.c */ diff --git a/libpcre/pcre_exec.c b/libpcre/pcre_exec.c new file mode 100644 index 0000000000..65173e2ba2 --- /dev/null +++ b/libpcre/pcre_exec.c @@ -0,0 +1,3632 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains pcre_exec(), the externally visible function that does +pattern matching using an NFA algorithm, trying to mimic Perl as closely as +possible. There are also some static supporting functions. */ + + +#include "pcre_internal.h" + + +/* Structure for building a chain of data that actually lives on the +stack, for holding the values of the subject pointer at the start of each +subpattern, so as to detect when an empty string has been matched by a +subpattern - to break infinite loops. When NO_RECURSE is set, these blocks +are on the heap, not on the stack. */ + +typedef struct eptrblock { + struct eptrblock *epb_prev; + const uschar *epb_saved_eptr; +} eptrblock; + +/* Flag bits for the match() function */ + +#define match_condassert 0x01 /* Called to check a condition assertion */ +#define match_isgroup 0x02 /* Set if start of bracketed group */ + +/* Non-error returns from the match() function. Error returns are externally +defined PCRE_ERROR_xxx codes, which are all negative. */ + +#define MATCH_MATCH 1 +#define MATCH_NOMATCH 0 + +/* Maximum number of ints of offset to save on the stack for recursive calls. +If the offset vector is bigger, malloc is used. This should be a multiple of 3, +because the offset vector is always a multiple of 3 long. */ + +#define REC_STACK_SAVE_MAX 30 + +/* Min and max values for the common repeats; for the maxima, 0 => infinity */ + +static const char rep_min[] = { 0, 0, 1, 1, 0, 0 }; +static const char rep_max[] = { 0, 0, 0, 0, 1, 1 }; + + + +#ifdef DEBUG +/************************************************* +* Debugging function to print chars * +*************************************************/ + +/* Print a sequence of chars in printable format, stopping at the end of the +subject if the requested. + +Arguments: + p points to characters + length number to print + is_subject TRUE if printing from within md->start_subject + md pointer to matching data block, if is_subject is TRUE + +Returns: nothing +*/ + +static void +pchars(const uschar *p, int length, BOOL is_subject, match_data *md) +{ +int c; +if (is_subject && length > md->end_subject - p) length = md->end_subject - p; +while (length-- > 0) + if (isprint(c = *(p++))) printf("%c", c); else printf("\\x%02x", c); +} +#endif + + + +/************************************************* +* Match a back-reference * +*************************************************/ + +/* If a back reference hasn't been set, the length that is passed is greater +than the number of characters left in the string, so the match fails. + +Arguments: + offset index into the offset vector + eptr points into the subject + length length to be matched + md points to match data block + ims the ims flags + +Returns: TRUE if matched +*/ + +static BOOL +match_ref(int offset, register const uschar *eptr, int length, match_data *md, + unsigned long int ims) +{ +const uschar *p = md->start_subject + md->offset_vector[offset]; + +#ifdef DEBUG +if (eptr >= md->end_subject) + printf("matching subject "); +else + { + printf("matching subject "); + pchars(eptr, length, TRUE, md); + } +printf(" against backref "); +pchars(p, length, FALSE, md); +printf("\n"); +#endif + +/* Always fail if not enough characters left */ + +if (length > md->end_subject - eptr) return FALSE; + +/* Separate the caselesss case for speed */ + +if ((ims & PCRE_CASELESS) != 0) + { + while (length-- > 0) + if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; + } +else + { while (length-- > 0) if (*p++ != *eptr++) return FALSE; } + +return TRUE; +} + + + +/*************************************************************************** +**************************************************************************** + RECURSION IN THE match() FUNCTION + +The match() function is highly recursive. Some regular expressions can cause +it to recurse thousands of times. I was writing for Unix, so I just let it +call itself recursively. This uses the stack for saving everything that has +to be saved for a recursive call. On Unix, the stack can be large, and this +works fine. + +It turns out that on non-Unix systems there are problems with programs that +use a lot of stack. (This despite the fact that every last chip has oodles +of memory these days, and techniques for extending the stack have been known +for decades.) So.... + +There is a fudge, triggered by defining NO_RECURSE, which avoids recursive +calls by keeping local variables that need to be preserved in blocks of memory +obtained from malloc instead instead of on the stack. Macros are used to +achieve this so that the actual code doesn't look very different to what it +always used to. +**************************************************************************** +***************************************************************************/ + + +/* These versions of the macros use the stack, as normal */ + +#ifndef NO_RECURSE +#define REGISTER register +#define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg) +#define RRETURN(ra) return ra +#else + + +/* These versions of the macros manage a private stack on the heap. Note +that the rd argument of RMATCH isn't actually used. It's the md argument of +match(), which never changes. */ + +#define REGISTER + +#define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\ + {\ + heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\ + if (setjmp(frame->Xwhere) == 0)\ + {\ + newframe->Xeptr = ra;\ + newframe->Xecode = rb;\ + newframe->Xoffset_top = rc;\ + newframe->Xims = re;\ + newframe->Xeptrb = rf;\ + newframe->Xflags = rg;\ + newframe->Xprevframe = frame;\ + frame = newframe;\ + DPRINTF(("restarting from line %d\n", __LINE__));\ + goto HEAP_RECURSE;\ + }\ + else\ + {\ + DPRINTF(("longjumped back to line %d\n", __LINE__));\ + frame = md->thisframe;\ + rx = frame->Xresult;\ + }\ + } + +#define RRETURN(ra)\ + {\ + heapframe *newframe = frame;\ + frame = newframe->Xprevframe;\ + (pcre_stack_free)(newframe);\ + if (frame != NULL)\ + {\ + frame->Xresult = ra;\ + md->thisframe = frame;\ + longjmp(frame->Xwhere, 1);\ + }\ + return ra;\ + } + + +/* Structure for remembering the local variables in a private frame */ + +typedef struct heapframe { + struct heapframe *Xprevframe; + + /* Function arguments that may change */ + + const uschar *Xeptr; + const uschar *Xecode; + int Xoffset_top; + long int Xims; + eptrblock *Xeptrb; + int Xflags; + + /* Function local variables */ + + const uschar *Xcallpat; + const uschar *Xcharptr; + const uschar *Xdata; + const uschar *Xnext; + const uschar *Xpp; + const uschar *Xprev; + const uschar *Xsaved_eptr; + + recursion_info Xnew_recursive; + + BOOL Xcur_is_word; + BOOL Xcondition; + BOOL Xminimize; + BOOL Xprev_is_word; + + unsigned long int Xoriginal_ims; + +#ifdef SUPPORT_UCP + int Xprop_type; + int Xprop_fail_result; + int Xprop_category; + int Xprop_chartype; + int Xprop_othercase; + int Xprop_test_against; + int *Xprop_test_variable; +#endif + + int Xctype; + int Xfc; + int Xfi; + int Xlength; + int Xmax; + int Xmin; + int Xnumber; + int Xoffset; + int Xop; + int Xsave_capture_last; + int Xsave_offset1, Xsave_offset2, Xsave_offset3; + int Xstacksave[REC_STACK_SAVE_MAX]; + + eptrblock Xnewptrb; + + /* Place to pass back result, and where to jump back to */ + + int Xresult; + jmp_buf Xwhere; + +} heapframe; + +#endif + + +/*************************************************************************** +***************************************************************************/ + + + +/************************************************* +* Match from current position * +*************************************************/ + +/* On entry ecode points to the first opcode, and eptr to the first character +in the subject string, while eptrb holds the value of eptr at the start of the +last bracketed group - used for breaking infinite loops matching zero-length +strings. This function is called recursively in many circumstances. Whenever it +returns a negative (error) response, the outer incarnation must also return the +same response. + +Performance note: It might be tempting to extract commonly used fields from the +md structure (e.g. utf8, end_subject) into individual variables to improve +performance. Tests using gcc on a SPARC disproved this; in the first case, it +made performance worse. + +Arguments: + eptr pointer in subject + ecode position in code + offset_top current top pointer + md pointer to "static" info for the match + ims current /i, /m, and /s options + eptrb pointer to chain of blocks containing eptr at start of + brackets - for testing for empty matches + flags can contain + match_condassert - this is an assertion condition + match_isgroup - this is the start of a bracketed group + +Returns: MATCH_MATCH if matched ) these values are >= 0 + MATCH_NOMATCH if failed to match ) + a negative PCRE_ERROR_xxx value if aborted by an error condition + (e.g. stopped by recursion limit) +*/ + +static int +match(REGISTER const uschar *eptr, REGISTER const uschar *ecode, + int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb, + int flags) +{ +/* These variables do not need to be preserved over recursion in this function, +so they can be ordinary variables in all cases. Mark them with "register" +because they are used a lot in loops. */ + +register int rrc; /* Returns from recursive calls */ +register int i; /* Used for loops not involving calls to RMATCH() */ +register int c; /* Character values not kept over RMATCH() calls */ +register BOOL utf8; /* Local copy of UTF-8 flag for speed */ + +/* When recursion is not being used, all "local" variables that have to be +preserved over calls to RMATCH() are part of a "frame" which is obtained from +heap storage. Set up the top-level frame here; others are obtained from the +heap whenever RMATCH() does a "recursion". See the macro definitions above. */ + +#ifdef NO_RECURSE +heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe)); +frame->Xprevframe = NULL; /* Marks the top level */ + +/* Copy in the original argument variables */ + +frame->Xeptr = eptr; +frame->Xecode = ecode; +frame->Xoffset_top = offset_top; +frame->Xims = ims; +frame->Xeptrb = eptrb; +frame->Xflags = flags; + +/* This is where control jumps back to to effect "recursion" */ + +HEAP_RECURSE: + +/* Macros make the argument variables come from the current frame */ + +#define eptr frame->Xeptr +#define ecode frame->Xecode +#define offset_top frame->Xoffset_top +#define ims frame->Xims +#define eptrb frame->Xeptrb +#define flags frame->Xflags + +/* Ditto for the local variables */ + +#ifdef SUPPORT_UTF8 +#define charptr frame->Xcharptr +#endif +#define callpat frame->Xcallpat +#define data frame->Xdata +#define next frame->Xnext +#define pp frame->Xpp +#define prev frame->Xprev +#define saved_eptr frame->Xsaved_eptr + +#define new_recursive frame->Xnew_recursive + +#define cur_is_word frame->Xcur_is_word +#define condition frame->Xcondition +#define minimize frame->Xminimize +#define prev_is_word frame->Xprev_is_word + +#define original_ims frame->Xoriginal_ims + +#ifdef SUPPORT_UCP +#define prop_type frame->Xprop_type +#define prop_fail_result frame->Xprop_fail_result +#define prop_category frame->Xprop_category +#define prop_chartype frame->Xprop_chartype +#define prop_othercase frame->Xprop_othercase +#define prop_test_against frame->Xprop_test_against +#define prop_test_variable frame->Xprop_test_variable +#endif + +#define ctype frame->Xctype +#define fc frame->Xfc +#define fi frame->Xfi +#define length frame->Xlength +#define max frame->Xmax +#define min frame->Xmin +#define number frame->Xnumber +#define offset frame->Xoffset +#define op frame->Xop +#define save_capture_last frame->Xsave_capture_last +#define save_offset1 frame->Xsave_offset1 +#define save_offset2 frame->Xsave_offset2 +#define save_offset3 frame->Xsave_offset3 +#define stacksave frame->Xstacksave + +#define newptrb frame->Xnewptrb + +/* When recursion is being used, local variables are allocated on the stack and +get preserved during recursion in the normal way. In this environment, fi and +i, and fc and c, can be the same variables. */ + +#else +#define fi i +#define fc c + + +#ifdef SUPPORT_UTF8 /* Many of these variables are used ony */ +const uschar *charptr; /* small blocks of the code. My normal */ +#endif /* style of coding would have declared */ +const uschar *callpat; /* them within each of those blocks. */ +const uschar *data; /* However, in order to accommodate the */ +const uschar *next; /* version of this code that uses an */ +const uschar *pp; /* external "stack" implemented on the */ +const uschar *prev; /* heap, it is easier to declare them */ +const uschar *saved_eptr; /* all here, so the declarations can */ + /* be cut out in a block. The only */ +recursion_info new_recursive; /* declarations within blocks below are */ + /* for variables that do not have to */ +BOOL cur_is_word; /* be preserved over a recursive call */ +BOOL condition; /* to RMATCH(). */ +BOOL minimize; +BOOL prev_is_word; + +unsigned long int original_ims; + +#ifdef SUPPORT_UCP +int prop_type; +int prop_fail_result; +int prop_category; +int prop_chartype; +int prop_othercase; +int prop_test_against; +int *prop_test_variable; +#endif + +int ctype; +int length; +int max; +int min; +int number; +int offset; +int op; +int save_capture_last; +int save_offset1, save_offset2, save_offset3; +int stacksave[REC_STACK_SAVE_MAX]; + +eptrblock newptrb; +#endif + +/* These statements are here to stop the compiler complaining about unitialized +variables. */ + +#ifdef SUPPORT_UCP +prop_fail_result = 0; +prop_test_against = 0; +prop_test_variable = NULL; +#endif + +/* OK, now we can get on with the real code of the function. Recursion is +specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined, +these just turn into a recursive call to match() and a "return", respectively. +However, RMATCH isn't like a function call because it's quite a complicated +macro. It has to be used in one particular way. This shouldn't, however, impact +performance when true recursion is being used. */ + +if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT); + +original_ims = ims; /* Save for resetting on ')' */ +utf8 = md->utf8; /* Local copy of the flag */ + +/* At the start of a bracketed group, add the current subject pointer to the +stack of such pointers, to be re-instated at the end of the group when we hit +the closing ket. When match() is called in other circumstances, we don't add to +this stack. */ + +if ((flags & match_isgroup) != 0) + { + newptrb.epb_prev = eptrb; + newptrb.epb_saved_eptr = eptr; + eptrb = &newptrb; + } + +/* Now start processing the operations. */ + +for (;;) + { + op = *ecode; + minimize = FALSE; + + /* For partial matching, remember if we ever hit the end of the subject after + matching at least one subject character. */ + + if (md->partial && + eptr >= md->end_subject && + eptr > md->start_match) + md->hitend = TRUE; + + /* Opening capturing bracket. If there is space in the offset vector, save + the current subject position in the working slot at the top of the vector. We + mustn't change the current values of the data slot, because they may be set + from a previous iteration of this group, and be referred to by a reference + inside the group. + + If the bracket fails to match, we need to restore this value and also the + values of the final offsets, in case they were set by a previous iteration of + the same bracket. + + If there isn't enough space in the offset vector, treat this as if it were a + non-capturing bracket. Don't worry about setting the flag for the error case + here; that is handled in the code for KET. */ + + if (op > OP_BRA) + { + number = op - OP_BRA; + + /* For extended extraction brackets (large number), we have to fish out the + number from a dummy opcode at the start. */ + + if (number > EXTRACT_BASIC_MAX) + number = GET2(ecode, 2+LINK_SIZE); + offset = number << 1; + +#ifdef DEBUG + printf("start bracket %d subject=", number); + pchars(eptr, 16, TRUE, md); + printf("\n"); +#endif + + if (offset < md->offset_max) + { + save_offset1 = md->offset_vector[offset]; + save_offset2 = md->offset_vector[offset+1]; + save_offset3 = md->offset_vector[md->offset_end - number]; + save_capture_last = md->capture_last; + + DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); + md->offset_vector[md->offset_end - number] = eptr - md->start_subject; + + do + { + RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, + match_isgroup); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + md->capture_last = save_capture_last; + ecode += GET(ecode, 1); + } + while (*ecode == OP_ALT); + + DPRINTF(("bracket %d failed\n", number)); + + md->offset_vector[offset] = save_offset1; + md->offset_vector[offset+1] = save_offset2; + md->offset_vector[md->offset_end - number] = save_offset3; + + RRETURN(MATCH_NOMATCH); + } + + /* Insufficient room for saving captured contents */ + + else op = OP_BRA; + } + + /* Other types of node can be handled by a switch */ + + switch(op) + { + case OP_BRA: /* Non-capturing bracket: optimized */ + DPRINTF(("start bracket 0\n")); + do + { + RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, + match_isgroup); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + ecode += GET(ecode, 1); + } + while (*ecode == OP_ALT); + DPRINTF(("bracket 0 failed\n")); + RRETURN(MATCH_NOMATCH); + + /* Conditional group: compilation checked that there are no more than + two branches. If the condition is false, skipping the first branch takes us + past the end if there is only one branch, but that's OK because that is + exactly what going to the ket would do. */ + + case OP_COND: + if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */ + { + offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */ + condition = (offset == CREF_RECURSE * 2)? + (md->recursive != NULL) : + (offset < offset_top && md->offset_vector[offset] >= 0); + RMATCH(rrc, eptr, ecode + (condition? + (LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))), + offset_top, md, ims, eptrb, match_isgroup); + RRETURN(rrc); + } + + /* The condition is an assertion. Call match() to evaluate it - setting + the final argument TRUE causes it to stop at the end of an assertion. */ + + else + { + RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, + match_condassert | match_isgroup); + if (rrc == MATCH_MATCH) + { + ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2); + while (*ecode == OP_ALT) ecode += GET(ecode, 1); + } + else if (rrc != MATCH_NOMATCH) + { + RRETURN(rrc); /* Need braces because of following else */ + } + else ecode += GET(ecode, 1); + RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, + match_isgroup); + RRETURN(rrc); + } + /* Control never reaches here */ + + /* Skip over conditional reference or large extraction number data if + encountered. */ + + case OP_CREF: + case OP_BRANUMBER: + ecode += 3; + break; + + /* End of the pattern. If we are in a recursion, we should restore the + offsets appropriately and continue from after the call. */ + + case OP_END: + if (md->recursive != NULL && md->recursive->group_num == 0) + { + recursion_info *rec = md->recursive; + DPRINTF(("Hit the end in a (?0) recursion\n")); + md->recursive = rec->prevrec; + memmove(md->offset_vector, rec->offset_save, + rec->saved_max * sizeof(int)); + md->start_match = rec->save_start; + ims = original_ims; + ecode = rec->after_call; + break; + } + + /* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty + string - backtracking will then try other alternatives, if any. */ + + if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH); + md->end_match_ptr = eptr; /* Record where we ended */ + md->end_offset_top = offset_top; /* and how many extracts were taken */ + RRETURN(MATCH_MATCH); + + /* Change option settings */ + + case OP_OPT: + ims = ecode[1]; + ecode += 2; + DPRINTF(("ims set to %02lx\n", ims)); + break; + + /* Assertion brackets. Check the alternative branches in turn - the + matching won't pass the KET for an assertion. If any one branch matches, + the assertion is true. Lookbehind assertions have an OP_REVERSE item at the + start of each branch to move the current point backwards, so the code at + this level is identical to the lookahead case. */ + + case OP_ASSERT: + case OP_ASSERTBACK: + do + { + RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, + match_isgroup); + if (rrc == MATCH_MATCH) break; + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + ecode += GET(ecode, 1); + } + while (*ecode == OP_ALT); + if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH); + + /* If checking an assertion for a condition, return MATCH_MATCH. */ + + if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH); + + /* Continue from after the assertion, updating the offsets high water + mark, since extracts may have been taken during the assertion. */ + + do ecode += GET(ecode,1); while (*ecode == OP_ALT); + ecode += 1 + LINK_SIZE; + offset_top = md->end_offset_top; + continue; + + /* Negative assertion: all branches must fail to match */ + + case OP_ASSERT_NOT: + case OP_ASSERTBACK_NOT: + do + { + RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, + match_isgroup); + if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + ecode += GET(ecode,1); + } + while (*ecode == OP_ALT); + + if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH); + + ecode += 1 + LINK_SIZE; + continue; + + /* Move the subject pointer back. This occurs only at the start of + each branch of a lookbehind assertion. If we are too close to the start to + move back, this match function fails. When working with UTF-8 we move + back a number of characters, not bytes. */ + + case OP_REVERSE: +#ifdef SUPPORT_UTF8 + if (utf8) + { + c = GET(ecode,1); + for (i = 0; i < c; i++) + { + eptr--; + if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); + BACKCHAR(eptr) + } + } + else +#endif + + /* No UTF-8 support, or not in UTF-8 mode: count is byte count */ + + { + eptr -= GET(ecode,1); + if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); + } + + /* Skip to next op code */ + + ecode += 1 + LINK_SIZE; + break; + + /* The callout item calls an external function, if one is provided, passing + details of the match so far. This is mainly for debugging, though the + function is able to force a failure. */ + + case OP_CALLOUT: + if (pcre_callout != NULL) + { + pcre_callout_block cb; + cb.version = 1; /* Version 1 of the callout block */ + cb.callout_number = ecode[1]; + cb.offset_vector = md->offset_vector; + cb.subject = (const char *)md->start_subject; + cb.subject_length = md->end_subject - md->start_subject; + cb.start_match = md->start_match - md->start_subject; + cb.current_position = eptr - md->start_subject; + cb.pattern_position = GET(ecode, 2); + cb.next_item_length = GET(ecode, 2 + LINK_SIZE); + cb.capture_top = offset_top/2; + cb.capture_last = md->capture_last; + cb.callout_data = md->callout_data; + if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH); + if (rrc < 0) RRETURN(rrc); + } + ecode += 2 + 2*LINK_SIZE; + break; + + /* Recursion either matches the current regex, or some subexpression. The + offset data is the offset to the starting bracket from the start of the + whole pattern. (This is so that it works from duplicated subpatterns.) + + If there are any capturing brackets started but not finished, we have to + save their starting points and reinstate them after the recursion. However, + we don't know how many such there are (offset_top records the completed + total) so we just have to save all the potential data. There may be up to + 65535 such values, which is too large to put on the stack, but using malloc + for small numbers seems expensive. As a compromise, the stack is used when + there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc + is used. A problem is what to do if the malloc fails ... there is no way of + returning to the top level with an error. Save the top REC_STACK_SAVE_MAX + values on the stack, and accept that the rest may be wrong. + + There are also other values that have to be saved. We use a chained + sequence of blocks that actually live on the stack. Thanks to Robin Houston + for the original version of this logic. */ + + case OP_RECURSE: + { + callpat = md->start_code + GET(ecode, 1); + new_recursive.group_num = *callpat - OP_BRA; + + /* For extended extraction brackets (large number), we have to fish out + the number from a dummy opcode at the start. */ + + if (new_recursive.group_num > EXTRACT_BASIC_MAX) + new_recursive.group_num = GET2(callpat, 2+LINK_SIZE); + + /* Add to "recursing stack" */ + + new_recursive.prevrec = md->recursive; + md->recursive = &new_recursive; + + /* Find where to continue from afterwards */ + + ecode += 1 + LINK_SIZE; + new_recursive.after_call = ecode; + + /* Now save the offset data. */ + + new_recursive.saved_max = md->offset_end; + if (new_recursive.saved_max <= REC_STACK_SAVE_MAX) + new_recursive.offset_save = stacksave; + else + { + new_recursive.offset_save = + (int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int)); + if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); + } + + memcpy(new_recursive.offset_save, md->offset_vector, + new_recursive.saved_max * sizeof(int)); + new_recursive.save_start = md->start_match; + md->start_match = eptr; + + /* OK, now we can do the recursion. For each top-level alternative we + restore the offset and recursion data. */ + + DPRINTF(("Recursing into group %d\n", new_recursive.group_num)); + do + { + RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims, + eptrb, match_isgroup); + if (rrc == MATCH_MATCH) + { + md->recursive = new_recursive.prevrec; + if (new_recursive.offset_save != stacksave) + (pcre_free)(new_recursive.offset_save); + RRETURN(MATCH_MATCH); + } + else if (rrc != MATCH_NOMATCH) RRETURN(rrc); + + md->recursive = &new_recursive; + memcpy(md->offset_vector, new_recursive.offset_save, + new_recursive.saved_max * sizeof(int)); + callpat += GET(callpat, 1); + } + while (*callpat == OP_ALT); + + DPRINTF(("Recursion didn't match\n")); + md->recursive = new_recursive.prevrec; + if (new_recursive.offset_save != stacksave) + (pcre_free)(new_recursive.offset_save); + RRETURN(MATCH_NOMATCH); + } + /* Control never reaches here */ + + /* "Once" brackets are like assertion brackets except that after a match, + the point in the subject string is not moved back. Thus there can never be + a move back into the brackets. Friedl calls these "atomic" subpatterns. + Check the alternative branches in turn - the matching won't pass the KET + for this kind of subpattern. If any one branch matches, we carry on as at + the end of a normal bracket, leaving the subject pointer. */ + + case OP_ONCE: + { + prev = ecode; + saved_eptr = eptr; + + do + { + RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, + eptrb, match_isgroup); + if (rrc == MATCH_MATCH) break; + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + ecode += GET(ecode,1); + } + while (*ecode == OP_ALT); + + /* If hit the end of the group (which could be repeated), fail */ + + if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH); + + /* Continue as from after the assertion, updating the offsets high water + mark, since extracts may have been taken. */ + + do ecode += GET(ecode,1); while (*ecode == OP_ALT); + + offset_top = md->end_offset_top; + eptr = md->end_match_ptr; + + /* For a non-repeating ket, just continue at this level. This also + happens for a repeating ket if no characters were matched in the group. + This is the forcible breaking of infinite loops as implemented in Perl + 5.005. If there is an options reset, it will get obeyed in the normal + course of events. */ + + if (*ecode == OP_KET || eptr == saved_eptr) + { + ecode += 1+LINK_SIZE; + break; + } + + /* The repeating kets try the rest of the pattern or restart from the + preceding bracket, in the appropriate order. We need to reset any options + that changed within the bracket before re-running it, so check the next + opcode. */ + + if (ecode[1+LINK_SIZE] == OP_OPT) + { + ims = (ims & ~PCRE_IMS) | ecode[4]; + DPRINTF(("ims set to %02lx at group repeat\n", ims)); + } + + if (*ecode == OP_KETRMIN) + { + RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + } + else /* OP_KETRMAX */ + { + RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + } + } + RRETURN(MATCH_NOMATCH); + + /* An alternation is the end of a branch; scan along to find the end of the + bracketed group and go to there. */ + + case OP_ALT: + do ecode += GET(ecode,1); while (*ecode == OP_ALT); + break; + + /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating + that it may occur zero times. It may repeat infinitely, or not at all - + i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper + repeat limits are compiled as a number of copies, with the optional ones + preceded by BRAZERO or BRAMINZERO. */ + + case OP_BRAZERO: + { + next = ecode+1; + RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + do next += GET(next,1); while (*next == OP_ALT); + ecode = next + 1+LINK_SIZE; + } + break; + + case OP_BRAMINZERO: + { + next = ecode+1; + do next += GET(next,1); while (*next == OP_ALT); + RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, + match_isgroup); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + ecode++; + } + break; + + /* End of a group, repeated or non-repeating. If we are at the end of + an assertion "group", stop matching and return MATCH_MATCH, but record the + current high water mark for use by positive assertions. Do this also + for the "once" (not-backup up) groups. */ + + case OP_KET: + case OP_KETRMIN: + case OP_KETRMAX: + { + prev = ecode - GET(ecode, 1); + saved_eptr = eptrb->epb_saved_eptr; + + /* Back up the stack of bracket start pointers. */ + + eptrb = eptrb->epb_prev; + + if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || + *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT || + *prev == OP_ONCE) + { + md->end_match_ptr = eptr; /* For ONCE */ + md->end_offset_top = offset_top; + RRETURN(MATCH_MATCH); + } + + /* In all other cases except a conditional group we have to check the + group number back at the start and if necessary complete handling an + extraction by setting the offsets and bumping the high water mark. */ + + if (*prev != OP_COND) + { + number = *prev - OP_BRA; + + /* For extended extraction brackets (large number), we have to fish out + the number from a dummy opcode at the start. */ + + if (number > EXTRACT_BASIC_MAX) number = GET2(prev, 2+LINK_SIZE); + offset = number << 1; + +#ifdef DEBUG + printf("end bracket %d", number); + printf("\n"); +#endif + + /* Test for a numbered group. This includes groups called as a result + of recursion. Note that whole-pattern recursion is coded as a recurse + into group 0, so it won't be picked up here. Instead, we catch it when + the OP_END is reached. */ + + if (number > 0) + { + md->capture_last = number; + if (offset >= md->offset_max) md->offset_overflow = TRUE; else + { + md->offset_vector[offset] = + md->offset_vector[md->offset_end - number]; + md->offset_vector[offset+1] = eptr - md->start_subject; + if (offset_top <= offset) offset_top = offset + 2; + } + + /* Handle a recursively called group. Restore the offsets + appropriately and continue from after the call. */ + + if (md->recursive != NULL && md->recursive->group_num == number) + { + recursion_info *rec = md->recursive; + DPRINTF(("Recursion (%d) succeeded - continuing\n", number)); + md->recursive = rec->prevrec; + md->start_match = rec->save_start; + memcpy(md->offset_vector, rec->offset_save, + rec->saved_max * sizeof(int)); + ecode = rec->after_call; + ims = original_ims; + break; + } + } + } + + /* Reset the value of the ims flags, in case they got changed during + the group. */ + + ims = original_ims; + DPRINTF(("ims reset to %02lx\n", ims)); + + /* For a non-repeating ket, just continue at this level. This also + happens for a repeating ket if no characters were matched in the group. + This is the forcible breaking of infinite loops as implemented in Perl + 5.005. If there is an options reset, it will get obeyed in the normal + course of events. */ + + if (*ecode == OP_KET || eptr == saved_eptr) + { + ecode += 1 + LINK_SIZE; + break; + } + + /* The repeating kets try the rest of the pattern or restart from the + preceding bracket, in the appropriate order. */ + + if (*ecode == OP_KETRMIN) + { + RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + } + else /* OP_KETRMAX */ + { + RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + } + } + + RRETURN(MATCH_NOMATCH); + + /* Start of subject unless notbol, or after internal newline if multiline */ + + case OP_CIRC: + if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH); + if ((ims & PCRE_MULTILINE) != 0) + { + if (eptr != md->start_subject && eptr[-1] != NEWLINE) + RRETURN(MATCH_NOMATCH); + ecode++; + break; + } + /* ... else fall through */ + + /* Start of subject assertion */ + + case OP_SOD: + if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH); + ecode++; + break; + + /* Start of match assertion */ + + case OP_SOM: + if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH); + ecode++; + break; + + /* Assert before internal newline if multiline, or before a terminating + newline unless endonly is set, else end of subject unless noteol is set. */ + + case OP_DOLL: + if ((ims & PCRE_MULTILINE) != 0) + { + if (eptr < md->end_subject) + { if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); } + else + { if (md->noteol) RRETURN(MATCH_NOMATCH); } + ecode++; + break; + } + else + { + if (md->noteol) RRETURN(MATCH_NOMATCH); + if (!md->endonly) + { + if (eptr < md->end_subject - 1 || + (eptr == md->end_subject - 1 && *eptr != NEWLINE)) + RRETURN(MATCH_NOMATCH); + ecode++; + break; + } + } + /* ... else fall through */ + + /* End of subject assertion (\z) */ + + case OP_EOD: + if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH); + ecode++; + break; + + /* End of subject or ending \n assertion (\Z) */ + + case OP_EODN: + if (eptr < md->end_subject - 1 || + (eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH); + ecode++; + break; + + /* Word boundary assertions */ + + case OP_NOT_WORD_BOUNDARY: + case OP_WORD_BOUNDARY: + { + + /* Find out if the previous and current characters are "word" characters. + It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to + be "non-word" characters. */ + +#ifdef SUPPORT_UTF8 + if (utf8) + { + if (eptr == md->start_subject) prev_is_word = FALSE; else + { + const uschar *lastptr = eptr - 1; + while((*lastptr & 0xc0) == 0x80) lastptr--; + GETCHAR(c, lastptr); + prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; + } + if (eptr >= md->end_subject) cur_is_word = FALSE; else + { + GETCHAR(c, eptr); + cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; + } + } + else +#endif + + /* More streamlined when not in UTF-8 mode */ + + { + prev_is_word = (eptr != md->start_subject) && + ((md->ctypes[eptr[-1]] & ctype_word) != 0); + cur_is_word = (eptr < md->end_subject) && + ((md->ctypes[*eptr] & ctype_word) != 0); + } + + /* Now see if the situation is what we want */ + + if ((*ecode++ == OP_WORD_BOUNDARY)? + cur_is_word == prev_is_word : cur_is_word != prev_is_word) + RRETURN(MATCH_NOMATCH); + } + break; + + /* Match a single character type; inline for speed */ + + case OP_ANY: + if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE) + RRETURN(MATCH_NOMATCH); + if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH); +#ifdef SUPPORT_UTF8 + if (utf8) + while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; +#endif + ecode++; + break; + + /* Match a single byte, even in UTF-8 mode. This opcode really does match + any byte, even newline, independent of the setting of PCRE_DOTALL. */ + + case OP_ANYBYTE: + if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH); + ecode++; + break; + + case OP_NOT_DIGIT: + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + GETCHARINCTEST(c, eptr); + if ( +#ifdef SUPPORT_UTF8 + c < 256 && +#endif + (md->ctypes[c] & ctype_digit) != 0 + ) + RRETURN(MATCH_NOMATCH); + ecode++; + break; + + case OP_DIGIT: + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + GETCHARINCTEST(c, eptr); + if ( +#ifdef SUPPORT_UTF8 + c >= 256 || +#endif + (md->ctypes[c] & ctype_digit) == 0 + ) + RRETURN(MATCH_NOMATCH); + ecode++; + break; + + case OP_NOT_WHITESPACE: + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + GETCHARINCTEST(c, eptr); + if ( +#ifdef SUPPORT_UTF8 + c < 256 && +#endif + (md->ctypes[c] & ctype_space) != 0 + ) + RRETURN(MATCH_NOMATCH); + ecode++; + break; + + case OP_WHITESPACE: + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + GETCHARINCTEST(c, eptr); + if ( +#ifdef SUPPORT_UTF8 + c >= 256 || +#endif + (md->ctypes[c] & ctype_space) == 0 + ) + RRETURN(MATCH_NOMATCH); + ecode++; + break; + + case OP_NOT_WORDCHAR: + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + GETCHARINCTEST(c, eptr); + if ( +#ifdef SUPPORT_UTF8 + c < 256 && +#endif + (md->ctypes[c] & ctype_word) != 0 + ) + RRETURN(MATCH_NOMATCH); + ecode++; + break; + + case OP_WORDCHAR: + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + GETCHARINCTEST(c, eptr); + if ( +#ifdef SUPPORT_UTF8 + c >= 256 || +#endif + (md->ctypes[c] & ctype_word) == 0 + ) + RRETURN(MATCH_NOMATCH); + ecode++; + break; + +#ifdef SUPPORT_UCP + /* Check the next character by Unicode property. We will get here only + if the support is in the binary; otherwise a compile-time error occurs. */ + + case OP_PROP: + case OP_NOTPROP: + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + GETCHARINCTEST(c, eptr); + { + int chartype, rqdtype; + int othercase; + int category = ucp_findchar(c, &chartype, &othercase); + + rqdtype = *(++ecode); + ecode++; + + if (rqdtype >= 128) + { + if ((rqdtype - 128 != category) == (op == OP_PROP)) + RRETURN(MATCH_NOMATCH); + } + else + { + if ((rqdtype != chartype) == (op == OP_PROP)) + RRETURN(MATCH_NOMATCH); + } + } + break; + + /* Match an extended Unicode sequence. We will get here only if the support + is in the binary; otherwise a compile-time error occurs. */ + + case OP_EXTUNI: + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + GETCHARINCTEST(c, eptr); + { + int chartype; + int othercase; + int category = ucp_findchar(c, &chartype, &othercase); + if (category == ucp_M) RRETURN(MATCH_NOMATCH); + while (eptr < md->end_subject) + { + int len = 1; + if (!utf8) c = *eptr; else + { + GETCHARLEN(c, eptr, len); + } + category = ucp_findchar(c, &chartype, &othercase); + if (category != ucp_M) break; + eptr += len; + } + } + ecode++; + break; +#endif + + + /* Match a back reference, possibly repeatedly. Look past the end of the + item to see if there is repeat information following. The code is similar + to that for character classes, but repeated for efficiency. Then obey + similar code to character type repeats - written out again for speed. + However, if the referenced string is the empty string, always treat + it as matched, any number of times (otherwise there could be infinite + loops). */ + + case OP_REF: + { + offset = GET2(ecode, 1) << 1; /* Doubled ref number */ + ecode += 3; /* Advance past item */ + + /* If the reference is unset, set the length to be longer than the amount + of subject left; this ensures that every attempt at a match fails. We + can't just fail here, because of the possibility of quantifiers with zero + minima. */ + + length = (offset >= offset_top || md->offset_vector[offset] < 0)? + md->end_subject - eptr + 1 : + md->offset_vector[offset+1] - md->offset_vector[offset]; + + /* Set up for repetition, or handle the non-repeated case */ + + switch (*ecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + c = *ecode++ - OP_CRSTAR; + minimize = (c & 1) != 0; + min = rep_min[c]; /* Pick up values from tables; */ + max = rep_max[c]; /* zero for max => infinity */ + if (max == 0) max = INT_MAX; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + minimize = (*ecode == OP_CRMINRANGE); + min = GET2(ecode, 1); + max = GET2(ecode, 3); + if (max == 0) max = INT_MAX; + ecode += 5; + break; + + default: /* No repeat follows */ + if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH); + eptr += length; + continue; /* With the main loop */ + } + + /* If the length of the reference is zero, just continue with the + main loop. */ + + if (length == 0) continue; + + /* First, ensure the minimum number of matches are present. We get back + the length of the reference string explicitly rather than passing the + address of eptr, so that eptr can be a register variable. */ + + for (i = 1; i <= min; i++) + { + if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH); + eptr += length; + } + + /* If min = max, continue at the same level without recursion. + They are not both allowed to be zero. */ + + if (min == max) continue; + + /* If minimizing, keep trying and advancing the pointer */ + + if (minimize) + { + for (fi = min;; fi++) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (fi >= max || !match_ref(offset, eptr, length, md, ims)) + RRETURN(MATCH_NOMATCH); + eptr += length; + } + /* Control never gets here */ + } + + /* If maximizing, find the longest string and work backwards */ + + else + { + pp = eptr; + for (i = min; i < max; i++) + { + if (!match_ref(offset, eptr, length, md, ims)) break; + eptr += length; + } + while (eptr >= pp) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + eptr -= length; + } + RRETURN(MATCH_NOMATCH); + } + } + /* Control never gets here */ + + + + /* Match a bit-mapped character class, possibly repeatedly. This op code is + used when all the characters in the class have values in the range 0-255, + and either the matching is caseful, or the characters are in the range + 0-127 when UTF-8 processing is enabled. The only difference between + OP_CLASS and OP_NCLASS occurs when a data character outside the range is + encountered. + + First, look past the end of the item to see if there is repeat information + following. Then obey similar code to character type repeats - written out + again for speed. */ + + case OP_NCLASS: + case OP_CLASS: + { + data = ecode + 1; /* Save for matching */ + ecode += 33; /* Advance past the item */ + + switch (*ecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + c = *ecode++ - OP_CRSTAR; + minimize = (c & 1) != 0; + min = rep_min[c]; /* Pick up values from tables; */ + max = rep_max[c]; /* zero for max => infinity */ + if (max == 0) max = INT_MAX; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + minimize = (*ecode == OP_CRMINRANGE); + min = GET2(ecode, 1); + max = GET2(ecode, 3); + if (max == 0) max = INT_MAX; + ecode += 5; + break; + + default: /* No repeat follows */ + min = max = 1; + break; + } + + /* First, ensure the minimum number of matches are present. */ + +#ifdef SUPPORT_UTF8 + /* UTF-8 mode */ + if (utf8) + { + for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + GETCHARINC(c, eptr); + if (c > 255) + { + if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); + } + else + { + if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); + } + } + } + else +#endif + /* Not UTF-8 mode */ + { + for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + c = *eptr++; + if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); + } + } + + /* If max == min we can continue with the main loop without the + need to recurse. */ + + if (min == max) continue; + + /* If minimizing, keep testing the rest of the expression and advancing + the pointer while it matches the class. */ + + if (minimize) + { +#ifdef SUPPORT_UTF8 + /* UTF-8 mode */ + if (utf8) + { + for (fi = min;; fi++) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + GETCHARINC(c, eptr); + if (c > 255) + { + if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); + } + else + { + if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); + } + } + } + else +#endif + /* Not UTF-8 mode */ + { + for (fi = min;; fi++) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + c = *eptr++; + if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); + } + } + /* Control never gets here */ + } + + /* If maximizing, find the longest possible run, then work backwards. */ + + else + { + pp = eptr; + +#ifdef SUPPORT_UTF8 + /* UTF-8 mode */ + if (utf8) + { + for (i = min; i < max; i++) + { + int len = 1; + if (eptr >= md->end_subject) break; + GETCHARLEN(c, eptr, len); + if (c > 255) + { + if (op == OP_CLASS) break; + } + else + { + if ((data[c/8] & (1 << (c&7))) == 0) break; + } + eptr += len; + } + for (;;) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (eptr-- == pp) break; /* Stop if tried at original pos */ + BACKCHAR(eptr); + } + } + else +#endif + /* Not UTF-8 mode */ + { + for (i = min; i < max; i++) + { + if (eptr >= md->end_subject) break; + c = *eptr; + if ((data[c/8] & (1 << (c&7))) == 0) break; + eptr++; + } + while (eptr >= pp) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + eptr--; + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + } + } + + RRETURN(MATCH_NOMATCH); + } + } + /* Control never gets here */ + + + /* Match an extended character class. This opcode is encountered only + in UTF-8 mode, because that's the only time it is compiled. */ + +#ifdef SUPPORT_UTF8 + case OP_XCLASS: + { + data = ecode + 1 + LINK_SIZE; /* Save for matching */ + ecode += GET(ecode, 1); /* Advance past the item */ + + switch (*ecode) + { + case OP_CRSTAR: + case OP_CRMINSTAR: + case OP_CRPLUS: + case OP_CRMINPLUS: + case OP_CRQUERY: + case OP_CRMINQUERY: + c = *ecode++ - OP_CRSTAR; + minimize = (c & 1) != 0; + min = rep_min[c]; /* Pick up values from tables; */ + max = rep_max[c]; /* zero for max => infinity */ + if (max == 0) max = INT_MAX; + break; + + case OP_CRRANGE: + case OP_CRMINRANGE: + minimize = (*ecode == OP_CRMINRANGE); + min = GET2(ecode, 1); + max = GET2(ecode, 3); + if (max == 0) max = INT_MAX; + ecode += 5; + break; + + default: /* No repeat follows */ + min = max = 1; + break; + } + + /* First, ensure the minimum number of matches are present. */ + + for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + GETCHARINC(c, eptr); + if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH); + } + + /* If max == min we can continue with the main loop without the + need to recurse. */ + + if (min == max) continue; + + /* If minimizing, keep testing the rest of the expression and advancing + the pointer while it matches the class. */ + + if (minimize) + { + for (fi = min;; fi++) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + GETCHARINC(c, eptr); + if (!_pcre_xclass(c, data)) RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + } + + /* If maximizing, find the longest possible run, then work backwards. */ + + else + { + pp = eptr; + for (i = min; i < max; i++) + { + int len = 1; + if (eptr >= md->end_subject) break; + GETCHARLEN(c, eptr, len); + if (!_pcre_xclass(c, data)) break; + eptr += len; + } + for(;;) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (eptr-- == pp) break; /* Stop if tried at original pos */ + BACKCHAR(eptr) + } + RRETURN(MATCH_NOMATCH); + } + + /* Control never gets here */ + } +#endif /* End of XCLASS */ + + /* Match a single character, casefully */ + + case OP_CHAR: +#ifdef SUPPORT_UTF8 + if (utf8) + { + length = 1; + ecode++; + GETCHARLEN(fc, ecode, length); + if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); + while (length-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH); + } + else +#endif + + /* Non-UTF-8 mode */ + { + if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH); + if (ecode[1] != *eptr++) RRETURN(MATCH_NOMATCH); + ecode += 2; + } + break; + + /* Match a single character, caselessly */ + + case OP_CHARNC: +#ifdef SUPPORT_UTF8 + if (utf8) + { + length = 1; + ecode++; + GETCHARLEN(fc, ecode, length); + + if (length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); + + /* If the pattern character's value is < 128, we have only one byte, and + can use the fast lookup table. */ + + if (fc < 128) + { + if (md->lcc[*ecode++] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); + } + + /* Otherwise we must pick up the subject character */ + + else + { + int dc; + GETCHARINC(dc, eptr); + ecode += length; + + /* If we have Unicode property support, we can use it to test the other + case of the character, if there is one. The result of ucp_findchar() is + < 0 if the char isn't found, and othercase is returned as zero if there + isn't one. */ + + if (fc != dc) + { +#ifdef SUPPORT_UCP + int chartype; + int othercase; + if (ucp_findchar(fc, &chartype, &othercase) < 0 || dc != othercase) +#endif + RRETURN(MATCH_NOMATCH); + } + } + } + else +#endif /* SUPPORT_UTF8 */ + + /* Non-UTF-8 mode */ + { + if (md->end_subject - eptr < 1) RRETURN(MATCH_NOMATCH); + if (md->lcc[ecode[1]] != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); + ecode += 2; + } + break; + + /* Match a single character repeatedly; different opcodes share code. */ + + case OP_EXACT: + min = max = GET2(ecode, 1); + ecode += 3; + goto REPEATCHAR; + + case OP_UPTO: + case OP_MINUPTO: + min = 0; + max = GET2(ecode, 1); + minimize = *ecode == OP_MINUPTO; + ecode += 3; + goto REPEATCHAR; + + case OP_STAR: + case OP_MINSTAR: + case OP_PLUS: + case OP_MINPLUS: + case OP_QUERY: + case OP_MINQUERY: + c = *ecode++ - OP_STAR; + minimize = (c & 1) != 0; + min = rep_min[c]; /* Pick up values from tables; */ + max = rep_max[c]; /* zero for max => infinity */ + if (max == 0) max = INT_MAX; + + /* Common code for all repeated single-character matches. We can give + up quickly if there are fewer than the minimum number of characters left in + the subject. */ + + REPEATCHAR: +#ifdef SUPPORT_UTF8 + if (utf8) + { + length = 1; + charptr = ecode; + GETCHARLEN(fc, ecode, length); + if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); + ecode += length; + + /* Handle multibyte character matching specially here. There is + support for caseless matching if UCP support is present. */ + + if (length > 1) + { + int oclength = 0; + uschar occhars[8]; + +#ifdef SUPPORT_UCP + int othercase; + int chartype; + if ((ims & PCRE_CASELESS) != 0 && + ucp_findchar(fc, &chartype, &othercase) >= 0 && + othercase > 0) + oclength = _pcre_ord2utf8(othercase, occhars); +#endif /* SUPPORT_UCP */ + + for (i = 1; i <= min; i++) + { + if (memcmp(eptr, charptr, length) == 0) eptr += length; + /* Need braces because of following else */ + else if (oclength == 0) { RRETURN(MATCH_NOMATCH); } + else + { + if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH); + eptr += oclength; + } + } + + if (min == max) continue; + + if (minimize) + { + for (fi = min;; fi++) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + if (memcmp(eptr, charptr, length) == 0) eptr += length; + /* Need braces because of following else */ + else if (oclength == 0) { RRETURN(MATCH_NOMATCH); } + else + { + if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH); + eptr += oclength; + } + } + /* Control never gets here */ + } + else + { + pp = eptr; + for (i = min; i < max; i++) + { + if (eptr > md->end_subject - length) break; + if (memcmp(eptr, charptr, length) == 0) eptr += length; + else if (oclength == 0) break; + else + { + if (memcmp(eptr, occhars, oclength) != 0) break; + eptr += oclength; + } + } + while (eptr >= pp) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + eptr -= length; + } + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + } + + /* If the length of a UTF-8 character is 1, we fall through here, and + obey the code as for non-UTF-8 characters below, though in this case the + value of fc will always be < 128. */ + } + else +#endif /* SUPPORT_UTF8 */ + + /* When not in UTF-8 mode, load a single-byte character. */ + { + if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); + fc = *ecode++; + } + + /* The value of fc at this point is always less than 256, though we may or + may not be in UTF-8 mode. The code is duplicated for the caseless and + caseful cases, for speed, since matching characters is likely to be quite + common. First, ensure the minimum number of matches are present. If min = + max, continue at the same level without recursing. Otherwise, if + minimizing, keep trying the rest of the expression and advancing one + matching character if failing, up to the maximum. Alternatively, if + maximizing, find the maximum number of characters and work backwards. */ + + DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max, + max, eptr)); + + if ((ims & PCRE_CASELESS) != 0) + { + fc = md->lcc[fc]; + for (i = 1; i <= min; i++) + if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); + if (min == max) continue; + if (minimize) + { + for (fi = min;; fi++) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (fi >= max || eptr >= md->end_subject || + fc != md->lcc[*eptr++]) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + } + else + { + pp = eptr; + for (i = min; i < max; i++) + { + if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break; + eptr++; + } + while (eptr >= pp) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + eptr--; + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + } + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + } + + /* Caseful comparisons (includes all multi-byte characters) */ + + else + { + for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH); + if (min == max) continue; + if (minimize) + { + for (fi = min;; fi++) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (fi >= max || eptr >= md->end_subject || fc != *eptr++) + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + } + else + { + pp = eptr; + for (i = min; i < max; i++) + { + if (eptr >= md->end_subject || fc != *eptr) break; + eptr++; + } + while (eptr >= pp) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + eptr--; + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + } + RRETURN(MATCH_NOMATCH); + } + } + /* Control never gets here */ + + /* Match a negated single one-byte character. The character we are + checking can be multibyte. */ + + case OP_NOT: + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + ecode++; + GETCHARINCTEST(c, eptr); + if ((ims & PCRE_CASELESS) != 0) + { +#ifdef SUPPORT_UTF8 + if (c < 256) +#endif + c = md->lcc[c]; + if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH); + } + else + { + if (*ecode++ == c) RRETURN(MATCH_NOMATCH); + } + break; + + /* Match a negated single one-byte character repeatedly. This is almost a + repeat of the code for a repeated single character, but I haven't found a + nice way of commoning these up that doesn't require a test of the + positive/negative option for each character match. Maybe that wouldn't add + very much to the time taken, but character matching *is* what this is all + about... */ + + case OP_NOTEXACT: + min = max = GET2(ecode, 1); + ecode += 3; + goto REPEATNOTCHAR; + + case OP_NOTUPTO: + case OP_NOTMINUPTO: + min = 0; + max = GET2(ecode, 1); + minimize = *ecode == OP_NOTMINUPTO; + ecode += 3; + goto REPEATNOTCHAR; + + case OP_NOTSTAR: + case OP_NOTMINSTAR: + case OP_NOTPLUS: + case OP_NOTMINPLUS: + case OP_NOTQUERY: + case OP_NOTMINQUERY: + c = *ecode++ - OP_NOTSTAR; + minimize = (c & 1) != 0; + min = rep_min[c]; /* Pick up values from tables; */ + max = rep_max[c]; /* zero for max => infinity */ + if (max == 0) max = INT_MAX; + + /* Common code for all repeated single-byte matches. We can give up quickly + if there are fewer than the minimum number of bytes left in the + subject. */ + + REPEATNOTCHAR: + if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); + fc = *ecode++; + + /* The code is duplicated for the caseless and caseful cases, for speed, + since matching characters is likely to be quite common. First, ensure the + minimum number of matches are present. If min = max, continue at the same + level without recursing. Otherwise, if minimizing, keep trying the rest of + the expression and advancing one matching character if failing, up to the + maximum. Alternatively, if maximizing, find the maximum number of + characters and work backwards. */ + + DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max, + max, eptr)); + + if ((ims & PCRE_CASELESS) != 0) + { + fc = md->lcc[fc]; + +#ifdef SUPPORT_UTF8 + /* UTF-8 mode */ + if (utf8) + { + register int d; + for (i = 1; i <= min; i++) + { + GETCHARINC(d, eptr); + if (d < 256) d = md->lcc[d]; + if (fc == d) RRETURN(MATCH_NOMATCH); + } + } + else +#endif + + /* Not UTF-8 mode */ + { + for (i = 1; i <= min; i++) + if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); + } + + if (min == max) continue; + + if (minimize) + { +#ifdef SUPPORT_UTF8 + /* UTF-8 mode */ + if (utf8) + { + register int d; + for (fi = min;; fi++) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + GETCHARINC(d, eptr); + if (d < 256) d = md->lcc[d]; + if (fi >= max || eptr >= md->end_subject || fc == d) + RRETURN(MATCH_NOMATCH); + } + } + else +#endif + /* Not UTF-8 mode */ + { + for (fi = min;; fi++) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++]) + RRETURN(MATCH_NOMATCH); + } + } + /* Control never gets here */ + } + + /* Maximize case */ + + else + { + pp = eptr; + +#ifdef SUPPORT_UTF8 + /* UTF-8 mode */ + if (utf8) + { + register int d; + for (i = min; i < max; i++) + { + int len = 1; + if (eptr >= md->end_subject) break; + GETCHARLEN(d, eptr, len); + if (d < 256) d = md->lcc[d]; + if (fc == d) break; + eptr += len; + } + for(;;) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (eptr-- == pp) break; /* Stop if tried at original pos */ + BACKCHAR(eptr); + } + } + else +#endif + /* Not UTF-8 mode */ + { + for (i = min; i < max; i++) + { + if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break; + eptr++; + } + while (eptr >= pp) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + eptr--; + } + } + + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + } + + /* Caseful comparisons */ + + else + { +#ifdef SUPPORT_UTF8 + /* UTF-8 mode */ + if (utf8) + { + register int d; + for (i = 1; i <= min; i++) + { + GETCHARINC(d, eptr); + if (fc == d) RRETURN(MATCH_NOMATCH); + } + } + else +#endif + /* Not UTF-8 mode */ + { + for (i = 1; i <= min; i++) + if (fc == *eptr++) RRETURN(MATCH_NOMATCH); + } + + if (min == max) continue; + + if (minimize) + { +#ifdef SUPPORT_UTF8 + /* UTF-8 mode */ + if (utf8) + { + register int d; + for (fi = min;; fi++) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + GETCHARINC(d, eptr); + if (fi >= max || eptr >= md->end_subject || fc == d) + RRETURN(MATCH_NOMATCH); + } + } + else +#endif + /* Not UTF-8 mode */ + { + for (fi = min;; fi++) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (fi >= max || eptr >= md->end_subject || fc == *eptr++) + RRETURN(MATCH_NOMATCH); + } + } + /* Control never gets here */ + } + + /* Maximize case */ + + else + { + pp = eptr; + +#ifdef SUPPORT_UTF8 + /* UTF-8 mode */ + if (utf8) + { + register int d; + for (i = min; i < max; i++) + { + int len = 1; + if (eptr >= md->end_subject) break; + GETCHARLEN(d, eptr, len); + if (fc == d) break; + eptr += len; + } + for(;;) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (eptr-- == pp) break; /* Stop if tried at original pos */ + BACKCHAR(eptr); + } + } + else +#endif + /* Not UTF-8 mode */ + { + for (i = min; i < max; i++) + { + if (eptr >= md->end_subject || fc == *eptr) break; + eptr++; + } + while (eptr >= pp) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + eptr--; + } + } + + RRETURN(MATCH_NOMATCH); + } + } + /* Control never gets here */ + + /* Match a single character type repeatedly; several different opcodes + share code. This is very similar to the code for single characters, but we + repeat it in the interests of efficiency. */ + + case OP_TYPEEXACT: + min = max = GET2(ecode, 1); + minimize = TRUE; + ecode += 3; + goto REPEATTYPE; + + case OP_TYPEUPTO: + case OP_TYPEMINUPTO: + min = 0; + max = GET2(ecode, 1); + minimize = *ecode == OP_TYPEMINUPTO; + ecode += 3; + goto REPEATTYPE; + + case OP_TYPESTAR: + case OP_TYPEMINSTAR: + case OP_TYPEPLUS: + case OP_TYPEMINPLUS: + case OP_TYPEQUERY: + case OP_TYPEMINQUERY: + c = *ecode++ - OP_TYPESTAR; + minimize = (c & 1) != 0; + min = rep_min[c]; /* Pick up values from tables; */ + max = rep_max[c]; /* zero for max => infinity */ + if (max == 0) max = INT_MAX; + + /* Common code for all repeated single character type matches. Note that + in UTF-8 mode, '.' matches a character of any length, but for the other + character types, the valid characters are all one-byte long. */ + + REPEATTYPE: + ctype = *ecode++; /* Code for the character type */ + +#ifdef SUPPORT_UCP + if (ctype == OP_PROP || ctype == OP_NOTPROP) + { + prop_fail_result = ctype == OP_NOTPROP; + prop_type = *ecode++; + if (prop_type >= 128) + { + prop_test_against = prop_type - 128; + prop_test_variable = &prop_category; + } + else + { + prop_test_against = prop_type; + prop_test_variable = &prop_chartype; + } + } + else prop_type = -1; +#endif + + /* First, ensure the minimum number of matches are present. Use inline + code for maximizing the speed, and do the type test once at the start + (i.e. keep it out of the loop). Also we can test that there are at least + the minimum number of bytes before we start. This isn't as effective in + UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that + is tidier. Also separate the UCP code, which can be the same for both UTF-8 + and single-bytes. */ + + if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); + if (min > 0) + { +#ifdef SUPPORT_UCP + if (prop_type > 0) + { + for (i = 1; i <= min; i++) + { + GETCHARINC(c, eptr); + prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase); + if ((*prop_test_variable == prop_test_against) == prop_fail_result) + RRETURN(MATCH_NOMATCH); + } + } + + /* Match extended Unicode sequences. We will get here only if the + support is in the binary; otherwise a compile-time error occurs. */ + + else if (ctype == OP_EXTUNI) + { + for (i = 1; i <= min; i++) + { + GETCHARINCTEST(c, eptr); + prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase); + if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH); + while (eptr < md->end_subject) + { + int len = 1; + if (!utf8) c = *eptr; else + { + GETCHARLEN(c, eptr, len); + } + prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase); + if (prop_category != ucp_M) break; + eptr += len; + } + } + } + + else +#endif /* SUPPORT_UCP */ + +/* Handle all other cases when the coding is UTF-8 */ + +#ifdef SUPPORT_UTF8 + if (utf8) switch(ctype) + { + case OP_ANY: + for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject || + (*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0)) + RRETURN(MATCH_NOMATCH); + while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; + } + break; + + case OP_ANYBYTE: + eptr += min; + break; + + case OP_NOT_DIGIT: + for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + GETCHARINC(c, eptr); + if (c < 128 && (md->ctypes[c] & ctype_digit) != 0) + RRETURN(MATCH_NOMATCH); + } + break; + + case OP_DIGIT: + for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject || + *eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0) + RRETURN(MATCH_NOMATCH); + /* No need to skip more bytes - we know it's a 1-byte character */ + } + break; + + case OP_NOT_WHITESPACE: + for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject || + (*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0)) + RRETURN(MATCH_NOMATCH); + while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; + } + break; + + case OP_WHITESPACE: + for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject || + *eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0) + RRETURN(MATCH_NOMATCH); + /* No need to skip more bytes - we know it's a 1-byte character */ + } + break; + + case OP_NOT_WORDCHAR: + for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject || + (*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0)) + RRETURN(MATCH_NOMATCH); + while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; + } + break; + + case OP_WORDCHAR: + for (i = 1; i <= min; i++) + { + if (eptr >= md->end_subject || + *eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0) + RRETURN(MATCH_NOMATCH); + /* No need to skip more bytes - we know it's a 1-byte character */ + } + break; + + default: + RRETURN(PCRE_ERROR_INTERNAL); + } /* End switch(ctype) */ + + else +#endif /* SUPPORT_UTF8 */ + + /* Code for the non-UTF-8 case for minimum matching of operators other + than OP_PROP and OP_NOTPROP. */ + + switch(ctype) + { + case OP_ANY: + if ((ims & PCRE_DOTALL) == 0) + { + for (i = 1; i <= min; i++) + if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH); + } + else eptr += min; + break; + + case OP_ANYBYTE: + eptr += min; + break; + + case OP_NOT_DIGIT: + for (i = 1; i <= min; i++) + if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); + break; + + case OP_DIGIT: + for (i = 1; i <= min; i++) + if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); + break; + + case OP_NOT_WHITESPACE: + for (i = 1; i <= min; i++) + if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); + break; + + case OP_WHITESPACE: + for (i = 1; i <= min; i++) + if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); + break; + + case OP_NOT_WORDCHAR: + for (i = 1; i <= min; i++) + if ((md->ctypes[*eptr++] & ctype_word) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_WORDCHAR: + for (i = 1; i <= min; i++) + if ((md->ctypes[*eptr++] & ctype_word) == 0) + RRETURN(MATCH_NOMATCH); + break; + + default: + RRETURN(PCRE_ERROR_INTERNAL); + } + } + + /* If min = max, continue at the same level without recursing */ + + if (min == max) continue; + + /* If minimizing, we have to test the rest of the pattern before each + subsequent match. Again, separate the UTF-8 case for speed, and also + separate the UCP cases. */ + + if (minimize) + { +#ifdef SUPPORT_UCP + if (prop_type > 0) + { + for (fi = min;; fi++) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + GETCHARINC(c, eptr); + prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase); + if ((*prop_test_variable == prop_test_against) == prop_fail_result) + RRETURN(MATCH_NOMATCH); + } + } + + /* Match extended Unicode sequences. We will get here only if the + support is in the binary; otherwise a compile-time error occurs. */ + + else if (ctype == OP_EXTUNI) + { + for (fi = min;; fi++) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + GETCHARINCTEST(c, eptr); + prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase); + if (prop_category == ucp_M) RRETURN(MATCH_NOMATCH); + while (eptr < md->end_subject) + { + int len = 1; + if (!utf8) c = *eptr; else + { + GETCHARLEN(c, eptr, len); + } + prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase); + if (prop_category != ucp_M) break; + eptr += len; + } + } + } + + else +#endif /* SUPPORT_UCP */ + +#ifdef SUPPORT_UTF8 + /* UTF-8 mode */ + if (utf8) + { + for (fi = min;; fi++) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + + GETCHARINC(c, eptr); + switch(ctype) + { + case OP_ANY: + if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH); + break; + + case OP_ANYBYTE: + break; + + case OP_NOT_DIGIT: + if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_DIGIT: + if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_NOT_WHITESPACE: + if (c < 256 && (md->ctypes[c] & ctype_space) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_WHITESPACE: + if (c >= 256 || (md->ctypes[c] & ctype_space) == 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_NOT_WORDCHAR: + if (c < 256 && (md->ctypes[c] & ctype_word) != 0) + RRETURN(MATCH_NOMATCH); + break; + + case OP_WORDCHAR: + if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) + RRETURN(MATCH_NOMATCH); + break; + + default: + RRETURN(PCRE_ERROR_INTERNAL); + } + } + } + else +#endif + /* Not UTF-8 mode */ + { + for (fi = min;; fi++) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); + c = *eptr++; + switch(ctype) + { + case OP_ANY: + if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH); + break; + + case OP_ANYBYTE: + break; + + case OP_NOT_DIGIT: + if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); + break; + + case OP_DIGIT: + if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); + break; + + case OP_NOT_WHITESPACE: + if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); + break; + + case OP_WHITESPACE: + if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); + break; + + case OP_NOT_WORDCHAR: + if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); + break; + + case OP_WORDCHAR: + if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); + break; + + default: + RRETURN(PCRE_ERROR_INTERNAL); + } + } + } + /* Control never gets here */ + } + + /* If maximizing it is worth using inline code for speed, doing the type + test once at the start (i.e. keep it out of the loop). Again, keep the + UTF-8 and UCP stuff separate. */ + + else + { + pp = eptr; /* Remember where we started */ + +#ifdef SUPPORT_UCP + if (prop_type > 0) + { + for (i = min; i < max; i++) + { + int len = 1; + if (eptr >= md->end_subject) break; + GETCHARLEN(c, eptr, len); + prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase); + if ((*prop_test_variable == prop_test_against) == prop_fail_result) + break; + eptr+= len; + } + + /* eptr is now past the end of the maximum run */ + + for(;;) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (eptr-- == pp) break; /* Stop if tried at original pos */ + BACKCHAR(eptr); + } + } + + /* Match extended Unicode sequences. We will get here only if the + support is in the binary; otherwise a compile-time error occurs. */ + + else if (ctype == OP_EXTUNI) + { + for (i = min; i < max; i++) + { + if (eptr >= md->end_subject) break; + GETCHARINCTEST(c, eptr); + prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase); + if (prop_category == ucp_M) break; + while (eptr < md->end_subject) + { + int len = 1; + if (!utf8) c = *eptr; else + { + GETCHARLEN(c, eptr, len); + } + prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase); + if (prop_category != ucp_M) break; + eptr += len; + } + } + + /* eptr is now past the end of the maximum run */ + + for(;;) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (eptr-- == pp) break; /* Stop if tried at original pos */ + for (;;) /* Move back over one extended */ + { + int len = 1; + BACKCHAR(eptr); + if (!utf8) c = *eptr; else + { + GETCHARLEN(c, eptr, len); + } + prop_category = ucp_findchar(c, &prop_chartype, &prop_othercase); + if (prop_category != ucp_M) break; + eptr--; + } + } + } + + else +#endif /* SUPPORT_UCP */ + +#ifdef SUPPORT_UTF8 + /* UTF-8 mode */ + + if (utf8) + { + switch(ctype) + { + case OP_ANY: + + /* Special code is required for UTF8, but when the maximum is unlimited + we don't need it, so we repeat the non-UTF8 code. This is probably + worth it, because .* is quite a common idiom. */ + + if (max < INT_MAX) + { + if ((ims & PCRE_DOTALL) == 0) + { + for (i = min; i < max; i++) + { + if (eptr >= md->end_subject || *eptr == NEWLINE) break; + eptr++; + while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; + } + } + else + { + for (i = min; i < max; i++) + { + eptr++; + while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; + } + } + } + + /* Handle unlimited UTF-8 repeat */ + + else + { + if ((ims & PCRE_DOTALL) == 0) + { + for (i = min; i < max; i++) + { + if (eptr >= md->end_subject || *eptr == NEWLINE) break; + eptr++; + } + break; + } + else + { + c = max - min; + if (c > md->end_subject - eptr) c = md->end_subject - eptr; + eptr += c; + } + } + break; + + /* The byte case is the same as non-UTF8 */ + + case OP_ANYBYTE: + c = max - min; + if (c > md->end_subject - eptr) c = md->end_subject - eptr; + eptr += c; + break; + + case OP_NOT_DIGIT: + for (i = min; i < max; i++) + { + int len = 1; + if (eptr >= md->end_subject) break; + GETCHARLEN(c, eptr, len); + if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) break; + eptr+= len; + } + break; + + case OP_DIGIT: + for (i = min; i < max; i++) + { + int len = 1; + if (eptr >= md->end_subject) break; + GETCHARLEN(c, eptr, len); + if (c >= 256 ||(md->ctypes[c] & ctype_digit) == 0) break; + eptr+= len; + } + break; + + case OP_NOT_WHITESPACE: + for (i = min; i < max; i++) + { + int len = 1; + if (eptr >= md->end_subject) break; + GETCHARLEN(c, eptr, len); + if (c < 256 && (md->ctypes[c] & ctype_space) != 0) break; + eptr+= len; + } + break; + + case OP_WHITESPACE: + for (i = min; i < max; i++) + { + int len = 1; + if (eptr >= md->end_subject) break; + GETCHARLEN(c, eptr, len); + if (c >= 256 ||(md->ctypes[c] & ctype_space) == 0) break; + eptr+= len; + } + break; + + case OP_NOT_WORDCHAR: + for (i = min; i < max; i++) + { + int len = 1; + if (eptr >= md->end_subject) break; + GETCHARLEN(c, eptr, len); + if (c < 256 && (md->ctypes[c] & ctype_word) != 0) break; + eptr+= len; + } + break; + + case OP_WORDCHAR: + for (i = min; i < max; i++) + { + int len = 1; + if (eptr >= md->end_subject) break; + GETCHARLEN(c, eptr, len); + if (c >= 256 || (md->ctypes[c] & ctype_word) == 0) break; + eptr+= len; + } + break; + + default: + RRETURN(PCRE_ERROR_INTERNAL); + } + + /* eptr is now past the end of the maximum run */ + + for(;;) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (eptr-- == pp) break; /* Stop if tried at original pos */ + BACKCHAR(eptr); + } + } + else +#endif + + /* Not UTF-8 mode */ + { + switch(ctype) + { + case OP_ANY: + if ((ims & PCRE_DOTALL) == 0) + { + for (i = min; i < max; i++) + { + if (eptr >= md->end_subject || *eptr == NEWLINE) break; + eptr++; + } + break; + } + /* For DOTALL case, fall through and treat as \C */ + + case OP_ANYBYTE: + c = max - min; + if (c > md->end_subject - eptr) c = md->end_subject - eptr; + eptr += c; + break; + + case OP_NOT_DIGIT: + for (i = min; i < max; i++) + { + if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0) + break; + eptr++; + } + break; + + case OP_DIGIT: + for (i = min; i < max; i++) + { + if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0) + break; + eptr++; + } + break; + + case OP_NOT_WHITESPACE: + for (i = min; i < max; i++) + { + if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0) + break; + eptr++; + } + break; + + case OP_WHITESPACE: + for (i = min; i < max; i++) + { + if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0) + break; + eptr++; + } + break; + + case OP_NOT_WORDCHAR: + for (i = min; i < max; i++) + { + if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0) + break; + eptr++; + } + break; + + case OP_WORDCHAR: + for (i = min; i < max; i++) + { + if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0) + break; + eptr++; + } + break; + + default: + RRETURN(PCRE_ERROR_INTERNAL); + } + + /* eptr is now past the end of the maximum run */ + + while (eptr >= pp) + { + RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); + eptr--; + if (rrc != MATCH_NOMATCH) RRETURN(rrc); + } + } + + /* Get here if we can't make it match with any permitted repetitions */ + + RRETURN(MATCH_NOMATCH); + } + /* Control never gets here */ + + /* There's been some horrible disaster. Since all codes > OP_BRA are + for capturing brackets, and there shouldn't be any gaps between 0 and + OP_BRA, arrival here can only mean there is something seriously wrong + in the code above or the OP_xxx definitions. */ + + default: + DPRINTF(("Unknown opcode %d\n", *ecode)); + RRETURN(PCRE_ERROR_UNKNOWN_NODE); + } + + /* Do not stick any code in here without much thought; it is assumed + that "continue" in the code above comes out to here to repeat the main + loop. */ + + } /* End of main loop */ +/* Control never reaches here */ +} + + +/*************************************************************************** +**************************************************************************** + RECURSION IN THE match() FUNCTION + +Undefine all the macros that were defined above to handle this. */ + +#ifdef NO_RECURSE +#undef eptr +#undef ecode +#undef offset_top +#undef ims +#undef eptrb +#undef flags + +#undef callpat +#undef charptr +#undef data +#undef next +#undef pp +#undef prev +#undef saved_eptr + +#undef new_recursive + +#undef cur_is_word +#undef condition +#undef minimize +#undef prev_is_word + +#undef original_ims + +#undef ctype +#undef length +#undef max +#undef min +#undef number +#undef offset +#undef op +#undef save_capture_last +#undef save_offset1 +#undef save_offset2 +#undef save_offset3 +#undef stacksave + +#undef newptrb + +#endif + +/* These two are defined as macros in both cases */ + +#undef fc +#undef fi + +/*************************************************************************** +***************************************************************************/ + + + +/************************************************* +* Execute a Regular Expression * +*************************************************/ + +/* This function applies a compiled re to a subject string and picks out +portions of the string if it matches. Two elements in the vector are set for +each substring: the offsets to the start and end of the substring. + +Arguments: + argument_re points to the compiled expression + extra_data points to extra data or is NULL + subject points to the subject string + length length of subject string (may contain binary zeros) + start_offset where to start in the subject string + options option bits + offsets points to a vector of ints to be filled in with offsets + offsetcount the number of elements in the vector + +Returns: > 0 => success; value is the number of elements filled in + = 0 => success, but offsets is not big enough + -1 => failed to match + < -1 => some kind of unexpected problem +*/ + +EXPORT int +pcre_exec(const pcre *argument_re, const pcre_extra *extra_data, + const char *subject, int length, int start_offset, int options, int *offsets, + int offsetcount) +{ +int rc, resetcount, ocount; +int first_byte = -1; +int req_byte = -1; +int req_byte2 = -1; +unsigned long int ims = 0; +BOOL using_temporary_offsets = FALSE; +BOOL anchored; +BOOL startline; +BOOL firstline; +BOOL first_byte_caseless = FALSE; +BOOL req_byte_caseless = FALSE; +match_data match_block; +const uschar *tables; +const uschar *start_bits = NULL; +const uschar *start_match = (const uschar *)subject + start_offset; +const uschar *end_subject; +const uschar *req_byte_ptr = start_match - 1; + +pcre_study_data internal_study; +const pcre_study_data *study; + +real_pcre internal_re; +const real_pcre *external_re = (const real_pcre *)argument_re; +const real_pcre *re = external_re; + +/* Plausibility checks */ + +if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; +if (re == NULL || subject == NULL || + (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; +if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; + +/* Fish out the optional data from the extra_data structure, first setting +the default values. */ + +study = NULL; +match_block.match_limit = MATCH_LIMIT; +match_block.callout_data = NULL; + +/* The table pointer is always in native byte order. */ + +tables = external_re->tables; + +if (extra_data != NULL) + { + register unsigned int flags = extra_data->flags; + if ((flags & PCRE_EXTRA_STUDY_DATA) != 0) + study = (const pcre_study_data *)extra_data->study_data; + if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) + match_block.match_limit = extra_data->match_limit; + if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0) + match_block.callout_data = extra_data->callout_data; + if ((flags & PCRE_EXTRA_TABLES) != 0) tables = extra_data->tables; + } + +/* If the exec call supplied NULL for tables, use the inbuilt ones. This +is a feature that makes it possible to save compiled regex and re-use them +in other programs later. */ + +if (tables == NULL) tables = _pcre_default_tables; + +/* Check that the first field in the block is the magic number. If it is not, +test for a regex that was compiled on a host of opposite endianness. If this is +the case, flipped values are put in internal_re and internal_study if there was +study data too. */ + +if (re->magic_number != MAGIC_NUMBER) + { + re = _pcre_try_flipped(re, &internal_re, study, &internal_study); + if (re == NULL) return PCRE_ERROR_BADMAGIC; + if (study != NULL) study = &internal_study; + } + +/* Set up other data */ + +anchored = ((re->options | options) & PCRE_ANCHORED) != 0; +startline = (re->options & PCRE_STARTLINE) != 0; +firstline = (re->options & PCRE_FIRSTLINE) != 0; + +/* The code starts after the real_pcre block and the capture name table. */ + +match_block.start_code = (const uschar *)external_re + re->name_table_offset + + re->name_count * re->name_entry_size; + +match_block.start_subject = (const uschar *)subject; +match_block.start_offset = start_offset; +match_block.end_subject = match_block.start_subject + length; +end_subject = match_block.end_subject; + +match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; +match_block.utf8 = (re->options & PCRE_UTF8) != 0; + +match_block.notbol = (options & PCRE_NOTBOL) != 0; +match_block.noteol = (options & PCRE_NOTEOL) != 0; +match_block.notempty = (options & PCRE_NOTEMPTY) != 0; +match_block.partial = (options & PCRE_PARTIAL) != 0; +match_block.hitend = FALSE; + +match_block.recursive = NULL; /* No recursion at top level */ + +match_block.lcc = tables + lcc_offset; +match_block.ctypes = tables + ctypes_offset; + +/* Partial matching is supported only for a restricted set of regexes at the +moment. */ + +if (match_block.partial && (re->options & PCRE_NOPARTIAL) != 0) + return PCRE_ERROR_BADPARTIAL; + +/* Check a UTF-8 string if required. Unfortunately there's no way of passing +back the character offset. */ + +#ifdef SUPPORT_UTF8 +if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) + { + if (_pcre_valid_utf8((uschar *)subject, length) >= 0) + return PCRE_ERROR_BADUTF8; + if (start_offset > 0 && start_offset < length) + { + int tb = ((uschar *)subject)[start_offset]; + if (tb > 127) + { + tb &= 0xc0; + if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET; + } + } + } +#endif + +/* The ims options can vary during the matching as a result of the presence +of (?ims) items in the pattern. They are kept in a local variable so that +restoring at the exit of a group is easy. */ + +ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL); + +/* If the expression has got more back references than the offsets supplied can +hold, we get a temporary chunk of working store to use during the matching. +Otherwise, we can use the vector supplied, rounding down its size to a multiple +of 3. */ + +ocount = offsetcount - (offsetcount % 3); + +if (re->top_backref > 0 && re->top_backref >= ocount/3) + { + ocount = re->top_backref * 3 + 3; + match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int)); + if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY; + using_temporary_offsets = TRUE; + DPRINTF(("Got memory to hold back references\n")); + } +else match_block.offset_vector = offsets; + +match_block.offset_end = ocount; +match_block.offset_max = (2*ocount)/3; +match_block.offset_overflow = FALSE; +match_block.capture_last = -1; + +/* Compute the minimum number of offsets that we need to reset each time. Doing +this makes a huge difference to execution time when there aren't many brackets +in the pattern. */ + +resetcount = 2 + re->top_bracket * 2; +if (resetcount > offsetcount) resetcount = ocount; + +/* Reset the working variable associated with each extraction. These should +never be used unless previously set, but they get saved and restored, and so we +initialize them to avoid reading uninitialized locations. */ + +if (match_block.offset_vector != NULL) + { + register int *iptr = match_block.offset_vector + ocount; + register int *iend = iptr - resetcount/2 + 1; + while (--iptr >= iend) *iptr = -1; + } + +/* Set up the first character to match, if available. The first_byte value is +never set for an anchored regular expression, but the anchoring may be forced +at run time, so we have to test for anchoring. The first char may be unset for +an unanchored pattern, of course. If there's no first char and the pattern was +studied, there may be a bitmap of possible first characters. */ + +if (!anchored) + { + if ((re->options & PCRE_FIRSTSET) != 0) + { + first_byte = re->first_byte & 255; + if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE) + first_byte = match_block.lcc[first_byte]; + } + else + if (!startline && study != NULL && + (study->options & PCRE_STUDY_MAPPED) != 0) + start_bits = study->start_bits; + } + +/* For anchored or unanchored matches, there may be a "last known required +character" set. */ + +if ((re->options & PCRE_REQCHSET) != 0) + { + req_byte = re->req_byte & 255; + req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0; + req_byte2 = (tables + fcc_offset)[req_byte]; /* case flipped */ + } + +/* Loop for handling unanchored repeated matching attempts; for anchored regexs +the loop runs just once. */ + +do + { + const uschar *save_end_subject = end_subject; + + /* Reset the maximum number of extractions we might see. */ + + if (match_block.offset_vector != NULL) + { + register int *iptr = match_block.offset_vector; + register int *iend = iptr + resetcount; + while (iptr < iend) *iptr++ = -1; + } + + /* Advance to a unique first char if possible. If firstline is TRUE, the + start of the match is constrained to the first line of a multiline string. + Implement this by temporarily adjusting end_subject so that we stop scanning + at a newline. If the match fails at the newline, later code breaks this loop. + */ + + if (firstline) + { + const uschar *t = start_match; + while (t < save_end_subject && *t != '\n') t++; + end_subject = t; + } + + /* Now test for a unique first byte */ + + if (first_byte >= 0) + { + if (first_byte_caseless) + while (start_match < end_subject && + match_block.lcc[*start_match] != first_byte) + start_match++; + else + while (start_match < end_subject && *start_match != first_byte) + start_match++; + } + + /* Or to just after \n for a multiline match if possible */ + + else if (startline) + { + if (start_match > match_block.start_subject + start_offset) + { + while (start_match < end_subject && start_match[-1] != NEWLINE) + start_match++; + } + } + + /* Or to a non-unique first char after study */ + + else if (start_bits != NULL) + { + while (start_match < end_subject) + { + register unsigned int c = *start_match; + if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break; + } + } + + /* Restore fudged end_subject */ + + end_subject = save_end_subject; + +#ifdef DEBUG /* Sigh. Some compilers never learn. */ + printf(">>>> Match against: "); + pchars(start_match, end_subject - start_match, TRUE, &match_block); + printf("\n"); +#endif + + /* If req_byte is set, we know that that character must appear in the subject + for the match to succeed. If the first character is set, req_byte must be + later in the subject; otherwise the test starts at the match point. This + optimization can save a huge amount of backtracking in patterns with nested + unlimited repeats that aren't going to match. Writing separate code for + cased/caseless versions makes it go faster, as does using an autoincrement + and backing off on a match. + + HOWEVER: when the subject string is very, very long, searching to its end can + take a long time, and give bad performance on quite ordinary patterns. This + showed up when somebody was matching /^C/ on a 32-megabyte string... so we + don't do this when the string is sufficiently long. + + ALSO: this processing is disabled when partial matching is requested. + */ + + if (req_byte >= 0 && + end_subject - start_match < REQ_BYTE_MAX && + !match_block.partial) + { + register const uschar *p = start_match + ((first_byte >= 0)? 1 : 0); + + /* We don't need to repeat the search if we haven't yet reached the + place we found it at last time. */ + + if (p > req_byte_ptr) + { + if (req_byte_caseless) + { + while (p < end_subject) + { + register int pp = *p++; + if (pp == req_byte || pp == req_byte2) { p--; break; } + } + } + else + { + while (p < end_subject) + { + if (*p++ == req_byte) { p--; break; } + } + } + + /* If we can't find the required character, break the matching loop */ + + if (p >= end_subject) break; + + /* If we have found the required character, save the point where we + found it, so that we don't search again next time round the loop if + the start hasn't passed this character yet. */ + + req_byte_ptr = p; + } + } + + /* When a match occurs, substrings will be set for all internal extractions; + we just need to set up the whole thing as substring 0 before returning. If + there were too many extractions, set the return code to zero. In the case + where we had to get some local store to hold offsets for backreferences, copy + those back references that we can. In this case there need not be overflow + if certain parts of the pattern were not used. */ + + match_block.start_match = start_match; + match_block.match_call_count = 0; + + rc = match(start_match, match_block.start_code, 2, &match_block, ims, NULL, + match_isgroup); + + /* When the result is no match, if the subject's first character was a + newline and the PCRE_FIRSTLINE option is set, break (which will return + PCRE_ERROR_NOMATCH). The option requests that a match occur before the first + newline in the subject. Otherwise, advance the pointer to the next character + and continue - but the continuation will actually happen only when the + pattern is not anchored. */ + + if (rc == MATCH_NOMATCH) + { + if (firstline && *start_match == NEWLINE) break; + start_match++; +#ifdef SUPPORT_UTF8 + if (match_block.utf8) + while(start_match < end_subject && (*start_match & 0xc0) == 0x80) + start_match++; +#endif + continue; + } + + if (rc != MATCH_MATCH) + { + DPRINTF((">>>> error: returning %d\n", rc)); + return rc; + } + + /* We have a match! Copy the offset information from temporary store if + necessary */ + + if (using_temporary_offsets) + { + if (offsetcount >= 4) + { + memcpy(offsets + 2, match_block.offset_vector + 2, + (offsetcount - 2) * sizeof(int)); + DPRINTF(("Copied offsets from temporary memory\n")); + } + if (match_block.end_offset_top > offsetcount) + match_block.offset_overflow = TRUE; + + DPRINTF(("Freeing temporary memory\n")); + (pcre_free)(match_block.offset_vector); + } + + rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2; + + if (offsetcount < 2) rc = 0; else + { + offsets[0] = start_match - match_block.start_subject; + offsets[1] = match_block.end_match_ptr - match_block.start_subject; + } + + DPRINTF((">>>> returning %d\n", rc)); + return rc; + } + +/* This "while" is the end of the "do" above */ + +while (!anchored && start_match <= end_subject); + +if (using_temporary_offsets) + { + DPRINTF(("Freeing temporary memory\n")); + (pcre_free)(match_block.offset_vector); + } + +if (match_block.partial && match_block.hitend) + { + DPRINTF((">>>> returning PCRE_ERROR_PARTIAL\n")); + return PCRE_ERROR_PARTIAL; + } +else + { + DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n")); + return PCRE_ERROR_NOMATCH; + } +} + +/* End of pcre_exec.c */ diff --git a/libpcre/pcre_fullinfo.c b/libpcre/pcre_fullinfo.c new file mode 100644 index 0000000000..ac80e65e2a --- /dev/null +++ b/libpcre/pcre_fullinfo.c @@ -0,0 +1,149 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/*PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains the external function pcre_fullinfo(), which returns +information about a compiled pattern. */ + + +#include "pcre_internal.h" + + +/************************************************* +* Return info about compiled pattern * +*************************************************/ + +/* This is a newer "info" function which has an extensible interface so +that additional items can be added compatibly. + +Arguments: + argument_re points to compiled code + extra_data points extra data, or NULL + what what information is required + where where to put the information + +Returns: 0 if data returned, negative on error +*/ + +EXPORT int +pcre_fullinfo(const pcre *argument_re, const pcre_extra *extra_data, int what, + void *where) +{ +real_pcre internal_re; +pcre_study_data internal_study; +const real_pcre *re = (const real_pcre *)argument_re; +const pcre_study_data *study = NULL; + +if (re == NULL || where == NULL) return PCRE_ERROR_NULL; + +if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0) + study = (const pcre_study_data *)extra_data->study_data; + +if (re->magic_number != MAGIC_NUMBER) + { + re = _pcre_try_flipped(re, &internal_re, study, &internal_study); + if (re == NULL) return PCRE_ERROR_BADMAGIC; + if (study != NULL) study = &internal_study; + } + +switch (what) + { + case PCRE_INFO_OPTIONS: + *((unsigned long int *)where) = re->options & PUBLIC_OPTIONS; + break; + + case PCRE_INFO_SIZE: + *((size_t *)where) = re->size; + break; + + case PCRE_INFO_STUDYSIZE: + *((size_t *)where) = (study == NULL)? 0 : study->size; + break; + + case PCRE_INFO_CAPTURECOUNT: + *((int *)where) = re->top_bracket; + break; + + case PCRE_INFO_BACKREFMAX: + *((int *)where) = re->top_backref; + break; + + case PCRE_INFO_FIRSTBYTE: + *((int *)where) = + ((re->options & PCRE_FIRSTSET) != 0)? re->first_byte : + ((re->options & PCRE_STARTLINE) != 0)? -1 : -2; + break; + + /* Make sure we pass back the pointer to the bit vector in the external + block, not the internal copy (with flipped integer fields). */ + + case PCRE_INFO_FIRSTTABLE: + *((const uschar **)where) = + (study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)? + ((const pcre_study_data *)extra_data->study_data)->start_bits : NULL; + break; + + case PCRE_INFO_LASTLITERAL: + *((int *)where) = + ((re->options & PCRE_REQCHSET) != 0)? re->req_byte : -1; + break; + + case PCRE_INFO_NAMEENTRYSIZE: + *((int *)where) = re->name_entry_size; + break; + + case PCRE_INFO_NAMECOUNT: + *((int *)where) = re->name_count; + break; + + case PCRE_INFO_NAMETABLE: + *((const uschar **)where) = (const uschar *)re + re->name_table_offset; + break; + + case PCRE_INFO_DEFAULT_TABLES: + *((const uschar **)where) = (const uschar *)(_pcre_default_tables); + break; + + default: return PCRE_ERROR_BADOPTION; + } + +return 0; +} + +/* End of pcre_fullinfo.c */ diff --git a/libpcre/get.c b/libpcre/pcre_get.c similarity index 85% rename from libpcre/get.c rename to libpcre/pcre_get.c index a20473cf24..fc4a14ac14 100644 --- a/libpcre/get.c +++ b/libpcre/pcre_get.c @@ -2,45 +2,48 @@ * Perl-Compatible Regular Expressions * *************************************************/ -/* -This is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. See -the file Tech.Notes for some information on the internals. +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. -Written by: Philip Hazel - - Copyright (c) 1997-2003 University of Cambridge + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge ----------------------------------------------------------------------------- -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ + /* This module contains some convenience functions for extracting substrings from the subject string after a regex match has succeeded. The original idea -for these functions came from Scott Wimer . */ - +for these functions came from Scott Wimer. */ -/* Include the internals header, which itself includes Standard C headers plus -the external pcre header. */ -#include "internal.h" +#include "pcre_internal.h" /************************************************* @@ -346,4 +349,4 @@ pcre_free_substring(const char *pointer) (pcre_free)((void *)pointer); } -/* End of get.c */ +/* End of pcre_get.c */ diff --git a/libpcre/pcre_globals.c b/libpcre/pcre_globals.c new file mode 100644 index 0000000000..1a839802ba --- /dev/null +++ b/libpcre/pcre_globals.c @@ -0,0 +1,69 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains global variables that are exported by the PCRE library. +PCRE is thread-clean and doesn't use any global variables in the normal sense. +However, it calls memory allocation and freeing functions via the four +indirections below, and it can optionally do callouts, using the fifth +indirection. These values can be changed by the caller, but are shared between +all threads. However, when compiling for Virtual Pascal, things are done +differently, and global variables are not used (see pcre.in). */ + + +#include "pcre_internal.h" + + +#ifndef VPCOMPAT +#ifdef __cplusplus +extern "C" void *(*pcre_malloc)(size_t) = malloc; +extern "C" void (*pcre_free)(void *) = free; +extern "C" void *(*pcre_stack_malloc)(size_t) = malloc; +extern "C" void (*pcre_stack_free)(void *) = free; +extern "C" int (*pcre_callout)(pcre_callout_block *) = NULL; +#else +void *(*pcre_malloc)(size_t) = malloc; +void (*pcre_free)(void *) = free; +void *(*pcre_stack_malloc)(size_t) = malloc; +void (*pcre_stack_free)(void *) = free; +int (*pcre_callout)(pcre_callout_block *) = NULL; +#endif +#endif + +/* End of pcre_globals.c */ diff --git a/libpcre/pcre_info.c b/libpcre/pcre_info.c new file mode 100644 index 0000000000..228949d0cd --- /dev/null +++ b/libpcre/pcre_info.c @@ -0,0 +1,89 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains the external function pcre_info(), which gives some +information about a compiled pattern. However, use of this function is now +deprecated, as it has been superseded by pcre_fullinfo(). */ + + +#include "pcre_internal.h" + + +/************************************************* +* (Obsolete) Return info about compiled pattern * +*************************************************/ + +/* This is the original "info" function. It picks potentially useful data out +of the private structure, but its interface was too rigid. It remains for +backwards compatibility. The public options are passed back in an int - though +the re->options field has been expanded to a long int, all the public options +at the low end of it, and so even on 16-bit systems this will still be OK. +Therefore, I haven't changed the API for pcre_info(). + +Arguments: + argument_re points to compiled code + optptr where to pass back the options + first_byte where to pass back the first character, + or -1 if multiline and all branches start ^, + or -2 otherwise + +Returns: number of capturing subpatterns + or negative values on error +*/ + +EXPORT int +pcre_info(const pcre *argument_re, int *optptr, int *first_byte) +{ +real_pcre internal_re; +const real_pcre *re = (const real_pcre *)argument_re; +if (re == NULL) return PCRE_ERROR_NULL; +if (re->magic_number != MAGIC_NUMBER) + { + re = _pcre_try_flipped(re, &internal_re, NULL, NULL); + if (re == NULL) return PCRE_ERROR_BADMAGIC; + } +if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS); +if (first_byte != NULL) + *first_byte = ((re->options & PCRE_FIRSTSET) != 0)? re->first_byte : + ((re->options & PCRE_STARTLINE) != 0)? -1 : -2; +return re->top_bracket; +} + +/* End of pcre_info.c */ diff --git a/libpcre/internal.h b/libpcre/pcre_internal.h similarity index 50% rename from libpcre/internal.h rename to libpcre/pcre_internal.h index 781a7991cb..e3182a4ed4 100644 --- a/libpcre/internal.h +++ b/libpcre/pcre_internal.h @@ -3,37 +3,44 @@ *************************************************/ -/* This is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. See -the file Tech.Notes for some information on the internals. +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. -Written by: Philip Hazel - - Copyright (c) 1997-2003 University of Cambridge + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge ----------------------------------------------------------------------------- -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ /* This header contains definitions that are shared between the different -modules, but which are not relevant to the outside. */ +modules, but which are not relevant to the exported API. This includes some +functions whose names all begin with "_pcre_". */ /* Get the definitions provided by running "configure" */ #ifndef WIN32 @@ -42,6 +49,82 @@ modules, but which are not relevant to the outside. */ #include "pcre_winconfig.h" #endif +/* Define DEBUG to get debugging output on stdout. */ + +/**** +#define DEBUG +****/ + +/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef +inline, and there are *still* stupid compilers about that don't like indented +pre-processor statements, or at least there were when I first wrote this. After +all, it had only been about 10 years then... */ + + +/* Get the definitions provided by running "configure" */ + +#ifdef DEBUG +#define DPRINTF(p) printf p +#else +#define DPRINTF(p) /*nothing*/ +#endif + +/* Standard C headers plus the external interface definition. The only time +setjmp and stdarg are used is when NO_RECURSE is set. */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef PCRE_SPY +#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */ +#endif + +/* We need to have types that specify unsigned 16-bit and 32-bit integers. We +cannot determine these outside the compilation (e.g. by running a program as +part of "configure") because PCRE is often cross-compiled for use on other +systems. Instead we make use of the maximum sizes that are available at +preprocessor time in standard C environments. */ + +#if USHRT_MAX == 65535 + typedef unsigned short pcre_uint16; +#elif UINT_MAX == 65535 + typedef unsigned int pcre_uint16; +#else + #error Cannot determine a type for 16-bit unsigned integers +#endif + +#if UINT_MAX == 4294967295 + typedef unsigned int pcre_uint32; +#elif ULONG_MAX == 4294967295 + typedef unsigned long int pcre_uint32; +#else + #error Cannot determine a type for 32-bit unsigned integers +#endif + +/* All character handling must be done as unsigned characters. Otherwise there +are problems with top-bit-set characters and functions such as isspace(). +However, we leave the interface to the outside world as char *, because that +should make things easier for callers. We define a short type for unsigned char +to save lots of typing. I tried "uchar", but it causes problems on Digital +Unix, where it is defined in sys/types, so use "uschar" instead. */ + +typedef unsigned char uschar; + +/* Include the public PCRE header */ + +#include "pcre.h" + +/* Include the (copy of) the public ucp header, changing the external name into a private one. This does no harm, even if we aren't compiling UCP support. */ + +#define ucp_findchar _pcre_ucp_findchar +#include "ucp.h" + /* When compiling for use with the Virtual Pascal compiler, these functions need to have their names changed. PCRE must be compiled with the -DVPCOMPAT option on the command line. */ @@ -79,13 +162,14 @@ for (i = 0; i < n; ++i) *(--dest) = *(--src); #endif /* not VPCOMPAT */ -/* PCRE keeps offsets in its compiled code as 2-byte quantities by default. -These are used, for example, to link from the start of a subpattern to its -alternatives and its end. The use of 2 bytes per offset limits the size of the -compiled regex to around 64K, which is big enough for almost everybody. -However, I received a request for an even bigger limit. For this reason, and -also to make the code easier to maintain, the storing and loading of offsets -from the byte string is now handled by the macros that are defined here. +/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored +in big-endian order) by default. These are used, for example, to link from the +start of a subpattern to its alternatives and its end. The use of 2 bytes per +offset limits the size of the compiled regex to around 64K, which is big enough +for almost everybody. However, I received a request for an even bigger limit. +For this reason, and also to make the code easier to maintain, the storing and +loading of offsets from the byte string is now handled by the macros that are +defined here. The macros are controlled by the value of LINK_SIZE. This defaults to 2 in the config.h file, but can be overridden by using -D on the command line. This @@ -154,24 +238,115 @@ capturing parenthesis numbers in back references. */ #define PUT2INC(a,n,d) PUT2(a,n,d), a += 2 -/* Standard C headers plus the external interface definition */ - -#include -#include -#include -#include -#include -#include +/* When UTF-8 encoding is being used, a character is no longer just a single +byte. The macros for character handling generate simple sequences when used in +byte-mode, and more complicated ones for UTF-8 characters. */ + +#ifndef SUPPORT_UTF8 +#define GETCHAR(c, eptr) c = *eptr; +#define GETCHARTEST(c, eptr) c = *eptr; +#define GETCHARINC(c, eptr) c = *eptr++; +#define GETCHARINCTEST(c, eptr) c = *eptr++; +#define GETCHARLEN(c, eptr, len) c = *eptr; +#define BACKCHAR(eptr) + +#else /* SUPPORT_UTF8 */ + +/* Get the next UTF-8 character, not advancing the pointer. This is called when +we know we are in UTF-8 mode. */ + +#define GETCHAR(c, eptr) \ + c = *eptr; \ + if ((c & 0xc0) == 0xc0) \ + { \ + int gcii; \ + int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ + int gcss = 6*gcaa; \ + c = (c & _pcre_utf8_table3[gcaa]) << gcss; \ + for (gcii = 1; gcii <= gcaa; gcii++) \ + { \ + gcss -= 6; \ + c |= (eptr[gcii] & 0x3f) << gcss; \ + } \ + } + +/* Get the next UTF-8 character, testing for UTF-8 mode, and not advancing the +pointer. */ + +#define GETCHARTEST(c, eptr) \ + c = *eptr; \ + if (utf8 && (c & 0xc0) == 0xc0) \ + { \ + int gcii; \ + int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ + int gcss = 6*gcaa; \ + c = (c & _pcre_utf8_table3[gcaa]) << gcss; \ + for (gcii = 1; gcii <= gcaa; gcii++) \ + { \ + gcss -= 6; \ + c |= (eptr[gcii] & 0x3f) << gcss; \ + } \ + } + +/* Get the next UTF-8 character, advancing the pointer. This is called when we +know we are in UTF-8 mode. */ + +#define GETCHARINC(c, eptr) \ + c = *eptr++; \ + if ((c & 0xc0) == 0xc0) \ + { \ + int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ + int gcss = 6*gcaa; \ + c = (c & _pcre_utf8_table3[gcaa]) << gcss; \ + while (gcaa-- > 0) \ + { \ + gcss -= 6; \ + c |= (*eptr++ & 0x3f) << gcss; \ + } \ + } + +/* Get the next character, testing for UTF-8 mode, and advancing the pointer */ + +#define GETCHARINCTEST(c, eptr) \ + c = *eptr++; \ + if (utf8 && (c & 0xc0) == 0xc0) \ + { \ + int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ + int gcss = 6*gcaa; \ + c = (c & _pcre_utf8_table3[gcaa]) << gcss; \ + while (gcaa-- > 0) \ + { \ + gcss -= 6; \ + c |= (*eptr++ & 0x3f) << gcss; \ + } \ + } + +/* Get the next UTF-8 character, not advancing the pointer, incrementing length +if there are extra bytes. This is called when we know we are in UTF-8 mode. */ + +#define GETCHARLEN(c, eptr, len) \ + c = *eptr; \ + if ((c & 0xc0) == 0xc0) \ + { \ + int gcii; \ + int gcaa = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ + int gcss = 6*gcaa; \ + c = (c & _pcre_utf8_table3[gcaa]) << gcss; \ + for (gcii = 1; gcii <= gcaa; gcii++) \ + { \ + gcss -= 6; \ + c |= (eptr[gcii] & 0x3f) << gcss; \ + } \ + len += gcaa; \ + } + +/* If the pointer is not at the start of a character, move it back until +it is. Called only in UTF-8 mode. */ + +#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--; -#ifndef PCRE_SPY -#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */ #endif -#ifndef WIN32 -#include "pcre.h" -#else -#include "pcre_win.h" -#endif /* In case there is no definition of offsetof() provided - though any proper Standard C system should have one. */ @@ -180,6 +355,7 @@ Standard C system should have one. */ #define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field)) #endif + /* These are the public options that can change during matching. */ #define PCRE_IMS (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL) @@ -187,32 +363,38 @@ Standard C system should have one. */ /* Private options flags start at the most significant end of the four bytes, but skip the top bit so we can use ints for convenience without getting tangled with negative values. The public options defined in pcre.h start at the least -significant end. Make sure they don't overlap, though now that we have expanded -to four bytes there is plenty of space. */ +significant end. Make sure they don't overlap! */ #define PCRE_FIRSTSET 0x40000000 /* first_byte is set */ #define PCRE_REQCHSET 0x20000000 /* req_byte is set */ #define PCRE_STARTLINE 0x10000000 /* start after \n for multiline */ #define PCRE_ICHANGED 0x08000000 /* i option changes within regex */ +#define PCRE_NOPARTIAL 0x04000000 /* can't use partial with this regex */ /* Options for the "extra" block produced by pcre_study(). */ #define PCRE_STUDY_MAPPED 0x01 /* a map of starting chars exists */ -/* Masks for identifying the public options which are permitted at compile -time, run time or study time, respectively. */ +/* Masks for identifying the public options that are permitted at compile +time, run time, or study time, respectively. */ #define PUBLIC_OPTIONS \ (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ - PCRE_NO_AUTO_CAPTURE) + PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE) #define PUBLIC_EXEC_OPTIONS \ - (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY) + (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \ + PCRE_PARTIAL) + +#define PUBLIC_DFA_EXEC_OPTIONS \ + (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \ + PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART) #define PUBLIC_STUDY_OPTIONS 0 /* None defined */ -/* Magic number to provide a small check against being handed junk. */ +/* Magic number to provide a small check against being handed junk. Also used +to detect whether a pattern was compiled on a host of different endianness. */ #define MAGIC_NUMBER 0x50435245UL /* 'PCRE' */ @@ -221,6 +403,11 @@ time, run time or study time, respectively. */ #define REQ_UNSET (-2) #define REQ_NONE (-1) +/* The maximum remaining length of subject we are prepared to search for a +req_byte match. */ + +#define REQ_BYTE_MAX 1000 + /* Flags added to firstbyte or reqbyte; a "non-literal" item is either a variable-length repeat, or a anything other than literal characters. */ @@ -268,12 +455,13 @@ definitions below, up to ESC_z. There's a dummy for OP_ANY because it corresponds to "." rather than an escape sequence. The final one must be ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two tests in the code for an escape greater than ESC_b and less than ESC_Z to -detect the types that may be repeated. These are the types that consume a -character. If any new escapes are put in between that don't consume a +detect the types that may be repeated. These are the types that consume +characters. If any new escapes are put in between that don't consume a character, that code will have to change. */ enum { ESC_A = 1, ESC_G, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, - ESC_w, ESC_dum1, ESC_C, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_REF }; + ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_X, ESC_Z, ESC_z, ESC_E, + ESC_Q, ESC_REF }; /* Flag bits and data types for the extended class (OP_XCLASS) for classes that contain UTF-8 characters with values greater than 255. */ @@ -284,6 +472,8 @@ contain UTF-8 characters with values greater than 255. */ #define XCL_END 0 /* Marks end of individual items */ #define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ #define XCL_RANGE 2 /* A range (two multibyte chars) follows */ +#define XCL_PROP 3 /* Unicode property (one property code) follows */ +#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ /* Opcode table: OP_BRA must be last, as all values >= it are used for brackets @@ -309,110 +499,123 @@ enum { OP_WORDCHAR, /* 10 \w */ OP_ANY, /* 11 Match any character */ OP_ANYBYTE, /* 12 Match any byte (\C); different to OP_ANY for UTF-8 */ - OP_EODN, /* 13 End of data or \n at end of data: \Z. */ - OP_EOD, /* 14 End of data: \z */ - - OP_OPT, /* 15 Set runtime options */ - OP_CIRC, /* 16 Start of line - varies with multiline switch */ - OP_DOLL, /* 17 End of line - varies with multiline switch */ - OP_CHARS, /* 18 Match string of characters */ - OP_NOT, /* 19 Match anything but the following char */ - - OP_STAR, /* 20 The maximizing and minimizing versions of */ - OP_MINSTAR, /* 21 all these opcodes must come in pairs, with */ - OP_PLUS, /* 22 the minimizing one second. */ - OP_MINPLUS, /* 23 This first set applies to single characters */ - OP_QUERY, /* 24 */ - OP_MINQUERY, /* 25 */ - OP_UPTO, /* 26 From 0 to n matches */ - OP_MINUPTO, /* 27 */ - OP_EXACT, /* 28 Exactly n matches */ - - OP_NOTSTAR, /* 29 The maximizing and minimizing versions of */ - OP_NOTMINSTAR, /* 30 all these opcodes must come in pairs, with */ - OP_NOTPLUS, /* 31 the minimizing one second. */ - OP_NOTMINPLUS, /* 32 This set applies to "not" single characters */ - OP_NOTQUERY, /* 33 */ - OP_NOTMINQUERY, /* 34 */ - OP_NOTUPTO, /* 35 From 0 to n matches */ - OP_NOTMINUPTO, /* 36 */ - OP_NOTEXACT, /* 37 Exactly n matches */ - - OP_TYPESTAR, /* 38 The maximizing and minimizing versions of */ - OP_TYPEMINSTAR, /* 39 all these opcodes must come in pairs, with */ - OP_TYPEPLUS, /* 40 the minimizing one second. These codes must */ - OP_TYPEMINPLUS, /* 41 be in exactly the same order as those above. */ - OP_TYPEQUERY, /* 42 This set applies to character types such as \d */ - OP_TYPEMINQUERY, /* 43 */ - OP_TYPEUPTO, /* 44 From 0 to n matches */ - OP_TYPEMINUPTO, /* 45 */ - OP_TYPEEXACT, /* 46 Exactly n matches */ - - OP_CRSTAR, /* 47 The maximizing and minimizing versions of */ - OP_CRMINSTAR, /* 48 all these opcodes must come in pairs, with */ - OP_CRPLUS, /* 49 the minimizing one second. These codes must */ - OP_CRMINPLUS, /* 50 be in exactly the same order as those above. */ - OP_CRQUERY, /* 51 These are for character classes and back refs */ - OP_CRMINQUERY, /* 52 */ - OP_CRRANGE, /* 53 These are different to the three seta above. */ - OP_CRMINRANGE, /* 54 */ - - OP_CLASS, /* 55 Match a character class, chars < 256 only */ - OP_NCLASS, /* 56 Same, but the bitmap was created from a negative + OP_NOTPROP, /* 13 \P (not Unicode property) */ + OP_PROP, /* 14 \p (Unicode property) */ + OP_EXTUNI, /* 15 \X (extended Unicode sequence */ + OP_EODN, /* 16 End of data or \n at end of data: \Z. */ + OP_EOD, /* 17 End of data: \z */ + + OP_OPT, /* 18 Set runtime options */ + OP_CIRC, /* 19 Start of line - varies with multiline switch */ + OP_DOLL, /* 20 End of line - varies with multiline switch */ + OP_CHAR, /* 21 Match one character, casefully */ + OP_CHARNC, /* 22 Match one character, caselessly */ + OP_NOT, /* 23 Match anything but the following char */ + + OP_STAR, /* 24 The maximizing and minimizing versions of */ + OP_MINSTAR, /* 25 all these opcodes must come in pairs, with */ + OP_PLUS, /* 26 the minimizing one second. */ + OP_MINPLUS, /* 27 This first set applies to single characters */ + OP_QUERY, /* 28 */ + OP_MINQUERY, /* 29 */ + OP_UPTO, /* 30 From 0 to n matches */ + OP_MINUPTO, /* 31 */ + OP_EXACT, /* 32 Exactly n matches */ + + OP_NOTSTAR, /* 33 The maximizing and minimizing versions of */ + OP_NOTMINSTAR, /* 34 all these opcodes must come in pairs, with */ + OP_NOTPLUS, /* 35 the minimizing one second. */ + OP_NOTMINPLUS, /* 36 This set applies to "not" single characters */ + OP_NOTQUERY, /* 37 */ + OP_NOTMINQUERY, /* 38 */ + OP_NOTUPTO, /* 39 From 0 to n matches */ + OP_NOTMINUPTO, /* 40 */ + OP_NOTEXACT, /* 41 Exactly n matches */ + + OP_TYPESTAR, /* 42 The maximizing and minimizing versions of */ + OP_TYPEMINSTAR, /* 43 all these opcodes must come in pairs, with */ + OP_TYPEPLUS, /* 44 the minimizing one second. These codes must */ + OP_TYPEMINPLUS, /* 45 be in exactly the same order as those above. */ + OP_TYPEQUERY, /* 46 This set applies to character types such as \d */ + OP_TYPEMINQUERY, /* 47 */ + OP_TYPEUPTO, /* 48 From 0 to n matches */ + OP_TYPEMINUPTO, /* 49 */ + OP_TYPEEXACT, /* 50 Exactly n matches */ + + OP_CRSTAR, /* 51 The maximizing and minimizing versions of */ + OP_CRMINSTAR, /* 52 all these opcodes must come in pairs, with */ + OP_CRPLUS, /* 53 the minimizing one second. These codes must */ + OP_CRMINPLUS, /* 54 be in exactly the same order as those above. */ + OP_CRQUERY, /* 55 These are for character classes and back refs */ + OP_CRMINQUERY, /* 56 */ + OP_CRRANGE, /* 57 These are different to the three sets above. */ + OP_CRMINRANGE, /* 58 */ + + OP_CLASS, /* 59 Match a character class, chars < 256 only */ + OP_NCLASS, /* 60 Same, but the bitmap was created from a negative class - the difference is relevant only when a UTF-8 character > 255 is encountered. */ - OP_XCLASS, /* 56 Extended class for handling UTF-8 chars within the + OP_XCLASS, /* 61 Extended class for handling UTF-8 chars within the class. This does both positive and negative. */ - OP_REF, /* 57 Match a back reference */ - OP_RECURSE, /* 58 Match a numbered subpattern (possibly recursive) */ - OP_CALLOUT, /* 59 Call out to external function if provided */ + OP_REF, /* 62 Match a back reference */ + OP_RECURSE, /* 63 Match a numbered subpattern (possibly recursive) */ + OP_CALLOUT, /* 64 Call out to external function if provided */ - OP_ALT, /* 60 Start of alternation */ - OP_KET, /* 61 End of group that doesn't have an unbounded repeat */ - OP_KETRMAX, /* 62 These two must remain together and in this */ - OP_KETRMIN, /* 63 order. They are for groups the repeat for ever. */ + OP_ALT, /* 65 Start of alternation */ + OP_KET, /* 66 End of group that doesn't have an unbounded repeat */ + OP_KETRMAX, /* 67 These two must remain together and in this */ + OP_KETRMIN, /* 68 order. They are for groups the repeat for ever. */ /* The assertions must come before ONCE and COND */ - OP_ASSERT, /* 64 Positive lookahead */ - OP_ASSERT_NOT, /* 65 Negative lookahead */ - OP_ASSERTBACK, /* 66 Positive lookbehind */ - OP_ASSERTBACK_NOT, /* 67 Negative lookbehind */ - OP_REVERSE, /* 68 Move pointer back - used in lookbehind assertions */ + OP_ASSERT, /* 69 Positive lookahead */ + OP_ASSERT_NOT, /* 70 Negative lookahead */ + OP_ASSERTBACK, /* 71 Positive lookbehind */ + OP_ASSERTBACK_NOT, /* 72 Negative lookbehind */ + OP_REVERSE, /* 73 Move pointer back - used in lookbehind assertions */ /* ONCE and COND must come after the assertions, with ONCE first, as there's a test for >= ONCE for a subpattern that isn't an assertion. */ - OP_ONCE, /* 69 Once matched, don't back up into the subpattern */ - OP_COND, /* 70 Conditional group */ - OP_CREF, /* 71 Used to hold an extraction string number (cond ref) */ + OP_ONCE, /* 74 Once matched, don't back up into the subpattern */ + OP_COND, /* 75 Conditional group */ + OP_CREF, /* 76 Used to hold an extraction string number (cond ref) */ - OP_BRAZERO, /* 72 These two must remain together and in this */ - OP_BRAMINZERO, /* 73 order. */ + OP_BRAZERO, /* 77 These two must remain together and in this */ + OP_BRAMINZERO, /* 78 order. */ - OP_BRANUMBER, /* 74 Used for extracting brackets whose number is greater + OP_BRANUMBER, /* 79 Used for extracting brackets whose number is greater than can fit into an opcode. */ - OP_BRA /* 75 This and greater values are used for brackets that - extract substrings up to a basic limit. After that, - use is made of OP_BRANUMBER. */ + OP_BRA /* 80 This and greater values are used for brackets that + extract substrings up to EXTRACT_BASIC_MAX. After + that, use is made of OP_BRANUMBER. */ }; -/* WARNING: There is an implicit assumption in study.c that all opcodes are -less than 128 in value. This makes handling UTF-8 character sequences easier. -*/ +/* WARNING WARNING WARNING: There is an implicit assumption in pcre.c and +study.c that all opcodes are less than 128 in value. This makes handling UTF-8 +character sequences easier. */ +/* The highest extraction number before we have to start using additional +bytes. (Originally PCRE didn't have support for extraction counts highter than +this number.) The value is limited by the number of opcodes left after OP_BRA, +i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional +opcodes. */ + +#define EXTRACT_BASIC_MAX 100 -/* This macro defines textual names for all the opcodes. There are used only -for debugging, in pcre.c when DEBUG is defined, and also in pcretest.c. The -macro is referenced only in printint.c. */ + +/* This macro defines textual names for all the opcodes. These are used only +for debugging. The macro is referenced only in pcre_printint.c. */ #define OP_NAME_LIST \ "End", "\\A", "\\G", "\\B", "\\b", "\\D", "\\d", \ - "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", "\\Z", "\\z", \ - "Opt", "^", "$", "chars", "not", \ + "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte", \ + "notprop", "prop", "extuni", \ + "\\Z", "\\z", \ + "Opt", "^", "$", "char", "charnc", "not", \ "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ "*", "*?", "+", "+?", "?", "??", "{", "{", "{", \ @@ -426,7 +629,7 @@ macro is referenced only in printint.c. */ /* This macro defines the length of fixed length operations in the compiled regex. The lengths are used when searching for specific things, and also in the debugging printing of a compiled regex. We use a macro so that it can be -incorporated both into pcre.c and pcretest.c without being publicly exposed. +defined close to the definitions of the opcodes themselves. As things have been extended, some of these are no longer fixed lenths, but are minima instead. For example, the length of a single-character repeat may vary @@ -435,13 +638,16 @@ in UTF-8 mode. The code that uses this table must know about such things. */ #define OP_LENGTHS \ 1, /* End */ \ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \ - 1, 1, 1, 1, 2, 1, 1, /* Any, Anybyte, \Z, \z, Opt, ^, $ */ \ - 2, /* Chars - the minimum length */ \ + 1, 1, /* Any, Anybyte */ \ + 2, 2, 1, /* NOTPROP, PROP, EXTUNI */ \ + 1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \ + 2, /* Char - the minimum length */ \ + 2, /* Charnc - the minimum length */ \ 2, /* not */ \ - /* Positive single-char repeats */ \ - 2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** These are */ \ - 4, 4, 4, /* upto, minupto, exact ** minima */ \ - /* Negative single-char repeats */ \ + /* Positive single-char repeats ** These are */ \ + 2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \ + 4, 4, 4, /* upto, minupto, exact ** UTF-8 mode */ \ + /* Negative single-char repeats - only for chars < 256 */ \ 2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ 4, 4, 4, /* NOT upto, minupto, exact */ \ /* Positive type repeats */ \ @@ -455,7 +661,7 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 0, /* XCLASS - variable length */ \ 3, /* REF */ \ 1+LINK_SIZE, /* RECURSE */ \ - 2, /* CALLOUT */ \ + 2+2*LINK_SIZE, /* CALLOUT */ \ 1+LINK_SIZE, /* Alt */ \ 1+LINK_SIZE, /* Ket */ \ 1+LINK_SIZE, /* KetRmax */ \ @@ -473,97 +679,61 @@ in UTF-8 mode. The code that uses this table must know about such things. */ 1+LINK_SIZE /* BRA */ \ -/* The highest extraction number before we have to start using additional -bytes. (Originally PCRE didn't have support for extraction counts highter than -this number.) The value is limited by the number of opcodes left after OP_BRA, -i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional -opcodes. */ - -#define EXTRACT_BASIC_MAX 150 - /* A magic value for OP_CREF to indicate the "in recursion" condition. */ #define CREF_RECURSE 0xffff -/* The texts of compile-time error messages are defined as macros here so that -they can be accessed by the POSIX wrapper and converted into error codes. Yes, -I could have used error codes in the first place, but didn't feel like changing -just to accommodate the POSIX wrapper. */ - -#define ERR1 "\\ at end of pattern" -#define ERR2 "\\c at end of pattern" -#define ERR3 "unrecognized character follows \\" -#define ERR4 "numbers out of order in {} quantifier" -#define ERR5 "number too big in {} quantifier" -#define ERR6 "missing terminating ] for character class" -#define ERR7 "invalid escape sequence in character class" -#define ERR8 "range out of order in character class" -#define ERR9 "nothing to repeat" -#define ERR10 "operand of unlimited repeat could match the empty string" -#define ERR11 "internal error: unexpected repeat" -#define ERR12 "unrecognized character after (?" -#define ERR13 "POSIX named classes are supported only within a class" -#define ERR14 "missing )" -#define ERR15 "reference to non-existent subpattern" -#define ERR16 "erroffset passed as NULL" -#define ERR17 "unknown option bit(s) set" -#define ERR18 "missing ) after comment" -#define ERR19 "parentheses nested too deeply" -#define ERR20 "regular expression too large" -#define ERR21 "failed to get memory" -#define ERR22 "unmatched parentheses" -#define ERR23 "internal error: code overflow" -#define ERR24 "unrecognized character after (?<" -#define ERR25 "lookbehind assertion is not fixed length" -#define ERR26 "malformed number after (?(" -#define ERR27 "conditional group contains more than two branches" -#define ERR28 "assertion expected after (?(" -#define ERR29 "(?R or (?digits must be followed by )" -#define ERR30 "unknown POSIX class name" -#define ERR31 "POSIX collating elements are not supported" -#define ERR32 "this version of PCRE is not compiled with PCRE_UTF8 support" -#define ERR33 "spare error" -#define ERR34 "character value in \\x{...} sequence is too large" -#define ERR35 "invalid condition (?(0)" -#define ERR36 "\\C not allowed in lookbehind assertion" -#define ERR37 "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X" -#define ERR38 "number after (?C is > 255" -#define ERR39 "closing ) for (?C expected" -#define ERR40 "recursive call could loop indefinitely" -#define ERR41 "unrecognized character after (?P" -#define ERR42 "syntax error after (?P" -#define ERR43 "two named groups have the same name" +/* Error code numbers. They are given names so that they can more easily be +tracked. */ -/* All character handling must be done as unsigned characters. Otherwise there -are problems with top-bit-set characters and functions such as isspace(). -However, we leave the interface to the outside world as char *, because that -should make things easier for callers. We define a short type for unsigned char -to save lots of typing. I tried "uchar", but it causes problems on Digital -Unix, where it is defined in sys/types, so use "uschar" instead. */ - -typedef unsigned char uschar; +enum { ERR0, ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, + ERR10, ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, + ERR20, ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR28, ERR29, + ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, + ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47 }; /* The real format of the start of the pcre block; the index of names and the -code vector run on as long as necessary after the end. */ +code vector run on as long as necessary after the end. We store an explicit +offset to the name table so that if a regex is compiled on one host, saved, and +then run on another where the size of pointers is different, all might still +be well. For the case of compiled-on-4 and run-on-8, we include an extra +pointer that is always NULL. For future-proofing, a few dummy fields were +originally included - even though you can never get this planning right - but +there is only one left now. + +NOTE NOTE NOTE: +Because people can now save and re-use compiled patterns, any additions to this +structure should be made at the end, and something earlier (e.g. a new +flag in the options or one of the dummy fields) should indicate that the new +fields are present. Currently PCRE always sets the dummy fields to zero. +NOTE NOTE NOTE: +*/ typedef struct real_pcre { - unsigned long int magic_number; - size_t size; /* Total that was malloced */ - const unsigned char *tables; /* Pointer to tables */ - unsigned long int options; - unsigned short int top_bracket; - unsigned short int top_backref; - unsigned short int first_byte; - unsigned short int req_byte; - unsigned short int name_entry_size; /* Size of any name items; 0 => none */ - unsigned short int name_count; /* Number of name items */ + pcre_uint32 magic_number; + pcre_uint32 size; /* Total that was malloced */ + pcre_uint32 options; + pcre_uint32 dummy1; /* For future use, maybe */ + + pcre_uint16 top_bracket; + pcre_uint16 top_backref; + pcre_uint16 first_byte; + pcre_uint16 req_byte; + pcre_uint16 name_table_offset; /* Offset to name table that follows */ + pcre_uint16 name_entry_size; /* Size of any name items */ + pcre_uint16 name_count; /* Number of name items */ + pcre_uint16 ref_count; /* Reference count */ + + const unsigned char *tables; /* Pointer to tables or NULL for std */ + const unsigned char *nullpad; /* NULL padding */ } real_pcre; -/* The format of the block used to store data from pcre_study(). */ +/* The format of the block used to store data from pcre_study(). The same +remark (see NOTE above) about extending this structure applies. */ typedef struct pcre_study_data { - size_t size; /* Total that was malloced */ - uschar options; + pcre_uint32 size; /* Total that was malloced */ + pcre_uint32 options; uschar start_bits[32]; } pcre_study_data; @@ -576,12 +746,14 @@ typedef struct compile_data { const uschar *cbits; /* Points to character type table */ const uschar *ctypes; /* Points to table of type maps */ const uschar *start_code; /* The start of the compiled code */ + const uschar *start_pattern; /* The start of the pattern */ uschar *name_table; /* The name/number table */ int names_found; /* Number of entries so far */ int name_entry_size; /* Size of each entry */ int top_backref; /* Maximum back reference */ unsigned int backref_map; /* Bitmap of low back refs */ int req_varyopt; /* "After variable item" flag for reqbyte */ + BOOL nopartial; /* Set TRUE if partial won't work */ } compile_data; /* Structure for maintaining a chain of pointers to the currently incomplete @@ -596,7 +768,7 @@ typedef struct branch_chain { call within the pattern. */ typedef struct recursion_info { - struct recursion_info *prev; /* Previous recursion record (or NULL) */ + struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ int group_num; /* Number of group that was called */ const uschar *after_call; /* "Return value": points after the call in the expr */ const uschar *save_start; /* Old value of md->start_match */ @@ -604,8 +776,18 @@ typedef struct recursion_info { int saved_max; /* Number of saved offsets */ } recursion_info; +/* When compiling in a mode that doesn't use recursive calls to match(), +a structure is used to remember local variables on the heap. It is defined in +pcre.c, close to the match() function, so that it is easy to keep it in step +with any changes of local variable. However, the pointer to the current frame +must be saved in some "static" place over a longjmp(). We declare the +structure here so that we can put a pointer in the match_data structure. +NOTE: This isn't used for a "normal" compilation of pcre. */ + +struct heapframe; + /* Structure for passing "static" information around between the functions -doing the matching, so that they are thread-safe. */ +doing traditional NFA matching, so that they are thread-safe. */ typedef struct match_data { unsigned long int match_call_count; /* As it says */ @@ -621,6 +803,8 @@ typedef struct match_data { BOOL utf8; /* UTF8 flag */ BOOL endonly; /* Dollar not before final \n */ BOOL notempty; /* Empty string match not wanted */ + BOOL partial; /* PARTIAL flag */ + BOOL hitend; /* Hit the end of the subject at some point */ const uschar *start_code; /* For use when recursing */ const uschar *start_subject; /* Start of the subject string */ const uschar *end_subject; /* End of the subject string */ @@ -631,8 +815,22 @@ typedef struct match_data { int start_offset; /* The start offset value */ recursion_info *recursive; /* Linked list of recursion data */ void *callout_data; /* To pass back to callouts */ + struct heapframe *thisframe; /* Used only when compiling for no recursion */ } match_data; +/* A similar structure is used for the same purpose by the DFA matching +functions. */ + +typedef struct dfa_match_data { + const uschar *start_code; /* Start of the compiled pattern */ + const uschar *start_subject; /* Start of the subject string */ + const uschar *end_subject; /* End of subject string */ + const uschar *tables; /* Character tables */ + int moptions; /* Match options */ + int poptions; /* Pattern options */ + void *callout_data; /* To pass back to callouts */ +} dfa_match_data; + /* Bit definitions for entries in the pcre_ctypes table. */ #define ctype_space 0x01 @@ -666,4 +864,45 @@ total length. */ #define ctypes_offset (cbits_offset + cbit_length) #define tables_length (ctypes_offset + 256) -/* End of internal.h */ +/* Layout of the UCP type table that translates property names into codes for +ucp_findchar(). */ + +typedef struct { + const char *name; + int value; +} ucp_type_table; + + +/* Internal shared data tables. These are tables that are used by more than one +of the exported public functions. They have to be "external" in the C sense, +but are not part of the PCRE public API. The data for these tables is in the +pcre_tables.c module. */ + +extern const int _pcre_utf8_table1[]; +extern const int _pcre_utf8_table2[]; +extern const int _pcre_utf8_table3[]; +extern const uschar _pcre_utf8_table4[]; + +extern const int _pcre_utf8_table1_size; + +extern const ucp_type_table _pcre_utt[]; +extern const int _pcre_utt_size; + +extern const uschar _pcre_default_tables[]; + +extern const uschar _pcre_OP_lengths[]; + + +/* Internal shared functions. These are functions that are used by more than +one of the exported public functions. They have to be "external" in the C +sense, but are not part of the PCRE public API. */ + +extern int _pcre_ord2utf8(int, uschar *); +extern void _pcre_printint(pcre *, FILE *); +extern real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *, + const pcre_study_data *, pcre_study_data *); +extern int _pcre_ucp_findchar(const int, int *, int *); +extern int _pcre_valid_utf8(const uschar *, int); +extern BOOL _pcre_xclass(int, const uschar *); + +/* End of pcre_internal.h */ diff --git a/libpcre/maketables.c b/libpcre/pcre_maketables.c similarity index 56% rename from libpcre/maketables.c rename to libpcre/pcre_maketables.c index 257fe89cec..c4954b2648 100644 --- a/libpcre/maketables.c +++ b/libpcre/pcre_maketables.c @@ -2,48 +2,53 @@ * Perl-Compatible Regular Expressions * *************************************************/ -/* -PCRE is a library of functions to support regular expressions whose syntax +/* PCRE is a library of functions to support regular expressions whose syntax and semantics are as close as possible to those of the Perl 5 language. -Written by: Philip Hazel - - Copyright (c) 1997-2003 University of Cambridge + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge ----------------------------------------------------------------------------- -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- - -See the file Tech.Notes for some information on the internals. */ -/* This file is compiled on its own as part of the PCRE library. However, -it is also included in the compilation of dftables.c, in which case the macro -DFTABLES is defined. */ +/* This module contains the external function pcre_maketables(), which builds +character tables for PCRE in the current locale. The file is compiled on its +own as part of the PCRE library. However, it is also included in the +compilation of dftables.c, in which case the macro DFTABLES is defined. */ + #ifndef DFTABLES -#include "internal.h" +#include "pcre_internal.h" #endif - /************************************************* * Create PCRE character tables * *************************************************/ @@ -126,11 +131,15 @@ for (i = 0; i < 256; i++) if (isdigit(i)) x += ctype_digit; if (isxdigit(i)) x += ctype_xdigit; if (isalnum(i) || i == '_') x += ctype_word; - if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta; - *p++ = x; - } + + /* Note: strchr includes the terminating zero in the characters it considers. + In this instance, that is ok because we want binary zero to be flagged as a + meta-character, which in this sense is any character that terminates a run + of data characters. */ + + if (strchr("*+?{^.$|()[", i) != 0) x += ctype_meta; *p++ = x; } return yield; } -/* End of maketables.c */ +/* End of pcre_maketables.c */ diff --git a/libpcre/printint.c b/libpcre/pcre_printint.c similarity index 54% rename from libpcre/printint.c rename to libpcre/pcre_printint.c index cd01f0577d..d18f399252 100644 --- a/libpcre/printint.c +++ b/libpcre/pcre_printint.c @@ -2,41 +2,48 @@ * Perl-Compatible Regular Expressions * *************************************************/ -/* -This is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. See -the file Tech.Notes for some information on the internals. +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. -Written by: Philip Hazel - - Copyright (c) 1997-2003 University of Cambridge + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge ----------------------------------------------------------------------------- -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ -/* This module contains a debugging function for printing out the internal form -of a compiled regular expression. It is kept in a separate file so that it can -be #included both in the pcretest program, and in the library itself when -compiled with the debugging switch. */ +/* This module contains an PCRE private debugging function for printing out the +internal form of a compiled regular expression, along with some supporting +local functions. */ + + +#include "pcre_internal.h" static const char *OP_names[] = { OP_NAME_LIST }; @@ -46,18 +53,6 @@ static const char *OP_names[] = { OP_NAME_LIST }; * Print single- or multi-byte character * *************************************************/ -/* These tables are actually copies of ones in pcre.c. If we compile the -library with debugging, they are included twice, but that isn't really a -problem - compiling with debugging is pretty rare and these are very small. */ - -static int utf8_t3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; - -static uschar utf8_t4[] = { - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; - static int print_char(FILE *f, uschar *ptr, BOOL utf8) { @@ -71,11 +66,23 @@ if (!utf8 || (c & 0xc0) != 0xc0) else { int i; - int a = utf8_t4[c & 0x3f]; /* Number of additional bytes */ + int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */ int s = 6*a; - c = (c & utf8_t3[a]) << s; + c = (c & _pcre_utf8_table3[a]) << s; for (i = 1; i <= a; i++) { + /* This is a check for malformed UTF-8; it should only occur if the sanity + check has been turned off. Rather than swallow random bytes, just stop if + we hit a bad one. Print it with \X instead of \x as an indication. */ + + if ((ptr[i] & 0xc0) != 0x80) + { + fprintf(f, "\\X{%x}", c); + return i - 1; + } + + /* The byte is OK */ + s -= 6; c |= (ptr[i] & 0x3f) << s; } @@ -86,19 +93,59 @@ else +/************************************************* +* Find Unicode property name * +*************************************************/ + +static const char * +get_ucpname(int property) +{ +#ifdef SUPPORT_UCP +int i; +for (i = _pcre_utt_size; i >= 0; i--) + { + if (property == _pcre_utt[i].value) break; + } +return (i >= 0)? _pcre_utt[i].name : "??"; +#else +return "??"; +#endif +} + + /************************************************* * Print compiled regex * *************************************************/ -static void -print_internals(pcre *external_re, FILE *f) +/* Make this function work for a regex with integers either byte order. +However, we assume that what we are passed is a compiled regex. */ + +EXPORT void +_pcre_printint(pcre *external_re, FILE *f) { real_pcre *re = (real_pcre *)external_re; -uschar *codestart = - (uschar *)re + sizeof(real_pcre) + re->name_count * re->name_entry_size; -uschar *code = codestart; -BOOL utf8 = (re->options & PCRE_UTF8) != 0; +uschar *codestart, *code; +BOOL utf8; + +unsigned int options = re->options; +int offset = re->name_table_offset; +int count = re->name_count; +int size = re->name_entry_size; + +if (re->magic_number != MAGIC_NUMBER) + { + offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff); + count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff); + size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff); + options = ((options << 24) & 0xff000000) | + ((options << 8) & 0x00ff0000) | + ((options >> 8) & 0x0000ff00) | + ((options >> 24) & 0x000000ff); + } + +code = codestart = (uschar *)re + offset + count * size; +utf8 = (options & PCRE_UTF8) != 0; for(;;) { @@ -106,7 +153,7 @@ for(;;) int c; int extra = 0; - fprintf(f, "%3d ", code - codestart); + fprintf(f, "%3d ", (int)(code - codestart)); if (*code >= OP_BRA) { @@ -114,7 +161,7 @@ for(;;) fprintf(f, "%3d Bra extra\n", GET(code, 1)); else fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA); - code += OP_lengths[OP_BRA]; + code += _pcre_OP_lengths[OP_BRA]; continue; } @@ -129,18 +176,31 @@ for(;;) fprintf(f, " %.2x %s", code[1], OP_names[*code]); break; - case OP_CHARS: + case OP_CHAR: { - int charlength = code[1]; - ccode = code + 2; - extra = charlength; - fprintf(f, "%3d ", charlength); - while (charlength > 0) + fprintf(f, " "); + do { - int extrabytes = print_char(f, ccode, utf8); - ccode += 1 + extrabytes; - charlength -= 1 + extrabytes; + code++; + code += 1 + print_char(f, code, utf8); } + while (*code == OP_CHAR); + fprintf(f, "\n"); + continue; + } + break; + + case OP_CHARNC: + { + fprintf(f, " NC "); + do + { + code++; + code += 1 + print_char(f, code, utf8); + } + while (*code == OP_CHARNC); + fprintf(f, "\n"); + continue; } break; @@ -182,8 +242,16 @@ for(;;) case OP_TYPEQUERY: case OP_TYPEMINQUERY: fprintf(f, " "); - if (*code >= OP_TYPESTAR) fprintf(f, "%s", OP_names[code[1]]); - else extra = print_char(f, code+1, utf8); + if (*code >= OP_TYPESTAR) + { + fprintf(f, "%s", OP_names[code[1]]); + if (code[1] == OP_PROP || code[1] == OP_NOTPROP) + { + fprintf(f, " %s ", get_ucpname(code[2])); + extra = 1; + } + } + else extra = print_char(f, code+1, utf8); fprintf(f, "%s", OP_names[*code]); break; @@ -201,7 +269,13 @@ for(;;) case OP_TYPEEXACT: case OP_TYPEUPTO: case OP_TYPEMINUPTO: - fprintf(f, " %s{", OP_names[code[3]]); + fprintf(f, " %s", OP_names[code[3]]); + if (code[3] == OP_PROP || code[3] == OP_NOTPROP) + { + fprintf(f, " %s ", get_ucpname(code[4])); + extra = 1; + } + fprintf(f, "{"); if (*code != OP_TYPEEXACT) fprintf(f, "0,"); fprintf(f, "%d}", GET2(code,1)); if (*code == OP_TYPEMINUPTO) fprintf(f, "?"); @@ -228,7 +302,7 @@ for(;;) case OP_NOTMINUPTO: if (isprint(c = code[3])) fprintf(f, " [^%c]{", c); else fprintf(f, " [^\\x%02x]{", c); - if (*code != OP_NOTEXACT) fprintf(f, ","); + if (*code != OP_NOTEXACT) fprintf(f, "0,"); fprintf(f, "%d}", GET2(code,1)); if (*code == OP_NOTMINUPTO) fprintf(f, "?"); break; @@ -239,11 +313,17 @@ for(;;) case OP_REF: fprintf(f, " \\%d", GET2(code,1)); - ccode = code + OP_lengths[*code]; + ccode = code + _pcre_OP_lengths[*code]; goto CLASS_REF_REPEAT; case OP_CALLOUT: - fprintf(f, " %s %d", OP_names[*code], code[1]); + fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2), + GET(code, 2 + LINK_SIZE)); + break; + + case OP_PROP: + case OP_NOTPROP: + fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1])); break; /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in @@ -287,7 +367,7 @@ for(;;) if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i); if (--j > i) { - fprintf(f, "-"); + if (j != i + 1) fprintf(f, "-"); if (j == '-' || j == ']') fprintf(f, "\\"); if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j); } @@ -304,11 +384,22 @@ for(;;) int ch; while ((ch = *ccode++) != XCL_END) { - ccode += 1 + print_char(f, ccode, TRUE); - if (ch == XCL_RANGE) + if (ch == XCL_PROP) + { + fprintf(f, "\\p{%s}", get_ucpname(*ccode++)); + } + else if (ch == XCL_NOTPROP) + { + fprintf(f, "\\P{%s}", get_ucpname(*ccode++)); + } + else { - fprintf(f, "-"); ccode += 1 + print_char(f, ccode, TRUE); + if (ch == XCL_RANGE) + { + fprintf(f, "-"); + ccode += 1 + print_char(f, ccode, TRUE); + } } } } @@ -329,7 +420,7 @@ for(;;) case OP_CRQUERY: case OP_CRMINQUERY: fprintf(f, "%s", OP_names[*ccode]); - extra = OP_lengths[*ccode]; + extra += _pcre_OP_lengths[*ccode]; break; case OP_CRRANGE: @@ -339,7 +430,7 @@ for(;;) if (max == 0) fprintf(f, "{%d,}", min); else fprintf(f, "{%d,%d}", min, max); if (*ccode == OP_CRMINRANGE) fprintf(f, "?"); - extra = OP_lengths[*ccode]; + extra += _pcre_OP_lengths[*ccode]; break; } } @@ -352,9 +443,9 @@ for(;;) break; } - code += OP_lengths[*code] + extra; + code += _pcre_OP_lengths[*code] + extra; fprintf(f, "\n"); } } -/* End of printint.c */ +/* End of pcre_printint.c */ diff --git a/libpcre/pcre_refcount.c b/libpcre/pcre_refcount.c new file mode 100644 index 0000000000..35a7ee8691 --- /dev/null +++ b/libpcre/pcre_refcount.c @@ -0,0 +1,77 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains the external function pcre_refcount(), which is an +auxiliary function that can be used to maintain a reference count in a compiled +pattern data block. This might be helpful in applications where the block is +shared by different users. */ + +#include "pcre_internal.h" + + +/************************************************* +* Maintain reference count * +*************************************************/ + +/* The reference count is a 16-bit field, initialized to zero. It is not +possible to transfer a non-zero count from one host to a different host that +has a different byte order - though I can't see why anyone in their right mind +would ever want to do that! + +Arguments: + argument_re points to compiled code + adjust value to add to the count + +Returns: the (possibly updated) count value (a non-negative number), or + a negative error number +*/ + +EXPORT int +pcre_refcount(pcre *argument_re, int adjust) +{ +real_pcre *re = (real_pcre *)argument_re; +if (re == NULL) return PCRE_ERROR_NULL; +re->ref_count = (-adjust > re->ref_count)? 0 : + (adjust + re->ref_count > 65535)? 65535 : + re->ref_count + adjust; +return re->ref_count; +} + +/* End of pcre_refcount.c */ diff --git a/libpcre/study.c b/libpcre/pcre_study.c similarity index 73% rename from libpcre/study.c rename to libpcre/pcre_study.c index 4320bd23d0..7c10c04917 100644 --- a/libpcre/study.c +++ b/libpcre/pcre_study.c @@ -2,42 +2,47 @@ * Perl-Compatible Regular Expressions * *************************************************/ -/* -This is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. See -the file Tech.Notes for some information on the internals. +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. -Written by: Philip Hazel - - Copyright (c) 1997-2002 University of Cambridge + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge ----------------------------------------------------------------------------- -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. - -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. - -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. - -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. ----------------------------------------------------------------------------- */ -/* Include the internals header, which itself includes Standard C headers plus -the external pcre header. */ +/* This module contains the external function pcre_study(), along with local +supporting functions. */ -#include "internal.h" +#include "pcre_internal.h" /************************************************* @@ -57,7 +62,7 @@ Returns: nothing */ static void -set_bit(uschar *start_bits, int c, BOOL caseless, compile_data *cd) +set_bit(uschar *start_bits, unsigned int c, BOOL caseless, compile_data *cd) { start_bits[c/8] |= (1 << (c&7)); if (caseless && (cd->ctypes[c] & ctype_letter) != 0) @@ -123,7 +128,7 @@ do /* Skip over callout */ case OP_CALLOUT: - tcode += 2; + tcode += 2 + 2*LINK_SIZE; break; /* Skip over extended extraction bracket number */ @@ -186,11 +191,10 @@ do /* At least one single char sets the bit and stops */ case OP_EXACT: /* Fall through */ - tcode++; - - case OP_CHARS: /* Fall through */ - tcode++; + tcode += 2; + case OP_CHAR: + case OP_CHARNC: case OP_PLUS: case OP_MINPLUS: set_bit(start_bits, tcode[1], caseless, cd); @@ -260,6 +264,9 @@ do case OP_TYPEMINQUERY: switch(tcode[1]) { + case OP_ANY: + return FALSE; + case OP_NOT_DIGIT: for (c = 0; c < 32; c++) start_bits[c] |= ~cd->cbits[c+cbit_digit]; @@ -297,19 +304,50 @@ do /* Character class where all the information is in a bit map: set the bits and either carry on or not, according to the repeat count. If it was a negative class, and we are operating with UTF-8 characters, any byte - with the top-bit set is a potentially valid starter because it may start - a character with a value > 255. (This is sub-optimal in that the - character may be in the range 128-255, and those characters might be - unwanted, but that's as far as we go for the moment.) */ + with a value >= 0xc4 is a potentially valid starter because it starts a + character with a value > 255. */ case OP_NCLASS: - if (utf8) memset(start_bits+16, 0xff, 16); + if (utf8) + { + start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */ + memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ + } /* Fall through */ case OP_CLASS: { tcode++; - for (c = 0; c < 32; c++) start_bits[c] |= tcode[c]; + + /* In UTF-8 mode, the bits in a bit map correspond to character + values, not to byte values. However, the bit map we are constructing is + for byte values. So we have to do a conversion for characters whose + value is > 127. In fact, there are only two possible starting bytes for + characters in the range 128 - 255. */ + + if (utf8) + { + for (c = 0; c < 16; c++) start_bits[c] |= tcode[c]; + for (c = 128; c < 256; c++) + { + if ((tcode[c/8] && (1 << (c&7))) != 0) + { + int d = (c >> 6) | 0xc0; /* Set bit for this starter */ + start_bits[d/8] |= (1 << (d&7)); /* and then skip on to the */ + c = (c & 0xc0) + 0x40 - 1; /* next relevant character. */ + } + } + } + + /* In non-UTF-8 mode, the two bit maps are completely compatible. */ + + else + { + for (c = 0; c < 32; c++) start_bits[c] |= tcode[c]; + } + + /* Advance past the bit map, and act on what follows */ + tcode += 32; switch (*tcode) { @@ -363,14 +401,15 @@ Returns: pointer to a pcre_extra block, with study_data filled in and the NULL on error or if no optimization possible */ -pcre_extra * +EXPORT pcre_extra * pcre_study(const pcre *external_re, int options, const char **errorptr) { uschar start_bits[32]; pcre_extra *extra; pcre_study_data *study; +const uschar *tables; const real_pcre *re = (const real_pcre *)external_re; -uschar *code = (uschar *)re + sizeof(real_pcre) + +uschar *code = (uschar *)re + re->name_table_offset + (re->name_count * re->name_entry_size); compile_data compile_block; @@ -395,12 +434,17 @@ at present. */ if ((re->options & (PCRE_ANCHORED|PCRE_FIRSTSET|PCRE_STARTLINE)) != 0) return NULL; -/* Set the character tables in the block which is passed around */ +/* Set the character tables in the block that is passed around */ + +tables = re->tables; +if (tables == NULL) + (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, + (void *)(&tables)); -compile_block.lcc = re->tables + lcc_offset; -compile_block.fcc = re->tables + fcc_offset; -compile_block.cbits = re->tables + cbits_offset; -compile_block.ctypes = re->tables + ctypes_offset; +compile_block.lcc = tables + lcc_offset; +compile_block.fcc = tables + fcc_offset; +compile_block.cbits = tables + cbits_offset; +compile_block.ctypes = tables + ctypes_offset; /* See if we can find a fixed set of initial characters for the pattern. */ @@ -435,4 +479,4 @@ memcpy(study->start_bits, start_bits, sizeof(start_bits)); return extra; } -/* End of study.c */ +/* End of pcre_study.c */ diff --git a/libpcre/pcre_tables.c b/libpcre/pcre_tables.c new file mode 100644 index 0000000000..4f442eae03 --- /dev/null +++ b/libpcre/pcre_tables.c @@ -0,0 +1,129 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains some fixed tables that are used by more than one of the +PCRE code modules. */ + + +#include "pcre_internal.h" + + +/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that +the definition is next to the definition of the opcodes in internal.h. */ + +const uschar _pcre_OP_lengths[] = { OP_LENGTHS }; + + + +/************************************************* +* Tables for UTF-8 support * +*************************************************/ + +/* These are the breakpoints for different numbers of bytes in a UTF-8 +character. */ + +const int _pcre_utf8_table1[] = + { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; + +const int _pcre_utf8_table1_size = sizeof(_pcre_utf8_table1)/sizeof(int); + +/* These are the indicator bits and the mask for the data bits to set in the +first byte of a character, indexed by the number of additional bytes. */ + +const int _pcre_utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; +const int _pcre_utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; + +/* Table of the number of extra characters, indexed by the first character +masked with 0x3f. The highest number for a valid UTF-8 character is in fact +0x3d. */ + +const uschar _pcre_utf8_table4[] = { + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; + +/* This table translates Unicode property names into code values for the +ucp_findchar() function. It is used by pcretest as well as by the library +functions. */ + +const ucp_type_table _pcre_utt[] = { + { "C", 128 + ucp_C }, + { "Cc", ucp_Cc }, + { "Cf", ucp_Cf }, + { "Cn", ucp_Cn }, + { "Co", ucp_Co }, + { "Cs", ucp_Cs }, + { "L", 128 + ucp_L }, + { "Ll", ucp_Ll }, + { "Lm", ucp_Lm }, + { "Lo", ucp_Lo }, + { "Lt", ucp_Lt }, + { "Lu", ucp_Lu }, + { "M", 128 + ucp_M }, + { "Mc", ucp_Mc }, + { "Me", ucp_Me }, + { "Mn", ucp_Mn }, + { "N", 128 + ucp_N }, + { "Nd", ucp_Nd }, + { "Nl", ucp_Nl }, + { "No", ucp_No }, + { "P", 128 + ucp_P }, + { "Pc", ucp_Pc }, + { "Pd", ucp_Pd }, + { "Pe", ucp_Pe }, + { "Pf", ucp_Pf }, + { "Pi", ucp_Pi }, + { "Po", ucp_Po }, + { "Ps", ucp_Ps }, + { "S", 128 + ucp_S }, + { "Sc", ucp_Sc }, + { "Sk", ucp_Sk }, + { "Sm", ucp_Sm }, + { "So", ucp_So }, + { "Z", 128 + ucp_Z }, + { "Zl", ucp_Zl }, + { "Zp", ucp_Zp }, + { "Zs", ucp_Zs } +}; + +const int _pcre_utt_size = sizeof(_pcre_utt)/sizeof(ucp_type_table); + +/* End of pcre_tables.c */ diff --git a/libpcre/pcre_try_flipped.c b/libpcre/pcre_try_flipped.c new file mode 100644 index 0000000000..a07bb23db0 --- /dev/null +++ b/libpcre/pcre_try_flipped.c @@ -0,0 +1,132 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains an internal function that tests a compiled pattern to +see if it was compiled with the opposite endianness. If so, it uses an +auxiliary local function to flip the appropriate bytes. */ + + +#include "pcre_internal.h" + + +/************************************************* +* Flip bytes in an integer * +*************************************************/ + +/* This function is called when the magic number in a regex doesn't match, in +order to flip its bytes to see if we are dealing with a pattern that was +compiled on a host of different endianness. If so, this function is used to +flip other byte values. + +Arguments: + value the number to flip + n the number of bytes to flip (assumed to be 2 or 4) + +Returns: the flipped value +*/ + +static long int +byteflip(long int value, int n) +{ +if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8); +return ((value & 0x000000ff) << 24) | + ((value & 0x0000ff00) << 8) | + ((value & 0x00ff0000) >> 8) | + ((value & 0xff000000) >> 24); +} + + + +/************************************************* +* Test for a byte-flipped compiled regex * +*************************************************/ + +/* This function is called from pcre_exec(), pcre_dfa_exec(), and also from +pcre_fullinfo(). Its job is to test whether the regex is byte-flipped - that +is, it was compiled on a system of opposite endianness. The function is called +only when the native MAGIC_NUMBER test fails. If the regex is indeed flipped, +we flip all the relevant values into a different data block, and return it. + +Arguments: + re points to the regex + study points to study data, or NULL + internal_re points to a new regex block + internal_study points to a new study block + +Returns: the new block if is is indeed a byte-flipped regex + NULL if it is not +*/ + +EXPORT real_pcre * +_pcre_try_flipped(const real_pcre *re, real_pcre *internal_re, + const pcre_study_data *study, pcre_study_data *internal_study) +{ +if (byteflip(re->magic_number, sizeof(re->magic_number)) != MAGIC_NUMBER) + return NULL; + +*internal_re = *re; /* To copy other fields */ +internal_re->size = byteflip(re->size, sizeof(re->size)); +internal_re->options = byteflip(re->options, sizeof(re->options)); +internal_re->top_bracket = + (pcre_uint16)byteflip(re->top_bracket, sizeof(re->top_bracket)); +internal_re->top_backref = + (pcre_uint16)byteflip(re->top_backref, sizeof(re->top_backref)); +internal_re->first_byte = + (pcre_uint16)byteflip(re->first_byte, sizeof(re->first_byte)); +internal_re->req_byte = + (pcre_uint16)byteflip(re->req_byte, sizeof(re->req_byte)); +internal_re->name_table_offset = + (pcre_uint16)byteflip(re->name_table_offset, sizeof(re->name_table_offset)); +internal_re->name_entry_size = + (pcre_uint16)byteflip(re->name_entry_size, sizeof(re->name_entry_size)); +internal_re->name_count = + (pcre_uint16)byteflip(re->name_count, sizeof(re->name_count)); + +if (study != NULL) + { + *internal_study = *study; /* To copy other fields */ + internal_study->size = byteflip(study->size, sizeof(study->size)); + internal_study->options = byteflip(study->options, sizeof(study->options)); + } + +return internal_re; +} + +/* End of pcre_tryflipped.c */ diff --git a/libpcre/pcre_version.c b/libpcre/pcre_version.c new file mode 100644 index 0000000000..d296eea809 --- /dev/null +++ b/libpcre/pcre_version.c @@ -0,0 +1,61 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains the external function pcre_version(), which returns a +string that identifies the PCRE version that is in use. */ + + +#include "pcre_internal.h" + + +/************************************************* +* Return version string * +*************************************************/ + +#define STRING(a) # a +#define XSTRING(s) STRING(s) + +EXPORT const char * +pcre_version(void) +{ +return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE); +} + +/* End of pcre_version.c */ diff --git a/libpcre/pcre_win.h b/libpcre/pcre_win.h deleted file mode 100644 index 505def3f71..0000000000 --- a/libpcre/pcre_win.h +++ /dev/null @@ -1,184 +0,0 @@ -/************************************************* -* Perl-Compatible Regular Expressions * -*************************************************/ - -/* Copyright (c) 1997-2003 University of Cambridge */ - -#ifndef _PCRE_H -#define _PCRE_H - -/* The file pcre.h is build by "configure". Do not edit it; instead -make changes to pcre.in. */ - -#define PCRE_MAJOR 4 -#define PCRE_MINOR 3 -#define PCRE_DATE "21-May-2003" - -/* Win32 uses DLL by default */ - -#ifdef _WIN32 -# ifdef PCRE_DEFINITION -# ifdef DLL_EXPORT -# define PCRE_DATA_SCOPE __declspec(dllexport) -# endif -# else -# ifndef PCRE_STATIC -# define PCRE_DATA_SCOPE __declspec(dllimport) -# endif -# endif -#endif -#ifndef PCRE_DATA_SCOPE -# define PCRE_DATA_SCOPE extern -#endif - -/* Have to include stdlib.h in order to ensure that size_t is defined; -it is needed here for malloc. */ - -#include - -/* Allow for C++ users */ - -#ifdef __cplusplus -extern "C" { -#endif - -/* Options */ - -#define PCRE_CASELESS 0x0001 -#define PCRE_MULTILINE 0x0002 -#define PCRE_DOTALL 0x0004 -#define PCRE_EXTENDED 0x0008 -#define PCRE_ANCHORED 0x0010 -#define PCRE_DOLLAR_ENDONLY 0x0020 -#define PCRE_EXTRA 0x0040 -#define PCRE_NOTBOL 0x0080 -#define PCRE_NOTEOL 0x0100 -#define PCRE_UNGREEDY 0x0200 -#define PCRE_NOTEMPTY 0x0400 -#define PCRE_UTF8 0x0800 -#define PCRE_NO_AUTO_CAPTURE 0x1000 - -/* Exec-time and get/set-time error codes */ - -#define PCRE_ERROR_NOMATCH (-1) -#define PCRE_ERROR_NULL (-2) -#define PCRE_ERROR_BADOPTION (-3) -#define PCRE_ERROR_BADMAGIC (-4) -#define PCRE_ERROR_UNKNOWN_NODE (-5) -#define PCRE_ERROR_NOMEMORY (-6) -#define PCRE_ERROR_NOSUBSTRING (-7) -#define PCRE_ERROR_MATCHLIMIT (-8) -#define PCRE_ERROR_CALLOUT (-9) /* Never used by PCRE itself */ - -/* Request types for pcre_fullinfo() */ - -#define PCRE_INFO_OPTIONS 0 -#define PCRE_INFO_SIZE 1 -#define PCRE_INFO_CAPTURECOUNT 2 -#define PCRE_INFO_BACKREFMAX 3 -#define PCRE_INFO_FIRSTBYTE 4 -#define PCRE_INFO_FIRSTCHAR 4 /* For backwards compatibility */ -#define PCRE_INFO_FIRSTTABLE 5 -#define PCRE_INFO_LASTLITERAL 6 -#define PCRE_INFO_NAMEENTRYSIZE 7 -#define PCRE_INFO_NAMECOUNT 8 -#define PCRE_INFO_NAMETABLE 9 -#define PCRE_INFO_STUDYSIZE 10 - -/* Request types for pcre_config() */ - -#define PCRE_CONFIG_UTF8 0 -#define PCRE_CONFIG_NEWLINE 1 -#define PCRE_CONFIG_LINK_SIZE 2 -#define PCRE_CONFIG_POSIX_MALLOC_THRESHOLD 3 -#define PCRE_CONFIG_MATCH_LIMIT 4 - -/* Bit flags for the pcre_extra structure */ - -#define PCRE_EXTRA_STUDY_DATA 0x0001 -#define PCRE_EXTRA_MATCH_LIMIT 0x0002 -#define PCRE_EXTRA_CALLOUT_DATA 0x0004 - -/* Types */ - -struct real_pcre; /* declaration; the definition is private */ -typedef struct real_pcre pcre; - -/* The structure for passing additional data to pcre_exec(). This is defined in -such as way as to be extensible. */ - -typedef struct pcre_extra { - unsigned long int flags; /* Bits for which fields are set */ - void *study_data; /* Opaque data from pcre_study() */ - unsigned long int match_limit; /* Maximum number of calls to match() */ - void *callout_data; /* Data passed back in callouts */ -} pcre_extra; - -/* The structure for passing out data via the pcre_callout_function. We use a -structure so that new fields can be added on the end in future versions, -without changing the API of the function, thereby allowing old clients to work -without modification. */ - -typedef struct pcre_callout_block { - int version; /* Identifies version of block */ - /* ------------------------ Version 0 ------------------------------- */ - int callout_number; /* Number compiled into pattern */ - int *offset_vector; /* The offset vector */ - const char *subject; /* The subject being matched */ - int subject_length; /* The length of the subject */ - int start_match; /* Offset to start of this match attempt */ - int current_position; /* Where we currently are */ - int capture_top; /* Max current capture */ - int capture_last; /* Most recently closed capture */ - void *callout_data; /* Data passed in with the call */ - /* ------------------------------------------------------------------ */ -} pcre_callout_block; - -/* Indirection for store get and free functions. These can be set to -alternative malloc/free functions if required. There is also an optional -callout function that is triggered by the (?) regex item. Some magic is -required for Win32 DLL; it is null on other OS. For Virtual Pascal, these -have to be different again. */ - -#ifndef VPCOMPAT -PCRE_DATA_SCOPE void *(*pcre_malloc)(size_t); -PCRE_DATA_SCOPE void (*pcre_free)(void *); -PCRE_DATA_SCOPE int (*pcre_callout)(pcre_callout_block *); -#else /* VPCOMPAT */ -extern void *pcre_malloc(size_t); -extern void pcre_free(void *); -extern int pcre_callout(pcre_callout_block *); -#endif /* VPCOMPAT */ - -/* Exported PCRE functions */ - -extern pcre *pcre_compile(const char *, int, const char **, - int *, const unsigned char *); -extern int pcre_config(int, void *); -extern int pcre_copy_named_substring(const pcre *, const char *, - int *, int, const char *, char *, int); -extern int pcre_copy_substring(const char *, int *, int, int, - char *, int); -extern int pcre_exec(const pcre *, const pcre_extra *, - const char *, int, int, int, int *, int); -extern void pcre_free_substring(const char *); -extern void pcre_free_substring_list(const char **); -extern int pcre_fullinfo(const pcre *, const pcre_extra *, int, - void *); -extern int pcre_get_named_substring(const pcre *, const char *, - int *, int, const char *, const char **); -extern int pcre_get_stringnumber(const pcre *, const char *); -extern int pcre_get_substring(const char *, int *, int, int, - const char **); -extern int pcre_get_substring_list(const char *, int *, int, - const char ***); -extern int pcre_info(const pcre *, int *, int *); -extern const unsigned char *pcre_maketables(void); -extern pcre_extra *pcre_study(const pcre *, int, const char **); -extern const char *pcre_version(void); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* End of pcre.h */ diff --git a/libpcre/pcre_xclass.c b/libpcre/pcre_xclass.c new file mode 100644 index 0000000000..40d2654da0 --- /dev/null +++ b/libpcre/pcre_xclass.c @@ -0,0 +1,121 @@ +/************************************************* +* Perl-Compatible Regular Expressions * +*************************************************/ + +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. + + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ + + +/* This module contains an internal function that is used to match an extended +class (one that contains characters whose values are > 255). It is used by both +pcre_exec() and pcre_def_exec(). */ + + +#include "pcre_internal.h" + + +/************************************************* +* Match character against an XCLASS * +*************************************************/ + +/* This function is called to match a character against an extended class that +might contain values > 255. + +Arguments: + c the character + data points to the flag byte of the XCLASS data + +Returns: TRUE if character matches, else FALSE +*/ + +EXPORT BOOL +_pcre_xclass(int c, const uschar *data) +{ +int t; +BOOL negated = (*data & XCL_NOT) != 0; + +/* Character values < 256 are matched against a bitmap, if one is present. If +not, we still carry on, because there may be ranges that start below 256 in the +additional data. */ + +if (c < 256) + { + if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0) + return !negated; /* char found */ + } + +/* First skip the bit map if present. Then match against the list of Unicode +properties or large chars or ranges that end with a large char. We won't ever +encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */ + +if ((*data++ & XCL_MAP) != 0) data += 32; + +while ((t = *data++) != XCL_END) + { + int x, y; + if (t == XCL_SINGLE) + { + GETCHARINC(x, data); + if (c == x) return !negated; + } + else if (t == XCL_RANGE) + { + GETCHARINC(x, data); + GETCHARINC(y, data); + if (c >= x && c <= y) return !negated; + } + +#ifdef SUPPORT_UCP + else /* XCL_PROP & XCL_NOTPROP */ + { + int chartype, othercase; + int rqdtype = *data++; + int category = ucp_findchar(c, &chartype, &othercase); + if (rqdtype >= 128) + { + if ((rqdtype - 128 == category) == (t == XCL_PROP)) return !negated; + } + else + { + if ((rqdtype == chartype) == (t == XCL_PROP)) return !negated; + } + } +#endif /* SUPPORT_UCP */ + } + +return negated; /* char did not match */ +} + +/* End of pcre_xclass.c */ diff --git a/libpcre/pcreposix.c b/libpcre/pcreposix.c index 49094f280d..06fd58bcbf 100644 --- a/libpcre/pcreposix.c +++ b/libpcre/pcreposix.c @@ -2,103 +2,108 @@ * Perl-Compatible Regular Expressions * *************************************************/ -/* -This is a library of functions to support regular expressions whose syntax -and semantics are as close as possible to those of the Perl 5 language. See -the file Tech.Notes for some information on the internals. - -This module is a wrapper that provides a POSIX API to the underlying PCRE -functions. - -Written by: Philip Hazel +/* PCRE is a library of functions to support regular expressions whose syntax +and semantics are as close as possible to those of the Perl 5 language. - Copyright (c) 1997-2003 University of Cambridge + Written by Philip Hazel + Copyright (c) 1997-2005 University of Cambridge ----------------------------------------------------------------------------- -Permission is granted to anyone to use this software for any purpose on any -computer system, and to redistribute it freely, subject to the following -restrictions: - -1. This software is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ -2. The origin of this software must not be misrepresented, either by - explicit claim or by omission. -3. Altered versions must be plainly marked as such, and must not be - misrepresented as being the original software. +/* This module is a wrapper that provides a POSIX API to the underlying PCRE +functions. */ -4. If PCRE is embedded in any software that is released under the GNU - General Purpose Licence (GPL), then the terms of that licence shall - supersede any condition above with which it is incompatible. ------------------------------------------------------------------------------ -*/ -#include "internal.h" +#include "pcre_internal.h" #include "pcreposix.h" #include "stdlib.h" -/* Corresponding tables of PCRE error messages and POSIX error codes. */ - -static const char *estring[] = { - ERR1, ERR2, ERR3, ERR4, ERR5, ERR6, ERR7, ERR8, ERR9, ERR10, - ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20, - ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR29, ERR29, ERR30, - ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40, - ERR41, ERR42, ERR43 }; - -static int eint[] = { - REG_EESCAPE, /* "\\ at end of pattern" */ - REG_EESCAPE, /* "\\c at end of pattern" */ - REG_EESCAPE, /* "unrecognized character follows \\" */ - REG_BADBR, /* "numbers out of order in {} quantifier" */ - REG_BADBR, /* "number too big in {} quantifier" */ - REG_EBRACK, /* "missing terminating ] for character class" */ - REG_ECTYPE, /* "invalid escape sequence in character class" */ - REG_ERANGE, /* "range out of order in character class" */ - REG_BADRPT, /* "nothing to repeat" */ - REG_BADRPT, /* "operand of unlimited repeat could match the empty string" */ - REG_ASSERT, /* "internal error: unexpected repeat" */ - REG_BADPAT, /* "unrecognized character after (?" */ - REG_BADPAT, /* "POSIX named classes are supported only within a class" */ - REG_EPAREN, /* "missing )" */ - REG_ESUBREG, /* "reference to non-existent subpattern" */ - REG_INVARG, /* "erroffset passed as NULL" */ - REG_INVARG, /* "unknown option bit(s) set" */ - REG_EPAREN, /* "missing ) after comment" */ - REG_ESIZE, /* "parentheses nested too deeply" */ - REG_ESIZE, /* "regular expression too large" */ - REG_ESPACE, /* "failed to get memory" */ - REG_EPAREN, /* "unmatched brackets" */ - REG_ASSERT, /* "internal error: code overflow" */ - REG_BADPAT, /* "unrecognized character after (?<" */ - REG_BADPAT, /* "lookbehind assertion is not fixed length" */ - REG_BADPAT, /* "malformed number after (?(" */ - REG_BADPAT, /* "conditional group containe more than two branches" */ - REG_BADPAT, /* "assertion expected after (?(" */ - REG_BADPAT, /* "(?R or (?digits must be followed by )" */ - REG_ECTYPE, /* "unknown POSIX class name" */ - REG_BADPAT, /* "POSIX collating elements are not supported" */ - REG_INVARG, /* "this version of PCRE is not compiled with PCRE_UTF8 support" */ - REG_BADPAT, /* "spare error" */ - REG_BADPAT, /* "character value in \x{...} sequence is too large" */ - REG_BADPAT, /* "invalid condition (?(0)" */ - REG_BADPAT, /* "\\C not allowed in lookbehind assertion" */ - REG_EESCAPE, /* "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X" */ - REG_BADPAT, /* "number after (?C is > 255" */ - REG_BADPAT, /* "closing ) for (?C expected" */ - REG_BADPAT, /* "recursive call could loop indefinitely" */ - REG_BADPAT, /* "unrecognized character after (?P" */ - REG_BADPAT, /* "syntax error after (?P" */ - REG_BADPAT /* "two named groups have the same name" */ +/* Table to translate PCRE compile time error codes into POSIX error codes. */ + +static const int eint[] = { + 0, /* no error */ + REG_EESCAPE, /* \ at end of pattern */ + REG_EESCAPE, /* \c at end of pattern */ + REG_EESCAPE, /* unrecognized character follows \ */ + REG_BADBR, /* numbers out of order in {} quantifier */ + REG_BADBR, /* number too big in {} quantifier */ + REG_EBRACK, /* missing terminating ] for character class */ + REG_ECTYPE, /* invalid escape sequence in character class */ + REG_ERANGE, /* range out of order in character class */ + REG_BADRPT, /* nothing to repeat */ + REG_BADRPT, /* operand of unlimited repeat could match the empty string */ + REG_ASSERT, /* internal error: unexpected repeat */ + REG_BADPAT, /* unrecognized character after (? */ + REG_BADPAT, /* POSIX named classes are supported only within a class */ + REG_EPAREN, /* missing ) */ + REG_ESUBREG, /* reference to non-existent subpattern */ + REG_INVARG, /* erroffset passed as NULL */ + REG_INVARG, /* unknown option bit(s) set */ + REG_EPAREN, /* missing ) after comment */ + REG_ESIZE, /* parentheses nested too deeply */ + REG_ESIZE, /* regular expression too large */ + REG_ESPACE, /* failed to get memory */ + REG_EPAREN, /* unmatched brackets */ + REG_ASSERT, /* internal error: code overflow */ + REG_BADPAT, /* unrecognized character after (?< */ + REG_BADPAT, /* lookbehind assertion is not fixed length */ + REG_BADPAT, /* malformed number after (?( */ + REG_BADPAT, /* conditional group containe more than two branches */ + REG_BADPAT, /* assertion expected after (?( */ + REG_BADPAT, /* (?R or (?digits must be followed by ) */ + REG_ECTYPE, /* unknown POSIX class name */ + REG_BADPAT, /* POSIX collating elements are not supported */ + REG_INVARG, /* this version of PCRE is not compiled with PCRE_UTF8 support */ + REG_BADPAT, /* spare error */ + REG_BADPAT, /* character value in \x{...} sequence is too large */ + REG_BADPAT, /* invalid condition (?(0) */ + REG_BADPAT, /* \C not allowed in lookbehind assertion */ + REG_EESCAPE, /* PCRE does not support \L, \l, \N, \U, or \u */ + REG_BADPAT, /* number after (?C is > 255 */ + REG_BADPAT, /* closing ) for (?C expected */ + REG_BADPAT, /* recursive call could loop indefinitely */ + REG_BADPAT, /* unrecognized character after (?P */ + REG_BADPAT, /* syntax error after (?P */ + REG_BADPAT, /* two named groups have the same name */ + REG_BADPAT, /* invalid UTF-8 string */ + REG_BADPAT, /* support for \P, \p, and \X has not been compiled */ + REG_BADPAT, /* malformed \P or \p sequence */ + REG_BADPAT /* unknown property name after \P or \p */ }; /* Table of texts corresponding to POSIX error codes */ -static const char *pstring[] = { +static const char *const pstring[] = { "", /* Dummy for value 0 */ "internal error", /* REG_ASSERT */ "invalid repeat counts in {}", /* BADBR */ @@ -122,29 +127,11 @@ static const char *pstring[] = { -/************************************************* -* Translate PCRE text code to int * -*************************************************/ - -/* PCRE compile-time errors are given as strings defined as macros. We can just -look them up in a table to turn them into POSIX-style error codes. */ - -static int -pcre_posix_error_code(const char *s) -{ -size_t i; -for (i = 0; i < sizeof(estring)/sizeof(char *); i++) - if (strcmp(s, estring[i]) == 0) return eint[i]; -return REG_ASSERT; -} - - - /************************************************* * Translate error code to string * *************************************************/ -size_t +EXPORT size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) { const char *message, *addmessage; @@ -179,7 +166,7 @@ return length + addlength; * Free store held by a regex * *************************************************/ -void +EXPORT void regfree(regex_t *preg) { (pcre_free)(preg->re_pcre); @@ -202,22 +189,25 @@ Returns: 0 on success various non-zero codes on failure */ -int +EXPORT int regcomp(regex_t *preg, const char *pattern, int cflags) { const char *errorptr; int erroffset; +int errorcode; int options = 0; if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS; if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE; +if ((cflags & REG_DOTALL) != 0) options |= PCRE_DOTALL; -preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL); +preg->re_pcre = pcre_compile2(pattern, options, &errorcode, &errorptr, + &erroffset, NULL); preg->re_erroffset = erroffset; -if (preg->re_pcre == NULL) return pcre_posix_error_code(errorptr); +if (preg->re_pcre == NULL) return eint[errorcode]; -preg->re_nsub = pcre_info(preg->re_pcre, NULL, NULL); +preg->re_nsub = pcre_info((const pcre *)preg->re_pcre, NULL, NULL); return 0; } @@ -235,7 +225,7 @@ ints. However, if the number of possible capturing brackets is small, use a block of store on the stack, to reduce the use of malloc/free. The threshold is in a macro that can be changed at configure time. */ -int +EXPORT int regexec(const regex_t *preg, const char *string, size_t nmatch, regmatch_t pmatch[], int eflags) { @@ -264,8 +254,8 @@ if (nmatch > 0) } } -rc = pcre_exec(preg->re_pcre, NULL, string, (int)strlen(string), 0, options, - ovector, nmatch * 3); +rc = pcre_exec((const pcre *)preg->re_pcre, NULL, string, (int)strlen(string), + 0, options, ovector, nmatch * 3); if (rc == 0) rc = nmatch; /* All captured slots were filled in */ @@ -293,6 +283,9 @@ else case PCRE_ERROR_BADMAGIC: return REG_INVARG; case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT; case PCRE_ERROR_NOMEMORY: return REG_ESPACE; + case PCRE_ERROR_MATCHLIMIT: return REG_ESPACE; + case PCRE_ERROR_BADUTF8: return REG_INVARG; + case PCRE_ERROR_BADUTF8_OFFSET: return REG_INVARG; default: return REG_ASSERT; } } diff --git a/libpcre/pcreposix.h b/libpcre/pcreposix.h index 2b97bf44f5..4f1b1abd45 100644 --- a/libpcre/pcreposix.h +++ b/libpcre/pcreposix.h @@ -2,14 +2,43 @@ * Perl-Compatible Regular Expressions * *************************************************/ -/* Copyright (c) 1997-2003 University of Cambridge */ - #ifndef _PCREPOSIX_H #define _PCREPOSIX_H /* This is the header for the POSIX wrapper interface to the PCRE Perl- Compatible Regular Expression library. It defines the things POSIX says should -be there. I hope. */ +be there. I hope. + + Copyright (c) 1997-2005 University of Cambridge + +----------------------------------------------------------------------------- +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. +----------------------------------------------------------------------------- +*/ /* Have to include stdlib.h in order to ensure that size_t is defined. */ @@ -28,6 +57,10 @@ extern "C" { #define REG_NOTBOL 0x04 #define REG_NOTEOL 0x08 +/* Additional options, not defined by POSIX, but somebody wanted them. */ + +#define REG_DOTALL 0x10 + /* These are not used by PCRE, but by defining them we make it easier to slot PCRE into existing programs that make POSIX calls. */ diff --git a/libpcre/perltest b/libpcre/perltest deleted file mode 100755 index bb34cc8398..0000000000 --- a/libpcre/perltest +++ /dev/null @@ -1,211 +0,0 @@ -#! /usr/bin/perl - -# Program for testing regular expressions with perl to check that PCRE handles -# them the same. This is the version that supports /8 for UTF-8 testing. As it -# stands, it requires at least Perl 5.8 for UTF-8 support. For Perl 5.6, it -# can be used as is for non-UTF-8 testing, but you have to uncomment the -# "use utf8" lines in order to to UTF-8 stuff (and you mustn't uncomment them -# for non-UTF-8 use). - - -# Function for turning a string into a string of printing chars. There are -# currently problems with UTF-8 strings; this fudges round them. - -sub pchars { -my($t) = ""; - -if ($utf8) - { -# use utf8; <=============== For UTF-8 in Perl 5.6 - @p = unpack('U*', $_[0]); - foreach $c (@p) - { - if ($c >= 32 && $c < 127) { $t .= chr $c; } - else { $t .= sprintf("\\x{%02x}", $c); } - } - } - -else - { - foreach $c (split(//, $_[0])) - { - if (ord $c >= 32 && ord $c < 127) { $t .= $c; } - else { $t .= sprintf("\\x%02x", ord $c); } - } - } - -$t; -} - - - -# Read lines from named file or stdin and write to named file or stdout; lines -# consist of a regular expression, in delimiters and optionally followed by -# options, followed by a set of test data, terminated by an empty line. - -# Sort out the input and output files - -if (@ARGV > 0) - { - open(INFILE, "<$ARGV[0]") || die "Failed to open $ARGV[0]\n"; - $infile = "INFILE"; - } -else { $infile = "STDIN"; } - -if (@ARGV > 1) - { - open(OUTFILE, ">$ARGV[1]") || die "Failed to open $ARGV[1]\n"; - $outfile = "OUTFILE"; - } -else { $outfile = "STDOUT"; } - -printf($outfile "Perl $] Regular Expressions\n\n"); - -# Main loop - -NEXT_RE: -for (;;) - { - printf " re> " if $infile eq "STDIN"; - last if ! ($_ = <$infile>); - printf $outfile "$_" if $infile ne "STDIN"; - next if ($_ eq ""); - - $pattern = $_; - - while ($pattern !~ /^\s*(.).*\1/s) - { - printf " > " if $infile eq "STDIN"; - last if ! ($_ = <$infile>); - printf $outfile "$_" if $infile ne "STDIN"; - $pattern .= $_; - } - - chomp($pattern); - $pattern =~ s/\s+$//; - - # The private /+ modifier means "print $' afterwards". - - $showrest = ($pattern =~ s/\+(?=[a-z]*$)//); - - # The private /8 modifier means "operate in UTF-8". Currently, Perl - # has bugs that we try to work around using this flag. - - $utf8 = ($pattern =~ s/8(?=[a-z]*$)//); - - # Check that the pattern is valid - - if ($utf8) - { -# use utf8; <=============== For UTF-8 in Perl 5.6 - eval "\$_ =~ ${pattern}"; - } - else - { - eval "\$_ =~ ${pattern}"; - } - - if ($@) - { - printf $outfile "Error: $@"; - next NEXT_RE; - } - - # If the /g modifier is present, we want to put a loop round the matching; - # otherwise just a single "if". - - $cmd = ($pattern =~ /g[a-z]*$/)? "while" : "if"; - - # If the pattern is actually the null string, Perl uses the most recently - # executed (and successfully compiled) regex is used instead. This is a - # nasty trap for the unwary! The PCRE test suite does contain null strings - # in places - if they are allowed through here all sorts of weird and - # unexpected effects happen. To avoid this, we replace such patterns with - # a non-null pattern that has the same effect. - - $pattern = "/(?#)/$2" if ($pattern =~ /^(.)\1(.*)$/); - - # Read data lines and test them - - for (;;) - { - printf "data> " if $infile eq "STDIN"; - last NEXT_RE if ! ($_ = <$infile>); - chomp; - printf $outfile "$_\n" if $infile ne "STDIN"; - - s/\s+$//; - s/^\s+//; - - last if ($_ eq ""); - - $x = eval "\"$_\""; # To get escapes processed - - # Empty array for holding results, then do the matching. - - @subs = (); - - $pushes = "push \@subs,\$&;" . - "push \@subs,\$1;" . - "push \@subs,\$2;" . - "push \@subs,\$3;" . - "push \@subs,\$4;" . - "push \@subs,\$5;" . - "push \@subs,\$6;" . - "push \@subs,\$7;" . - "push \@subs,\$8;" . - "push \@subs,\$9;" . - "push \@subs,\$10;" . - "push \@subs,\$11;" . - "push \@subs,\$12;" . - "push \@subs,\$13;" . - "push \@subs,\$14;" . - "push \@subs,\$15;" . - "push \@subs,\$16;" . - "push \@subs,\$'; }"; - - if ($utf8) - { -# use utf8; <=============== For UTF-8 in Perl 5.6 - eval "${cmd} (\$x =~ ${pattern}) {" . $pushes; - } - else - { - eval "${cmd} (\$x =~ ${pattern}) {" . $pushes; - } - - if ($@) - { - printf $outfile "Error: $@\n"; - next NEXT_RE; - } - elsif (scalar(@subs) == 0) - { - printf $outfile "No match\n"; - } - else - { - while (scalar(@subs) != 0) - { - printf $outfile (" 0: %s\n", &pchars($subs[0])); - printf $outfile (" 0+ %s\n", &pchars($subs[17])) if $showrest; - $last_printed = 0; - for ($i = 1; $i <= 16; $i++) - { - if (defined $subs[$i]) - { - while ($last_printed++ < $i-1) - { printf $outfile ("%2d: \n", $last_printed); } - printf $outfile ("%2d: %s\n", $i, &pchars($subs[$i])); - $last_printed = $i; - } - } - splice(@subs, 0, 18); - } - } - } - } - -printf $outfile "\n"; - -# End diff --git a/libpcre/ucp.h b/libpcre/ucp.h new file mode 100644 index 0000000000..955e104a6a --- /dev/null +++ b/libpcre/ucp.h @@ -0,0 +1,60 @@ +/************************************************* +* libucp - Unicode Property Table handler * +*************************************************/ + + +#ifndef _UCP_H +#define _UCP_H + +/* These are the character categories that are returned by ucp_findchar */ + +enum { + ucp_C, /* Other */ + ucp_L, /* Letter */ + ucp_M, /* Mark */ + ucp_N, /* Number */ + ucp_P, /* Punctuation */ + ucp_S, /* Symbol */ + ucp_Z /* Separator */ +}; + +/* These are the detailed character types that are returned by ucp_findchar */ + +enum { + ucp_Cc, /* Control */ + ucp_Cf, /* Format */ + ucp_Cn, /* Unassigned */ + ucp_Co, /* Private use */ + ucp_Cs, /* Surrogate */ + ucp_Ll, /* Lower case letter */ + ucp_Lm, /* Modifier letter */ + ucp_Lo, /* Other letter */ + ucp_Lt, /* Title case letter */ + ucp_Lu, /* Upper case letter */ + ucp_Mc, /* Spacing mark */ + ucp_Me, /* Enclosing mark */ + ucp_Mn, /* Non-spacing mark */ + ucp_Nd, /* Decimal number */ + ucp_Nl, /* Letter number */ + ucp_No, /* Other number */ + ucp_Pc, /* Connector punctuation */ + ucp_Pd, /* Dash punctuation */ + ucp_Pe, /* Close punctuation */ + ucp_Pf, /* Final punctuation */ + ucp_Pi, /* Initial punctuation */ + ucp_Po, /* Other punctuation */ + ucp_Ps, /* Open punctuation */ + ucp_Sc, /* Currency symbol */ + ucp_Sk, /* Modifier symbol */ + ucp_Sm, /* Mathematical symbol */ + ucp_So, /* Other symbol */ + ucp_Zl, /* Line separator */ + ucp_Zp, /* Paragraph separator */ + ucp_Zs /* Space separator */ +}; + +extern int ucp_findchar(const int, int *, int *); + +#endif + +/* End of ucp.h */