From 090089c4f7e3a6f2f6ea63a79139bbf79871f4e6 Mon Sep 17 00:00:00 2001 From: wessels <> Date: Thu, 22 Feb 1996 13:23:53 +0000 Subject: [PATCH] Initial revision --- INSTALL | 40 + README | 25 + configure | 2188 +++++++++++++++ configure.in | 214 ++ include/GNUregex.h | 490 ++++ include/autoconf.h.in | 97 + include/util.h | 187 ++ include/version.h | 7 + lib/GNUregex.c | 4949 ++++++++++++++++++++++++++++++++++ lib/Makefile.in | 46 + lib/getfullhostname.c | 112 + lib/rfc1738.c | 182 ++ lib/util.c | 235 ++ scripts/AnnounceCache.pl | 33 + scripts/Makefile.in | 29 + scripts/RunAccel.in | 131 + scripts/RunCache.in | 167 ++ scripts/access-log-matrix.pl | 112 + scripts/cache-compare.pl | 154 ++ scripts/check_cache.pl | 55 + scripts/flag_truncs.pl | 68 + scripts/icpserver.pl | 112 + scripts/tcp-banger.pl | 47 + scripts/udp-banger.pl | 46 + src/Makefile.in | 108 + src/cache_cf.cc | 1750 ++++++++++++ src/cachemgr.cc | 692 +++++ src/client.cc | 221 ++ src/comm.cc | 1257 +++++++++ src/debug.cc | 266 ++ src/disk.cc | 736 +++++ src/dnsserver.cc | 299 ++ src/filemap.cc | 226 ++ src/ftp.cc | 746 +++++ src/gopher.cc | 1191 ++++++++ src/http.cc | 853 ++++++ src/ipcache.cc | 1596 +++++++++++ src/main.cc | 522 ++++ src/mime.cc | 167 ++ src/neighbors.cc | 849 ++++++ src/recv-announce.cc | 91 + src/send-announce.cc | 182 ++ src/stat.cc | 1343 +++++++++ src/stmem.cc | 488 ++++ src/store.cc | 2662 ++++++++++++++++++ src/tools.cc | 392 +++ src/url.cc | 223 ++ src/wais.cc | 515 ++++ 48 files changed, 27101 insertions(+) create mode 100644 INSTALL create mode 100644 README create mode 100755 configure create mode 100644 configure.in create mode 100644 include/GNUregex.h create mode 100644 include/autoconf.h.in create mode 100644 include/util.h create mode 100644 include/version.h create mode 100644 lib/GNUregex.c create mode 100644 lib/Makefile.in create mode 100644 lib/getfullhostname.c create mode 100644 lib/rfc1738.c create mode 100644 lib/util.c create mode 100755 scripts/AnnounceCache.pl create mode 100644 scripts/Makefile.in create mode 100644 scripts/RunAccel.in create mode 100644 scripts/RunCache.in create mode 100755 scripts/access-log-matrix.pl create mode 100755 scripts/cache-compare.pl create mode 100755 scripts/check_cache.pl create mode 100755 scripts/flag_truncs.pl create mode 100755 scripts/icpserver.pl create mode 100755 scripts/tcp-banger.pl create mode 100755 scripts/udp-banger.pl create mode 100644 src/Makefile.in create mode 100644 src/cache_cf.cc create mode 100644 src/cachemgr.cc create mode 100644 src/client.cc create mode 100644 src/comm.cc create mode 100644 src/debug.cc create mode 100644 src/disk.cc create mode 100644 src/dnsserver.cc create mode 100644 src/filemap.cc create mode 100644 src/ftp.cc create mode 100644 src/gopher.cc create mode 100644 src/http.cc create mode 100644 src/ipcache.cc create mode 100644 src/main.cc create mode 100644 src/mime.cc create mode 100644 src/neighbors.cc create mode 100644 src/recv-announce.cc create mode 100644 src/send-announce.cc create mode 100644 src/stat.cc create mode 100644 src/stmem.cc create mode 100644 src/store.cc create mode 100644 src/tools.cc create mode 100644 src/url.cc create mode 100644 src/wais.cc diff --git a/INSTALL b/INSTALL new file mode 100644 index 00000000000..76b369ecd64 --- /dev/null +++ b/INSTALL @@ -0,0 +1,40 @@ +Change the prefix variable in ./Makefile to be the directory in which +you would like to install the Harvest Cache. The default is +/usr/local/harvest. + +To build and install the Harvest Cache, type: + + % make all + % make install + +To run a Cache, you will need to: + + 1. set the HARVEST_HOME environment variable, as appropriate: + % setenv HARVEST_HOME /usr/local/harvest + + 2. customize the cached.conf configuration file: + % vi $HARVEST_HOME/lib/cached.conf + + 3. start the cache: + % $HARVEST_HOME/bin/RunCache + +If you want to use the WWW interface to the Cache Manager, copy +the cachemgr.cgi program into your httpd server's cgi-bin +directory. + +Alternatively, you can configure your HTTP server to recognize CGI +scripts in $HARVEST_HOME/cgi-bin. For example, with NCSA httpd, add +this line to the srm.conf file: + + ScriptAlias /Harvest/cgi-bin/ Your-HARVEST_HOME/cgi-bin/ + +Then, use to access the +WWW interface to the Cache Manager. + +For further or more detailed instructions on how to run the Cache, +refer to the Harvest User's Manual: + http://harvest.cs.colorado.edu/harvest/doc.html + +To run the Cache as an httpd accelerator, follow the instructions here: + http://harvest.cs.colorado.edu/harvest/httpd_accel.html + diff --git a/README b/README new file mode 100644 index 00000000000..2abf6fdb8be --- /dev/null +++ b/README @@ -0,0 +1,25 @@ +This is a special distribution that contains only the Harvest object +cache. The cache is an element of the Harvest Information Discovery and +Access System, offering high performance proxy caching for Mosaic, +Netscape, Lynx, and other World Wide Web clients. It supports URLs via +HTTP, FTP, and Gopher. + +The Harvest cache outperforms other popular Internet caches (such as the +CERN cache) by a factor of 10-100, because the Harvest cache never +forks, is implemented with non-blocking I/O, keeps meta data and +especially hot objects cached in RAM, caches DNS lookups, supports +non-blocking DNS lookups, and implements negative caching both of +objects and of DNS lookups. A technical paper discussing the Harvest +cache and providing performance comparisions is available from +ftp://ftp.cs.colorado.edu/pub/cs/techreports/schwartz/HarvestCache.ps.Z + +The Harvest cache may also be run as an ``accelerator'' for HTTP servers, as +described in http://harvest.cs.colorado.edu/harvest/httpd_accel.html + +You can learn more about the overall Harvest system starting from the +Harvest Home Page at http://harvest.cs.colorado.edu/ + +Harvest documentation is on-line at +http://harvest.cs.colorado.edu/harvest/doc.html + +For further assistance, see http://harvest.cs.colorado.edu/support.html. diff --git a/configure b/configure new file mode 100755 index 00000000000..dbcc7c4e026 --- /dev/null +++ b/configure @@ -0,0 +1,2188 @@ +#! /bin/sh + +# Guess values for system-dependent variables and create Makefiles. +# Generated automatically using autoconf version 2.7 +# Copyright (C) 1992, 1993, 1994 Free Software Foundation, Inc. +# +# This configure script is free software; the Free Software Foundation +# gives unlimited permission to copy, distribute and modify it. + +# Defaults: +ac_help= +ac_default_prefix=/usr/local +# Any additions from configure.in: +ac_default_prefix=/usr/local/harvest + +# Initialize some variables set by options. +# The variables have the same names as the options, with +# dashes changed to underlines. +build=NONE +cache_file=./config.cache +exec_prefix=NONE +host=NONE +no_create= +nonopt=NONE +no_recursion= +prefix=NONE +program_prefix=NONE +program_suffix=NONE +program_transform_name=s,x,x, +silent= +site= +srcdir= +target=NONE +verbose= +x_includes=NONE +x_libraries=NONE +bindir='${exec_prefix}/bin' +sbindir='${exec_prefix}/sbin' +libexecdir='${exec_prefix}/libexec' +datadir='${prefix}/share' +sysconfdir='${prefix}/etc' +sharedstatedir='${prefix}/com' +localstatedir='${prefix}/var' +libdir='${exec_prefix}/lib' +includedir='${prefix}/include' +oldincludedir='/usr/include' +infodir='${prefix}/info' +mandir='${prefix}/man' + +# Initialize some other variables. +subdirs= +MFLAGS= MAKEFLAGS= + +ac_prev= +for ac_option +do + + # If the previous option needs an argument, assign it. + if test -n "$ac_prev"; then + eval "$ac_prev=\$ac_option" + ac_prev= + continue + fi + + case "$ac_option" in + -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;; + *) ac_optarg= ;; + esac + + # Accept the important Cygnus configure options, so we can diagnose typos. + + case "$ac_option" in + + -bindir | --bindir | --bindi | --bind | --bin | --bi) + ac_prev=bindir ;; + -bindir=* | --bindir=* | --bindi=* | --bind=* | --bin=* | --bi=*) + bindir="$ac_optarg" ;; + + -build | --build | --buil | --bui | --bu) + ac_prev=build ;; + -build=* | --build=* | --buil=* | --bui=* | --bu=*) + build="$ac_optarg" ;; + + -cache-file | --cache-file | --cache-fil | --cache-fi \ + | --cache-f | --cache- | --cache | --cach | --cac | --ca | --c) + ac_prev=cache_file ;; + -cache-file=* | --cache-file=* | --cache-fil=* | --cache-fi=* \ + | --cache-f=* | --cache-=* | --cache=* | --cach=* | --cac=* | --ca=* | --c=*) + cache_file="$ac_optarg" ;; + + -datadir | --datadir | --datadi | --datad | --data | --dat | --da) + ac_prev=datadir ;; + -datadir=* | --datadir=* | --datadi=* | --datad=* | --data=* | --dat=* \ + | --da=*) + datadir="$ac_optarg" ;; + + -disable-* | --disable-*) + ac_feature=`echo $ac_option|sed -e 's/-*disable-//'` + # Reject names that are not valid shell variable names. + if test -n "`echo $ac_feature| sed 's/[-a-zA-Z0-9_]//g'`"; then + { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } + fi + ac_feature=`echo $ac_feature| sed 's/-/_/g'` + eval "enable_${ac_feature}=no" ;; + + -enable-* | --enable-*) + ac_feature=`echo $ac_option|sed -e 's/-*enable-//' -e 's/=.*//'` + # Reject names that are not valid shell variable names. + if test -n "`echo $ac_feature| sed 's/[-_a-zA-Z0-9]//g'`"; then + { echo "configure: error: $ac_feature: invalid feature name" 1>&2; exit 1; } + fi + ac_feature=`echo $ac_feature| sed 's/-/_/g'` + case "$ac_option" in + *=*) ;; + *) ac_optarg=yes ;; + esac + eval "enable_${ac_feature}='$ac_optarg'" ;; + + -exec-prefix | --exec_prefix | --exec-prefix | --exec-prefi \ + | --exec-pref | --exec-pre | --exec-pr | --exec-p | --exec- \ + | --exec | --exe | --ex) + ac_prev=exec_prefix ;; + -exec-prefix=* | --exec_prefix=* | --exec-prefix=* | --exec-prefi=* \ + | --exec-pref=* | --exec-pre=* | --exec-pr=* | --exec-p=* | --exec-=* \ + | --exec=* | --exe=* | --ex=*) + exec_prefix="$ac_optarg" ;; + + -gas | --gas | --ga | --g) + # Obsolete; use --with-gas. + with_gas=yes ;; + + -help | --help | --hel | --he) + # Omit some internal or obsolete options to make the list less imposing. + # This message is too long to be a string in the A/UX 3.1 sh. + cat << EOF +Usage: configure [options] [host] +Options: [defaults in brackets after descriptions] +Configuration: + --cache-file=FILE cache test results in FILE + --help print this message + --no-create do not create output files + --quiet, --silent do not print \`checking...' messages + --version print the version of autoconf that created configure +Directory and file names: + --prefix=PREFIX install architecture-independent files in PREFIX + [$ac_default_prefix] + --exec-prefix=EPREFIX install architecture-dependent files in EPREFIX + [same as prefix] + --bindir=DIR user executables in DIR [EPREFIX/bin] + --sbindir=DIR system admin executables in DIR [EPREFIX/sbin] + --libexecdir=DIR program executables in DIR [EPREFIX/libexec] + --datadir=DIR read-only architecture-independent data in DIR + [PREFIX/share] + --sysconfdir=DIR read-only single-machine data in DIR [PREFIX/etc] + --sharedstatedir=DIR modifiable architecture-independent data in DIR + [PREFIX/com] + --localstatedir=DIR modifiable single-machine data in DIR [PREFIX/var] + --libdir=DIR object code libraries in DIR [EPREFIX/lib] + --includedir=DIR C header files in DIR [PREFIX/include] + --oldincludedir=DIR C header files for non-gcc in DIR [/usr/include] + --infodir=DIR info documentation in DIR [PREFIX/info] + --mandir=DIR man documentation in DIR [PREFIX/man] + --srcdir=DIR find the sources in DIR [configure dir or ..] + --program-prefix=PREFIX prepend PREFIX to installed program names + --program-suffix=SUFFIX append SUFFIX to installed program names + --program-transform-name=PROGRAM + run sed PROGRAM on installed program names +EOF + cat << EOF +Host type: + --build=BUILD configure for building on BUILD [BUILD=HOST] + --host=HOST configure for HOST [guessed] + --target=TARGET configure for TARGET [TARGET=HOST] +Features and packages: + --disable-FEATURE do not include FEATURE (same as --enable-FEATURE=no) + --enable-FEATURE[=ARG] include FEATURE [ARG=yes] + --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] + --without-PACKAGE do not use PACKAGE (same as --with-PACKAGE=no) + --x-includes=DIR X include files are in DIR + --x-libraries=DIR X library files are in DIR +EOF + if test -n "$ac_help"; then + echo "--enable and --with options recognized:$ac_help" + fi + exit 0 ;; + + -host | --host | --hos | --ho) + ac_prev=host ;; + -host=* | --host=* | --hos=* | --ho=*) + host="$ac_optarg" ;; + + -includedir | --includedir | --includedi | --included | --include \ + | --includ | --inclu | --incl | --inc) + ac_prev=includedir ;; + -includedir=* | --includedir=* | --includedi=* | --included=* | --include=* \ + | --includ=* | --inclu=* | --incl=* | --inc=*) + includedir="$ac_optarg" ;; + + -infodir | --infodir | --infodi | --infod | --info | --inf) + ac_prev=infodir ;; + -infodir=* | --infodir=* | --infodi=* | --infod=* | --info=* | --inf=*) + infodir="$ac_optarg" ;; + + -libdir | --libdir | --libdi | --libd) + ac_prev=libdir ;; + -libdir=* | --libdir=* | --libdi=* | --libd=*) + libdir="$ac_optarg" ;; + + -libexecdir | --libexecdir | --libexecdi | --libexecd | --libexec \ + | --libexe | --libex | --libe) + ac_prev=libexecdir ;; + -libexecdir=* | --libexecdir=* | --libexecdi=* | --libexecd=* | --libexec=* \ + | --libexe=* | --libex=* | --libe=*) + libexecdir="$ac_optarg" ;; + + -localstatedir | --localstatedir | --localstatedi | --localstated \ + | --localstate | --localstat | --localsta | --localst \ + | --locals | --local | --loca | --loc | --lo) + ac_prev=localstatedir ;; + -localstatedir=* | --localstatedir=* | --localstatedi=* | --localstated=* \ + | --localstate=* | --localstat=* | --localsta=* | --localst=* \ + | --locals=* | --local=* | --loca=* | --loc=* | --lo=*) + localstatedir="$ac_optarg" ;; + + -mandir | --mandir | --mandi | --mand | --man | --ma | --m) + ac_prev=mandir ;; + -mandir=* | --mandir=* | --mandi=* | --mand=* | --man=* | --ma=* | --m=*) + mandir="$ac_optarg" ;; + + -nfp | --nfp | --nf) + # Obsolete; use --without-fp. + with_fp=no ;; + + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c) + no_create=yes ;; + + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) + no_recursion=yes ;; + + -oldincludedir | --oldincludedir | --oldincludedi | --oldincluded \ + | --oldinclude | --oldinclud | --oldinclu | --oldincl | --oldinc \ + | --oldin | --oldi | --old | --ol | --o) + ac_prev=oldincludedir ;; + -oldincludedir=* | --oldincludedir=* | --oldincludedi=* | --oldincluded=* \ + | --oldinclude=* | --oldinclud=* | --oldinclu=* | --oldincl=* | --oldinc=* \ + | --oldin=* | --oldi=* | --old=* | --ol=* | --o=*) + oldincludedir="$ac_optarg" ;; + + -prefix | --prefix | --prefi | --pref | --pre | --pr | --p) + ac_prev=prefix ;; + -prefix=* | --prefix=* | --prefi=* | --pref=* | --pre=* | --pr=* | --p=*) + prefix="$ac_optarg" ;; + + -program-prefix | --program-prefix | --program-prefi | --program-pref \ + | --program-pre | --program-pr | --program-p) + ac_prev=program_prefix ;; + -program-prefix=* | --program-prefix=* | --program-prefi=* \ + | --program-pref=* | --program-pre=* | --program-pr=* | --program-p=*) + program_prefix="$ac_optarg" ;; + + -program-suffix | --program-suffix | --program-suffi | --program-suff \ + | --program-suf | --program-su | --program-s) + ac_prev=program_suffix ;; + -program-suffix=* | --program-suffix=* | --program-suffi=* \ + | --program-suff=* | --program-suf=* | --program-su=* | --program-s=*) + program_suffix="$ac_optarg" ;; + + -program-transform-name | --program-transform-name \ + | --program-transform-nam | --program-transform-na \ + | --program-transform-n | --program-transform- \ + | --program-transform | --program-transfor \ + | --program-transfo | --program-transf \ + | --program-trans | --program-tran \ + | --progr-tra | --program-tr | --program-t) + ac_prev=program_transform_name ;; + -program-transform-name=* | --program-transform-name=* \ + | --program-transform-nam=* | --program-transform-na=* \ + | --program-transform-n=* | --program-transform-=* \ + | --program-transform=* | --program-transfor=* \ + | --program-transfo=* | --program-transf=* \ + | --program-trans=* | --program-tran=* \ + | --progr-tra=* | --program-tr=* | --program-t=*) + program_transform_name="$ac_optarg" ;; + + -q | -quiet | --quiet | --quie | --qui | --qu | --q \ + | -silent | --silent | --silen | --sile | --sil) + silent=yes ;; + + -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb) + ac_prev=sbindir ;; + -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \ + | --sbi=* | --sb=*) + sbindir="$ac_optarg" ;; + + -sharedstatedir | --sharedstatedir | --sharedstatedi \ + | --sharedstated | --sharedstate | --sharedstat | --sharedsta \ + | --sharedst | --shareds | --shared | --share | --shar \ + | --sha | --sh) + ac_prev=sharedstatedir ;; + -sharedstatedir=* | --sharedstatedir=* | --sharedstatedi=* \ + | --sharedstated=* | --sharedstate=* | --sharedstat=* | --sharedsta=* \ + | --sharedst=* | --shareds=* | --shared=* | --share=* | --shar=* \ + | --sha=* | --sh=*) + sharedstatedir="$ac_optarg" ;; + + -site | --site | --sit) + ac_prev=site ;; + -site=* | --site=* | --sit=*) + site="$ac_optarg" ;; + + -srcdir | --srcdir | --srcdi | --srcd | --src | --sr) + ac_prev=srcdir ;; + -srcdir=* | --srcdir=* | --srcdi=* | --srcd=* | --src=* | --sr=*) + srcdir="$ac_optarg" ;; + + -sysconfdir | --sysconfdir | --sysconfdi | --sysconfd | --sysconf \ + | --syscon | --sysco | --sysc | --sys | --sy) + ac_prev=sysconfdir ;; + -sysconfdir=* | --sysconfdir=* | --sysconfdi=* | --sysconfd=* | --sysconf=* \ + | --syscon=* | --sysco=* | --sysc=* | --sys=* | --sy=*) + sysconfdir="$ac_optarg" ;; + + -target | --target | --targe | --targ | --tar | --ta | --t) + ac_prev=target ;; + -target=* | --target=* | --targe=* | --targ=* | --tar=* | --ta=* | --t=*) + target="$ac_optarg" ;; + + -v | -verbose | --verbose | --verbos | --verbo | --verb) + verbose=yes ;; + + -version | --version | --versio | --versi | --vers) + echo "configure generated by autoconf version 2.7" + exit 0 ;; + + -with-* | --with-*) + ac_package=`echo $ac_option|sed -e 's/-*with-//' -e 's/=.*//'` + # Reject names that are not valid shell variable names. + if test -n "`echo $ac_package| sed 's/[-_a-zA-Z0-9]//g'`"; then + { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } + fi + ac_package=`echo $ac_package| sed 's/-/_/g'` + case "$ac_option" in + *=*) ;; + *) ac_optarg=yes ;; + esac + eval "with_${ac_package}='$ac_optarg'" ;; + + -without-* | --without-*) + ac_package=`echo $ac_option|sed -e 's/-*without-//'` + # Reject names that are not valid shell variable names. + if test -n "`echo $ac_package| sed 's/[-a-zA-Z0-9_]//g'`"; then + { echo "configure: error: $ac_package: invalid package name" 1>&2; exit 1; } + fi + ac_package=`echo $ac_package| sed 's/-/_/g'` + eval "with_${ac_package}=no" ;; + + --x) + # Obsolete; use --with-x. + with_x=yes ;; + + -x-includes | --x-includes | --x-include | --x-includ | --x-inclu \ + | --x-incl | --x-inc | --x-in | --x-i) + ac_prev=x_includes ;; + -x-includes=* | --x-includes=* | --x-include=* | --x-includ=* | --x-inclu=* \ + | --x-incl=* | --x-inc=* | --x-in=* | --x-i=*) + x_includes="$ac_optarg" ;; + + -x-libraries | --x-libraries | --x-librarie | --x-librari \ + | --x-librar | --x-libra | --x-libr | --x-lib | --x-li | --x-l) + ac_prev=x_libraries ;; + -x-libraries=* | --x-libraries=* | --x-librarie=* | --x-librari=* \ + | --x-librar=* | --x-libra=* | --x-libr=* | --x-lib=* | --x-li=* | --x-l=*) + x_libraries="$ac_optarg" ;; + + -*) { echo "configure: error: $ac_option: invalid option; use --help to show usage" 1>&2; exit 1; } + ;; + + *) + if test -n "`echo $ac_option| sed 's/[-a-z0-9.]//g'`"; then + echo "configure: warning: $ac_option: invalid host type" 1>&2 + fi + if test "x$nonopt" != xNONE; then + { echo "configure: error: can only configure for one host and one target at a time" 1>&2; exit 1; } + fi + nonopt="$ac_option" + ;; + + esac +done + +if test -n "$ac_prev"; then + { echo "configure: error: missing argument to --`echo $ac_prev | sed 's/_/-/g'`" 1>&2; exit 1; } +fi + +trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 + +# File descriptor usage: +# 0 standard input +# 1 file creation +# 2 errors and warnings +# 3 some systems may open it to /dev/tty +# 4 used on the Kubota Titan +# 6 checking for... messages and results +# 5 compiler messages saved in config.log +if test "$silent" = yes; then + exec 6>/dev/null +else + exec 6>&1 +fi +exec 5>./config.log + +echo "\ +This file contains any messages produced by compilers while +running configure, to aid debugging if configure makes a mistake. +" 1>&5 + +# Strip out --no-create and --no-recursion so they do not pile up. +# Also quote any args containing shell metacharacters. +ac_configure_args= +for ac_arg +do + case "$ac_arg" in + -no-create | --no-create | --no-creat | --no-crea | --no-cre \ + | --no-cr | --no-c) ;; + -no-recursion | --no-recursion | --no-recursio | --no-recursi \ + | --no-recurs | --no-recur | --no-recu | --no-rec | --no-re | --no-r) ;; + *" "*|*" "*|*[\[\]\~\#\$\^\&\*\(\)\{\}\\\|\;\<\>\?]*) + ac_configure_args="$ac_configure_args '$ac_arg'" ;; + *) ac_configure_args="$ac_configure_args $ac_arg" ;; + esac +done + +# NLS nuisances. +# Only set LANG and LC_ALL to C if already set. +# These must not be set unconditionally because not all systems understand +# e.g. LANG=C (notably SCO). +if test "${LC_ALL+set}" = set; then LC_ALL=C; export LC_ALL; fi +if test "${LANG+set}" = set; then LANG=C; export LANG; fi + +# confdefs.h avoids OS command line length limits that DEFS can exceed. +rm -rf conftest* confdefs.h +# AIX cpp loses on an empty file, so make sure it contains at least a newline. +echo > confdefs.h + +# A filename unique to this package, relative to the directory that +# configure is in, which we can look for to find out if srcdir is correct. +ac_unique_file=include/config.h + +# Find the source files, if location was not specified. +if test -z "$srcdir"; then + ac_srcdir_defaulted=yes + # Try the directory containing this script, then its parent. + ac_prog=$0 + ac_confdir=`echo $ac_prog|sed 's%/[^/][^/]*$%%'` + test "x$ac_confdir" = "x$ac_prog" && ac_confdir=. + srcdir=$ac_confdir + if test ! -r $srcdir/$ac_unique_file; then + srcdir=.. + fi +else + ac_srcdir_defaulted=no +fi +if test ! -r $srcdir/$ac_unique_file; then + if test "$ac_srcdir_defaulted" = yes; then + { echo "configure: error: can not find sources in $ac_confdir or .." 1>&2; exit 1; } + else + { echo "configure: error: can not find sources in $srcdir" 1>&2; exit 1; } + fi +fi +srcdir=`echo "${srcdir}" | sed 's%\([^/]\)/*$%\1%'` + +# Prefer explicitly selected file to automatically selected ones. +if test -z "$CONFIG_SITE"; then + if test "x$prefix" != xNONE; then + CONFIG_SITE="$prefix/share/config.site $prefix/etc/config.site" + else + CONFIG_SITE="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" + fi +fi +for ac_site_file in $CONFIG_SITE; do + if test -r "$ac_site_file"; then + echo "loading site script $ac_site_file" + . "$ac_site_file" + fi +done + +if test -r "$cache_file"; then + echo "loading cache $cache_file" + . $cache_file +else + echo "creating cache $cache_file" + > $cache_file +fi + +ac_ext=c +# CFLAGS is not in ac_cpp because -g, -O, etc. are not valid cpp options. +ac_cpp='echo $CPP $CPPFLAGS 1>&5; +$CPP $CPPFLAGS' +ac_compile='echo ${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5; +${CC-cc} -c $CFLAGS $CPPFLAGS conftest.$ac_ext 1>&5 2>&5' +ac_link='echo ${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5; +${CC-cc} -o conftest $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS 1>&5 2>&5' + +if (echo "testing\c"; echo 1,2,3) | grep c >/dev/null; then + # Stardent Vistra SVR4 grep lacks -e, says ghazi@caip.rutgers.edu. + if (echo -n testing; echo 1,2,3) | sed s/-n/xn/ | grep xn >/dev/null; then + ac_n= ac_c=' +' ac_t=' ' + else + ac_n=-n ac_c= ac_t= + fi +else + ac_n= ac_c='\c' ac_t= +fi + + + +# From configure.in Revision: 1.10.4.7 + + +if test -n "$prefix"; then + myprefix="$prefix"; +else + myprefix='/usr/local/harvest'; +fi + +INSTALL_TOPDIR="$myprefix"; +INSTALL_LIBDIR="$myprefix/lib"; +INSTALL_BINDIR="$myprefix/bin"; +INSTALL_MANDIR="$myprefix/man"; + +THIS_HOST="`hostname`" +XTRA_CFLAGS='' +XTRA_LIBS='' +CRYPT_LIB='' + +SOCKET_PH_TYPE='default' + +echo '-----------------------' +echo "Installing Harvest in $myprefix" +echo '-----------------------' + + + + + + + + + + +# Extract the first word of "gcc", so it can be a program name with args. +set dummy gcc; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_prog_CC'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test -n "$CC"; then + ac_cv_prog_CC="$CC" # Let the user override the test. +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" + for ac_dir in $PATH; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_prog_CC="gcc" + break + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_prog_CC" && ac_cv_prog_CC="cc" +fi +fi +CC="$ac_cv_prog_CC" +if test -n "$CC"; then + echo "$ac_t""$CC" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + + +echo $ac_n "checking whether we are using GNU C""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_prog_gcc'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + cat > conftest.c <&5 | egrep yes >/dev/null 2>&1; then + ac_cv_prog_gcc=yes +else + ac_cv_prog_gcc=no +fi +fi + +echo "$ac_t""$ac_cv_prog_gcc" 1>&6 +if test $ac_cv_prog_gcc = yes; then + GCC=yes + if test "${CFLAGS+set}" != set; then + echo $ac_n "checking whether ${CC-cc} accepts -g""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_prog_gcc_g'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + echo 'void f(){}' > conftest.c +if test -z "`${CC-cc} -g -c conftest.c 2>&1`"; then + ac_cv_prog_gcc_g=yes +else + ac_cv_prog_gcc_g=no +fi +rm -f conftest* + +fi + +echo "$ac_t""$ac_cv_prog_gcc_g" 1>&6 + if test $ac_cv_prog_gcc_g = yes; then + CFLAGS="-g -O" + else + CFLAGS="-O" + fi + fi +else + GCC= + test "${CFLAGS+set}" = set || CFLAGS="-g" +fi + + +XXXCC=`echo $CC | awk '{print $1}'` +if test "`basename $XXXCC`" != "gcc" +then + echo '**************************************************************'; + echo '**************************************************************'; + echo '**************************************************************'; + echo '**************************************************************'; + echo '**************************************************************'; + echo '**'; + echo '** WARNING: You should compile Harvest using GNU cc.'; + echo "** Currently, you're using $CC"; + echo '** '; + echo '** For more information about the platforms on which'; + echo '** Harvest works, see: '; + echo '** http://harvest.cs.colorado.edu/harvest/FAQ.html#platforms'; + echo '** '; + echo '**************************************************************'; + echo '**************************************************************'; + echo '**************************************************************'; + echo '**************************************************************'; + echo '**************************************************************'; + sleep 5 +fi + +echo $ac_n "checking how to run the C preprocessor""... $ac_c" 1>&6 +# On Suns, sometimes $CPP names a directory. +if test -n "$CPP" && test -d "$CPP"; then + CPP= +fi +if test -z "$CPP"; then +if eval "test \"`echo '$''{'ac_cv_prog_CPP'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + # This must be in double quotes, not single quotes, because CPP may get + # substituted into the Makefile and "${CC-cc}" will confuse make. + CPP="${CC-cc} -E" + # On the NeXT, cc -E runs the code through the compiler's parser, + # not just through cpp. + cat > conftest.$ac_ext < +Syntax Error +EOF +eval "$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" +ac_err=`grep -v '^ *+' conftest.out` +if test -z "$ac_err"; then + : +else + echo "$ac_err" >&5 + rm -rf conftest* + CPP="${CC-cc} -E -traditional-cpp" + cat > conftest.$ac_ext < +Syntax Error +EOF +eval "$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" +ac_err=`grep -v '^ *+' conftest.out` +if test -z "$ac_err"; then + : +else + echo "$ac_err" >&5 + rm -rf conftest* + CPP=/lib/cpp +fi +rm -f conftest* +fi +rm -f conftest* + ac_cv_prog_CPP="$CPP" +fi + CPP="$ac_cv_prog_CPP" +else + ac_cv_prog_CPP="$CPP" +fi +echo "$ac_t""$CPP" 1>&6 + +ac_aux_dir= +for ac_dir in $srcdir $srcdir/.. $srcdir/../..; do + if test -f $ac_dir/install-sh; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install-sh -c" + break + elif test -f $ac_dir/install.sh; then + ac_aux_dir=$ac_dir + ac_install_sh="$ac_aux_dir/install.sh -c" + break + fi +done +if test -z "$ac_aux_dir"; then + { echo "configure: error: can not find install-sh or install.sh in $srcdir $srcdir/.. $srcdir/../.." 1>&2; exit 1; } +fi +ac_config_guess=$ac_aux_dir/config.guess +ac_config_sub=$ac_aux_dir/config.sub +ac_configure=$ac_aux_dir/configure # This should be Cygnus configure. + +# Find a good install program. We prefer a C program (faster), +# so one script is as good as another. But avoid the broken or +# incompatible versions: +# SysV /etc/install, /usr/sbin/install +# SunOS /usr/etc/install +# IRIX /sbin/install +# AIX /bin/install +# AFS /usr/afsws/bin/install, which mishandles nonexistent args +# SVR4 /usr/ucb/install, which tries to use the nonexistent group "staff" +# ./install, which can be erroneously created by make from ./install.sh. +echo $ac_n "checking for a BSD compatible install""... $ac_c" 1>&6 +if test -z "$INSTALL"; then +if eval "test \"`echo '$''{'ac_cv_path_install'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" + for ac_dir in $PATH; do + # Account for people who put trailing slashes in PATH elements. + case "$ac_dir/" in + /|./|.//|/etc/*|/usr/sbin/*|/usr/etc/*|/sbin/*|/usr/afsws/bin/*|/usr/ucb/*) ;; + *) + # OSF1 and SCO ODT 3.0 have their own names for install. + for ac_prog in ginstall installbsd scoinst install; do + if test -f $ac_dir/$ac_prog; then + if test $ac_prog = install && + grep dspmsg $ac_dir/$ac_prog >/dev/null 2>&1; then + # AIX install. It has an incompatible calling convention. + # OSF/1 installbsd also uses dspmsg, but is usable. + : + else + ac_cv_path_install="$ac_dir/$ac_prog -c" + break 2 + fi + fi + done + ;; + esac + done + IFS="$ac_save_ifs" + +fi + if test "${ac_cv_path_install+set}" = set; then + INSTALL="$ac_cv_path_install" + else + # As a last resort, use the slow shell script. We don't cache a + # path for INSTALL within a source directory, because that will + # break other packages using the cache if that directory is + # removed, or if the path is relative. + INSTALL="$ac_install_sh" + fi +fi +echo "$ac_t""$INSTALL" 1>&6 + +# Use test -z because SunOS4 sh mishandles braces in ${var-val}. +# It thinks the first close brace ends the variable substitution. +test -z "$INSTALL_PROGRAM" && INSTALL_PROGRAM='${INSTALL}' + +test -z "$INSTALL_DATA" && INSTALL_DATA='${INSTALL} -m 644' + +# Extract the first word of "ranlib", so it can be a program name with args. +set dummy ranlib; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_prog_RANLIB'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test -n "$RANLIB"; then + ac_cv_prog_RANLIB="$RANLIB" # Let the user override the test. +else + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" + for ac_dir in $PATH; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_prog_RANLIB="ranlib" + break + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_prog_RANLIB" && ac_cv_prog_RANLIB=":" +fi +fi +RANLIB="$ac_cv_prog_RANLIB" +if test -n "$RANLIB"; then + echo "$ac_t""$RANLIB" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +echo $ac_n "checking whether ln -s works""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_prog_LN_S'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + rm -f conftestdata +if ln -s X conftestdata 2>/dev/null +then + rm -f conftestdata + ac_cv_prog_LN_S="ln -s" +else + ac_cv_prog_LN_S=ln +fi +fi +LN_S="$ac_cv_prog_LN_S" +if test "$ac_cv_prog_LN_S" = "ln -s"; then + echo "$ac_t""yes" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +# Extract the first word of "rm", so it can be a program name with args. +set dummy rm; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_path_RM'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + case "$RM" in + /*) + ac_cv_path_RM="$RM" # Let the user override the test with a path. + ;; + *) + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" + for ac_dir in $PATH; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_path_RM="$ac_dir/$ac_word" + break + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_path_RM" && ac_cv_path_RM="/bin/false" + ;; +esac +fi +RM="$ac_cv_path_RM" +if test -n "$RM"; then + echo "$ac_t""$RM" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +# Extract the first word of "mv", so it can be a program name with args. +set dummy mv; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_path_MV'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + case "$MV" in + /*) + ac_cv_path_MV="$MV" # Let the user override the test with a path. + ;; + *) + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" + for ac_dir in $PATH; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_path_MV="$ac_dir/$ac_word" + break + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_path_MV" && ac_cv_path_MV="/bin/false" + ;; +esac +fi +MV="$ac_cv_path_MV" +if test -n "$MV"; then + echo "$ac_t""$MV" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +# Extract the first word of "mkdir", so it can be a program name with args. +set dummy mkdir; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_path_MKDIR'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + case "$MKDIR" in + /*) + ac_cv_path_MKDIR="$MKDIR" # Let the user override the test with a path. + ;; + *) + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" + for ac_dir in $PATH; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_path_MKDIR="$ac_dir/$ac_word" + break + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_path_MKDIR" && ac_cv_path_MKDIR="/bin/false" + ;; +esac +fi +MKDIR="$ac_cv_path_MKDIR" +if test -n "$MKDIR"; then + echo "$ac_t""$MKDIR" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +# Extract the first word of "ln", so it can be a program name with args. +set dummy ln; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_path_LN'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + case "$LN" in + /*) + ac_cv_path_LN="$LN" # Let the user override the test with a path. + ;; + *) + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" + for ac_dir in $PATH; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_path_LN="$ac_dir/$ac_word" + break + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_path_LN" && ac_cv_path_LN="cp" + ;; +esac +fi +LN="$ac_cv_path_LN" +if test -n "$LN"; then + echo "$ac_t""$LN" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +# Extract the first word of "gzip", so it can be a program name with args. +set dummy gzip; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_path_CMD_GZIP'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + case "$CMD_GZIP" in + /*) + ac_cv_path_CMD_GZIP="$CMD_GZIP" # Let the user override the test with a path. + ;; + *) + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" + for ac_dir in $PATH; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_path_CMD_GZIP="$ac_dir/$ac_word" + break + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_path_CMD_GZIP" && ac_cv_path_CMD_GZIP="/bin/false" + ;; +esac +fi +CMD_GZIP="$ac_cv_path_CMD_GZIP" +if test -n "$CMD_GZIP"; then + echo "$ac_t""$CMD_GZIP" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +# Extract the first word of "gunzip", so it can be a program name with args. +set dummy gunzip; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_path_CMD_GUNZIP'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + case "$CMD_GUNZIP" in + /*) + ac_cv_path_CMD_GUNZIP="$CMD_GUNZIP" # Let the user override the test with a path. + ;; + *) + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" + for ac_dir in $PATH; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_path_CMD_GUNZIP="$ac_dir/$ac_word" + break + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_path_CMD_GUNZIP" && ac_cv_path_CMD_GUNZIP="/bin/false" + ;; +esac +fi +CMD_GUNZIP="$ac_cv_path_CMD_GUNZIP" +if test -n "$CMD_GUNZIP"; then + echo "$ac_t""$CMD_GUNZIP" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +# Extract the first word of "unzip", so it can be a program name with args. +set dummy unzip; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_path_CMD_UNZIP'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + case "$CMD_UNZIP" in + /*) + ac_cv_path_CMD_UNZIP="$CMD_UNZIP" # Let the user override the test with a path. + ;; + *) + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" + for ac_dir in $PATH; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_path_CMD_UNZIP="$ac_dir/$ac_word" + break + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_path_CMD_UNZIP" && ac_cv_path_CMD_UNZIP="/bin/false" + ;; +esac +fi +CMD_UNZIP="$ac_cv_path_CMD_UNZIP" +if test -n "$CMD_UNZIP"; then + echo "$ac_t""$CMD_UNZIP" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +# Extract the first word of "uncompress", so it can be a program name with args. +set dummy uncompress; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_path_CMD_UNCOMPRESS'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + case "$CMD_UNCOMPRESS" in + /*) + ac_cv_path_CMD_UNCOMPRESS="$CMD_UNCOMPRESS" # Let the user override the test with a path. + ;; + *) + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" + for ac_dir in $PATH; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_path_CMD_UNCOMPRESS="$ac_dir/$ac_word" + break + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_path_CMD_UNCOMPRESS" && ac_cv_path_CMD_UNCOMPRESS="/bin/false" + ;; +esac +fi +CMD_UNCOMPRESS="$ac_cv_path_CMD_UNCOMPRESS" +if test -n "$CMD_UNCOMPRESS"; then + echo "$ac_t""$CMD_UNCOMPRESS" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + +# Extract the first word of "perl", so it can be a program name with args. +set dummy perl; ac_word=$2 +echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_path_CMD_PERL'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + case "$CMD_PERL" in + /*) + ac_cv_path_CMD_PERL="$CMD_PERL" # Let the user override the test with a path. + ;; + *) + IFS="${IFS= }"; ac_save_ifs="$IFS"; IFS="${IFS}:" + for ac_dir in $PATH; do + test -z "$ac_dir" && ac_dir=. + if test -f $ac_dir/$ac_word; then + ac_cv_path_CMD_PERL="$ac_dir/$ac_word" + break + fi + done + IFS="$ac_save_ifs" + test -z "$ac_cv_path_CMD_PERL" && ac_cv_path_CMD_PERL="/bin/false" + ;; +esac +fi +CMD_PERL="$ac_cv_path_CMD_PERL" +if test -n "$CMD_PERL"; then + echo "$ac_t""$CMD_PERL" 1>&6 +else + echo "$ac_t""no" 1>&6 +fi + + +PERL="$CMD_PERL" + + +ac_header_dirent=no +for ac_hdr in dirent.h sys/ndir.h sys/dir.h ndir.h +do +ac_safe=`echo "$ac_hdr" | tr './\055' '___'` +echo $ac_n "checking for $ac_hdr that defines DIR""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_header_dirent_$ac_safe'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + cat > conftest.$ac_ext < +#include <$ac_hdr> +int main() { return 0; } +int t() { +DIR *dirp = 0; +; return 0; } +EOF +if eval $ac_compile; then + rm -rf conftest* + eval "ac_cv_header_dirent_$ac_safe=yes" +else + rm -rf conftest* + eval "ac_cv_header_dirent_$ac_safe=no" +fi +rm -f conftest* + +fi +if eval "test \"`echo '$ac_cv_header_dirent_'$ac_safe`\" = yes"; then + echo "$ac_t""yes" 1>&6 + ac_tr_hdr=HAVE_`echo $ac_hdr | tr 'abcdedfghijklmnopqrstuvwxyz./\055' 'ABCDEDFGHIJKLMNOPQRSTUVWXYZ___'` + cat >> confdefs.h <&6 +fi +done +# Two versions of opendir et al. are in -ldir and -lx on SCO Xenix. +if test $ac_header_dirent = dirent.h; then +echo $ac_n "checking for -ldir""... $ac_c" 1>&6 +ac_lib_var=`echo dir | tr '.-/+' '___p'` +if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-ldir $LIBS" +cat > conftest.$ac_ext <&6 + LIBS="$LIBS -ldir" +else + echo "$ac_t""no" 1>&6 +fi + +else +echo $ac_n "checking for -lx""... $ac_c" 1>&6 +ac_lib_var=`echo x | tr '.-/+' '___p'` +if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-lx $LIBS" +cat > conftest.$ac_ext <&6 + LIBS="$LIBS -lx" +else + echo "$ac_t""no" 1>&6 +fi + +fi + +# If we cannot run a trivial program, we must be cross compiling. +echo $ac_n "checking whether cross-compiling""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_c_cross'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test "$cross_compiling" = yes; then + ac_cv_c_cross=yes +else +cat > conftest.$ac_ext </dev/null; then + ac_cv_c_cross=no +else + ac_cv_c_cross=yes +fi +fi +rm -fr conftest* +fi + +echo "$ac_t""$ac_cv_c_cross" 1>&6 +cross_compiling=$ac_cv_c_cross + +echo $ac_n "checking for ANSI C header files""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_header_stdc'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + cat > conftest.$ac_ext < +#include +#include +#include +EOF +eval "$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" +ac_err=`grep -v '^ *+' conftest.out` +if test -z "$ac_err"; then + rm -rf conftest* + ac_cv_header_stdc=yes +else + echo "$ac_err" >&5 + rm -rf conftest* + ac_cv_header_stdc=no +fi +rm -f conftest* + +if test $ac_cv_header_stdc = yes; then + # SunOS 4.x string.h does not declare mem*, contrary to ANSI. +cat > conftest.$ac_ext < +EOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + egrep "memchr" >/dev/null 2>&1; then + : +else + rm -rf conftest* + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # ISC 2.0.2 stdlib.h does not declare free, contrary to ANSI. +cat > conftest.$ac_ext < +EOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + egrep "free" >/dev/null 2>&1; then + : +else + rm -rf conftest* + ac_cv_header_stdc=no +fi +rm -f conftest* + +fi + +if test $ac_cv_header_stdc = yes; then + # /bin/cc in Irix-4.0.5 gets non-ANSI ctype macros unless using -ansi. +if test "$cross_compiling" = yes; then + : +else +cat > conftest.$ac_ext < +#define ISLOWER(c) ('a' <= (c) && (c) <= 'z') +#define TOUPPER(c) (ISLOWER(c) ? 'A' + ((c) - 'a') : (c)) +#define XOR(e, f) (((e) && !(f)) || (!(e) && (f))) +int main () { int i; for (i = 0; i < 256; i++) +if (XOR (islower (i), ISLOWER (i)) || toupper (i) != TOUPPER (i)) exit(2); +exit (0); } + +EOF +eval $ac_link +if test -s conftest && (./conftest; exit) 2>/dev/null; then + : +else + ac_cv_header_stdc=no +fi +fi +rm -fr conftest* +fi +fi + +echo "$ac_t""$ac_cv_header_stdc" 1>&6 +if test $ac_cv_header_stdc = yes; then + cat >> confdefs.h <<\EOF +#define STDC_HEADERS 1 +EOF + +fi + +for ac_hdr in config.h fcntl.h memory.h stdlib.h string.h unistd.h \ + syslog.h sys/file.h sys/types.h sys/syslog.h sys/time.h \ + regex.h netinet/in.h arpa/inet.h crypt.h alloca.h +do +ac_safe=`echo "$ac_hdr" | tr './\055' '___'` +echo $ac_n "checking for $ac_hdr""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_header_$ac_safe'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + cat > conftest.$ac_ext < +EOF +eval "$ac_cpp conftest.$ac_ext >/dev/null 2>conftest.out" +ac_err=`grep -v '^ *+' conftest.out` +if test -z "$ac_err"; then + rm -rf conftest* + eval "ac_cv_header_$ac_safe=yes" +else + echo "$ac_err" >&5 + rm -rf conftest* + eval "ac_cv_header_$ac_safe=no" +fi +rm -f conftest* +fi +if eval "test \"`echo '$ac_cv_header_'$ac_safe`\" = yes"; then + echo "$ac_t""yes" 1>&6 + ac_tr_hdr=HAVE_`echo $ac_hdr | tr 'abcdefghijklmnopqrstuvwxyz./\055' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ___'` + cat >> confdefs.h <&6 +fi +done + +cat > conftest.$ac_ext < +EOF +if (eval "$ac_cpp conftest.$ac_ext") 2>&5 | + egrep "mxfast" >/dev/null 2>&1; then + rm -rf conftest* + XTRA_CFLAGS="$XTRA_CFLAGS -DLNG_MALLINFO" +else + rm -rf conftest* + XTRA_CFLAGS="$XTRA_CFLAGS" +fi +rm -f conftest* + + +echo $ac_n "checking size of int""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_sizeof_int'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test "$cross_compiling" = yes; then + { echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; } +else +cat > conftest.$ac_ext < +main() +{ + FILE *f=fopen("conftestval", "w"); + if (!f) exit(1); + fprintf(f, "%d\n", sizeof(int)); + exit(0); +} +EOF +eval $ac_link +if test -s conftest && (./conftest; exit) 2>/dev/null; then + ac_cv_sizeof_int=`cat conftestval` +else + ac_cv_sizeof_int=0 +fi +fi +rm -fr conftest* +fi +echo "$ac_t""$ac_cv_sizeof_int" 1>&6 +cat >> confdefs.h <&6 +if eval "test \"`echo '$''{'ac_cv_sizeof_long'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + if test "$cross_compiling" = yes; then + { echo "configure: error: can not run test program while cross compiling" 1>&2; exit 1; } +else +cat > conftest.$ac_ext < +main() +{ + FILE *f=fopen("conftestval", "w"); + if (!f) exit(1); + fprintf(f, "%d\n", sizeof(long)); + exit(0); +} +EOF +eval $ac_link +if test -s conftest && (./conftest; exit) 2>/dev/null; then + ac_cv_sizeof_long=`cat conftestval` +else + ac_cv_sizeof_long=0 +fi +fi +rm -fr conftest* +fi +echo "$ac_t""$ac_cv_sizeof_long" 1>&6 +cat >> confdefs.h </dev/null 2>&1; then + NO_LIBMALLOC=x + XTRA_CFLAGS="-DUSE_MALLINFO=0 -DUSE_MALLOPT=0 $XTRA_CFLAGS" + fi +fi + + +for ac_func in getdtablesize sysconf strerror setrlimit strdup \ + regcomp regexec regfree timegm +do +echo $ac_n "checking for $ac_func""... $ac_c" 1>&6 +if eval "test \"`echo '$''{'ac_cv_func_$ac_func'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + cat > conftest.$ac_ext < +/* Override any gcc2 internal prototype to avoid an error. */ +char $ac_func(); + +int main() { return 0; } +int t() { + +/* The GNU C library defines this for functions which it implements + to always fail with ENOSYS. Some functions are actually named + something starting with __ and the normal name is an alias. */ +#if defined (__stub_$ac_func) || defined (__stub___$ac_func) +choke me +#else +$ac_func(); +#endif + +; return 0; } +EOF +if eval $ac_link; then + rm -rf conftest* + eval "ac_cv_func_$ac_func=yes" +else + rm -rf conftest* + eval "ac_cv_func_$ac_func=no" +fi +rm -f conftest* + +fi +if eval "test \"`echo '$ac_cv_func_'$ac_func`\" = yes"; then + echo "$ac_t""yes" 1>&6 + ac_tr_func=HAVE_`echo $ac_func | tr 'abcdefghijklmnopqrstuvwxyz' 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'` + cat >> confdefs.h <&6 +fi +done + + +echo $ac_n "checking for -lsocket""... $ac_c" 1>&6 +ac_lib_var=`echo socket | tr '.-/+' '___p'` +if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-lsocket $LIBS" +cat > conftest.$ac_ext <&6 + XTRA_LIBS="$XTRA_LIBS -lsocket" +else + echo "$ac_t""no" 1>&6 +fi + +if test x$NO_LIBMALLOC = x ; then + echo $ac_n "checking for -lmalloc""... $ac_c" 1>&6 +ac_lib_var=`echo malloc | tr '.-/+' '___p'` +if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-lmalloc $LIBS" +cat > conftest.$ac_ext <&6 + XTRA_LIBS="$XTRA_LIBS -lmalloc" +else + echo "$ac_t""no" 1>&6 +fi + +fi +echo $ac_n "checking for -lnsl""... $ac_c" 1>&6 +ac_lib_var=`echo nsl | tr '.-/+' '___p'` +if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-lnsl $LIBS" +cat > conftest.$ac_ext <&6 + XTRA_LIBS="$XTRA_LIBS -lnsl" +else + echo "$ac_t""no" 1>&6 +fi + +echo $ac_n "checking for -lresolv""... $ac_c" 1>&6 +ac_lib_var=`echo resolv | tr '.-/+' '___p'` +if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-lresolv $LIBS" +cat > conftest.$ac_ext <&6 + XTRA_LIBS="$XTRA_LIBS -lresolv" +else + echo "$ac_t""no" 1>&6 +fi + +echo $ac_n "checking for -lbsd""... $ac_c" 1>&6 +ac_lib_var=`echo bsd | tr '.-/+' '___p'` +if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-lbsd $LIBS" +cat > conftest.$ac_ext <&6 + XTRA_LIBS="$XTRA_LIBS -lbsd" +else + echo "$ac_t""no" 1>&6 +fi + + +echo $ac_n "checking for -l44bsd""... $ac_c" 1>&6 +ac_lib_var=`echo 44bsd | tr '.-/+' '___p'` +if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-l44bsd $LIBS" +cat > conftest.$ac_ext <&6 + HAVE_44BSD="yes" +else + echo "$ac_t""no" 1>&6 +fi + +echo $ac_n "checking for -lresolv""... $ac_c" 1>&6 +ac_lib_var=`echo resolv | tr '.-/+' '___p'` +if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-lresolv $LIBS" +cat > conftest.$ac_ext <&6 + if test "$HAVE_44BSD" = "yes"; then XTRA_LIBS="$XTRA_LIBS -l44bsd"; fi +else + echo "$ac_t""no" 1>&6 +fi + + +echo $ac_n "checking for -lcrypt""... $ac_c" 1>&6 +ac_lib_var=`echo crypt | tr '.-/+' '___p'` +if eval "test \"`echo '$''{'ac_cv_lib_$ac_lib_var'+set}'`\" = set"; then + echo $ac_n "(cached) $ac_c" 1>&6 +else + ac_save_LIBS="$LIBS" +LIBS="-lcrypt $LIBS" +cat > conftest.$ac_ext <&6 + CRYPT_LIB="-lcrypt" +else + echo "$ac_t""no" 1>&6 +fi + + +XTRA_OBJS='' +if test x$NO_LIBMALLOC != x ; then + if test -r /usr/lib/debug/malloc.o ; then + XTRA_OBJS="$XTRA_OBJS /usr/lib/debug/malloc.o" + fi + if test -r /usr/lib/debug/mallocmap.o ; then + XTRA_OBJS="$XTRA_OBJS /usr/lib/debug/mallocmap.o" + fi +fi + + + + +if test -n "$XTRA_LIBS_OVERRIDE"; then + XTRA_LIBS="$XTRA_LIBS_OVERRIDE"; +fi + + +if test -x "/bin/uname"; then + XXMyOs=`/bin/uname`; +fi + +if test -z "$DEBUG_TOP"; then + if test "HP-UX" = "$XXMyOs"; then + DEBUG_TOP='#-g #-O' + else + DEBUG_TOP='-g #-O' + fi +fi + + +if test "HP-UX" = "$XXMyOs"; then + RANLIB=":"; +fi + +if test "IRIX" = "$XXMyOs"; then + XXXCC=`echo $CC | awk '{print $1}'` + if test "`basename $XXXCC`" = "gcc"; then + XTRA_CFLAGS="$XTRA_CFLAGS -ansi" + else + XTRA_CFLAGS="$XTRA_CFLAGS -D__STRICT_ANSI__" + fi +fi + + +rm -f core + +trap '' 1 2 15 +cat > confcache <<\EOF +# This file is a shell script that caches the results of configure +# tests run on this system so they can be shared between configure +# scripts and configure runs. It is not useful on other systems. +# If it contains results you don't want to keep, you may remove or edit it. +# +# By default, configure uses ./config.cache as the cache file, +# creating it if it does not exist already. You can give configure +# the --cache-file=FILE option to use a different cache file; that is +# what configure does when it calls configure scripts in +# subdirectories, so they share the cache. +# Giving --cache-file=/dev/null disables caching, for debugging configure. +# config.status only pays attention to the cache file if you give it the +# --recheck option to rerun configure. +# +EOF +# Ultrix sh set writes to stderr and can't be redirected directly, +# and sets the high bit in the cache file unless we assign to the vars. +(set) 2>&1 | + sed -n "s/^\([a-zA-Z0-9_]*_cv_[a-zA-Z0-9_]*\)=\(.*\)/\1=\${\1='\2'}/p" \ + >> confcache +if cmp -s $cache_file confcache; then + : +else + if test -w $cache_file; then + echo "updating cache $cache_file" + cat confcache > $cache_file + else + echo "not updating unwritable cache $cache_file" + fi +fi +rm -f confcache + +trap 'rm -fr conftest* confdefs* core core.* *.core $ac_clean_files; exit 1' 1 2 15 + +test "x$prefix" = xNONE && prefix=$ac_default_prefix +# Let make expand exec_prefix. +test "x$exec_prefix" = xNONE && exec_prefix='${prefix}' + +# Any assignment to VPATH causes Sun make to only execute +# the first set of double-colon rules, so remove it if not needed. +# If there is a colon in the path, we need to keep it. +if test "x$srcdir" = x.; then + ac_vpsub='/^[ ]*VPATH[ ]*=[^:]*$/d' +fi + +trap 'rm -f $CONFIG_STATUS conftest*; exit 1' 1 2 15 + +DEFS=-DHAVE_CONFIG_H + +# Without the "./", some shells look in PATH for config.status. +: ${CONFIG_STATUS=./config.status} + +echo creating $CONFIG_STATUS +rm -f $CONFIG_STATUS +cat > $CONFIG_STATUS </dev/null | sed 1q`: +# +# $0 $ac_configure_args +# +# Compiler output produced by configure, useful for debugging +# configure, is in ./config.log if it exists. + +ac_cs_usage="Usage: $CONFIG_STATUS [--recheck] [--version] [--help]" +for ac_option +do + case "\$ac_option" in + -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) + echo "running \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion" + exec \${CONFIG_SHELL-/bin/sh} $0 $ac_configure_args --no-create --no-recursion ;; + -version | --version | --versio | --versi | --vers | --ver | --ve | --v) + echo "$CONFIG_STATUS generated by autoconf version 2.7" + exit 0 ;; + -help | --help | --hel | --he | --h) + echo "\$ac_cs_usage"; exit 0 ;; + *) echo "\$ac_cs_usage"; exit 1 ;; + esac +done + +ac_given_srcdir=$srcdir +ac_given_INSTALL="$INSTALL" + +trap 'rm -fr `echo "\ + ./lib/Makefile \ + ./server/Makefile \ + ./include/paths.h \ + ./regex/Makefile \ + ./url/Makefile \ + ./scripts/Makefile \ + ./scripts/RunCache \ + ./scripts/RunAccel \ + ./announce/Makefile \ + include/autoconf.h" | sed "s/:[^ ]*//g"` conftest*; exit 1' 1 2 15 +EOF +cat >> $CONFIG_STATUS < conftest.subs <<\\CEOF +$ac_vpsub +$extrasub +s%@CFLAGS@%$CFLAGS%g +s%@CPPFLAGS@%$CPPFLAGS%g +s%@CXXFLAGS@%$CXXFLAGS%g +s%@DEFS@%$DEFS%g +s%@LDFLAGS@%$LDFLAGS%g +s%@LIBS@%$LIBS%g +s%@exec_prefix@%$exec_prefix%g +s%@prefix@%$prefix%g +s%@program_transform_name@%$program_transform_name%g +s%@bindir@%$bindir%g +s%@sbindir@%$sbindir%g +s%@libexecdir@%$libexecdir%g +s%@datadir@%$datadir%g +s%@sysconfdir@%$sysconfdir%g +s%@sharedstatedir@%$sharedstatedir%g +s%@localstatedir@%$localstatedir%g +s%@libdir@%$libdir%g +s%@includedir@%$includedir%g +s%@oldincludedir@%$oldincludedir%g +s%@infodir@%$infodir%g +s%@mandir@%$mandir%g +s%@INSTALL_TOPDIR@%$INSTALL_TOPDIR%g +s%@INSTALL_LIBDIR@%$INSTALL_LIBDIR%g +s%@INSTALL_BINDIR@%$INSTALL_BINDIR%g +s%@INSTALL_MANDIR@%$INSTALL_MANDIR%g +s%@XTRA_CFLAGS@%$XTRA_CFLAGS%g +s%@THIS_HOST@%$THIS_HOST%g +s%@SOCKET_PH_TYPE@%$SOCKET_PH_TYPE%g +s%@CC@%$CC%g +s%@CPP@%$CPP%g +s%@INSTALL_PROGRAM@%$INSTALL_PROGRAM%g +s%@INSTALL_DATA@%$INSTALL_DATA%g +s%@RANLIB@%$RANLIB%g +s%@LN_S@%$LN_S%g +s%@RM@%$RM%g +s%@MV@%$MV%g +s%@MKDIR@%$MKDIR%g +s%@LN@%$LN%g +s%@CMD_GZIP@%$CMD_GZIP%g +s%@CMD_GUNZIP@%$CMD_GUNZIP%g +s%@CMD_UNZIP@%$CMD_UNZIP%g +s%@CMD_UNCOMPRESS@%$CMD_UNCOMPRESS%g +s%@CMD_PERL@%$CMD_PERL%g +s%@PERL@%$PERL%g +s%@XTRA_OBJS@%$XTRA_OBJS%g +s%@CRYPT_LIB@%$CRYPT_LIB%g +s%@XTRA_LIBS@%$XTRA_LIBS%g +s%@DEBUG_TOP@%$DEBUG_TOP%g + +CEOF +EOF +cat >> $CONFIG_STATUS <> $CONFIG_STATUS <<\EOF +for ac_file in .. $CONFIG_FILES; do if test "x$ac_file" != x..; then + # Support "outfile[:infile]", defaulting infile="outfile.in". + case "$ac_file" in + *:*) ac_file_in=`echo "$ac_file"|sed 's%.*:%%'` + ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; + *) ac_file_in="${ac_file}.in" ;; + esac + + # Adjust relative srcdir, etc. for subdirectories. + + # Remove last slash and all that follows it. Not all systems have dirname. + ac_dir=`echo $ac_file|sed 's%/[^/][^/]*$%%'` + if test "$ac_dir" != "$ac_file" && test "$ac_dir" != .; then + # The file is in a subdirectory. + test ! -d "$ac_dir" && mkdir "$ac_dir" + ac_dir_suffix="/`echo $ac_dir|sed 's%^\./%%'`" + # A "../" for each directory in $ac_dir_suffix. + ac_dots=`echo $ac_dir_suffix|sed 's%/[^/]*%../%g'` + else + ac_dir_suffix= ac_dots= + fi + + case "$ac_given_srcdir" in + .) srcdir=. + if test -z "$ac_dots"; then top_srcdir=. + else top_srcdir=`echo $ac_dots|sed 's%/$%%'`; fi ;; + /*) srcdir="$ac_given_srcdir$ac_dir_suffix"; top_srcdir="$ac_given_srcdir" ;; + *) # Relative path. + srcdir="$ac_dots$ac_given_srcdir$ac_dir_suffix" + top_srcdir="$ac_dots$ac_given_srcdir" ;; + esac + + case "$ac_given_INSTALL" in + [/$]*) INSTALL="$ac_given_INSTALL" ;; + *) INSTALL="$ac_dots$ac_given_INSTALL" ;; + esac + echo creating "$ac_file" + rm -f "$ac_file" + configure_input="Generated automatically from `echo $ac_file_in|sed 's%.*/%%'` by configure." + case "$ac_file" in + *Makefile*) ac_comsub="1i\\ +# $configure_input" ;; + *) ac_comsub= ;; + esac + sed -e "$ac_comsub +s%@configure_input@%$configure_input%g +s%@srcdir@%$srcdir%g +s%@top_srcdir@%$top_srcdir%g +s%@INSTALL@%$INSTALL%g +" -f conftest.subs $ac_given_srcdir/$ac_file_in > $ac_file +fi; done +rm -f conftest.subs + +# These sed commands are passed to sed as "A NAME B NAME C VALUE D", where +# NAME is the cpp macro being defined and VALUE is the value it is being given. +# +# ac_d sets the value in "#define NAME VALUE" lines. +ac_dA='s%^\([ ]*\)#\([ ]*define[ ][ ]*\)' +ac_dB='\([ ][ ]*\)[^ ]*%\1#\2' +ac_dC='\3' +ac_dD='%g' +# ac_u turns "#undef NAME" with trailing blanks into "#define NAME VALUE". +ac_uA='s%^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' +ac_uB='\([ ]\)%\1#\2define\3' +ac_uC=' ' +ac_uD='\4%g' +# ac_e turns "#undef NAME" without trailing blanks into "#define NAME VALUE". +ac_eA='s%^\([ ]*\)#\([ ]*\)undef\([ ][ ]*\)' +ac_eB='$%\1#\2define\3' +ac_eC=' ' +ac_eD='%g' + +CONFIG_HEADERS=${CONFIG_HEADERS-"include/autoconf.h"} +for ac_file in .. $CONFIG_HEADERS; do if test "x$ac_file" != x..; then + # Support "outfile[:infile]", defaulting infile="outfile.in". + case "$ac_file" in + *:*) ac_file_in=`echo "$ac_file"|sed 's%.*:%%'` + ac_file=`echo "$ac_file"|sed 's%:.*%%'` ;; + *) ac_file_in="${ac_file}.in" ;; + esac + + echo creating $ac_file + + rm -f conftest.frag conftest.in conftest.out + cp $ac_given_srcdir/$ac_file_in conftest.in + +EOF + +# Transform confdefs.h into a sed script conftest.vals that substitutes +# the proper values into config.h.in to produce config.h. And first: +# Protect against being on the right side of a sed subst in config.status. +# Protect against being in an unquoted here document in config.status. +rm -f conftest.vals +cat > conftest.hdr <<\EOF +s/[\\&%]/\\&/g +s%[\\$`]%\\&%g +s%#define \([A-Za-z_][A-Za-z0-9_]*\) \(.*\)%${ac_dA}\1${ac_dB}\1${ac_dC}\2${ac_dD}%gp +s%ac_d%ac_u%gp +s%ac_u%ac_e%gp +EOF +sed -n -f conftest.hdr confdefs.h > conftest.vals +rm -f conftest.hdr + +# This sed command replaces #undef with comments. This is necessary, for +# example, in the case of _POSIX_SOURCE, which is predefined and required +# on some systems where configure will not decide to define it. +cat >> conftest.vals <<\EOF +s%^[ ]*#[ ]*undef[ ][ ]*[a-zA-Z_][a-zA-Z_0-9]*%/* & */% +EOF + +# Break up conftest.vals because some shells have a limit on +# the size of here documents, and old seds have small limits too. +# Maximum number of lines to put in a single here document. +ac_max_here_lines=12 + +rm -f conftest.tail +while : +do + ac_lines=`grep -c . conftest.vals` + # grep -c gives empty output for an empty file on some AIX systems. + if test -z "$ac_lines" || test "$ac_lines" -eq 0; then break; fi + # Write a limited-size here document to conftest.frag. + echo ' cat > conftest.frag <> $CONFIG_STATUS + sed ${ac_max_here_lines}q conftest.vals >> $CONFIG_STATUS + echo 'CEOF + sed -f conftest.frag conftest.in > conftest.out + rm -f conftest.in + mv conftest.out conftest.in +' >> $CONFIG_STATUS + sed 1,${ac_max_here_lines}d conftest.vals > conftest.tail + rm -f conftest.vals + mv conftest.tail conftest.vals +done +rm -f conftest.vals + +cat >> $CONFIG_STATUS <<\EOF + rm -f conftest.frag conftest.h + echo "/* $ac_file. Generated automatically by configure. */" > conftest.h + cat conftest.in >> conftest.h + rm -f conftest.in + if cmp -s $ac_file conftest.h 2>/dev/null; then + echo "$ac_file is unchanged" + rm -f conftest.h + else + rm -f $ac_file + mv conftest.h $ac_file + fi +fi; done + + + +exit 0 +EOF +chmod +x $CONFIG_STATUS +rm -fr confdefs* $ac_clean_files +test "$no_create" = yes || ${CONFIG_SHELL-/bin/sh} $CONFIG_STATUS || exit 1 + diff --git a/configure.in b/configure.in new file mode 100644 index 00000000000..e81ee3c5409 --- /dev/null +++ b/configure.in @@ -0,0 +1,214 @@ +dnl +dnl Configuration input file for Harvest +dnl +dnl Darren Hardy, hardy@cs.colorado.edu, July 1994 (autoconf v1.11) +dnl Darren Hardy, hardy@cs.colorado.edu, July 1995 (autoconf v2.4) +dnl +dnl $Id: configure.in,v 1.1 1996/02/22 06:23:53 wessels Exp $ +dnl +dnl +dnl TODO: +dnl +dnl - use -lgen instead of -lregex on Solaris if available. +dnl +AC_INIT(include/config.h) +AC_CONFIG_HEADER(include/autoconf.h) +AC_REVISION($Revision: 1.1 $)dnl +AC_PREFIX_DEFAULT(/usr/local/harvest) + +dnl Build a copy of prefix + +if test -n "$prefix"; then + myprefix="$prefix"; +else + myprefix='/usr/local/harvest'; +fi + +INSTALL_TOPDIR="$myprefix"; +INSTALL_LIBDIR="$myprefix/lib"; +INSTALL_BINDIR="$myprefix/bin"; +INSTALL_MANDIR="$myprefix/man"; + +THIS_HOST="`hostname`" +XTRA_CFLAGS='' +XTRA_LIBS='' +CRYPT_LIB='' + +SOCKET_PH_TYPE='default' + +echo '-----------------------' +echo "Installing Harvest in $myprefix" +echo '-----------------------' + +dnl Subsitutions +AC_SUBST(INSTALL_TOPDIR) +AC_SUBST(INSTALL_LIBDIR) +AC_SUBST(INSTALL_BINDIR) +AC_SUBST(INSTALL_MANDIR) +AC_SUBST(DEFS) +AC_SUBST(XTRA_CFLAGS) +AC_SUBST(THIS_HOST) +AC_SUBST(SOCKET_PH_TYPE) + +dnl Check for GNU cc +AC_PROG_CC + +dnl Warn if not using GNU cc +XXXCC=`echo $CC | awk '{print $1}'` +if test "`basename $XXXCC`" != "gcc" +then + echo '**************************************************************'; + echo '**************************************************************'; + echo '**************************************************************'; + echo '**************************************************************'; + echo '**************************************************************'; + echo '**'; + echo '** WARNING: You should compile Harvest using GNU cc.'; + echo "** Currently, you're using $CC"; + echo '** '; + echo '** For more information about the platforms on which'; + echo '** Harvest works, see: '; + echo '** http://harvest.cs.colorado.edu/harvest/FAQ.html#platforms'; + echo '** '; + echo '**************************************************************'; + echo '**************************************************************'; + echo '**************************************************************'; + echo '**************************************************************'; + echo '**************************************************************'; + sleep 5 +fi + +dnl Check for programs +AC_PROG_CPP +AC_PROG_INSTALL +AC_PROG_RANLIB +AC_PROG_LN_S +AC_PATH_PROG(RM, rm, /bin/false) +AC_PATH_PROG(MV, mv, /bin/false) +AC_PATH_PROG(MKDIR, mkdir, /bin/false) +AC_PATH_PROG(LN, ln, cp) +AC_PATH_PROG(CMD_GZIP, gzip, /bin/false) +AC_PATH_PROG(CMD_GUNZIP, gunzip, /bin/false) +AC_PATH_PROG(CMD_UNZIP, unzip, /bin/false) +AC_PATH_PROG(CMD_UNCOMPRESS, uncompress, /bin/false) +AC_PATH_PROG(CMD_PERL, perl, /bin/false) + +dnl Set PERL too +PERL="$CMD_PERL" +AC_SUBST(PERL) + +dnl Check for headers +AC_HEADER_DIRENT +AC_HEADER_STDC +AC_CHECK_HEADERS(config.h fcntl.h memory.h stdlib.h string.h unistd.h \ + syslog.h sys/file.h sys/types.h sys/syslog.h sys/time.h \ + regex.h netinet/in.h arpa/inet.h crypt.h alloca.h) +AC_EGREP_HEADER(mxfast, malloc.h, [XTRA_CFLAGS="$XTRA_CFLAGS -DLNG_MALLINFO"], [XTRA_CFLAGS="$XTRA_CFLAGS"]) + +dnl Check for typedefs +AC_CHECK_SIZEOF(int) +AC_CHECK_SIZEOF(long) + + +dnl Decide if we should really use -lmalloc. It is known to have +dnl problems on SunOS 5.4 at least. +dnl +if test -x "/bin/uname"; then + if /bin/uname -sr | grep ['SunOS 5.[0-9]'] >/dev/null 2>&1; then + NO_LIBMALLOC=x + XTRA_CFLAGS="-DUSE_MALLINFO=0 -DUSE_MALLOPT=0 $XTRA_CFLAGS" + fi +fi + + +dnl Check for library functions +AC_CHECK_FUNCS(getdtablesize sysconf strerror setrlimit strdup \ + regcomp regexec regfree timegm) + +dnl Check for needed libraries +AC_CHECK_LIB(socket, main, [XTRA_LIBS="$XTRA_LIBS -lsocket"]) +if test x$NO_LIBMALLOC = x ; then + AC_CHECK_LIB(malloc, main, [XTRA_LIBS="$XTRA_LIBS -lmalloc"]) +fi +AC_CHECK_LIB(nsl, main, [XTRA_LIBS="$XTRA_LIBS -lnsl"]) +AC_CHECK_LIB(resolv, main, [XTRA_LIBS="$XTRA_LIBS -lresolv"]) +AC_CHECK_LIB(bsd, main, [XTRA_LIBS="$XTRA_LIBS -lbsd"]) + +dnl We found a version of libresolv on SunOS 4.x which requires -l44bsd +dnl We have to first check to see if -l44bsd is present and has the +dnl function inet_aton +AC_CHECK_LIB(44bsd, inet_aton, [HAVE_44BSD="yes"]) +AC_CHECK_LIB(resolv, inet_aton, [if test "$HAVE_44BSD" = "yes"; then XTRA_LIBS="$XTRA_LIBS -l44bsd"; fi]) + +dnl Check for libcrypt +dnl cached uses crypt(3) which may be in libc, or in libcrypt (eg FreeBSD) +AC_CHECK_LIB(crypt, crypt, [CRYPT_LIB="-lcrypt"]) + +dnl Need the debugging version of malloc if available +XTRA_OBJS='' +if test x$NO_LIBMALLOC != x ; then + if test -r /usr/lib/debug/malloc.o ; then + XTRA_OBJS="$XTRA_OBJS /usr/lib/debug/malloc.o" + fi + if test -r /usr/lib/debug/mallocmap.o ; then + XTRA_OBJS="$XTRA_OBJS /usr/lib/debug/mallocmap.o" + fi +fi +AC_SUBST(XTRA_OBJS) + +AC_SUBST(CRYPT_LIB) + +dnl Override XTRA_LIBS +if test -n "$XTRA_LIBS_OVERRIDE"; then + XTRA_LIBS="$XTRA_LIBS_OVERRIDE"; +fi +AC_SUBST(XTRA_LIBS) + +dnl Set the XXMyOs variable +dnl +if test -x "/bin/uname"; then + XXMyOs=`/bin/uname`; +fi + +dnl Set DEBUG_TOP +if test -z "$DEBUG_TOP"; then + if test "HP-UX" = "$XXMyOs"; then + DEBUG_TOP='#-g #-O' + else + DEBUG_TOP='-g #-O' + fi +fi +AC_SUBST(DEBUG_TOP) + +dnl Disable ranlib if on HP-UX +dnl +if test "HP-UX" = "$XXMyOs"; then + RANLIB=":"; +fi + +dnl Additions for IRIX +dnl - add ANSI compile flag +if test "IRIX" = "$XXMyOs"; then + XXXCC=`echo $CC | awk '{print $1}'` + if test "`basename $XXXCC`" = "gcc"; then + [XTRA_CFLAGS="$XTRA_CFLAGS -ansi"] + else + [XTRA_CFLAGS="$XTRA_CFLAGS -D__STRICT_ANSI__"] + fi +fi + + +dnl Clean up after OSF/1 core dump bug +rm -f core + +AC_OUTPUT(\ + ./lib/Makefile \ + ./server/Makefile \ + ./include/paths.h \ + ./regex/Makefile \ + ./url/Makefile \ + ./scripts/Makefile \ + ./scripts/RunCache \ + ./scripts/RunAccel \ + ./announce/Makefile \ +) diff --git a/include/GNUregex.h b/include/GNUregex.h new file mode 100644 index 00000000000..408dd210348 --- /dev/null +++ b/include/GNUregex.h @@ -0,0 +1,490 @@ +/* Definitions for data structures and routines for the regular + expression library, version 0.12. + + Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +#ifndef __REGEXP_LIBRARY_H__ +#define __REGEXP_LIBRARY_H__ + +/* POSIX says that must be included (by the caller) before + . */ + +#ifdef VMS +/* VMS doesn't have `size_t' in , even though POSIX says it + should be there. */ +#include +#endif + + +/* The following bits are used to determine the regexp syntax we + recognize. The set/not-set meanings are chosen so that Emacs syntax + remains the value 0. The bits are given in alphabetical order, and + the definitions shifted by one from the previous bit; thus, when we + add or remove a bit, only one other definition need change. */ +typedef unsigned reg_syntax_t; + +/* If this bit is not set, then \ inside a bracket expression is literal. + If set, then such a \ quotes the following character. */ +#define RE_BACKSLASH_ESCAPE_IN_LISTS (1) + +/* If this bit is not set, then + and ? are operators, and \+ and \? are + literals. + If set, then \+ and \? are operators and + and ? are literals. */ +#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) + +/* If this bit is set, then character classes are supported. They are: + [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], + [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. + If not set, then character classes are not supported. */ +#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) + +/* If this bit is set, then ^ and $ are always anchors (outside bracket + expressions, of course). + If this bit is not set, then it depends: + ^ is an anchor if it is at the beginning of a regular + expression or after an open-group or an alternation operator; + $ is an anchor if it is at the end of a regular expression, or + before a close-group or an alternation operator. + + This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because + POSIX draft 11.2 says that * etc. in leading positions is undefined. + We already implemented a previous draft which made those constructs + invalid, though, so we haven't changed the code back. */ +#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) + +/* If this bit is set, then special characters are always special + regardless of where they are in the pattern. + If this bit is not set, then special characters are special only in + some contexts; otherwise they are ordinary. Specifically, + * + ? and intervals are only special when not after the beginning, + open-group, or alternation operator. */ +#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) + +/* If this bit is set, then *, +, ?, and { cannot be first in an re or + immediately after an alternation or begin-group operator. */ +#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) + +/* If this bit is set, then . matches newline. + If not set, then it doesn't. */ +#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) + +/* If this bit is set, then . doesn't match NUL. + If not set, then it does. */ +#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) + +/* If this bit is set, nonmatching lists [^...] do not match newline. + If not set, they do. */ +#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) + +/* If this bit is set, either \{...\} or {...} defines an + interval, depending on RE_NO_BK_BRACES. + If not set, \{, \}, {, and } are literals. */ +#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) + +/* If this bit is set, +, ? and | aren't recognized as operators. + If not set, they are. */ +#define RE_LIMITED_OPS (RE_INTERVALS << 1) + +/* If this bit is set, newline is an alternation operator. + If not set, newline is literal. */ +#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) + +/* If this bit is set, then `{...}' defines an interval, and \{ and \} + are literals. + If not set, then `\{...\}' defines an interval. */ +#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) + +/* If this bit is set, (...) defines a group, and \( and \) are literals. + If not set, \(...\) defines a group, and ( and ) are literals. */ +#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) + +/* If this bit is set, then \ matches . + If not set, then \ is a back-reference. */ +#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) + +/* If this bit is set, then | is an alternation operator, and \| is literal. + If not set, then \| is an alternation operator, and | is literal. */ +#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) + +/* If this bit is set, then an ending range point collating higher + than the starting range point, as in [z-a], is invalid. + If not set, then when ending range point collates higher than the + starting range point, the range is ignored. */ +#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) + +/* If this bit is set, then an unmatched ) is ordinary. + If not set, then an unmatched ) is invalid. */ +#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) + +/* This global variable defines the particular regexp syntax to use (for + some interfaces). When a regexp is compiled, the syntax used is + stored in the pattern buffer, so changing this does not affect + already-compiled regexps. */ +extern reg_syntax_t re_syntax_options; + +/* Define combinations of the above bits for the standard possibilities. + (The [[[ comments delimit what gets put into the Texinfo file, so + don't delete them!) */ +/* [[[begin syntaxes]]] */ +#define RE_SYNTAX_EMACS 0 + +#define RE_SYNTAX_AWK \ + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ + | RE_UNMATCHED_RIGHT_PAREN_ORD) + +#define RE_SYNTAX_POSIX_AWK \ + (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) + +#define RE_SYNTAX_GREP \ + (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ + | RE_HAT_LISTS_NOT_NEWLINE | RE_INTERVALS \ + | RE_NEWLINE_ALT) + +#define RE_SYNTAX_EGREP \ + (RE_CHAR_CLASSES | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_HAT_LISTS_NOT_NEWLINE \ + | RE_NEWLINE_ALT | RE_NO_BK_PARENS \ + | RE_NO_BK_VBAR) + +#define RE_SYNTAX_POSIX_EGREP \ + (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES) + +/* P1003.2/D11.2, section 4.20.7.1, lines 5078ff. */ +#define RE_SYNTAX_ED RE_SYNTAX_POSIX_BASIC + +#define RE_SYNTAX_SED RE_SYNTAX_POSIX_BASIC + +/* Syntax bits common to both basic and extended POSIX regex syntax. */ +#define _RE_SYNTAX_POSIX_COMMON \ + (RE_CHAR_CLASSES | RE_DOT_NEWLINE | RE_DOT_NOT_NULL \ + | RE_INTERVALS | RE_NO_EMPTY_RANGES) + +#define RE_SYNTAX_POSIX_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_BK_PLUS_QM) + +/* Differs from ..._POSIX_BASIC only in that RE_BK_PLUS_QM becomes + RE_LIMITED_OPS, i.e., \? \+ \| are not recognized. Actually, this + isn't minimal, since other operators, such as \`, aren't disabled. */ +#define RE_SYNTAX_POSIX_MINIMAL_BASIC \ + (_RE_SYNTAX_POSIX_COMMON | RE_LIMITED_OPS) + +#define RE_SYNTAX_POSIX_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INDEP_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_VBAR \ + | RE_UNMATCHED_RIGHT_PAREN_ORD) + +/* Differs from ..._POSIX_EXTENDED in that RE_CONTEXT_INVALID_OPS + replaces RE_CONTEXT_INDEP_OPS and RE_NO_BK_REFS is added. */ +#define RE_SYNTAX_POSIX_MINIMAL_EXTENDED \ + (_RE_SYNTAX_POSIX_COMMON | RE_CONTEXT_INDEP_ANCHORS \ + | RE_CONTEXT_INVALID_OPS | RE_NO_BK_BRACES \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_UNMATCHED_RIGHT_PAREN_ORD) +/* [[[end syntaxes]]] */ + +/* Maximum number of duplicates an interval can allow. Some systems + (erroneously) define this in other header files, but we want our + value, so remove any previous define. */ +#ifdef RE_DUP_MAX +#undef RE_DUP_MAX +#endif +#define RE_DUP_MAX ((1 << 15) - 1) + + +/* POSIX `cflags' bits (i.e., information for `regcomp'). */ + +/* If this bit is set, then use extended regular expression syntax. + If not set, then use basic regular expression syntax. */ +#define REG_EXTENDED 1 + +/* If this bit is set, then ignore case when matching. + If not set, then case is significant. */ +#define REG_ICASE (REG_EXTENDED << 1) + +/* If this bit is set, then anchors do not match at newline + characters in the string. + If not set, then anchors do match at newlines. */ +#define REG_NEWLINE (REG_ICASE << 1) + +/* If this bit is set, then report only success or fail in regexec. + If not set, then returns differ between not matching and errors. */ +#define REG_NOSUB (REG_NEWLINE << 1) + + +/* POSIX `eflags' bits (i.e., information for regexec). */ + +/* If this bit is set, then the beginning-of-line operator doesn't match + the beginning of the string (presumably because it's not the + beginning of a line). + If not set, then the beginning-of-line operator does match the + beginning of the string. */ +#define REG_NOTBOL 1 + +/* Like REG_NOTBOL, except for the end-of-line. */ +#define REG_NOTEOL (1 << 1) + + +/* If any error codes are removed, changed, or added, update the + `re_error_msg' table in regex.c. */ +typedef enum +{ + REG_NOERROR = 0, /* Success. */ + REG_NOMATCH, /* Didn't find a match (for regexec). */ + + /* POSIX regcomp return error codes. (In the order listed in the + standard.) */ + REG_BADPAT, /* Invalid pattern. */ + REG_ECOLLATE, /* Not implemented. */ + REG_ECTYPE, /* Invalid character class name. */ + REG_EESCAPE, /* Trailing backslash. */ + REG_ESUBREG, /* Invalid back reference. */ + REG_EBRACK, /* Unmatched left bracket. */ + REG_EPAREN, /* Parenthesis imbalance. */ + REG_EBRACE, /* Unmatched \{. */ + REG_BADBR, /* Invalid contents of \{\}. */ + REG_ERANGE, /* Invalid range end. */ + REG_ESPACE, /* Ran out of memory. */ + REG_BADRPT, /* No preceding re for repetition op. */ + + /* Error codes we've added. */ + REG_EEND, /* Premature end. */ + REG_ESIZE, /* Compiled pattern bigger than 2^16 bytes. */ + REG_ERPAREN /* Unmatched ) or \); not returned from regcomp. */ +} reg_errcode_t; + +/* This data structure represents a compiled pattern. Before calling + the pattern compiler, the fields `buffer', `allocated', `fastmap', + `translate', and `no_sub' can be set. After the pattern has been + compiled, the `re_nsub' field is available. All other fields are + private to the regex routines. */ + +struct re_pattern_buffer +{ +/* [[[begin pattern_buffer]]] */ + /* Space that holds the compiled pattern. It is declared as + `unsigned char *' because its elements are + sometimes used as array indexes. */ + unsigned char *buffer; + + /* Number of bytes to which `buffer' points. */ + unsigned long allocated; + + /* Number of bytes actually used in `buffer'. */ + unsigned long used; + + /* Syntax setting with which the pattern was compiled. */ + reg_syntax_t syntax; + + /* Pointer to a fastmap, if any, otherwise zero. re_search uses + the fastmap, if there is one, to skip over impossible + starting points for matches. */ + char *fastmap; + + /* Either a translate table to apply to all characters before + comparing them, or zero for no translation. The translation + is applied to a pattern when it is compiled and to a string + when it is matched. */ + char *translate; + + /* Number of subexpressions found by the compiler. */ + size_t re_nsub; + + /* Zero if this pattern cannot match the empty string, one else. + Well, in truth it's used only in `re_search_2', to see + whether or not we should use the fastmap, so we don't set + this absolutely perfectly; see `re_compile_fastmap' (the + `duplicate' case). */ + unsigned can_be_null : 1; + + /* If REGS_UNALLOCATED, allocate space in the `regs' structure + for `max (RE_NREGS, re_nsub + 1)' groups. + If REGS_REALLOCATE, reallocate space if necessary. + If REGS_FIXED, use what's there. */ +#define REGS_UNALLOCATED 0 +#define REGS_REALLOCATE 1 +#define REGS_FIXED 2 + unsigned regs_allocated : 2; + + /* Set to zero when `regex_compile' compiles a pattern; set to one + by `re_compile_fastmap' if it updates the fastmap. */ + unsigned fastmap_accurate : 1; + + /* If set, `re_match_2' does not return information about + subexpressions. */ + unsigned no_sub : 1; + + /* If set, a beginning-of-line anchor doesn't match at the + beginning of the string. */ + unsigned not_bol : 1; + + /* Similarly for an end-of-line anchor. */ + unsigned not_eol : 1; + + /* If true, an anchor at a newline matches. */ + unsigned newline_anchor : 1; + +/* [[[end pattern_buffer]]] */ +}; + +typedef struct re_pattern_buffer regex_t; + + +/* search.c (search_buffer) in Emacs needs this one opcode value. It is + defined both in `regex.c' and here. */ +#define RE_EXACTN_VALUE 1 + +/* Type for byte offsets within the string. POSIX mandates this. */ +typedef int regoff_t; + + +/* This is the structure we store register match data in. See + regex.texinfo for a full description of what registers match. */ +struct re_registers +{ + unsigned num_regs; + regoff_t *start; + regoff_t *end; +}; + + +/* If `regs_allocated' is REGS_UNALLOCATED in the pattern buffer, + `re_match_2' returns information about at least this many registers + the first time a `regs' structure is passed. */ +#ifndef RE_NREGS +#define RE_NREGS 30 +#endif + + +/* POSIX specification for registers. Aside from the different names than + `re_registers', POSIX uses an array of structures, instead of a + structure of arrays. */ +typedef struct +{ + regoff_t rm_so; /* Byte offset from string's start to substring's start. */ + regoff_t rm_eo; /* Byte offset from string's start to substring's end. */ +} regmatch_t; + +/* Declarations for routines. */ + +/* To avoid duplicating every routine declaration -- once with a + prototype (if we are ANSI), and once without (if we aren't) -- we + use the following macro to declare argument types. This + unfortunately clutters up the declarations a bit, but I think it's + worth it. */ + +#if __STDC__ + +#define _RE_ARGS(args) args + +#else /* not __STDC__ */ + +#define _RE_ARGS(args) () + +#endif /* not __STDC__ */ + +/* Sets the current default syntax to SYNTAX, and return the old syntax. + You can also simply assign to the `re_syntax_options' variable. */ +extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); + +/* Compile the regular expression PATTERN, with length LENGTH + and syntax given by the global `re_syntax_options', into the buffer + BUFFER. Return NULL if successful, and an error string if not. */ +extern const char *re_compile_pattern + _RE_ARGS ((const char *pattern, int length, + struct re_pattern_buffer *buffer)); + + +/* Compile a fastmap for the compiled pattern in BUFFER; used to + accelerate searches. Return 0 if successful and -2 if was an + internal error. */ +extern int re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); + + +/* Search in the string STRING (with length LENGTH) for the pattern + compiled into BUFFER. Start searching at position START, for RANGE + characters. Return the starting position of the match, -1 for no + match, or -2 for an internal error. Also return register + information in REGS (if REGS and BUFFER->no_sub are nonzero). */ +extern int re_search + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, + int length, int start, int range, struct re_registers *regs)); + + +/* Like `re_search', but search in the concatenation of STRING1 and + STRING2. Also, stop searching at index START + STOP. */ +extern int re_search_2 + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, + int length1, const char *string2, int length2, + int start, int range, struct re_registers *regs, int stop)); + + +/* Like `re_search', but return how many characters in STRING the regexp + in BUFFER matched, starting at position START. */ +extern int re_match + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, + int length, int start, struct re_registers *regs)); + + +/* Relates to `re_match' as `re_search_2' relates to `re_search'. */ +extern int re_match_2 + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, + int length1, const char *string2, int length2, + int start, struct re_registers *regs, int stop)); + + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using BUFFER and REGS will use this memory + for recording register information. STARTS and ENDS must be + allocated with malloc, and must each be at least `NUM_REGS * sizeof + (regoff_t)' bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ +extern void re_set_registers + _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, + unsigned num_regs, regoff_t *starts, regoff_t *ends)); + +/* 4.2 bsd compatibility. */ +extern char *re_comp _RE_ARGS ((const char *)); +extern int re_exec _RE_ARGS ((const char *)); + +/* POSIX compatibility. */ +extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags)); +extern int regexec + _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch, + regmatch_t pmatch[], int eflags)); +extern size_t regerror + _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf, + size_t errbuf_size)); +extern void regfree _RE_ARGS ((regex_t *preg)); + +#endif /* not __REGEXP_LIBRARY_H__ */ + +/* +Local variables: +make-backup-files: t +version-control: t +trim-versions-without-asking: nil +End: +*/ diff --git a/include/autoconf.h.in b/include/autoconf.h.in new file mode 100644 index 00000000000..56994ec167d --- /dev/null +++ b/include/autoconf.h.in @@ -0,0 +1,97 @@ +/* include/autoconf.h.in. Generated automatically from configure.in by autoheader. */ + +/* Define if you have the ANSI C header files. */ +#undef STDC_HEADERS + +/* The number of bytes in a int. */ +#undef SIZEOF_INT + +/* The number of bytes in a long. */ +#undef SIZEOF_LONG + +/* Define if you have the getdtablesize function. */ +#undef HAVE_GETDTABLESIZE + +/* Define if you have the regcomp function. */ +#undef HAVE_REGCOMP + +/* Define if you have the regexec function. */ +#undef HAVE_REGEXEC + +/* Define if you have the regfree function. */ +#undef HAVE_REGFREE + +/* Define if you have the setrlimit function. */ +#undef HAVE_SETRLIMIT + +/* Define if you have the strdup function. */ +#undef HAVE_STRDUP + +/* Define if you have the strerror function. */ +#undef HAVE_STRERROR + +/* Define if you have the sysconf function. */ +#undef HAVE_SYSCONF + +/* Define if you have the timegm function. */ +#undef HAVE_TIMEGM + +/* Define if you have the header file. */ +#undef HAVE_ALLOCA_H + +/* Define if you have the header file. */ +#undef HAVE_ARPA_INET_H + +/* Define if you have the header file. */ +#undef HAVE_CONFIG_H + +/* Define if you have the header file. */ +#undef HAVE_CRYPT_H + +/* Define if you have the header file. */ +#undef HAVE_DIRENT_H + +/* Define if you have the header file. */ +#undef HAVE_FCNTL_H + +/* Define if you have the header file. */ +#undef HAVE_MEMORY_H + +/* Define if you have the header file. */ +#undef HAVE_NDIR_H + +/* Define if you have the header file. */ +#undef HAVE_NETINET_IN_H + +/* Define if you have the header file. */ +#undef HAVE_REGEX_H + +/* Define if you have the header file. */ +#undef HAVE_STDLIB_H + +/* Define if you have the header file. */ +#undef HAVE_STRING_H + +/* Define if you have the header file. */ +#undef HAVE_SYS_DIR_H + +/* Define if you have the header file. */ +#undef HAVE_SYS_FILE_H + +/* Define if you have the header file. */ +#undef HAVE_SYS_NDIR_H + +/* Define if you have the header file. */ +#undef HAVE_SYS_SYSLOG_H + +/* Define if you have the header file. */ +#undef HAVE_SYS_TIME_H + +/* Define if you have the header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define if you have the header file. */ +#undef HAVE_SYSLOG_H + +/* Define if you have the header file. */ +#undef HAVE_UNISTD_H diff --git a/include/util.h b/include/util.h new file mode 100644 index 00000000000..fa23c7c0170 --- /dev/null +++ b/include/util.h @@ -0,0 +1,187 @@ +/* + * Darren Hardy, hardy@cs.colorado.edu, April 1994 + * + * $Id: util.h,v 1.1 1996/02/22 06:23:56 wessels Exp $ + * + * ---------------------------------------------------------------------- + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + */ +#ifndef _UTIL_H_ +#define _UTIL_H_ + +#include "config.h" +#include +#include + +#if !defined(SQUIDHOSTNAMELEN) +#include +#include +#if !defined(MAXHOSTNAMELEN) || (MAXHOSTNAMELEN < 128) +#define SQUIDHOSTNAMELEN 128 +#else +#define SQUIDHOSTNAMELEN MAXHOSTNAMELEN +#endif +#endif + +#ifndef _PARAMS +#if defined(__STDC__) || defined(__cplusplus) || defined(__STRICT_ANSI__) +#define _PARAMS(ARGS) ARGS +#else /* Traditional C */ +#define _PARAMS(ARGS) () +#endif /* __STDC__ */ +#endif /* _PARAMS */ + +#ifdef NO_STRDUP +char *strdup _PARAMS((char *)); /* Duplicate a string */ +#endif +char *xstrdup _PARAMS((char *)); /* Duplicate a string */ + +/* from xmalloc.c */ +void *xmalloc _PARAMS((size_t)); /* Wrapper for malloc(3) */ +void *xrealloc _PARAMS((void *, size_t)); /* Wrapper for realloc(3) */ +void *xcalloc _PARAMS((int, size_t)); /* Wrapper for calloc(3) */ +void xfree _PARAMS((void *)); /* Wrapper for free(3) */ +char *xstrdup _PARAMS ((char *)); +char *xstrerror _PARAMS(()); + +char *getfullhostname _PARAMS(()); + +/* from debug.c */ +#ifndef MAX_DEBUG_LEVELS +#define MAX_DEBUG_LEVELS 256 +#endif /* MAX_DEBUG_LEVELS */ + +#ifndef MAIN +extern int Harvest_do_debug; +extern int Harvest_debug_levels[]; +#endif /* MAIN */ + +#undef debug_ok_fast +#if USE_NO_DEBUGGING +#define debug_ok_fast(S,L) 0 +#else +#define debug_ok_fast(S,L) \ + ( \ + (Harvest_do_debug) && \ + ((Harvest_debug_levels[S] == -2) || \ + ((Harvest_debug_levels[S] != -1) && \ + ((L) <= Harvest_debug_levels[S]))) \ + ) +#endif /* USE_NO_DEBUGGING */ + +#undef Debug +#if USE_NO_DEBUGGING +#define Debug(section, level, X) /* empty */; +#else +#define Debug(section, level, X) \ + {if (debug_ok_fast((section),(level))) {Log X;}} +#endif + +void debug_reset _PARAMS((void)); +void debug_enable _PARAMS((int, int)); +void debug_disable _PARAMS((int)); +void debug_flag _PARAMS((char *)); +int debug_ok _PARAMS((int, int)); + +#define HOST_CACHE_TTL 3600 + +typedef struct _host { + char key[SQUIDHOSTNAMELEN]; /* www.bar.com */ + char fqdn[SQUIDHOSTNAMELEN]; /* real.bar.com */ + char dotaddr[16]; /* 128.138.213.10 */ + char ipaddr[4]; + time_t last_t; /* last access of this info */ + int n; /* # of requests for this host */ + int addrlen; /* length of 'ipaddr', always 4 */ + struct _host *next; +} Host; + +extern Host *thisHost; + +void host_cache_init _PARAMS((void)); +Host *get_host _PARAMS((char *hostname)); +int delete_host _PARAMS((Host *h)); +int expire_host_cache _PARAMS((time_t timeout)); +void dump_host_cache _PARAMS((int, int)); + + + +char *mkhttpdlogtime _PARAMS((time_t *)); +extern char *mkrfc850 _PARAMS((time_t *)); +extern void init_log3 _PARAMS((char *pn, FILE *a, FILE *b)); +extern void debug_init(); +extern void log_errno2 _PARAMS((char *, int, char *)); + +#endif /* ndef _UTIL_H_ */ diff --git a/include/version.h b/include/version.h new file mode 100644 index 00000000000..d6e80f7e556 --- /dev/null +++ b/include/version.h @@ -0,0 +1,7 @@ +/* $Id: version.h,v 1.1 1996/02/22 06:23:56 wessels Exp $ + * + * SQUID_VERSION - String for version id of this distribution + */ +#ifndef SQUID_VERSION +#define SQUID_VERSION "1.0.beta0" +#endif diff --git a/lib/GNUregex.c b/lib/GNUregex.c new file mode 100644 index 00000000000..91d8d1019e7 --- /dev/null +++ b/lib/GNUregex.c @@ -0,0 +1,4949 @@ +#include "autoconf.h" /* get the #defines from GNU autoconf */ +/* Extended regular expression matching and search library, + version 0.12. + (Implements POSIX draft P10003.2/D11.2, except for + internationalization features.) + + Copyright (C) 1993 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + +/* AIX requires this to be the first thing in the file. */ +#if defined (_AIX) && !defined (REGEX_MALLOC) + #pragma alloca +#endif + +#define _GNU_SOURCE + +/* We need this for `regex.h', and perhaps for the Emacs include files. */ +#include + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +/* The `emacs' switch turns on certain matching commands + that make sense only in Emacs. */ +#ifdef emacs + +#include "lisp.h" +#include "buffer.h" +#include "syntax.h" + +/* Emacs uses `NULL' as a predicate. */ +#undef NULL + +#else /* not emacs */ + +/* We used to test for `BSTRING' here, but only GCC and Emacs define + `BSTRING', as far as I know, and neither of them use this code. */ +#if HAVE_STRING_H || STDC_HEADERS +#include +#ifndef bcmp +#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) +#endif +#ifndef bcopy +#define bcopy(s, d, n) memcpy ((d), (s), (n)) +#endif +#ifndef bzero +#define bzero(s, n) memset ((s), 0, (n)) +#endif +#else +#include +#endif + +#ifdef STDC_HEADERS +#include +#else +char *malloc (); +char *realloc (); +#endif + + +/* Define the syntax stuff for \<, \>, etc. */ + +/* This must be nonzero for the wordchar and notwordchar pattern + commands in re_match_2. */ +#ifndef Sword +#define Sword 1 +#endif + +#ifdef SYNTAX_TABLE + +extern char *re_syntax_table; + +#else /* not SYNTAX_TABLE */ + +/* How many characters in the character set. */ +#define CHAR_SET_SIZE 256 + +static char re_syntax_table[CHAR_SET_SIZE]; + +static void +init_syntax_once () +{ + register int c; + static int done = 0; + + if (done) + return; + + bzero (re_syntax_table, sizeof re_syntax_table); + + for (c = 'a'; c <= 'z'; c++) + re_syntax_table[c] = Sword; + + for (c = 'A'; c <= 'Z'; c++) + re_syntax_table[c] = Sword; + + for (c = '0'; c <= '9'; c++) + re_syntax_table[c] = Sword; + + re_syntax_table['_'] = Sword; + + done = 1; +} + +#endif /* not SYNTAX_TABLE */ + +#define SYNTAX(c) re_syntax_table[c] + +#endif /* not emacs */ + +/* Get the interface, including the syntax bits. */ +#include "GNUregex.h" + +/* isalpha etc. are used for the character classes. */ +#include + +#ifndef isascii +#define isascii(c) 1 +#endif + +#ifdef isblank +#define ISBLANK(c) (isascii (c) && isblank (c)) +#else +#define ISBLANK(c) ((c) == ' ' || (c) == '\t') +#endif +#ifdef isgraph +#define ISGRAPH(c) (isascii (c) && isgraph (c)) +#else +#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c)) +#endif + +#define ISPRINT(c) (isascii (c) && isprint (c)) +#define ISDIGIT(c) (isascii (c) && isdigit (c)) +#define ISALNUM(c) (isascii (c) && isalnum (c)) +#define ISALPHA(c) (isascii (c) && isalpha (c)) +#define ISCNTRL(c) (isascii (c) && iscntrl (c)) +#define ISLOWER(c) (isascii (c) && islower (c)) +#define ISPUNCT(c) (isascii (c) && ispunct (c)) +#define ISSPACE(c) (isascii (c) && isspace (c)) +#define ISUPPER(c) (isascii (c) && isupper (c)) +#define ISXDIGIT(c) (isascii (c) && isxdigit (c)) + +#ifndef NULL +#define NULL 0 +#endif + +/* We remove any previous definition of `SIGN_EXTEND_CHAR', + since ours (we hope) works properly with all combinations of + machines, compilers, `char' and `unsigned char' argument types. + (Per Bothner suggested the basic approach.) */ +#undef SIGN_EXTEND_CHAR +#if __STDC__ +#define SIGN_EXTEND_CHAR(c) ((signed char) (c)) +#else /* not __STDC__ */ +/* As in Harbison and Steele. */ +#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) +#endif + +/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we + use `alloca' instead of `malloc'. This is because using malloc in + re_search* or re_match* could cause memory leaks when C-g is used in + Emacs; also, malloc is slower and causes storage fragmentation. On + the other hand, malloc is more portable, and easier to debug. + + Because we sometimes use alloca, some routines have to be macros, + not functions -- `alloca'-allocated space disappears at the end of the + function it is called in. */ + +#ifdef REGEX_MALLOC + +#define REGEX_ALLOCATE malloc +#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) + +#else /* not REGEX_MALLOC */ + +/* Emacs already defines alloca, sometimes. */ +#ifndef alloca + +/* Make alloca work the best possible way. */ +#ifdef __GNUC__ +#define alloca __builtin_alloca +#else /* not __GNUC__ */ +#if HAVE_ALLOCA_H +#include +#else /* not __GNUC__ or HAVE_ALLOCA_H */ +#ifndef _AIX /* Already did AIX, up at the top. */ +char *alloca (); +#endif /* not _AIX */ +#endif /* not HAVE_ALLOCA_H */ +#endif /* not __GNUC__ */ + +#endif /* not alloca */ + +#define REGEX_ALLOCATE alloca + +/* Assumes a `char *destination' variable. */ +#define REGEX_REALLOCATE(source, osize, nsize) \ + (destination = (char *) alloca (nsize), \ + bcopy (source, destination, osize), \ + destination) + +#endif /* not REGEX_MALLOC */ + + +/* True if `size1' is non-NULL and PTR is pointing anywhere inside + `string1' or just past its end. This works if PTR is NULL, which is + a good thing. */ +#define FIRST_STRING_P(ptr) \ + (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) + +/* (Re)Allocate N items of type T using malloc, or fail. */ +#define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) +#define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) +#define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) + +#define BYTEWIDTH 8 /* In bits. */ + +#define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + +typedef char boolean; +#define false 0 +#define true 1 + +/* These are the command codes that appear in compiled regular + expressions. Some opcodes are followed by argument bytes. A + command code can specify any interpretation whatsoever for its + arguments. Zero bytes may appear in the compiled regular expression. + + The value of `exactn' is needed in search.c (search_buffer) in Emacs. + So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of + `exactn' we use here must also be 1. */ + +typedef enum +{ + no_op = 0, + + /* Followed by one byte giving n, then by n literal bytes. */ + exactn = 1, + + /* Matches any (more or less) character. */ + anychar, + + /* Matches any one char belonging to specified set. First + following byte is number of bitmap bytes. Then come bytes + for a bitmap saying which chars are in. Bits in each byte + are ordered low-bit-first. A character is in the set if its + bit is 1. A character too large to have a bit in the map is + automatically not in the set. */ + charset, + + /* Same parameters as charset, but match any character that is + not one of those specified. */ + charset_not, + + /* Start remembering the text that is matched, for storing in a + register. Followed by one byte with the register number, in + the range 0 to one less than the pattern buffer's re_nsub + field. Then followed by one byte with the number of groups + inner to this one. (This last has to be part of the + start_memory only because we need it in the on_failure_jump + of re_match_2.) */ + start_memory, + + /* Stop remembering the text that is matched and store it in a + memory register. Followed by one byte with the register + number, in the range 0 to one less than `re_nsub' in the + pattern buffer, and one byte with the number of inner groups, + just like `start_memory'. (We need the number of inner + groups here because we don't have any easy way of finding the + corresponding start_memory when we're at a stop_memory.) */ + stop_memory, + + /* Match a duplicate of something remembered. Followed by one + byte containing the register number. */ + duplicate, + + /* Fail unless at beginning of line. */ + begline, + + /* Fail unless at end of line. */ + endline, + + /* Succeeds if at beginning of buffer (if emacs) or at beginning + of string to be matched (if not). */ + begbuf, + + /* Analogously, for end of buffer/string. */ + endbuf, + + /* Followed by two byte relative address to which to jump. */ + jump, + + /* Same as jump, but marks the end of an alternative. */ + jump_past_alt, + + /* Followed by two-byte relative address of place to resume at + in case of failure. */ + on_failure_jump, + + /* Like on_failure_jump, but pushes a placeholder instead of the + current string position when executed. */ + on_failure_keep_string_jump, + + /* Throw away latest failure point and then jump to following + two-byte relative address. */ + pop_failure_jump, + + /* Change to pop_failure_jump if know won't have to backtrack to + match; otherwise change to jump. This is used to jump + back to the beginning of a repeat. If what follows this jump + clearly won't match what the repeat does, such that we can be + sure that there is no use backtracking out of repetitions + already matched, then we change it to a pop_failure_jump. + Followed by two-byte address. */ + maybe_pop_jump, + + /* Jump to following two-byte address, and push a dummy failure + point. This failure point will be thrown away if an attempt + is made to use it for a failure. A `+' construct makes this + before the first repeat. Also used as an intermediary kind + of jump when compiling an alternative. */ + dummy_failure_jump, + + /* Push a dummy failure point and continue. Used at the end of + alternatives. */ + push_dummy_failure, + + /* Followed by two-byte relative address and two-byte number n. + After matching N times, jump to the address upon failure. */ + succeed_n, + + /* Followed by two-byte relative address, and two-byte number n. + Jump to the address N times, then fail. */ + jump_n, + + /* Set the following two-byte relative address to the + subsequent two-byte number. The address *includes* the two + bytes of number. */ + set_number_at, + + wordchar, /* Matches any word-constituent character. */ + notwordchar, /* Matches any char that is not a word-constituent. */ + + wordbeg, /* Succeeds if at word beginning. */ + wordend, /* Succeeds if at word end. */ + + wordbound, /* Succeeds if at a word boundary. */ + notwordbound /* Succeeds if not at a word boundary. */ + +#ifdef emacs + ,before_dot, /* Succeeds if before point. */ + at_dot, /* Succeeds if at point. */ + after_dot, /* Succeeds if after point. */ + + /* Matches any character whose syntax is specified. Followed by + a byte which contains a syntax code, e.g., Sword. */ + syntaxspec, + + /* Matches any character whose syntax is not that specified. */ + notsyntaxspec +#endif /* emacs */ +} re_opcode_t; + +/* Common operations on the compiled pattern. */ + +/* Store NUMBER in two contiguous bytes starting at DESTINATION. */ + +#define STORE_NUMBER(destination, number) \ + do { \ + (destination)[0] = (number) & 0377; \ + (destination)[1] = (number) >> 8; \ + } while (0) + +/* Same as STORE_NUMBER, except increment DESTINATION to + the byte after where the number is stored. Therefore, DESTINATION + must be an lvalue. */ + +#define STORE_NUMBER_AND_INCR(destination, number) \ + do { \ + STORE_NUMBER (destination, number); \ + (destination) += 2; \ + } while (0) + +/* Put into DESTINATION a number stored in two contiguous bytes starting + at SOURCE. */ + +#define EXTRACT_NUMBER(destination, source) \ + do { \ + (destination) = *(source) & 0377; \ + (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \ + } while (0) + +#ifdef DEBUG +static void +extract_number (dest, source) + int *dest; + unsigned char *source; +{ + int temp = SIGN_EXTEND_CHAR (*(source + 1)); + *dest = *source & 0377; + *dest += temp << 8; +} + +#ifndef EXTRACT_MACROS /* To debug the macros. */ +#undef EXTRACT_NUMBER +#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) +#endif /* not EXTRACT_MACROS */ + +#endif /* DEBUG */ + +/* Same as EXTRACT_NUMBER, except increment SOURCE to after the number. + SOURCE must be an lvalue. */ + +#define EXTRACT_NUMBER_AND_INCR(destination, source) \ + do { \ + EXTRACT_NUMBER (destination, source); \ + (source) += 2; \ + } while (0) + +#ifdef DEBUG +static void +extract_number_and_incr (destination, source) + int *destination; + unsigned char **source; +{ + extract_number (destination, *source); + *source += 2; +} + +#ifndef EXTRACT_MACROS +#undef EXTRACT_NUMBER_AND_INCR +#define EXTRACT_NUMBER_AND_INCR(dest, src) \ + extract_number_and_incr (&dest, &src) +#endif /* not EXTRACT_MACROS */ + +#endif /* DEBUG */ + +/* If DEBUG is defined, Regex prints many voluminous messages about what + it is doing (if the variable `debug' is nonzero). If linked with the + main program in `iregex.c', you can enter patterns and strings + interactively. And if linked with the main program in `main.c' and + the other test files, you can run the already-written tests. */ + +#ifdef DEBUG + +/* We use standard I/O for debugging. */ +#include + +/* It is useful to test things that ``must'' be true when debugging. */ +#include + +static int debug = 0; + +#define DEBUG_STATEMENT(e) e +#define DEBUG_PRINT1(x) if (debug) printf (x) +#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) +#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) +#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) +#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ + if (debug) print_partial_compiled_pattern (s, e) +#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ + if (debug) print_double_string (w, s1, sz1, s2, sz2) + + +extern void printchar (); + +/* Print the fastmap in human-readable form. */ + +void +print_fastmap (fastmap) + char *fastmap; +{ + unsigned was_a_range = 0; + unsigned i = 0; + + while (i < (1 << BYTEWIDTH)) + { + if (fastmap[i++]) + { + was_a_range = 0; + printchar (i - 1); + while (i < (1 << BYTEWIDTH) && fastmap[i]) + { + was_a_range = 1; + i++; + } + if (was_a_range) + { + printf ("-"); + printchar (i - 1); + } + } + } + putchar ('\n'); +} + + +/* Print a compiled pattern string in human-readable form, starting at + the START pointer into it and ending just before the pointer END. */ + +void +print_partial_compiled_pattern (start, end) + unsigned char *start; + unsigned char *end; +{ + int mcnt, mcnt2; + unsigned char *p = start; + unsigned char *pend = end; + + if (start == NULL) + { + printf ("(null)\n"); + return; + } + + /* Loop over pattern commands. */ + while (p < pend) + { + switch ((re_opcode_t) *p++) + { + case no_op: + printf ("/no_op"); + break; + + case exactn: + mcnt = *p++; + printf ("/exactn/%d", mcnt); + do + { + putchar ('/'); + printchar (*p++); + } + while (--mcnt); + break; + + case start_memory: + mcnt = *p++; + printf ("/start_memory/%d/%d", mcnt, *p++); + break; + + case stop_memory: + mcnt = *p++; + printf ("/stop_memory/%d/%d", mcnt, *p++); + break; + + case duplicate: + printf ("/duplicate/%d", *p++); + break; + + case anychar: + printf ("/anychar"); + break; + + case charset: + case charset_not: + { + register int c; + + printf ("/charset%s", + (re_opcode_t) *(p - 1) == charset_not ? "_not" : ""); + + assert (p + *p < pend); + + for (c = 0; c < *p; c++) + { + unsigned bit; + unsigned char map_byte = p[1 + c]; + + putchar ('/'); + + for (bit = 0; bit < BYTEWIDTH; bit++) + if (map_byte & (1 << bit)) + printchar (c * BYTEWIDTH + bit); + } + p += 1 + *p; + break; + } + + case begline: + printf ("/begline"); + break; + + case endline: + printf ("/endline"); + break; + + case on_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/on_failure_jump/0/%d", mcnt); + break; + + case on_failure_keep_string_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/on_failure_keep_string_jump/0/%d", mcnt); + break; + + case dummy_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/dummy_failure_jump/0/%d", mcnt); + break; + + case push_dummy_failure: + printf ("/push_dummy_failure"); + break; + + case maybe_pop_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/maybe_pop_jump/0/%d", mcnt); + break; + + case pop_failure_jump: + extract_number_and_incr (&mcnt, &p); + printf ("/pop_failure_jump/0/%d", mcnt); + break; + + case jump_past_alt: + extract_number_and_incr (&mcnt, &p); + printf ("/jump_past_alt/0/%d", mcnt); + break; + + case jump: + extract_number_and_incr (&mcnt, &p); + printf ("/jump/0/%d", mcnt); + break; + + case succeed_n: + extract_number_and_incr (&mcnt, &p); + extract_number_and_incr (&mcnt2, &p); + printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2); + break; + + case jump_n: + extract_number_and_incr (&mcnt, &p); + extract_number_and_incr (&mcnt2, &p); + printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2); + break; + + case set_number_at: + extract_number_and_incr (&mcnt, &p); + extract_number_and_incr (&mcnt2, &p); + printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2); + break; + + case wordbound: + printf ("/wordbound"); + break; + + case notwordbound: + printf ("/notwordbound"); + break; + + case wordbeg: + printf ("/wordbeg"); + break; + + case wordend: + printf ("/wordend"); + +#ifdef emacs + case before_dot: + printf ("/before_dot"); + break; + + case at_dot: + printf ("/at_dot"); + break; + + case after_dot: + printf ("/after_dot"); + break; + + case syntaxspec: + printf ("/syntaxspec"); + mcnt = *p++; + printf ("/%d", mcnt); + break; + + case notsyntaxspec: + printf ("/notsyntaxspec"); + mcnt = *p++; + printf ("/%d", mcnt); + break; +#endif /* emacs */ + + case wordchar: + printf ("/wordchar"); + break; + + case notwordchar: + printf ("/notwordchar"); + break; + + case begbuf: + printf ("/begbuf"); + break; + + case endbuf: + printf ("/endbuf"); + break; + + default: + printf ("?%d", *(p-1)); + } + } + printf ("/\n"); +} + + +void +print_compiled_pattern (bufp) + struct re_pattern_buffer *bufp; +{ + unsigned char *buffer = bufp->buffer; + + print_partial_compiled_pattern (buffer, buffer + bufp->used); + printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated); + + if (bufp->fastmap_accurate && bufp->fastmap) + { + printf ("fastmap: "); + print_fastmap (bufp->fastmap); + } + + printf ("re_nsub: %d\t", bufp->re_nsub); + printf ("regs_alloc: %d\t", bufp->regs_allocated); + printf ("can_be_null: %d\t", bufp->can_be_null); + printf ("newline_anchor: %d\n", bufp->newline_anchor); + printf ("no_sub: %d\t", bufp->no_sub); + printf ("not_bol: %d\t", bufp->not_bol); + printf ("not_eol: %d\t", bufp->not_eol); + printf ("syntax: %d\n", bufp->syntax); + /* Perhaps we should print the translate table? */ +} + + +void +print_double_string (where, string1, size1, string2, size2) + const char *where; + const char *string1; + const char *string2; + int size1; + int size2; +{ + unsigned this_char; + + if (where == NULL) + printf ("(null)"); + else + { + if (FIRST_STRING_P (where)) + { + for (this_char = where - string1; this_char < size1; this_char++) + printchar (string1[this_char]); + + where = string2; + } + + for (this_char = where - string2; this_char < size2; this_char++) + printchar (string2[this_char]); + } +} + +#else /* not DEBUG */ + +#undef assert +#define assert(e) + +#define DEBUG_STATEMENT(e) +#define DEBUG_PRINT1(x) +#define DEBUG_PRINT2(x1, x2) +#define DEBUG_PRINT3(x1, x2, x3) +#define DEBUG_PRINT4(x1, x2, x3, x4) +#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) +#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) + +#endif /* not DEBUG */ + +/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can + also be assigned to arbitrarily: each pattern buffer stores its own + syntax, so it can be changed between regex compilations. */ +reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS; + + +/* Specify the precise syntax of regexps for compilation. This provides + for compatibility for various utilities which historically have + different, incompatible syntaxes. + + The argument SYNTAX is a bit mask comprised of the various bits + defined in regex.h. We return the old syntax. */ + +reg_syntax_t +re_set_syntax (syntax) + reg_syntax_t syntax; +{ + reg_syntax_t ret = re_syntax_options; + + re_syntax_options = syntax; + return ret; +} + +/* This table gives an error message for each of the error codes listed + in regex.h. Obviously the order here has to be same as there. */ + +static const char *re_error_msg[] = + { NULL, /* REG_NOERROR */ + "No match", /* REG_NOMATCH */ + "Invalid regular expression", /* REG_BADPAT */ + "Invalid collation character", /* REG_ECOLLATE */ + "Invalid character class name", /* REG_ECTYPE */ + "Trailing backslash", /* REG_EESCAPE */ + "Invalid back reference", /* REG_ESUBREG */ + "Unmatched [ or [^", /* REG_EBRACK */ + "Unmatched ( or \\(", /* REG_EPAREN */ + "Unmatched \\{", /* REG_EBRACE */ + "Invalid content of \\{\\}", /* REG_BADBR */ + "Invalid range end", /* REG_ERANGE */ + "Memory exhausted", /* REG_ESPACE */ + "Invalid preceding regular expression", /* REG_BADRPT */ + "Premature end of regular expression", /* REG_EEND */ + "Regular expression too big", /* REG_ESIZE */ + "Unmatched ) or \\)", /* REG_ERPAREN */ + }; + +/* Subroutine declarations and macros for regex_compile. */ + +static void store_op1 (), store_op2 (); +static void insert_op1 (), insert_op2 (); +static boolean at_begline_loc_p (), at_endline_loc_p (); +static boolean group_in_compile_stack (); +static reg_errcode_t compile_range (); + +/* Fetch the next character in the uncompiled pattern---translating it + if necessary. Also cast from a signed character in the constant + string passed to us by the user to an unsigned char that we can use + as an array index (in, e.g., `translate'). */ +#define PATFETCH(c) \ + do {if (p == pend) return REG_EEND; \ + c = (unsigned char) *p++; \ + if (translate) c = translate[c]; \ + } while (0) + +/* Fetch the next character in the uncompiled pattern, with no + translation. */ +#define PATFETCH_RAW(c) \ + do {if (p == pend) return REG_EEND; \ + c = (unsigned char) *p++; \ + } while (0) + +/* Go backwards one character in the pattern. */ +#define PATUNFETCH p-- + + +/* If `translate' is non-null, return translate[D], else just D. We + cast the subscript to translate because some data is declared as + `char *', to avoid warnings when a string constant is passed. But + when we use a character as a subscript we must make it unsigned. */ +#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d)) + + +/* Macros for outputting the compiled pattern into `buffer'. */ + +/* If the buffer isn't allocated when it comes in, use this. */ +#define INIT_BUF_SIZE 32 + +/* Make sure we have at least N more bytes of space in buffer. */ +#define GET_BUFFER_SPACE(n) \ + while (b - bufp->buffer + (n) > bufp->allocated) \ + EXTEND_BUFFER () + +/* Make sure we have one more byte of buffer space and then add C to it. */ +#define BUF_PUSH(c) \ + do { \ + GET_BUFFER_SPACE (1); \ + *b++ = (unsigned char) (c); \ + } while (0) + + +/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ +#define BUF_PUSH_2(c1, c2) \ + do { \ + GET_BUFFER_SPACE (2); \ + *b++ = (unsigned char) (c1); \ + *b++ = (unsigned char) (c2); \ + } while (0) + + +/* As with BUF_PUSH_2, except for three bytes. */ +#define BUF_PUSH_3(c1, c2, c3) \ + do { \ + GET_BUFFER_SPACE (3); \ + *b++ = (unsigned char) (c1); \ + *b++ = (unsigned char) (c2); \ + *b++ = (unsigned char) (c3); \ + } while (0) + + +/* Store a jump with opcode OP at LOC to location TO. We store a + relative address offset by the three bytes the jump itself occupies. */ +#define STORE_JUMP(op, loc, to) \ + store_op1 (op, loc, (to) - (loc) - 3) + +/* Likewise, for a two-argument jump. */ +#define STORE_JUMP2(op, loc, to, arg) \ + store_op2 (op, loc, (to) - (loc) - 3, arg) + +/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ +#define INSERT_JUMP(op, loc, to) \ + insert_op1 (op, loc, (to) - (loc) - 3, b) + +/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ +#define INSERT_JUMP2(op, loc, to, arg) \ + insert_op2 (op, loc, (to) - (loc) - 3, arg, b) + + +/* This is not an arbitrary limit: the arguments which represent offsets + into the pattern are two bytes long. So if 2^16 bytes turns out to + be too small, many things would have to change. */ +#define MAX_BUF_SIZE (1L << 16) + + +/* Extend the buffer by twice its current size via realloc and + reset the pointers that pointed into the old block to point to the + correct places in the new one. If extending the buffer results in it + being larger than MAX_BUF_SIZE, then flag memory exhausted. */ +#define EXTEND_BUFFER() \ + do { \ + unsigned char *old_buffer = bufp->buffer; \ + if (bufp->allocated == MAX_BUF_SIZE) \ + return REG_ESIZE; \ + bufp->allocated <<= 1; \ + if (bufp->allocated > MAX_BUF_SIZE) \ + bufp->allocated = MAX_BUF_SIZE; \ + bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ + if (bufp->buffer == NULL) \ + return REG_ESPACE; \ + /* If the buffer moved, move all the pointers into it. */ \ + if (old_buffer != bufp->buffer) \ + { \ + b = (b - old_buffer) + bufp->buffer; \ + begalt = (begalt - old_buffer) + bufp->buffer; \ + if (fixup_alt_jump) \ + fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ + if (laststart) \ + laststart = (laststart - old_buffer) + bufp->buffer; \ + if (pending_exact) \ + pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ + } \ + } while (0) + + +/* Since we have one byte reserved for the register number argument to + {start,stop}_memory, the maximum number of groups we can report + things about is what fits in that byte. */ +#define MAX_REGNUM 255 + +/* But patterns can have more than `MAX_REGNUM' registers. We just + ignore the excess. */ +typedef unsigned regnum_t; + + +/* Macros for the compile stack. */ + +/* Since offsets can go either forwards or backwards, this type needs to + be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ +typedef int pattern_offset_t; + +typedef struct +{ + pattern_offset_t begalt_offset; + pattern_offset_t fixup_alt_jump; + pattern_offset_t inner_group_offset; + pattern_offset_t laststart_offset; + regnum_t regnum; +} compile_stack_elt_t; + + +typedef struct +{ + compile_stack_elt_t *stack; + unsigned size; + unsigned avail; /* Offset of next open position. */ +} compile_stack_type; + + +#define INIT_COMPILE_STACK_SIZE 32 + +#define COMPILE_STACK_EMPTY (compile_stack.avail == 0) +#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) + +/* The next available element. */ +#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) + + +/* Set the bit for character C in a list. */ +#define SET_LIST_BIT(c) \ + (b[((unsigned char) (c)) / BYTEWIDTH] \ + |= 1 << (((unsigned char) c) % BYTEWIDTH)) + + +/* Get the next unsigned number in the uncompiled pattern. */ +#define GET_UNSIGNED_NUMBER(num) \ + { if (p != pend) \ + { \ + PATFETCH (c); \ + while (ISDIGIT (c)) \ + { \ + if (num < 0) \ + num = 0; \ + num = num * 10 + c - '0'; \ + if (p == pend) \ + break; \ + PATFETCH (c); \ + } \ + } \ + } + +#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ + +#define IS_CHAR_CLASS(string) \ + (STREQ (string, "alpha") || STREQ (string, "upper") \ + || STREQ (string, "lower") || STREQ (string, "digit") \ + || STREQ (string, "alnum") || STREQ (string, "xdigit") \ + || STREQ (string, "space") || STREQ (string, "print") \ + || STREQ (string, "punct") || STREQ (string, "graph") \ + || STREQ (string, "cntrl") || STREQ (string, "blank")) + +/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. + Returns one of error codes defined in `regex.h', or zero for success. + + Assumes the `allocated' (and perhaps `buffer') and `translate' + fields are set in BUFP on entry. + + If it succeeds, results are put in BUFP (if it returns an error, the + contents of BUFP are undefined): + `buffer' is the compiled pattern; + `syntax' is set to SYNTAX; + `used' is set to the length of the compiled pattern; + `fastmap_accurate' is zero; + `re_nsub' is the number of subexpressions in PATTERN; + `not_bol' and `not_eol' are zero; + + The `fastmap' and `newline_anchor' fields are neither + examined nor set. */ + +static reg_errcode_t +regex_compile (pattern, size, syntax, bufp) + const char *pattern; + int size; + reg_syntax_t syntax; + struct re_pattern_buffer *bufp; +{ + /* We fetch characters from PATTERN here. Even though PATTERN is + `char *' (i.e., signed), we declare these variables as unsigned, so + they can be reliably used as array indices. */ + register unsigned char c, c1; + + /* A random tempory spot in PATTERN. */ + const char *p1; + + /* Points to the end of the buffer, where we should append. */ + register unsigned char *b; + + /* Keeps track of unclosed groups. */ + compile_stack_type compile_stack; + + /* Points to the current (ending) position in the pattern. */ + const char *p = pattern; + const char *pend = pattern + size; + + /* How to translate the characters in the pattern. */ + char *translate = bufp->translate; + + /* Address of the count-byte of the most recently inserted `exactn' + command. This makes it possible to tell if a new exact-match + character can be added to that command or if the character requires + a new `exactn' command. */ + unsigned char *pending_exact = 0; + + /* Address of start of the most recently finished expression. + This tells, e.g., postfix * where to find the start of its + operand. Reset at the beginning of groups and alternatives. */ + unsigned char *laststart = 0; + + /* Address of beginning of regexp, or inside of last group. */ + unsigned char *begalt; + + /* Place in the uncompiled pattern (i.e., the {) to + which to go back if the interval is invalid. */ + const char *beg_interval; + + /* Address of the place where a forward jump should go to the end of + the containing expression. Each alternative of an `or' -- except the + last -- ends with a forward jump of this sort. */ + unsigned char *fixup_alt_jump = 0; + + /* Counts open-groups as they are encountered. Remembered for the + matching close-group on the compile stack, so the same register + number is put in the stop_memory as the start_memory. */ + regnum_t regnum = 0; + +#ifdef DEBUG + DEBUG_PRINT1 ("\nCompiling pattern: "); + if (debug) + { + unsigned debug_count; + + for (debug_count = 0; debug_count < size; debug_count++) + printchar (pattern[debug_count]); + putchar ('\n'); + } +#endif /* DEBUG */ + + /* Initialize the compile stack. */ + compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t); + if (compile_stack.stack == NULL) + return REG_ESPACE; + + compile_stack.size = INIT_COMPILE_STACK_SIZE; + compile_stack.avail = 0; + + /* Initialize the pattern buffer. */ + bufp->syntax = syntax; + bufp->fastmap_accurate = 0; + bufp->not_bol = bufp->not_eol = 0; + + /* Set `used' to zero, so that if we return an error, the pattern + printer (for debugging) will think there's no pattern. We reset it + at the end. */ + bufp->used = 0; + + /* Always count groups, whether or not bufp->no_sub is set. */ + bufp->re_nsub = 0; + +#if !defined (emacs) && !defined (SYNTAX_TABLE) + /* Initialize the syntax table. */ + init_syntax_once (); +#endif + + if (bufp->allocated == 0) + { + if (bufp->buffer) + { /* If zero allocated, but buffer is non-null, try to realloc + enough space. This loses if buffer's address is bogus, but + that is the user's responsibility. */ + RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); + } + else + { /* Caller did not allocate a buffer. Do it for them. */ + bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); + } + if (!bufp->buffer) return REG_ESPACE; + + bufp->allocated = INIT_BUF_SIZE; + } + + begalt = b = bufp->buffer; + + /* Loop through the uncompiled pattern until we're at the end. */ + while (p != pend) + { + PATFETCH (c); + + switch (c) + { + case '^': + { + if ( /* If at start of pattern, it's an operator. */ + p == pattern + 1 + /* If context independent, it's an operator. */ + || syntax & RE_CONTEXT_INDEP_ANCHORS + /* Otherwise, depends on what's come before. */ + || at_begline_loc_p (pattern, p, syntax)) + BUF_PUSH (begline); + else + goto normal_char; + } + break; + + + case '$': + { + if ( /* If at end of pattern, it's an operator. */ + p == pend + /* If context independent, it's an operator. */ + || syntax & RE_CONTEXT_INDEP_ANCHORS + /* Otherwise, depends on what's next. */ + || at_endline_loc_p (p, pend, syntax)) + BUF_PUSH (endline); + else + goto normal_char; + } + break; + + + case '+': + case '?': + if ((syntax & RE_BK_PLUS_QM) + || (syntax & RE_LIMITED_OPS)) + goto normal_char; + handle_plus: + case '*': + /* If there is no previous pattern... */ + if (!laststart) + { + if (syntax & RE_CONTEXT_INVALID_OPS) + return REG_BADRPT; + else if (!(syntax & RE_CONTEXT_INDEP_OPS)) + goto normal_char; + } + + { + /* Are we optimizing this jump? */ + boolean keep_string_p = false; + + /* 1 means zero (many) matches is allowed. */ + char zero_times_ok = 0, many_times_ok = 0; + + /* If there is a sequence of repetition chars, collapse it + down to just one (the right one). We can't combine + interval operators with these because of, e.g., `a{2}*', + which should only match an even number of `a's. */ + + for (;;) + { + zero_times_ok |= c != '+'; + many_times_ok |= c != '?'; + + if (p == pend) + break; + + PATFETCH (c); + + if (c == '*' + || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) + ; + + else if (syntax & RE_BK_PLUS_QM && c == '\\') + { + if (p == pend) return REG_EESCAPE; + + PATFETCH (c1); + if (!(c1 == '+' || c1 == '?')) + { + PATUNFETCH; + PATUNFETCH; + break; + } + + c = c1; + } + else + { + PATUNFETCH; + break; + } + + /* If we get here, we found another repeat character. */ + } + + /* Star, etc. applied to an empty pattern is equivalent + to an empty pattern. */ + if (!laststart) + break; + + /* Now we know whether or not zero matches is allowed + and also whether or not two or more matches is allowed. */ + if (many_times_ok) + { /* More than one repetition is allowed, so put in at the + end a backward relative jump from `b' to before the next + jump we're going to put in below (which jumps from + laststart to after this jump). + + But if we are at the `*' in the exact sequence `.*\n', + insert an unconditional jump backwards to the ., + instead of the beginning of the loop. This way we only + push a failure point once, instead of every time + through the loop. */ + assert (p - 1 > pattern); + + /* Allocate the space for the jump. */ + GET_BUFFER_SPACE (3); + + /* We know we are not at the first character of the pattern, + because laststart was nonzero. And we've already + incremented `p', by the way, to be the character after + the `*'. Do we have to do something analogous here + for null bytes, because of RE_DOT_NOT_NULL? */ + if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') + && zero_times_ok + && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') + && !(syntax & RE_DOT_NEWLINE)) + { /* We have .*\n. */ + STORE_JUMP (jump, b, laststart); + keep_string_p = true; + } + else + /* Anything else. */ + STORE_JUMP (maybe_pop_jump, b, laststart - 3); + + /* We've added more stuff to the buffer. */ + b += 3; + } + + /* On failure, jump from laststart to b + 3, which will be the + end of the buffer after this jump is inserted. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump + : on_failure_jump, + laststart, b + 3); + pending_exact = 0; + b += 3; + + if (!zero_times_ok) + { + /* At least one repetition is required, so insert a + `dummy_failure_jump' before the initial + `on_failure_jump' instruction of the loop. This + effects a skip over that instruction the first time + we hit that loop. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); + b += 3; + } + } + break; + + + case '.': + laststart = b; + BUF_PUSH (anychar); + break; + + + case '[': + { + boolean had_char_class = false; + + if (p == pend) return REG_EBRACK; + + /* Ensure that we have enough space to push a charset: the + opcode, the length count, and the bitset; 34 bytes in all. */ + GET_BUFFER_SPACE (34); + + laststart = b; + + /* We test `*p == '^' twice, instead of using an if + statement, so we only need one BUF_PUSH. */ + BUF_PUSH (*p == '^' ? charset_not : charset); + if (*p == '^') + p++; + + /* Remember the first position in the bracket expression. */ + p1 = p; + + /* Push the number of bytes in the bitmap. */ + BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); + + /* Clear the whole map. */ + bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); + + /* charset_not matches newline according to a syntax bit. */ + if ((re_opcode_t) b[-2] == charset_not + && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) + SET_LIST_BIT ('\n'); + + /* Read in characters and ranges, setting map bits. */ + for (;;) + { + if (p == pend) return REG_EBRACK; + + PATFETCH (c); + + /* \ might escape characters inside [...] and [^...]. */ + if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') + { + if (p == pend) return REG_EESCAPE; + + PATFETCH (c1); + SET_LIST_BIT (c1); + continue; + } + + /* Could be the end of the bracket expression. If it's + not (i.e., when the bracket expression is `[]' so + far), the ']' character bit gets set way below. */ + if (c == ']' && p != p1 + 1) + break; + + /* Look ahead to see if it's a range when the last thing + was a character class. */ + if (had_char_class && c == '-' && *p != ']') + return REG_ERANGE; + + /* Look ahead to see if it's a range when the last thing + was a character: if this is a hyphen not at the + beginning or the end of a list, then it's the range + operator. */ + if (c == '-' + && !(p - 2 >= pattern && p[-2] == '[') + && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') + && *p != ']') + { + reg_errcode_t ret + = compile_range (&p, pend, translate, syntax, b); + if (ret != REG_NOERROR) return ret; + } + + else if (p[0] == '-' && p[1] != ']') + { /* This handles ranges made up of characters only. */ + reg_errcode_t ret; + + /* Move past the `-'. */ + PATFETCH (c1); + + ret = compile_range (&p, pend, translate, syntax, b); + if (ret != REG_NOERROR) return ret; + } + + /* See if we're at the beginning of a possible character + class. */ + + else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') + { /* Leave room for the null. */ + char str[CHAR_CLASS_MAX_LENGTH + 1]; + + PATFETCH (c); + c1 = 0; + + /* If pattern is `[[:'. */ + if (p == pend) return REG_EBRACK; + + for (;;) + { + PATFETCH (c); + if (c == ':' || c == ']' || p == pend + || c1 == CHAR_CLASS_MAX_LENGTH) + break; + str[c1++] = c; + } + str[c1] = '\0'; + + /* If isn't a word bracketed by `[:' and:`]': + undo the ending character, the letters, and leave + the leading `:' and `[' (but set bits for them). */ + if (c == ':' && *p == ']') + { + int ch; + boolean is_alnum = STREQ (str, "alnum"); + boolean is_alpha = STREQ (str, "alpha"); + boolean is_blank = STREQ (str, "blank"); + boolean is_cntrl = STREQ (str, "cntrl"); + boolean is_digit = STREQ (str, "digit"); + boolean is_graph = STREQ (str, "graph"); + boolean is_lower = STREQ (str, "lower"); + boolean is_print = STREQ (str, "print"); + boolean is_punct = STREQ (str, "punct"); + boolean is_space = STREQ (str, "space"); + boolean is_upper = STREQ (str, "upper"); + boolean is_xdigit = STREQ (str, "xdigit"); + + if (!IS_CHAR_CLASS (str)) return REG_ECTYPE; + + /* Throw away the ] at the end of the character + class. */ + PATFETCH (c); + + if (p == pend) return REG_EBRACK; + + for (ch = 0; ch < 1 << BYTEWIDTH; ch++) + { + if ( (is_alnum && ISALNUM (ch)) + || (is_alpha && ISALPHA (ch)) + || (is_blank && ISBLANK (ch)) + || (is_cntrl && ISCNTRL (ch)) + || (is_digit && ISDIGIT (ch)) + || (is_graph && ISGRAPH (ch)) + || (is_lower && ISLOWER (ch)) + || (is_print && ISPRINT (ch)) + || (is_punct && ISPUNCT (ch)) + || (is_space && ISSPACE (ch)) + || (is_upper && ISUPPER (ch)) + || (is_xdigit && ISXDIGIT (ch))) + SET_LIST_BIT (ch); + } + had_char_class = true; + } + else + { + c1++; + while (c1--) + PATUNFETCH; + SET_LIST_BIT ('['); + SET_LIST_BIT (':'); + had_char_class = false; + } + } + else + { + had_char_class = false; + SET_LIST_BIT (c); + } + } + + /* Discard any (non)matching list bytes that are all 0 at the + end of the map. Decrease the map-length byte too. */ + while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) + b[-1]--; + b += b[-1]; + } + break; + + + case '(': + if (syntax & RE_NO_BK_PARENS) + goto handle_open; + else + goto normal_char; + + + case ')': + if (syntax & RE_NO_BK_PARENS) + goto handle_close; + else + goto normal_char; + + + case '\n': + if (syntax & RE_NEWLINE_ALT) + goto handle_alt; + else + goto normal_char; + + + case '|': + if (syntax & RE_NO_BK_VBAR) + goto handle_alt; + else + goto normal_char; + + + case '{': + if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) + goto handle_interval; + else + goto normal_char; + + + case '\\': + if (p == pend) return REG_EESCAPE; + + /* Do not translate the character after the \, so that we can + distinguish, e.g., \B from \b, even if we normally would + translate, e.g., B to b. */ + PATFETCH_RAW (c); + + switch (c) + { + case '(': + if (syntax & RE_NO_BK_PARENS) + goto normal_backslash; + + handle_open: + bufp->re_nsub++; + regnum++; + + if (COMPILE_STACK_FULL) + { + RETALLOC (compile_stack.stack, compile_stack.size << 1, + compile_stack_elt_t); + if (compile_stack.stack == NULL) return REG_ESPACE; + + compile_stack.size <<= 1; + } + + /* These are the values to restore when we hit end of this + group. They are all relative offsets, so that if the + whole pattern moves because of realloc, they will still + be valid. */ + COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; + COMPILE_STACK_TOP.fixup_alt_jump + = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; + COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; + COMPILE_STACK_TOP.regnum = regnum; + + /* We will eventually replace the 0 with the number of + groups inner to this one. But do not push a + start_memory for groups beyond the last one we can + represent in the compiled pattern. */ + if (regnum <= MAX_REGNUM) + { + COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; + BUF_PUSH_3 (start_memory, regnum, 0); + } + + compile_stack.avail++; + + fixup_alt_jump = 0; + laststart = 0; + begalt = b; + /* If we've reached MAX_REGNUM groups, then this open + won't actually generate any code, so we'll have to + clear pending_exact explicitly. */ + pending_exact = 0; + break; + + + case ')': + if (syntax & RE_NO_BK_PARENS) goto normal_backslash; + + if (COMPILE_STACK_EMPTY) + if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) + goto normal_backslash; + else + return REG_ERPAREN; + + handle_close: + if (fixup_alt_jump) + { /* Push a dummy failure point at the end of the + alternative for a possible future + `pop_failure_jump' to pop. See comments at + `push_dummy_failure' in `re_match_2'. */ + BUF_PUSH (push_dummy_failure); + + /* We allocated space for this jump when we assigned + to `fixup_alt_jump', in the `handle_alt' case below. */ + STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); + } + + /* See similar code for backslashed left paren above. */ + if (COMPILE_STACK_EMPTY) + if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) + goto normal_char; + else + return REG_ERPAREN; + + /* Since we just checked for an empty stack above, this + ``can't happen''. */ + assert (compile_stack.avail != 0); + { + /* We don't just want to restore into `regnum', because + later groups should continue to be numbered higher, + as in `(ab)c(de)' -- the second group is #2. */ + regnum_t this_group_regnum; + + compile_stack.avail--; + begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; + fixup_alt_jump + = COMPILE_STACK_TOP.fixup_alt_jump + ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 + : 0; + laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; + this_group_regnum = COMPILE_STACK_TOP.regnum; + /* If we've reached MAX_REGNUM groups, then this open + won't actually generate any code, so we'll have to + clear pending_exact explicitly. */ + pending_exact = 0; + + /* We're at the end of the group, so now we know how many + groups were inside this one. */ + if (this_group_regnum <= MAX_REGNUM) + { + unsigned char *inner_group_loc + = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; + + *inner_group_loc = regnum - this_group_regnum; + BUF_PUSH_3 (stop_memory, this_group_regnum, + regnum - this_group_regnum); + } + } + break; + + + case '|': /* `\|'. */ + if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) + goto normal_backslash; + handle_alt: + if (syntax & RE_LIMITED_OPS) + goto normal_char; + + /* Insert before the previous alternative a jump which + jumps to this alternative if the former fails. */ + GET_BUFFER_SPACE (3); + INSERT_JUMP (on_failure_jump, begalt, b + 6); + pending_exact = 0; + b += 3; + + /* The alternative before this one has a jump after it + which gets executed if it gets matched. Adjust that + jump so it will jump to this alternative's analogous + jump (put in below, which in turn will jump to the next + (if any) alternative's such jump, etc.). The last such + jump jumps to the correct final destination. A picture: + _____ _____ + | | | | + | v | v + a | b | c + + If we are at `b', then fixup_alt_jump right now points to a + three-byte space after `a'. We'll put in the jump, set + fixup_alt_jump to right after `b', and leave behind three + bytes which we'll fill in when we get to after `c'. */ + + if (fixup_alt_jump) + STORE_JUMP (jump_past_alt, fixup_alt_jump, b); + + /* Mark and leave space for a jump after this alternative, + to be filled in later either by next alternative or + when know we're at the end of a series of alternatives. */ + fixup_alt_jump = b; + GET_BUFFER_SPACE (3); + b += 3; + + laststart = 0; + begalt = b; + break; + + + case '{': + /* If \{ is a literal. */ + if (!(syntax & RE_INTERVALS) + /* If we're at `\{' and it's not the open-interval + operator. */ + || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) + || (p - 2 == pattern && p == pend)) + goto normal_backslash; + + handle_interval: + { + /* If got here, then the syntax allows intervals. */ + + /* At least (most) this many matches must be made. */ + int lower_bound = -1, upper_bound = -1; + + beg_interval = p - 1; + + if (p == pend) + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + return REG_EBRACE; + } + + GET_UNSIGNED_NUMBER (lower_bound); + + if (c == ',') + { + GET_UNSIGNED_NUMBER (upper_bound); + if (upper_bound < 0) upper_bound = RE_DUP_MAX; + } + else + /* Interval such as `{1}' => match exactly once. */ + upper_bound = lower_bound; + + if (lower_bound < 0 || upper_bound > RE_DUP_MAX + || lower_bound > upper_bound) + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + return REG_BADBR; + } + + if (!(syntax & RE_NO_BK_BRACES)) + { + if (c != '\\') return REG_EBRACE; + + PATFETCH (c); + } + + if (c != '}') + { + if (syntax & RE_NO_BK_BRACES) + goto unfetch_interval; + else + return REG_BADBR; + } + + /* We just parsed a valid interval. */ + + /* If it's invalid to have no preceding re. */ + if (!laststart) + { + if (syntax & RE_CONTEXT_INVALID_OPS) + return REG_BADRPT; + else if (syntax & RE_CONTEXT_INDEP_OPS) + laststart = b; + else + goto unfetch_interval; + } + + /* If the upper bound is zero, don't want to succeed at + all; jump from `laststart' to `b + 3', which will be + the end of the buffer after we insert the jump. */ + if (upper_bound == 0) + { + GET_BUFFER_SPACE (3); + INSERT_JUMP (jump, laststart, b + 3); + b += 3; + } + + /* Otherwise, we have a nontrivial interval. When + we're all done, the pattern will look like: + set_number_at + set_number_at + succeed_n + + jump_n + (The upper bound and `jump_n' are omitted if + `upper_bound' is 1, though.) */ + else + { /* If the upper bound is > 1, we need to insert + more at the end of the loop. */ + unsigned nbytes = 10 + (upper_bound > 1) * 10; + + GET_BUFFER_SPACE (nbytes); + + /* Initialize lower bound of the `succeed_n', even + though it will be set during matching by its + attendant `set_number_at' (inserted next), + because `re_compile_fastmap' needs to know. + Jump to the `jump_n' we might insert below. */ + INSERT_JUMP2 (succeed_n, laststart, + b + 5 + (upper_bound > 1) * 5, + lower_bound); + b += 5; + + /* Code to initialize the lower bound. Insert + before the `succeed_n'. The `5' is the last two + bytes of this `set_number_at', plus 3 bytes of + the following `succeed_n'. */ + insert_op2 (set_number_at, laststart, 5, lower_bound, b); + b += 5; + + if (upper_bound > 1) + { /* More than one repetition is allowed, so + append a backward jump to the `succeed_n' + that starts this interval. + + When we've reached this during matching, + we'll have matched the interval once, so + jump back only `upper_bound - 1' times. */ + STORE_JUMP2 (jump_n, b, laststart + 5, + upper_bound - 1); + b += 5; + + /* The location we want to set is the second + parameter of the `jump_n'; that is `b-2' as + an absolute address. `laststart' will be + the `set_number_at' we're about to insert; + `laststart+3' the number to set, the source + for the relative address. But we are + inserting into the middle of the pattern -- + so everything is getting moved up by 5. + Conclusion: (b - 2) - (laststart + 3) + 5, + i.e., b - laststart. + + We insert this at the beginning of the loop + so that if we fail during matching, we'll + reinitialize the bounds. */ + insert_op2 (set_number_at, laststart, b - laststart, + upper_bound - 1, b); + b += 5; + } + } + pending_exact = 0; + beg_interval = NULL; + } + break; + + unfetch_interval: + /* If an invalid interval, match the characters as literals. */ + assert (beg_interval); + p = beg_interval; + beg_interval = NULL; + + /* normal_char and normal_backslash need `c'. */ + PATFETCH (c); + + if (!(syntax & RE_NO_BK_BRACES)) + { + if (p > pattern && p[-1] == '\\') + goto normal_backslash; + } + goto normal_char; + +#ifdef emacs + /* There is no way to specify the before_dot and after_dot + operators. rms says this is ok. --karl */ + case '=': + BUF_PUSH (at_dot); + break; + + case 's': + laststart = b; + PATFETCH (c); + BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); + break; + + case 'S': + laststart = b; + PATFETCH (c); + BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); + break; +#endif /* emacs */ + + + case 'w': + laststart = b; + BUF_PUSH (wordchar); + break; + + + case 'W': + laststart = b; + BUF_PUSH (notwordchar); + break; + + + case '<': + BUF_PUSH (wordbeg); + break; + + case '>': + BUF_PUSH (wordend); + break; + + case 'b': + BUF_PUSH (wordbound); + break; + + case 'B': + BUF_PUSH (notwordbound); + break; + + case '`': + BUF_PUSH (begbuf); + break; + + case '\'': + BUF_PUSH (endbuf); + break; + + case '1': case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + if (syntax & RE_NO_BK_REFS) + goto normal_char; + + c1 = c - '0'; + + if (c1 > regnum) + return REG_ESUBREG; + + /* Can't back reference to a subexpression if inside of it. */ + if (group_in_compile_stack (compile_stack, c1)) + goto normal_char; + + laststart = b; + BUF_PUSH_2 (duplicate, c1); + break; + + + case '+': + case '?': + if (syntax & RE_BK_PLUS_QM) + goto handle_plus; + else + goto normal_backslash; + + default: + normal_backslash: + /* You might think it would be useful for \ to mean + not to translate; but if we don't translate it + it will never match anything. */ + c = TRANSLATE (c); + goto normal_char; + } + break; + + + default: + /* Expects the character in `c'. */ + normal_char: + /* If no exactn currently being built. */ + if (!pending_exact + + /* If last exactn not at current position. */ + || pending_exact + *pending_exact + 1 != b + + /* We have only one byte following the exactn for the count. */ + || *pending_exact == (1 << BYTEWIDTH) - 1 + + /* If followed by a repetition operator. */ + || *p == '*' || *p == '^' + || ((syntax & RE_BK_PLUS_QM) + ? *p == '\\' && (p[1] == '+' || p[1] == '?') + : (*p == '+' || *p == '?')) + || ((syntax & RE_INTERVALS) + && ((syntax & RE_NO_BK_BRACES) + ? *p == '{' + : (p[0] == '\\' && p[1] == '{')))) + { + /* Start building a new exactn. */ + + laststart = b; + + BUF_PUSH_2 (exactn, 0); + pending_exact = b - 1; + } + + BUF_PUSH (c); + (*pending_exact)++; + break; + } /* switch (c) */ + } /* while p != pend */ + + + /* Through the pattern now. */ + + if (fixup_alt_jump) + STORE_JUMP (jump_past_alt, fixup_alt_jump, b); + + if (!COMPILE_STACK_EMPTY) + return REG_EPAREN; + + free (compile_stack.stack); + + /* We have succeeded; set the length of the buffer. */ + bufp->used = b - bufp->buffer; + +#ifdef DEBUG + if (debug) + { + DEBUG_PRINT1 ("\nCompiled pattern: "); + print_compiled_pattern (bufp); + } +#endif /* DEBUG */ + + return REG_NOERROR; +} /* regex_compile */ + +/* Subroutines for `regex_compile'. */ + +/* Store OP at LOC followed by two-byte integer parameter ARG. */ + +static void +store_op1 (op, loc, arg) + re_opcode_t op; + unsigned char *loc; + int arg; +{ + *loc = (unsigned char) op; + STORE_NUMBER (loc + 1, arg); +} + + +/* Like `store_op1', but for two two-byte parameters ARG1 and ARG2. */ + +static void +store_op2 (op, loc, arg1, arg2) + re_opcode_t op; + unsigned char *loc; + int arg1, arg2; +{ + *loc = (unsigned char) op; + STORE_NUMBER (loc + 1, arg1); + STORE_NUMBER (loc + 3, arg2); +} + + +/* Copy the bytes from LOC to END to open up three bytes of space at LOC + for OP followed by two-byte integer parameter ARG. */ + +static void +insert_op1 (op, loc, arg, end) + re_opcode_t op; + unsigned char *loc; + int arg; + unsigned char *end; +{ + register unsigned char *pfrom = end; + register unsigned char *pto = end + 3; + + while (pfrom != loc) + *--pto = *--pfrom; + + store_op1 (op, loc, arg); +} + + +/* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2. */ + +static void +insert_op2 (op, loc, arg1, arg2, end) + re_opcode_t op; + unsigned char *loc; + int arg1, arg2; + unsigned char *end; +{ + register unsigned char *pfrom = end; + register unsigned char *pto = end + 5; + + while (pfrom != loc) + *--pto = *--pfrom; + + store_op2 (op, loc, arg1, arg2); +} + + +/* P points to just after a ^ in PATTERN. Return true if that ^ comes + after an alternative or a begin-subexpression. We assume there is at + least one character before the ^. */ + +static boolean +at_begline_loc_p (pattern, p, syntax) + const char *pattern, *p; + reg_syntax_t syntax; +{ + const char *prev = p - 2; + boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; + + return + /* After a subexpression? */ + (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) + /* After an alternative? */ + || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); +} + + +/* The dual of at_begline_loc_p. This one is for $. We assume there is + at least one character after the $, i.e., `P < PEND'. */ + +static boolean +at_endline_loc_p (p, pend, syntax) + const char *p, *pend; + int syntax; +{ + const char *next = p; + boolean next_backslash = *next == '\\'; + const char *next_next = p + 1 < pend ? p + 1 : NULL; + + return + /* Before a subexpression? */ + (syntax & RE_NO_BK_PARENS ? *next == ')' + : next_backslash && next_next && *next_next == ')') + /* Before an alternative? */ + || (syntax & RE_NO_BK_VBAR ? *next == '|' + : next_backslash && next_next && *next_next == '|'); +} + + +/* Returns true if REGNUM is in one of COMPILE_STACK's elements and + false if it's not. */ + +static boolean +group_in_compile_stack (compile_stack, regnum) + compile_stack_type compile_stack; + regnum_t regnum; +{ + int this_element; + + for (this_element = compile_stack.avail - 1; + this_element >= 0; + this_element--) + if (compile_stack.stack[this_element].regnum == regnum) + return true; + + return false; +} + + +/* Read the ending character of a range (in a bracket expression) from the + uncompiled pattern *P_PTR (which ends at PEND). We assume the + starting character is in `P[-2]'. (`P[-1]' is the character `-'.) + Then we set the translation of all bits between the starting and + ending characters (inclusive) in the compiled pattern B. + + Return an error code. + + We use these short variable names so we can use the same macros as + `regex_compile' itself. */ + +static reg_errcode_t +compile_range (p_ptr, pend, translate, syntax, b) + const char **p_ptr, *pend; + char *translate; + reg_syntax_t syntax; + unsigned char *b; +{ + unsigned this_char; + + const char *p = *p_ptr; + int range_start, range_end; + + if (p == pend) + return REG_ERANGE; + + /* Even though the pattern is a signed `char *', we need to fetch + with unsigned char *'s; if the high bit of the pattern character + is set, the range endpoints will be negative if we fetch using a + signed char *. + + We also want to fetch the endpoints without translating them; the + appropriate translation is done in the bit-setting loop below. */ + range_start = ((unsigned char *) p)[-2]; + range_end = ((unsigned char *) p)[0]; + + /* Have to increment the pointer into the pattern string, so the + caller isn't still at the ending character. */ + (*p_ptr)++; + + /* If the start is after the end, the range is empty. */ + if (range_start > range_end) + return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; + + /* Here we see why `this_char' has to be larger than an `unsigned + char' -- the range is inclusive, so if `range_end' == 0xff + (assuming 8-bit characters), we would otherwise go into an infinite + loop, since all characters <= 0xff. */ + for (this_char = range_start; this_char <= range_end; this_char++) + { + SET_LIST_BIT (TRANSLATE (this_char)); + } + + return REG_NOERROR; +} + +/* Failure stack declarations and macros; both re_compile_fastmap and + re_match_2 use a failure stack. These have to be macros because of + REGEX_ALLOCATE. */ + + +/* Number of failure points for which to initially allocate space + when matching. If this number is exceeded, we allocate more + space, so it is not a hard limit. */ +#ifndef INIT_FAILURE_ALLOC +#define INIT_FAILURE_ALLOC 5 +#endif + +/* Roughly the maximum number of failure points on the stack. Would be + exactly that if always used MAX_FAILURE_SPACE each time we failed. + This is a variable only so users of regex can assign to it; we never + change it ourselves. */ +int re_max_failures = 2000; + +typedef const unsigned char *fail_stack_elt_t; + +typedef struct +{ + fail_stack_elt_t *stack; + unsigned size; + unsigned avail; /* Offset of next open position. */ +} fail_stack_type; + +#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) +#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) +#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) +#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail]) + + +/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */ + +#define INIT_FAIL_STACK() \ + do { \ + fail_stack.stack = (fail_stack_elt_t *) \ + REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ + \ + if (fail_stack.stack == NULL) \ + return -2; \ + \ + fail_stack.size = INIT_FAILURE_ALLOC; \ + fail_stack.avail = 0; \ + } while (0) + + +/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. + + Return 1 if succeeds, and 0 if either ran out of memory + allocating space for it or it was already too large. + + REGEX_REALLOCATE requires `destination' be declared. */ + +#define DOUBLE_FAIL_STACK(fail_stack) \ + ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ + ? 0 \ + : ((fail_stack).stack = (fail_stack_elt_t *) \ + REGEX_REALLOCATE ((fail_stack).stack, \ + (fail_stack).size * sizeof (fail_stack_elt_t), \ + ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ + \ + (fail_stack).stack == NULL \ + ? 0 \ + : ((fail_stack).size <<= 1, \ + 1))) + + +/* Push PATTERN_OP on FAIL_STACK. + + Return 1 if was able to do so and 0 if ran out of memory allocating + space to do so. */ +#define PUSH_PATTERN_OP(pattern_op, fail_stack) \ + ((FAIL_STACK_FULL () \ + && !DOUBLE_FAIL_STACK (fail_stack)) \ + ? 0 \ + : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \ + 1)) + +/* This pushes an item onto the failure stack. Must be a four-byte + value. Assumes the variable `fail_stack'. Probably should only + be called from within `PUSH_FAILURE_POINT'. */ +#define PUSH_FAILURE_ITEM(item) \ + fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item + +/* The complement operation. Assumes `fail_stack' is nonempty. */ +#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail] + +/* Used to omit pushing failure point id's when we're not debugging. */ +#ifdef DEBUG +#define DEBUG_PUSH PUSH_FAILURE_ITEM +#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM () +#else +#define DEBUG_PUSH(item) +#define DEBUG_POP(item_addr) +#endif + + +/* Push the information about the state we will need + if we ever fail back to it. + + Requires variables fail_stack, regstart, regend, reg_info, and + num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be + declared. + + Does `return FAILURE_CODE' if runs out of memory. */ + +#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ + do { \ + char *destination; \ + /* Must be int, so when we don't save any registers, the arithmetic \ + of 0 + -1 isn't done as unsigned. */ \ + int this_reg; \ + \ + DEBUG_STATEMENT (failure_id++); \ + DEBUG_STATEMENT (nfailure_points_pushed++); \ + DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ + DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ + DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ + \ + DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ + DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ + \ + /* Ensure we have enough space allocated for what we will push. */ \ + while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ + { \ + if (!DOUBLE_FAIL_STACK (fail_stack)) \ + return failure_code; \ + \ + DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ + (fail_stack).size); \ + DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ + } \ + \ + /* Push the info, starting with the registers. */ \ + DEBUG_PRINT1 ("\n"); \ + \ + for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ + this_reg++) \ + { \ + DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ + DEBUG_STATEMENT (num_regs_pushed++); \ + \ + DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ + PUSH_FAILURE_ITEM (regstart[this_reg]); \ + \ + DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ + PUSH_FAILURE_ITEM (regend[this_reg]); \ + \ + DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \ + DEBUG_PRINT2 (" match_null=%d", \ + REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ + DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ + DEBUG_PRINT2 (" matched_something=%d", \ + MATCHED_SOMETHING (reg_info[this_reg])); \ + DEBUG_PRINT2 (" ever_matched=%d", \ + EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ + DEBUG_PRINT1 ("\n"); \ + PUSH_FAILURE_ITEM (reg_info[this_reg].word); \ + } \ + \ + DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\ + PUSH_FAILURE_ITEM (lowest_active_reg); \ + \ + DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ + PUSH_FAILURE_ITEM (highest_active_reg); \ + \ + DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \ + DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ + PUSH_FAILURE_ITEM (pattern_place); \ + \ + DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ + DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ + size2); \ + DEBUG_PRINT1 ("'\n"); \ + PUSH_FAILURE_ITEM (string_place); \ + \ + DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ + DEBUG_PUSH (failure_id); \ + } while (0) + +/* This is the number of items that are pushed and popped on the stack + for each register. */ +#define NUM_REG_ITEMS 3 + +/* Individual items aside from the registers. */ +#ifdef DEBUG +#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ +#else +#define NUM_NONREG_ITEMS 4 +#endif + +/* We push at most this many items on the stack. */ +#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS) + +/* We actually push this many items. */ +#define NUM_FAILURE_ITEMS \ + ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ + + NUM_NONREG_ITEMS) + +/* How many items can still be added to the stack without overflowing it. */ +#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) + + +/* Pops what PUSH_FAIL_STACK pushes. + + We restore into the parameters, all of which should be lvalues: + STR -- the saved data position. + PAT -- the saved pattern position. + LOW_REG, HIGH_REG -- the highest and lowest active registers. + REGSTART, REGEND -- arrays of string positions. + REG_INFO -- array of information about each subexpression. + + Also assumes the variables `fail_stack' and (if debugging), `bufp', + `pend', `string1', `size1', `string2', and `size2'. */ + +#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ +{ \ + DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ + int this_reg; \ + const unsigned char *string_temp; \ + \ + assert (!FAIL_STACK_EMPTY ()); \ + \ + /* Remove failure points and point to how many regs pushed. */ \ + DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ + DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ + DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ + \ + assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ + \ + DEBUG_POP (&failure_id); \ + DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ + \ + /* If the saved string location is NULL, it came from an \ + on_failure_keep_string_jump opcode, and we want to throw away the \ + saved NULL, thus retaining our current position in the string. */ \ + string_temp = POP_FAILURE_ITEM (); \ + if (string_temp != NULL) \ + str = (const char *) string_temp; \ + \ + DEBUG_PRINT2 (" Popping string 0x%x: `", str); \ + DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ + DEBUG_PRINT1 ("'\n"); \ + \ + pat = (unsigned char *) POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \ + DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ + \ + /* Restore register info. */ \ + high_reg = (unsigned) POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ + \ + low_reg = (unsigned) POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ + \ + for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ + { \ + DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ + \ + reg_info[this_reg].word = POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ + \ + regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ + \ + regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \ + DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ + } \ + \ + DEBUG_STATEMENT (nfailure_points_popped++); \ +} /* POP_FAILURE_POINT */ + +/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in + BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible + characters can start a string that matches the pattern. This fastmap + is used by re_search to skip quickly over impossible starting points. + + The caller must supply the address of a (1 << BYTEWIDTH)-byte data + area as BUFP->fastmap. + + We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in + the pattern buffer. + + Returns 0 if we succeed, -2 if an internal error. */ + +int +re_compile_fastmap (bufp) + struct re_pattern_buffer *bufp; +{ + int j, k; + fail_stack_type fail_stack; +#ifndef REGEX_MALLOC + char *destination; +#endif + /* We don't push any register information onto the failure stack. */ + unsigned num_regs = 0; + + register char *fastmap = bufp->fastmap; + unsigned char *pattern = bufp->buffer; + unsigned long size = bufp->used; + const unsigned char *p = pattern; + register unsigned char *pend = pattern + size; + + /* Assume that each path through the pattern can be null until + proven otherwise. We set this false at the bottom of switch + statement, to which we get only if a particular path doesn't + match the empty string. */ + boolean path_can_be_null = true; + + /* We aren't doing a `succeed_n' to begin with. */ + boolean succeed_n_p = false; + + assert (fastmap != NULL && p != NULL); + + INIT_FAIL_STACK (); + bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ + bufp->fastmap_accurate = 1; /* It will be when we're done. */ + bufp->can_be_null = 0; + + while (p != pend || !FAIL_STACK_EMPTY ()) + { + if (p == pend) + { + bufp->can_be_null |= path_can_be_null; + + /* Reset for next path. */ + path_can_be_null = true; + + p = fail_stack.stack[--fail_stack.avail]; + } + + /* We should never be about to go beyond the end of the pattern. */ + assert (p < pend); + +#ifdef SWITCH_ENUM_BUG + switch ((int) ((re_opcode_t) *p++)) +#else + switch ((re_opcode_t) *p++) +#endif + { + + /* I guess the idea here is to simply not bother with a fastmap + if a backreference is used, since it's too hard to figure out + the fastmap for the corresponding group. Setting + `can_be_null' stops `re_search_2' from using the fastmap, so + that is all we do. */ + case duplicate: + bufp->can_be_null = 1; + return 0; + + + /* Following are the cases which match a character. These end + with `break'. */ + + case exactn: + fastmap[p[1]] = 1; + break; + + + case charset: + for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) + if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) + fastmap[j] = 1; + break; + + + case charset_not: + /* Chars beyond end of map must be allowed. */ + for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) + fastmap[j] = 1; + + for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) + if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) + fastmap[j] = 1; + break; + + + case wordchar: + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) == Sword) + fastmap[j] = 1; + break; + + + case notwordchar: + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) != Sword) + fastmap[j] = 1; + break; + + + case anychar: + /* `.' matches anything ... */ + for (j = 0; j < (1 << BYTEWIDTH); j++) + fastmap[j] = 1; + + /* ... except perhaps newline. */ + if (!(bufp->syntax & RE_DOT_NEWLINE)) + fastmap['\n'] = 0; + + /* Return if we have already set `can_be_null'; if we have, + then the fastmap is irrelevant. Something's wrong here. */ + else if (bufp->can_be_null) + return 0; + + /* Otherwise, have to check alternative paths. */ + break; + + +#ifdef emacs + case syntaxspec: + k = *p++; + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) == (enum syntaxcode) k) + fastmap[j] = 1; + break; + + + case notsyntaxspec: + k = *p++; + for (j = 0; j < (1 << BYTEWIDTH); j++) + if (SYNTAX (j) != (enum syntaxcode) k) + fastmap[j] = 1; + break; + + + /* All cases after this match the empty string. These end with + `continue'. */ + + + case before_dot: + case at_dot: + case after_dot: + continue; +#endif /* not emacs */ + + + case no_op: + case begline: + case endline: + case begbuf: + case endbuf: + case wordbound: + case notwordbound: + case wordbeg: + case wordend: + case push_dummy_failure: + continue; + + + case jump_n: + case pop_failure_jump: + case maybe_pop_jump: + case jump: + case jump_past_alt: + case dummy_failure_jump: + EXTRACT_NUMBER_AND_INCR (j, p); + p += j; + if (j > 0) + continue; + + /* Jump backward implies we just went through the body of a + loop and matched nothing. Opcode jumped to should be + `on_failure_jump' or `succeed_n'. Just treat it like an + ordinary jump. For a * loop, it has pushed its failure + point already; if so, discard that as redundant. */ + if ((re_opcode_t) *p != on_failure_jump + && (re_opcode_t) *p != succeed_n) + continue; + + p++; + EXTRACT_NUMBER_AND_INCR (j, p); + p += j; + + /* If what's on the stack is where we are now, pop it. */ + if (!FAIL_STACK_EMPTY () + && fail_stack.stack[fail_stack.avail - 1] == p) + fail_stack.avail--; + + continue; + + + case on_failure_jump: + case on_failure_keep_string_jump: + handle_on_failure_jump: + EXTRACT_NUMBER_AND_INCR (j, p); + + /* For some patterns, e.g., `(a?)?', `p+j' here points to the + end of the pattern. We don't want to push such a point, + since when we restore it above, entering the switch will + increment `p' past the end of the pattern. We don't need + to push such a point since we obviously won't find any more + fastmap entries beyond `pend'. Such a pattern can match + the null string, though. */ + if (p + j < pend) + { + if (!PUSH_PATTERN_OP (p + j, fail_stack)) + return -2; + } + else + bufp->can_be_null = 1; + + if (succeed_n_p) + { + EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */ + succeed_n_p = false; + } + + continue; + + + case succeed_n: + /* Get to the number of times to succeed. */ + p += 2; + + /* Increment p past the n for when k != 0. */ + EXTRACT_NUMBER_AND_INCR (k, p); + if (k == 0) + { + p -= 4; + succeed_n_p = true; /* Spaghetti code alert. */ + goto handle_on_failure_jump; + } + continue; + + + case set_number_at: + p += 4; + continue; + + + case start_memory: + case stop_memory: + p += 2; + continue; + + + default: + abort (); /* We have listed all the cases. */ + } /* switch *p++ */ + + /* Getting here means we have found the possible starting + characters for one path of the pattern -- and that the empty + string does not match. We need not follow this path further. + Instead, look at the next alternative (remembered on the + stack), or quit if no more. The test at the top of the loop + does these things. */ + path_can_be_null = false; + p = pend; + } /* while p */ + + /* Set `can_be_null' for the last path (also the first path, if the + pattern is empty). */ + bufp->can_be_null |= path_can_be_null; + return 0; +} /* re_compile_fastmap */ + +/* Set REGS to hold NUM_REGS registers, storing them in STARTS and + ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use + this memory for recording register information. STARTS and ENDS + must be allocated using the malloc library routine, and must each + be at least NUM_REGS * sizeof (regoff_t) bytes long. + + If NUM_REGS == 0, then subsequent matches should allocate their own + register data. + + Unless this function is called, the first search or match using + PATTERN_BUFFER will allocate its own register data, without + freeing the old data. */ + +void +re_set_registers (bufp, regs, num_regs, starts, ends) + struct re_pattern_buffer *bufp; + struct re_registers *regs; + unsigned num_regs; + regoff_t *starts, *ends; +{ + if (num_regs) + { + bufp->regs_allocated = REGS_REALLOCATE; + regs->num_regs = num_regs; + regs->start = starts; + regs->end = ends; + } + else + { + bufp->regs_allocated = REGS_UNALLOCATED; + regs->num_regs = 0; + regs->start = regs->end = (regoff_t) 0; + } +} + +/* Searching routines. */ + +/* Like re_search_2, below, but only one string is specified, and + doesn't let you say where to stop matching. */ + +int +re_search (bufp, string, size, startpos, range, regs) + struct re_pattern_buffer *bufp; + const char *string; + int size, startpos, range; + struct re_registers *regs; +{ + return re_search_2 (bufp, NULL, 0, string, size, startpos, range, + regs, size); +} + + +/* Using the compiled pattern in BUFP->buffer, first tries to match the + virtual concatenation of STRING1 and STRING2, starting first at index + STARTPOS, then at STARTPOS + 1, and so on. + + STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. + + RANGE is how far to scan while trying to match. RANGE = 0 means try + only at STARTPOS; in general, the last start tried is STARTPOS + + RANGE. + + In REGS, return the indices of the virtual concatenation of STRING1 + and STRING2 that matched the entire BUFP->buffer and its contained + subexpressions. + + Do not consider matching one past the index STOP in the virtual + concatenation of STRING1 and STRING2. + + We return either the position in the strings at which the match was + found, -1 if no match, or -2 if error (such as failure + stack overflow). */ + +int +re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int size1, size2; + int startpos; + int range; + struct re_registers *regs; + int stop; +{ + int val; + register char *fastmap = bufp->fastmap; + register char *translate = bufp->translate; + int total_size = size1 + size2; + int endpos = startpos + range; + + /* Check for out-of-range STARTPOS. */ + if (startpos < 0 || startpos > total_size) + return -1; + + /* Fix up RANGE if it might eventually take us outside + the virtual concatenation of STRING1 and STRING2. */ + if (endpos < -1) + range = -1 - startpos; + else if (endpos > total_size) + range = total_size - startpos; + + /* If the search isn't to be a backwards one, don't waste time in a + search for a pattern that must be anchored. */ + if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) + { + if (startpos > 0) + return -1; + else + range = 1; + } + + /* Update the fastmap now if not correct already. */ + if (fastmap && !bufp->fastmap_accurate) + if (re_compile_fastmap (bufp) == -2) + return -2; + + /* Loop through the string, looking for a place to start matching. */ + for (;;) + { + /* If a fastmap is supplied, skip quickly over characters that + cannot be the start of a match. If the pattern can match the + null string, however, we don't need to skip characters; we want + the first null string. */ + if (fastmap && startpos < total_size && !bufp->can_be_null) + { + if (range > 0) /* Searching forwards. */ + { + register const char *d; + register int lim = 0; + int irange = range; + + if (startpos < size1 && startpos + range >= size1) + lim = range - (size1 - startpos); + + d = (startpos >= size1 ? string2 - size1 : string1) + startpos; + + /* Written out as an if-else to avoid testing `translate' + inside the loop. */ + if (translate) + while (range > lim + && !fastmap[(unsigned char) + translate[(unsigned char) *d++]]) + range--; + else + while (range > lim && !fastmap[(unsigned char) *d++]) + range--; + + startpos += irange - range; + } + else /* Searching backwards. */ + { + register char c = (size1 == 0 || startpos >= size1 + ? string2[startpos - size1] + : string1[startpos]); + + if (!fastmap[(unsigned char) TRANSLATE (c)]) + goto advance; + } + } + + /* If can't match the null string, and that's all we have left, fail. */ + if (range >= 0 && startpos == total_size && fastmap + && !bufp->can_be_null) + return -1; + + val = re_match_2 (bufp, string1, size1, string2, size2, + startpos, regs, stop); + if (val >= 0) + return startpos; + + if (val == -2) + return -2; + + advance: + if (!range) + break; + else if (range > 0) + { + range--; + startpos++; + } + else + { + range++; + startpos--; + } + } + return -1; +} /* re_search_2 */ + +/* Declarations and macros for re_match_2. */ + +static int bcmp_translate (); +static boolean alt_match_null_string_p (), + common_op_match_null_string_p (), + group_match_null_string_p (); + +/* Structure for per-register (a.k.a. per-group) information. + This must not be longer than one word, because we push this value + onto the failure stack. Other register information, such as the + starting and ending positions (which are addresses), and the list of + inner groups (which is a bits list) are maintained in separate + variables. + + We are making a (strictly speaking) nonportable assumption here: that + the compiler will pack our bit fields into something that fits into + the type of `word', i.e., is something that fits into one item on the + failure stack. */ +typedef union +{ + fail_stack_elt_t word; + struct + { + /* This field is one if this group can match the empty string, + zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ +#define MATCH_NULL_UNSET_VALUE 3 + unsigned match_null_string_p : 2; + unsigned is_active : 1; + unsigned matched_something : 1; + unsigned ever_matched_something : 1; + } bits; +} register_info_type; + +#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) +#define IS_ACTIVE(R) ((R).bits.is_active) +#define MATCHED_SOMETHING(R) ((R).bits.matched_something) +#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) + + +/* Call this when have matched a real character; it sets `matched' flags + for the subexpressions which we are currently inside. Also records + that those subexprs have matched. */ +#define SET_REGS_MATCHED() \ + do \ + { \ + unsigned r; \ + for (r = lowest_active_reg; r <= highest_active_reg; r++) \ + { \ + MATCHED_SOMETHING (reg_info[r]) \ + = EVER_MATCHED_SOMETHING (reg_info[r]) \ + = 1; \ + } \ + } \ + while (0) + + +/* This converts PTR, a pointer into one of the search strings `string1' + and `string2' into an offset from the beginning of that string. */ +#define POINTER_TO_OFFSET(ptr) \ + (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1) + +/* Registers are set to a sentinel when they haven't yet matched. */ +#define REG_UNSET_VALUE ((char *) -1) +#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) + + +/* Macros for dealing with the split strings in re_match_2. */ + +#define MATCHING_IN_FIRST_STRING (dend == end_match_1) + +/* Call before fetching a character with *d. This switches over to + string2 if necessary. */ +#define PREFETCH() \ + while (d == dend) \ + { \ + /* End of string2 => fail. */ \ + if (dend == end_match_2) \ + goto fail; \ + /* End of string1 => advance to string2. */ \ + d = string2; \ + dend = end_match_2; \ + } + + +/* Test if at very beginning or at very end of the virtual concatenation + of `string1' and `string2'. If only one string, it's `string2'. */ +#define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) +#define AT_STRINGS_END(d) ((d) == end2) + + +/* Test if D points to a character which is word-constituent. We have + two special cases to check for: if past the end of string1, look at + the first character in string2; and if before the beginning of + string2, look at the last character in string1. */ +#define WORDCHAR_P(d) \ + (SYNTAX ((d) == end1 ? *string2 \ + : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ + == Sword) + +/* Test if the character before D and the one at D differ with respect + to being word-constituent. */ +#define AT_WORD_BOUNDARY(d) \ + (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ + || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) + + +/* Free everything we malloc. */ +#ifdef REGEX_MALLOC +#define FREE_VAR(var) if (var) free (var); var = NULL +#define FREE_VARIABLES() \ + do { \ + FREE_VAR (fail_stack.stack); \ + FREE_VAR (regstart); \ + FREE_VAR (regend); \ + FREE_VAR (old_regstart); \ + FREE_VAR (old_regend); \ + FREE_VAR (best_regstart); \ + FREE_VAR (best_regend); \ + FREE_VAR (reg_info); \ + FREE_VAR (reg_dummy); \ + FREE_VAR (reg_info_dummy); \ + } while (0) +#else /* not REGEX_MALLOC */ +/* Some MIPS systems (at least) want this to free alloca'd storage. */ +#define FREE_VARIABLES() alloca (0) +#endif /* not REGEX_MALLOC */ + + +/* These values must meet several constraints. They must not be valid + register values; since we have a limit of 255 registers (because + we use only one byte in the pattern for the register number), we can + use numbers larger than 255. They must differ by 1, because of + NUM_FAILURE_ITEMS above. And the value for the lowest register must + be larger than the value for the highest register, so we do not try + to actually save any registers when none are active. */ +#define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) +#define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) + +/* Matching routines. */ + +#ifndef emacs /* Emacs never uses this. */ +/* re_match is like re_match_2 except it takes only a single string. */ + +int +re_match (bufp, string, size, pos, regs) + struct re_pattern_buffer *bufp; + const char *string; + int size, pos; + struct re_registers *regs; + { + return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size); +} +#endif /* not emacs */ + + +/* re_match_2 matches the compiled pattern in BUFP against the + the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 + and SIZE2, respectively). We start matching at POS, and stop + matching at STOP. + + If REGS is non-null and the `no_sub' field of BUFP is nonzero, we + store offsets for the substring each group matched in REGS. See the + documentation for exactly how many groups we fill. + + We return -1 if no match, -2 if an internal error (such as the + failure stack overflowing). Otherwise, we return the length of the + matched substring. */ + +int +re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int size1, size2; + int pos; + struct re_registers *regs; + int stop; +{ + /* General temporaries. */ + int mcnt; + unsigned char *p1; + + /* Just past the end of the corresponding string. */ + const char *end1, *end2; + + /* Pointers into string1 and string2, just past the last characters in + each to consider matching. */ + const char *end_match_1, *end_match_2; + + /* Where we are in the data, and the end of the current string. */ + const char *d, *dend; + + /* Where we are in the pattern, and the end of the pattern. */ + unsigned char *p = bufp->buffer; + register unsigned char *pend = p + bufp->used; + + /* We use this to map every character in the string. */ + char *translate = bufp->translate; + + /* Failure point stack. Each place that can handle a failure further + down the line pushes a failure point on this stack. It consists of + restart, regend, and reg_info for all registers corresponding to + the subexpressions we're currently inside, plus the number of such + registers, and, finally, two char *'s. The first char * is where + to resume scanning the pattern; the second one is where to resume + scanning the strings. If the latter is zero, the failure point is + a ``dummy''; if a failure happens and the failure point is a dummy, + it gets discarded and the next next one is tried. */ + fail_stack_type fail_stack; +#ifdef DEBUG + static unsigned failure_id = 0; + unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; +#endif + + /* We fill all the registers internally, independent of what we + return, for use in backreferences. The number here includes + an element for register zero. */ + unsigned num_regs = bufp->re_nsub + 1; + + /* The currently active registers. */ + unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; + unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; + + /* Information on the contents of registers. These are pointers into + the input strings; they record just what was matched (on this + attempt) by a subexpression part of the pattern, that is, the + regnum-th regstart pointer points to where in the pattern we began + matching and the regnum-th regend points to right after where we + stopped matching the regnum-th subexpression. (The zeroth register + keeps track of what the whole pattern matches.) */ + const char **regstart, **regend; + + /* If a group that's operated upon by a repetition operator fails to + match anything, then the register for its start will need to be + restored because it will have been set to wherever in the string we + are when we last see its open-group operator. Similarly for a + register's end. */ + const char **old_regstart, **old_regend; + + /* The is_active field of reg_info helps us keep track of which (possibly + nested) subexpressions we are currently in. The matched_something + field of reg_info[reg_num] helps us tell whether or not we have + matched any of the pattern so far this time through the reg_num-th + subexpression. These two fields get reset each time through any + loop their register is in. */ + register_info_type *reg_info; + + /* The following record the register info as found in the above + variables when we find a match better than any we've seen before. + This happens as we backtrack through the failure points, which in + turn happens only if we have not yet matched the entire string. */ + unsigned best_regs_set = false; + const char **best_regstart, **best_regend; + + /* Logically, this is `best_regend[0]'. But we don't want to have to + allocate space for that if we're not allocating space for anything + else (see below). Also, we never need info about register 0 for + any of the other register vectors, and it seems rather a kludge to + treat `best_regend' differently than the rest. So we keep track of + the end of the best match so far in a separate variable. We + initialize this to NULL so that when we backtrack the first time + and need to test it, it's not garbage. */ + const char *match_end = NULL; + + /* Used when we pop values we don't care about. */ + const char **reg_dummy; + register_info_type *reg_info_dummy; + +#ifdef DEBUG + /* Counts the total number of registers pushed. */ + unsigned num_regs_pushed = 0; +#endif + + DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); + + INIT_FAIL_STACK (); + + /* Do not bother to initialize all the register variables if there are + no groups in the pattern, as it takes a fair amount of time. If + there are groups, we include space for register 0 (the whole + pattern), even though we never use it, since it simplifies the + array indexing. We should fix this. */ + if (bufp->re_nsub) + { + regstart = REGEX_TALLOC (num_regs, const char *); + regend = REGEX_TALLOC (num_regs, const char *); + old_regstart = REGEX_TALLOC (num_regs, const char *); + old_regend = REGEX_TALLOC (num_regs, const char *); + best_regstart = REGEX_TALLOC (num_regs, const char *); + best_regend = REGEX_TALLOC (num_regs, const char *); + reg_info = REGEX_TALLOC (num_regs, register_info_type); + reg_dummy = REGEX_TALLOC (num_regs, const char *); + reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); + + if (!(regstart && regend && old_regstart && old_regend && reg_info + && best_regstart && best_regend && reg_dummy && reg_info_dummy)) + { + FREE_VARIABLES (); + return -2; + } + } +#ifdef REGEX_MALLOC + else + { + /* We must initialize all our variables to NULL, so that + `FREE_VARIABLES' doesn't try to free them. */ + regstart = regend = old_regstart = old_regend = best_regstart + = best_regend = reg_dummy = NULL; + reg_info = reg_info_dummy = (register_info_type *) NULL; + } +#endif /* REGEX_MALLOC */ + + /* The starting position is bogus. */ + if (pos < 0 || pos > size1 + size2) + { + FREE_VARIABLES (); + return -1; + } + + /* Initialize subexpression text positions to -1 to mark ones that no + start_memory/stop_memory has been seen for. Also initialize the + register information struct. */ + for (mcnt = 1; mcnt < num_regs; mcnt++) + { + regstart[mcnt] = regend[mcnt] + = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; + + REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; + IS_ACTIVE (reg_info[mcnt]) = 0; + MATCHED_SOMETHING (reg_info[mcnt]) = 0; + EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; + } + + /* We move `string1' into `string2' if the latter's empty -- but not if + `string1' is null. */ + if (size2 == 0 && string1 != NULL) + { + string2 = string1; + size2 = size1; + string1 = 0; + size1 = 0; + } + end1 = string1 + size1; + end2 = string2 + size2; + + /* Compute where to stop matching, within the two strings. */ + if (stop <= size1) + { + end_match_1 = string1 + stop; + end_match_2 = string2; + } + else + { + end_match_1 = end1; + end_match_2 = string2 + stop - size1; + } + + /* `p' scans through the pattern as `d' scans through the data. + `dend' is the end of the input string that `d' points within. `d' + is advanced into the following input string whenever necessary, but + this happens before fetching; therefore, at the beginning of the + loop, `d' can be pointing at the end of a string, but it cannot + equal `string2'. */ + if (size1 > 0 && pos <= size1) + { + d = string1 + pos; + dend = end_match_1; + } + else + { + d = string2 + pos - size1; + dend = end_match_2; + } + + DEBUG_PRINT1 ("The compiled pattern is: "); + DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); + DEBUG_PRINT1 ("The string to match is: `"); + DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); + DEBUG_PRINT1 ("'\n"); + + /* This loops over pattern commands. It exits by returning from the + function if the match is complete, or it drops through if the match + fails at this starting point in the input data. */ + for (;;) + { + DEBUG_PRINT2 ("\n0x%x: ", p); + + if (p == pend) + { /* End of pattern means we might have succeeded. */ + DEBUG_PRINT1 ("end of pattern ... "); + + /* If we haven't matched the entire string, and we want the + longest match, try backtracking. */ + if (d != end_match_2) + { + DEBUG_PRINT1 ("backtracking.\n"); + + if (!FAIL_STACK_EMPTY ()) + { /* More failure points to try. */ + boolean same_str_p = (FIRST_STRING_P (match_end) + == MATCHING_IN_FIRST_STRING); + + /* If exceeds best match so far, save it. */ + if (!best_regs_set + || (same_str_p && d > match_end) + || (!same_str_p && !MATCHING_IN_FIRST_STRING)) + { + best_regs_set = true; + match_end = d; + + DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); + + for (mcnt = 1; mcnt < num_regs; mcnt++) + { + best_regstart[mcnt] = regstart[mcnt]; + best_regend[mcnt] = regend[mcnt]; + } + } + goto fail; + } + + /* If no failure points, don't restore garbage. */ + else if (best_regs_set) + { + restore_best_regs: + /* Restore best match. It may happen that `dend == + end_match_1' while the restored d is in string2. + For example, the pattern `x.*y.*z' against the + strings `x-' and `y-z-', if the two strings are + not consecutive in memory. */ + DEBUG_PRINT1 ("Restoring best registers.\n"); + + d = match_end; + dend = ((d >= string1 && d <= end1) + ? end_match_1 : end_match_2); + + for (mcnt = 1; mcnt < num_regs; mcnt++) + { + regstart[mcnt] = best_regstart[mcnt]; + regend[mcnt] = best_regend[mcnt]; + } + } + } /* d != end_match_2 */ + + DEBUG_PRINT1 ("Accepting match.\n"); + + /* If caller wants register contents data back, do it. */ + if (regs && !bufp->no_sub) + { + /* Have the register data arrays been allocated? */ + if (bufp->regs_allocated == REGS_UNALLOCATED) + { /* No. So allocate them with malloc. We need one + extra element beyond `num_regs' for the `-1' marker + GNU code uses. */ + regs->num_regs = MAX (RE_NREGS, num_regs + 1); + regs->start = TALLOC (regs->num_regs, regoff_t); + regs->end = TALLOC (regs->num_regs, regoff_t); + if (regs->start == NULL || regs->end == NULL) + return -2; + bufp->regs_allocated = REGS_REALLOCATE; + } + else if (bufp->regs_allocated == REGS_REALLOCATE) + { /* Yes. If we need more elements than were already + allocated, reallocate them. If we need fewer, just + leave it alone. */ + if (regs->num_regs < num_regs + 1) + { + regs->num_regs = num_regs + 1; + RETALLOC (regs->start, regs->num_regs, regoff_t); + RETALLOC (regs->end, regs->num_regs, regoff_t); + if (regs->start == NULL || regs->end == NULL) + return -2; + } + } + else + assert (bufp->regs_allocated == REGS_FIXED); + + /* Convert the pointer data in `regstart' and `regend' to + indices. Register zero has to be set differently, + since we haven't kept track of any info for it. */ + if (regs->num_regs > 0) + { + regs->start[0] = pos; + regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1 + : d - string2 + size1); + } + + /* Go through the first `min (num_regs, regs->num_regs)' + registers, since that is all we initialized. */ + for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) + { + if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) + regs->start[mcnt] = regs->end[mcnt] = -1; + else + { + regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]); + regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]); + } + } + + /* If the regs structure we return has more elements than + were in the pattern, set the extra elements to -1. If + we (re)allocated the registers, this is the case, + because we always allocate enough to have at least one + -1 at the end. */ + for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) + regs->start[mcnt] = regs->end[mcnt] = -1; + } /* regs && !bufp->no_sub */ + + FREE_VARIABLES (); + DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", + nfailure_points_pushed, nfailure_points_popped, + nfailure_points_pushed - nfailure_points_popped); + DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); + + mcnt = d - pos - (MATCHING_IN_FIRST_STRING + ? string1 + : string2 - size1); + + DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); + + return mcnt; + } + + /* Otherwise match next pattern command. */ +#ifdef SWITCH_ENUM_BUG + switch ((int) ((re_opcode_t) *p++)) +#else + switch ((re_opcode_t) *p++) +#endif + { + /* Ignore these. Used to ignore the n of succeed_n's which + currently have n == 0. */ + case no_op: + DEBUG_PRINT1 ("EXECUTING no_op.\n"); + break; + + + /* Match the next n pattern characters exactly. The following + byte in the pattern defines n, and the n bytes after that + are the characters to match. */ + case exactn: + mcnt = *p++; + DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); + + /* This is written out as an if-else so we don't waste time + testing `translate' inside the loop. */ + if (translate) + { + do + { + PREFETCH (); + if (translate[(unsigned char) *d++] != (char) *p++) + goto fail; + } + while (--mcnt); + } + else + { + do + { + PREFETCH (); + if (*d++ != (char) *p++) goto fail; + } + while (--mcnt); + } + SET_REGS_MATCHED (); + break; + + + /* Match any character except possibly a newline or a null. */ + case anychar: + DEBUG_PRINT1 ("EXECUTING anychar.\n"); + + PREFETCH (); + + if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') + || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) + goto fail; + + SET_REGS_MATCHED (); + DEBUG_PRINT2 (" Matched `%d'.\n", *d); + d++; + break; + + + case charset: + case charset_not: + { + register unsigned char c; + boolean not = (re_opcode_t) *(p - 1) == charset_not; + + DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); + + PREFETCH (); + c = TRANSLATE (*d); /* The character to match. */ + + /* Cast to `unsigned' instead of `unsigned char' in case the + bit list is a full 32 bytes long. */ + if (c < (unsigned) (*p * BYTEWIDTH) + && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) + not = !not; + + p += 1 + *p; + + if (!not) goto fail; + + SET_REGS_MATCHED (); + d++; + break; + } + + + /* The beginning of a group is represented by start_memory. + The arguments are the register number in the next byte, and the + number of groups inner to this one in the next. The text + matched within the group is recorded (in the internal + registers data structure) under the register number. */ + case start_memory: + DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); + + /* Find out if this group can match the empty string. */ + p1 = p; /* To send to group_match_null_string_p. */ + + if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) + REG_MATCH_NULL_STRING_P (reg_info[*p]) + = group_match_null_string_p (&p1, pend, reg_info); + + /* Save the position in the string where we were the last time + we were at this open-group operator in case the group is + operated upon by a repetition operator, e.g., with `(a*)*b' + against `ab'; then we want to ignore where we are now in + the string in case this attempt to match fails. */ + old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) + ? REG_UNSET (regstart[*p]) ? d : regstart[*p] + : regstart[*p]; + DEBUG_PRINT2 (" old_regstart: %d\n", + POINTER_TO_OFFSET (old_regstart[*p])); + + regstart[*p] = d; + DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); + + IS_ACTIVE (reg_info[*p]) = 1; + MATCHED_SOMETHING (reg_info[*p]) = 0; + + /* This is the new highest active register. */ + highest_active_reg = *p; + + /* If nothing was active before, this is the new lowest active + register. */ + if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) + lowest_active_reg = *p; + + /* Move past the register number and inner group count. */ + p += 2; + break; + + + /* The stop_memory opcode represents the end of a group. Its + arguments are the same as start_memory's: the register + number, and the number of inner groups. */ + case stop_memory: + DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); + + /* We need to save the string position the last time we were at + this close-group operator in case the group is operated + upon by a repetition operator, e.g., with `((a*)*(b*)*)*' + against `aba'; then we want to ignore where we are now in + the string in case this attempt to match fails. */ + old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) + ? REG_UNSET (regend[*p]) ? d : regend[*p] + : regend[*p]; + DEBUG_PRINT2 (" old_regend: %d\n", + POINTER_TO_OFFSET (old_regend[*p])); + + regend[*p] = d; + DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); + + /* This register isn't active anymore. */ + IS_ACTIVE (reg_info[*p]) = 0; + + /* If this was the only register active, nothing is active + anymore. */ + if (lowest_active_reg == highest_active_reg) + { + lowest_active_reg = NO_LOWEST_ACTIVE_REG; + highest_active_reg = NO_HIGHEST_ACTIVE_REG; + } + else + { /* We must scan for the new highest active register, since + it isn't necessarily one less than now: consider + (a(b)c(d(e)f)g). When group 3 ends, after the f), the + new highest active register is 1. */ + unsigned char r = *p - 1; + while (r > 0 && !IS_ACTIVE (reg_info[r])) + r--; + + /* If we end up at register zero, that means that we saved + the registers as the result of an `on_failure_jump', not + a `start_memory', and we jumped to past the innermost + `stop_memory'. For example, in ((.)*) we save + registers 1 and 2 as a result of the *, but when we pop + back to the second ), we are at the stop_memory 1. + Thus, nothing is active. */ + if (r == 0) + { + lowest_active_reg = NO_LOWEST_ACTIVE_REG; + highest_active_reg = NO_HIGHEST_ACTIVE_REG; + } + else + highest_active_reg = r; + } + + /* If just failed to match something this time around with a + group that's operated on by a repetition operator, try to + force exit from the ``loop'', and restore the register + information for this group that we had before trying this + last match. */ + if ((!MATCHED_SOMETHING (reg_info[*p]) + || (re_opcode_t) p[-3] == start_memory) + && (p + 2) < pend) + { + boolean is_a_jump_n = false; + + p1 = p + 2; + mcnt = 0; + switch ((re_opcode_t) *p1++) + { + case jump_n: + is_a_jump_n = true; + case pop_failure_jump: + case maybe_pop_jump: + case jump: + case dummy_failure_jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + if (is_a_jump_n) + p1 += 2; + break; + + default: + /* do nothing */ ; + } + p1 += mcnt; + + /* If the next operation is a jump backwards in the pattern + to an on_failure_jump right before the start_memory + corresponding to this stop_memory, exit from the loop + by forcing a failure after pushing on the stack the + on_failure_jump's jump in the pattern, and d. */ + if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump + && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) + { + /* If this group ever matched anything, then restore + what its registers were before trying this last + failed match, e.g., with `(a*)*b' against `ab' for + regstart[1], and, e.g., with `((a*)*(b*)*)*' + against `aba' for regend[3]. + + Also restore the registers for inner groups for, + e.g., `((a*)(b*))*' against `aba' (register 3 would + otherwise get trashed). */ + + if (EVER_MATCHED_SOMETHING (reg_info[*p])) + { + unsigned r; + + EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; + + /* Restore this and inner groups' (if any) registers. */ + for (r = *p; r < *p + *(p + 1); r++) + { + regstart[r] = old_regstart[r]; + + /* xx why this test? */ + if ((int) old_regend[r] >= (int) regstart[r]) + regend[r] = old_regend[r]; + } + } + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + PUSH_FAILURE_POINT (p1 + mcnt, d, -2); + + goto fail; + } + } + + /* Move past the register number and the inner group count. */ + p += 2; + break; + + + /* \ has been turned into a `duplicate' command which is + followed by the numeric value of as the register number. */ + case duplicate: + { + register const char *d2, *dend2; + int regno = *p++; /* Get which register to match against. */ + DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); + + /* Can't back reference a group which we've never matched. */ + if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) + goto fail; + + /* Where in input to try to start matching. */ + d2 = regstart[regno]; + + /* Where to stop matching; if both the place to start and + the place to stop matching are in the same string, then + set to the place to stop, otherwise, for now have to use + the end of the first string. */ + + dend2 = ((FIRST_STRING_P (regstart[regno]) + == FIRST_STRING_P (regend[regno])) + ? regend[regno] : end_match_1); + for (;;) + { + /* If necessary, advance to next segment in register + contents. */ + while (d2 == dend2) + { + if (dend2 == end_match_2) break; + if (dend2 == regend[regno]) break; + + /* End of string1 => advance to string2. */ + d2 = string2; + dend2 = regend[regno]; + } + /* At end of register contents => success */ + if (d2 == dend2) break; + + /* If necessary, advance to next segment in data. */ + PREFETCH (); + + /* How many characters left in this segment to match. */ + mcnt = dend - d; + + /* Want how many consecutive characters we can match in + one shot, so, if necessary, adjust the count. */ + if (mcnt > dend2 - d2) + mcnt = dend2 - d2; + + /* Compare that many; failure if mismatch, else move + past them. */ + if (translate + ? bcmp_translate (d, d2, mcnt, translate) + : bcmp (d, d2, mcnt)) + goto fail; + d += mcnt, d2 += mcnt; + } + } + break; + + + /* begline matches the empty string at the beginning of the string + (unless `not_bol' is set in `bufp'), and, if + `newline_anchor' is set, after newlines. */ + case begline: + DEBUG_PRINT1 ("EXECUTING begline.\n"); + + if (AT_STRINGS_BEG (d)) + { + if (!bufp->not_bol) break; + } + else if (d[-1] == '\n' && bufp->newline_anchor) + { + break; + } + /* In all other cases, we fail. */ + goto fail; + + + /* endline is the dual of begline. */ + case endline: + DEBUG_PRINT1 ("EXECUTING endline.\n"); + + if (AT_STRINGS_END (d)) + { + if (!bufp->not_eol) break; + } + + /* We have to ``prefetch'' the next character. */ + else if ((d == end1 ? *string2 : *d) == '\n' + && bufp->newline_anchor) + { + break; + } + goto fail; + + + /* Match at the very beginning of the data. */ + case begbuf: + DEBUG_PRINT1 ("EXECUTING begbuf.\n"); + if (AT_STRINGS_BEG (d)) + break; + goto fail; + + + /* Match at the very end of the data. */ + case endbuf: + DEBUG_PRINT1 ("EXECUTING endbuf.\n"); + if (AT_STRINGS_END (d)) + break; + goto fail; + + + /* on_failure_keep_string_jump is used to optimize `.*\n'. It + pushes NULL as the value for the string on the stack. Then + `pop_failure_point' will keep the current value for the + string, instead of restoring it. To see why, consider + matching `foo\nbar' against `.*\n'. The .* matches the foo; + then the . fails against the \n. But the next thing we want + to do is match the \n against the \n; if we restored the + string value, we would be back at the foo. + + Because this is used only in specific cases, we don't need to + check all the things that `on_failure_jump' does, to make + sure the right things get saved on the stack. Hence we don't + share its code. The only reason to push anything on the + stack at all is that otherwise we would have to change + `anychar's code to do something besides goto fail in this + case; that seems worse than this. */ + case on_failure_keep_string_jump: + DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); + + EXTRACT_NUMBER_AND_INCR (mcnt, p); + DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); + + PUSH_FAILURE_POINT (p + mcnt, NULL, -2); + break; + + + /* Uses of on_failure_jump: + + Each alternative starts with an on_failure_jump that points + to the beginning of the next alternative. Each alternative + except the last ends with a jump that in effect jumps past + the rest of the alternatives. (They really jump to the + ending jump of the following alternative, because tensioning + these jumps is a hassle.) + + Repeats start with an on_failure_jump that points past both + the repetition text and either the following jump or + pop_failure_jump back to this on_failure_jump. */ + case on_failure_jump: + on_failure: + DEBUG_PRINT1 ("EXECUTING on_failure_jump"); + + EXTRACT_NUMBER_AND_INCR (mcnt, p); + DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); + + /* If this on_failure_jump comes right before a group (i.e., + the original * applied to a group), save the information + for that group and all inner ones, so that if we fail back + to this point, the group's information will be correct. + For example, in \(a*\)*\1, we need the preceding group, + and in \(\(a*\)b*\)\2, we need the inner group. */ + + /* We can't use `p' to check ahead because we push + a failure point to `p + mcnt' after we do this. */ + p1 = p; + + /* We need to skip no_op's before we look for the + start_memory in case this on_failure_jump is happening as + the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 + against aba. */ + while (p1 < pend && (re_opcode_t) *p1 == no_op) + p1++; + + if (p1 < pend && (re_opcode_t) *p1 == start_memory) + { + /* We have a new highest active register now. This will + get reset at the start_memory we are about to get to, + but we will have saved all the registers relevant to + this repetition op, as described above. */ + highest_active_reg = *(p1 + 1) + *(p1 + 2); + if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) + lowest_active_reg = *(p1 + 1); + } + + DEBUG_PRINT1 (":\n"); + PUSH_FAILURE_POINT (p + mcnt, d, -2); + break; + + + /* A smart repeat ends with `maybe_pop_jump'. + We change it to either `pop_failure_jump' or `jump'. */ + case maybe_pop_jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p); + DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); + { + register unsigned char *p2 = p; + + /* Compare the beginning of the repeat with what in the + pattern follows its end. If we can establish that there + is nothing that they would both match, i.e., that we + would have to backtrack because of (as in, e.g., `a*a') + then we can change to pop_failure_jump, because we'll + never have to backtrack. + + This is not true in the case of alternatives: in + `(a|ab)*' we do need to backtrack to the `ab' alternative + (e.g., if the string was `ab'). But instead of trying to + detect that here, the alternative has put on a dummy + failure point which is what we will end up popping. */ + + /* Skip over open/close-group commands. */ + while (p2 + 2 < pend + && ((re_opcode_t) *p2 == stop_memory + || (re_opcode_t) *p2 == start_memory)) + p2 += 3; /* Skip over args, too. */ + + /* If we're at the end of the pattern, we can change. */ + if (p2 == pend) + { + /* Consider what happens when matching ":\(.*\)" + against ":/". I don't really understand this code + yet. */ + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 + (" End of pattern: change to `pop_failure_jump'.\n"); + } + + else if ((re_opcode_t) *p2 == exactn + || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) + { + register unsigned char c + = *p2 == (unsigned char) endline ? '\n' : p2[2]; + p1 = p + mcnt; + + /* p1[0] ... p1[2] are the `on_failure_jump' corresponding + to the `maybe_finalize_jump' of this case. Examine what + follows. */ + if ((re_opcode_t) p1[3] == exactn && p1[5] != c) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", + c, p1[5]); + } + + else if ((re_opcode_t) p1[3] == charset + || (re_opcode_t) p1[3] == charset_not) + { + int not = (re_opcode_t) p1[3] == charset_not; + + if (c < (unsigned char) (p1[4] * BYTEWIDTH) + && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) + not = !not; + + /* `not' is equal to 1 if c would match, which means + that we can't change to pop_failure_jump. */ + if (!not) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + } + } + } + } + p -= 2; /* Point at relative address again. */ + if ((re_opcode_t) p[-1] != pop_failure_jump) + { + p[-1] = (unsigned char) jump; + DEBUG_PRINT1 (" Match => jump.\n"); + goto unconditional_jump; + } + /* Note fall through. */ + + + /* The end of a simple repeat has a pop_failure_jump back to + its matching on_failure_jump, where the latter will push a + failure point. The pop_failure_jump takes off failure + points put on by this pop_failure_jump's matching + on_failure_jump; we got through the pattern to here from the + matching on_failure_jump, so didn't fail. */ + case pop_failure_jump: + { + /* We need to pass separate storage for the lowest and + highest registers, even though we don't care about the + actual values. Otherwise, we will restore only one + register from the stack, since lowest will == highest in + `pop_failure_point'. */ + unsigned dummy_low_reg, dummy_high_reg; + unsigned char *pdummy; + const char *sdummy; + + DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); + POP_FAILURE_POINT (sdummy, pdummy, + dummy_low_reg, dummy_high_reg, + reg_dummy, reg_dummy, reg_info_dummy); + } + /* Note fall through. */ + + + /* Unconditionally jump (without popping any failure points). */ + case jump: + unconditional_jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ + DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); + p += mcnt; /* Do the jump. */ + DEBUG_PRINT2 ("(to 0x%x).\n", p); + break; + + + /* We need this opcode so we can detect where alternatives end + in `group_match_null_string_p' et al. */ + case jump_past_alt: + DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); + goto unconditional_jump; + + + /* Normally, the on_failure_jump pushes a failure point, which + then gets popped at pop_failure_jump. We will end up at + pop_failure_jump, also, and with a pattern of, say, `a+', we + are skipping over the on_failure_jump, so we have to push + something meaningless for pop_failure_jump to pop. */ + case dummy_failure_jump: + DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); + /* It doesn't matter what we push for the string here. What + the code at `fail' tests is the value for the pattern. */ + PUSH_FAILURE_POINT (0, 0, -2); + goto unconditional_jump; + + + /* At the end of an alternative, we need to push a dummy failure + point in case we are followed by a `pop_failure_jump', because + we don't want the failure point for the alternative to be + popped. For example, matching `(a|ab)*' against `aab' + requires that we match the `ab' alternative. */ + case push_dummy_failure: + DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); + /* See comments just above at `dummy_failure_jump' about the + two zeroes. */ + PUSH_FAILURE_POINT (0, 0, -2); + break; + + /* Have to succeed matching what follows at least n times. + After that, handle like `on_failure_jump'. */ + case succeed_n: + EXTRACT_NUMBER (mcnt, p + 2); + DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); + + assert (mcnt >= 0); + /* Originally, this is how many times we HAVE to succeed. */ + if (mcnt > 0) + { + mcnt--; + p += 2; + STORE_NUMBER_AND_INCR (p, mcnt); + DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt); + } + else if (mcnt == 0) + { + DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); + p[2] = (unsigned char) no_op; + p[3] = (unsigned char) no_op; + goto on_failure; + } + break; + + case jump_n: + EXTRACT_NUMBER (mcnt, p + 2); + DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); + + /* Originally, this is how many times we CAN jump. */ + if (mcnt) + { + mcnt--; + STORE_NUMBER (p + 2, mcnt); + goto unconditional_jump; + } + /* If don't have to jump any more, skip over the rest of command. */ + else + p += 4; + break; + + case set_number_at: + { + DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); + + EXTRACT_NUMBER_AND_INCR (mcnt, p); + p1 = p + mcnt; + EXTRACT_NUMBER_AND_INCR (mcnt, p); + DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); + STORE_NUMBER (p1, mcnt); + break; + } + + case wordbound: + DEBUG_PRINT1 ("EXECUTING wordbound.\n"); + if (AT_WORD_BOUNDARY (d)) + break; + goto fail; + + case notwordbound: + DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); + if (AT_WORD_BOUNDARY (d)) + goto fail; + break; + + case wordbeg: + DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); + if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1))) + break; + goto fail; + + case wordend: + DEBUG_PRINT1 ("EXECUTING wordend.\n"); + if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1) + && (!WORDCHAR_P (d) || AT_STRINGS_END (d))) + break; + goto fail; + +#ifdef emacs +#ifdef emacs19 + case before_dot: + DEBUG_PRINT1 ("EXECUTING before_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) >= point) + goto fail; + break; + + case at_dot: + DEBUG_PRINT1 ("EXECUTING at_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) != point) + goto fail; + break; + + case after_dot: + DEBUG_PRINT1 ("EXECUTING after_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) <= point) + goto fail; + break; +#else /* not emacs19 */ + case at_dot: + DEBUG_PRINT1 ("EXECUTING at_dot.\n"); + if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point) + goto fail; + break; +#endif /* not emacs19 */ + + case syntaxspec: + DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); + mcnt = *p++; + goto matchsyntax; + + case wordchar: + DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); + mcnt = (int) Sword; + matchsyntax: + PREFETCH (); + if (SYNTAX (*d++) != (enum syntaxcode) mcnt) + goto fail; + SET_REGS_MATCHED (); + break; + + case notsyntaxspec: + DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); + mcnt = *p++; + goto matchnotsyntax; + + case notwordchar: + DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); + mcnt = (int) Sword; + matchnotsyntax: + PREFETCH (); + if (SYNTAX (*d++) == (enum syntaxcode) mcnt) + goto fail; + SET_REGS_MATCHED (); + break; + +#else /* not emacs */ + case wordchar: + DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n"); + PREFETCH (); + if (!WORDCHAR_P (d)) + goto fail; + SET_REGS_MATCHED (); + d++; + break; + + case notwordchar: + DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); + PREFETCH (); + if (WORDCHAR_P (d)) + goto fail; + SET_REGS_MATCHED (); + d++; + break; +#endif /* not emacs */ + + default: + abort (); + } + continue; /* Successfully executed one pattern command; keep going. */ + + + /* We goto here if a matching operation fails. */ + fail: + if (!FAIL_STACK_EMPTY ()) + { /* A restart point is known. Restore to that state. */ + DEBUG_PRINT1 ("\nFAIL:\n"); + POP_FAILURE_POINT (d, p, + lowest_active_reg, highest_active_reg, + regstart, regend, reg_info); + + /* If this failure point is a dummy, try the next one. */ + if (!p) + goto fail; + + /* If we failed to the end of the pattern, don't examine *p. */ + assert (p <= pend); + if (p < pend) + { + boolean is_a_jump_n = false; + + /* If failed to a backwards jump that's part of a repetition + loop, need to pop this failure point and use the next one. */ + switch ((re_opcode_t) *p) + { + case jump_n: + is_a_jump_n = true; + case maybe_pop_jump: + case pop_failure_jump: + case jump: + p1 = p + 1; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + p1 += mcnt; + + if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) + || (!is_a_jump_n + && (re_opcode_t) *p1 == on_failure_jump)) + goto fail; + break; + default: + /* do nothing */ ; + } + } + + if (d >= string1 && d <= end1) + dend = end_match_1; + } + else + break; /* Matching at this starting point really fails. */ + } /* for (;;) */ + + if (best_regs_set) + goto restore_best_regs; + + FREE_VARIABLES (); + + return -1; /* Failure to match. */ +} /* re_match_2 */ + +/* Subroutine definitions for re_match_2. */ + + +/* We are passed P pointing to a register number after a start_memory. + + Return true if the pattern up to the corresponding stop_memory can + match the empty string, and false otherwise. + + If we find the matching stop_memory, sets P to point to one past its number. + Otherwise, sets P to an undefined byte less than or equal to END. + + We don't handle duplicates properly (yet). */ + +static boolean +group_match_null_string_p (p, end, reg_info) + unsigned char **p, *end; + register_info_type *reg_info; +{ + int mcnt; + /* Point to after the args to the start_memory. */ + unsigned char *p1 = *p + 2; + + while (p1 < end) + { + /* Skip over opcodes that can match nothing, and return true or + false, as appropriate, when we get to one that can't, or to the + matching stop_memory. */ + + switch ((re_opcode_t) *p1) + { + /* Could be either a loop or a series of alternatives. */ + case on_failure_jump: + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + + /* If the next operation is not a jump backwards in the + pattern. */ + + if (mcnt >= 0) + { + /* Go through the on_failure_jumps of the alternatives, + seeing if any of the alternatives cannot match nothing. + The last alternative starts with only a jump, + whereas the rest start with on_failure_jump and end + with a jump, e.g., here is the pattern for `a|b|c': + + /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 + /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 + /exactn/1/c + + So, we have to first go through the first (n-1) + alternatives and then deal with the last one separately. */ + + + /* Deal with the first (n-1) alternatives, which start + with an on_failure_jump (see above) that jumps to right + past a jump_past_alt. */ + + while ((re_opcode_t) p1[mcnt-3] == jump_past_alt) + { + /* `mcnt' holds how many bytes long the alternative + is, including the ending `jump_past_alt' and + its number. */ + + if (!alt_match_null_string_p (p1, p1 + mcnt - 3, + reg_info)) + return false; + + /* Move to right after this alternative, including the + jump_past_alt. */ + p1 += mcnt; + + /* Break if it's the beginning of an n-th alternative + that doesn't begin with an on_failure_jump. */ + if ((re_opcode_t) *p1 != on_failure_jump) + break; + + /* Still have to check that it's not an n-th + alternative that starts with an on_failure_jump. */ + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + if ((re_opcode_t) p1[mcnt-3] != jump_past_alt) + { + /* Get to the beginning of the n-th alternative. */ + p1 -= 3; + break; + } + } + + /* Deal with the last alternative: go back and get number + of the `jump_past_alt' just before it. `mcnt' contains + the length of the alternative. */ + EXTRACT_NUMBER (mcnt, p1 - 2); + + if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info)) + return false; + + p1 += mcnt; /* Get past the n-th alternative. */ + } /* if mcnt > 0 */ + break; + + + case stop_memory: + assert (p1[1] == **p); + *p = p1 + 2; + return true; + + + default: + if (!common_op_match_null_string_p (&p1, end, reg_info)) + return false; + } + } /* while p1 < end */ + + return false; +} /* group_match_null_string_p */ + + +/* Similar to group_match_null_string_p, but doesn't deal with alternatives: + It expects P to be the first byte of a single alternative and END one + byte past the last. The alternative can contain groups. */ + +static boolean +alt_match_null_string_p (p, end, reg_info) + unsigned char *p, *end; + register_info_type *reg_info; +{ + int mcnt; + unsigned char *p1 = p; + + while (p1 < end) + { + /* Skip over opcodes that can match nothing, and break when we get + to one that can't. */ + + switch ((re_opcode_t) *p1) + { + /* It's a loop. */ + case on_failure_jump: + p1++; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + p1 += mcnt; + break; + + default: + if (!common_op_match_null_string_p (&p1, end, reg_info)) + return false; + } + } /* while p1 < end */ + + return true; +} /* alt_match_null_string_p */ + + +/* Deals with the ops common to group_match_null_string_p and + alt_match_null_string_p. + + Sets P to one after the op and its arguments, if any. */ + +static boolean +common_op_match_null_string_p (p, end, reg_info) + unsigned char **p, *end; + register_info_type *reg_info; +{ + int mcnt; + boolean ret; + int reg_no; + unsigned char *p1 = *p; + + switch ((re_opcode_t) *p1++) + { + case no_op: + case begline: + case endline: + case begbuf: + case endbuf: + case wordbeg: + case wordend: + case wordbound: + case notwordbound: +#ifdef emacs + case before_dot: + case at_dot: + case after_dot: +#endif + break; + + case start_memory: + reg_no = *p1; + assert (reg_no > 0 && reg_no <= MAX_REGNUM); + ret = group_match_null_string_p (&p1, end, reg_info); + + /* Have to set this here in case we're checking a group which + contains a group and a back reference to it. */ + + if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE) + REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret; + + if (!ret) + return false; + break; + + /* If this is an optimized succeed_n for zero times, make the jump. */ + case jump: + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + if (mcnt >= 0) + p1 += mcnt; + else + return false; + break; + + case succeed_n: + /* Get to the number of times to succeed. */ + p1 += 2; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + + if (mcnt == 0) + { + p1 -= 4; + EXTRACT_NUMBER_AND_INCR (mcnt, p1); + p1 += mcnt; + } + else + return false; + break; + + case duplicate: + if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) + return false; + break; + + case set_number_at: + p1 += 4; + + default: + /* All other opcodes mean we cannot match the empty string. */ + return false; + } + + *p = p1; + return true; +} /* common_op_match_null_string_p */ + + +/* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN + bytes; nonzero otherwise. */ + +static int +bcmp_translate (s1, s2, len, translate) + unsigned char *s1, *s2; + register int len; + char *translate; +{ + register unsigned char *p1 = s1, *p2 = s2; + while (len) + { + if (translate[*p1++] != translate[*p2++]) return 1; + len--; + } + return 0; +} + +/* Entry points for GNU code. */ + +/* re_compile_pattern is the GNU regular expression compiler: it + compiles PATTERN (of length SIZE) and puts the result in BUFP. + Returns 0 if the pattern was valid, otherwise an error string. + + Assumes the `allocated' (and perhaps `buffer') and `translate' fields + are set in BUFP on entry. + + We call regex_compile to do the actual compilation. */ + +const char * +re_compile_pattern (pattern, length, bufp) + const char *pattern; + int length; + struct re_pattern_buffer *bufp; +{ + reg_errcode_t ret; + + /* GNU code is written to assume at least RE_NREGS registers will be set + (and at least one extra will be -1). */ + bufp->regs_allocated = REGS_UNALLOCATED; + + /* And GNU code determines whether or not to get register information + by passing null for the REGS argument to re_match, etc., not by + setting no_sub. */ + bufp->no_sub = 0; + + /* Match anchors at newline. */ + bufp->newline_anchor = 1; + + ret = regex_compile (pattern, length, re_syntax_options, bufp); + + return re_error_msg[(int) ret]; +} + +/* Entry points compatible with 4.2 BSD regex library. We don't define + them if this is an Emacs or POSIX compilation. */ + +#if !defined (emacs) && !defined (_POSIX_SOURCE) + +/* BSD has one and only one pattern buffer. */ +static struct re_pattern_buffer re_comp_buf; + +char * +re_comp (s) + const char *s; +{ + reg_errcode_t ret; + + if (!s) + { + if (!re_comp_buf.buffer) + return "No previous regular expression"; + return 0; + } + + if (!re_comp_buf.buffer) + { + re_comp_buf.buffer = (unsigned char *) malloc (200); + if (re_comp_buf.buffer == NULL) + return "Memory exhausted"; + re_comp_buf.allocated = 200; + + re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); + if (re_comp_buf.fastmap == NULL) + return "Memory exhausted"; + } + + /* Since `re_exec' always passes NULL for the `regs' argument, we + don't need to initialize the pattern buffer fields which affect it. */ + + /* Match anchors at newlines. */ + re_comp_buf.newline_anchor = 1; + + ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); + + /* Yes, we're discarding `const' here. */ + return (char *) re_error_msg[(int) ret]; +} + + +int +re_exec (s) + const char *s; +{ + const int len = strlen (s); + return + 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); +} +#endif /* not emacs and not _POSIX_SOURCE */ + +/* POSIX.2 functions. Don't define these for Emacs. */ + +#ifndef emacs + +/* regcomp takes a regular expression as a string and compiles it. + + PREG is a regex_t *. We do not expect any fields to be initialized, + since POSIX says we shouldn't. Thus, we set + + `buffer' to the compiled pattern; + `used' to the length of the compiled pattern; + `syntax' to RE_SYNTAX_POSIX_EXTENDED if the + REG_EXTENDED bit in CFLAGS is set; otherwise, to + RE_SYNTAX_POSIX_BASIC; + `newline_anchor' to REG_NEWLINE being set in CFLAGS; + `fastmap' and `fastmap_accurate' to zero; + `re_nsub' to the number of subexpressions in PATTERN. + + PATTERN is the address of the pattern string. + + CFLAGS is a series of bits which affect compilation. + + If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we + use POSIX basic syntax. + + If REG_NEWLINE is set, then . and [^...] don't match newline. + Also, regexec will try a match beginning after every newline. + + If REG_ICASE is set, then we considers upper- and lowercase + versions of letters to be equivalent when matching. + + If REG_NOSUB is set, then when PREG is passed to regexec, that + routine will report only success or failure, and nothing about the + registers. + + It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for + the return codes and their meanings.) */ + +int +regcomp (preg, pattern, cflags) + regex_t *preg; + const char *pattern; + int cflags; +{ + reg_errcode_t ret; + unsigned syntax + = (cflags & REG_EXTENDED) ? + RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; + + /* regex_compile will allocate the space for the compiled pattern. */ + preg->buffer = 0; + preg->allocated = 0; + + /* Don't bother to use a fastmap when searching. This simplifies the + REG_NEWLINE case: if we used a fastmap, we'd have to put all the + characters after newlines into the fastmap. This way, we just try + every character. */ + preg->fastmap = 0; + + if (cflags & REG_ICASE) + { + unsigned i; + + preg->translate = (char *) malloc (CHAR_SET_SIZE); + if (preg->translate == NULL) + return (int) REG_ESPACE; + + /* Map uppercase characters to corresponding lowercase ones. */ + for (i = 0; i < CHAR_SET_SIZE; i++) + preg->translate[i] = ISUPPER (i) ? tolower (i) : i; + } + else + preg->translate = NULL; + + /* If REG_NEWLINE is set, newlines are treated differently. */ + if (cflags & REG_NEWLINE) + { /* REG_NEWLINE implies neither . nor [^...] match newline. */ + syntax &= ~RE_DOT_NEWLINE; + syntax |= RE_HAT_LISTS_NOT_NEWLINE; + /* It also changes the matching behavior. */ + preg->newline_anchor = 1; + } + else + preg->newline_anchor = 0; + + preg->no_sub = !!(cflags & REG_NOSUB); + + /* POSIX says a null character in the pattern terminates it, so we + can use strlen here in compiling the pattern. */ + ret = regex_compile (pattern, strlen (pattern), syntax, preg); + + /* POSIX doesn't distinguish between an unmatched open-group and an + unmatched close-group: both are REG_EPAREN. */ + if (ret == REG_ERPAREN) ret = REG_EPAREN; + + return (int) ret; +} + + +/* regexec searches for a given pattern, specified by PREG, in the + string STRING. + + If NMATCH is zero or REG_NOSUB was set in the cflags argument to + `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + least NMATCH elements, and we set them to the offsets of the + corresponding matched substrings. + + EFLAGS specifies `execution flags' which affect matching: if + REG_NOTBOL is set, then ^ does not match at the beginning of the + string; if REG_NOTEOL is set, then $ does not match at the end. + + We return 0 if we find a match and REG_NOMATCH if not. */ + +int +regexec (preg, string, nmatch, pmatch, eflags) + const regex_t *preg; + const char *string; + size_t nmatch; + regmatch_t pmatch[]; + int eflags; +{ + int ret; + struct re_registers regs; + regex_t private_preg; + int len = strlen (string); + boolean want_reg_info = !preg->no_sub && nmatch > 0; + + private_preg = *preg; + + private_preg.not_bol = !!(eflags & REG_NOTBOL); + private_preg.not_eol = !!(eflags & REG_NOTEOL); + + /* The user has told us exactly how many registers to return + information about, via `nmatch'. We have to pass that on to the + matching routines. */ + private_preg.regs_allocated = REGS_FIXED; + + if (want_reg_info) + { + regs.num_regs = nmatch; + regs.start = TALLOC (nmatch, regoff_t); + regs.end = TALLOC (nmatch, regoff_t); + if (regs.start == NULL || regs.end == NULL) + return (int) REG_NOMATCH; + } + + /* Perform the searching operation. */ + ret = re_search (&private_preg, string, len, + /* start: */ 0, /* range: */ len, + want_reg_info ? ®s : (struct re_registers *) 0); + + /* Copy the register information to the POSIX structure. */ + if (want_reg_info) + { + if (ret >= 0) + { + unsigned r; + + for (r = 0; r < nmatch; r++) + { + pmatch[r].rm_so = regs.start[r]; + pmatch[r].rm_eo = regs.end[r]; + } + } + + /* If we needed the temporary register info, free the space now. */ + free (regs.start); + free (regs.end); + } + + /* We want zero return to mean success, unlike `re_search'. */ + return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; +} + + +/* Returns a message corresponding to an error code, ERRCODE, returned + from either regcomp or regexec. We don't use PREG here. */ + +size_t +regerror (errcode, preg, errbuf, errbuf_size) + int errcode; + const regex_t *preg; + char *errbuf; + size_t errbuf_size; +{ + const char *msg; + size_t msg_size; + + if (errcode < 0 + || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0]))) + /* Only error codes returned by the rest of the code should be passed + to this routine. If we are given anything else, or if other regex + code generates an invalid error code, then the program has a bug. + Dump core so we can fix it. */ + abort (); + + msg = re_error_msg[errcode]; + + /* POSIX doesn't require that we do anything in this case, but why + not be nice. */ + if (! msg) + msg = "Success"; + + msg_size = strlen (msg) + 1; /* Includes the null. */ + + if (errbuf_size != 0) + { + if (msg_size > errbuf_size) + { + strncpy (errbuf, msg, errbuf_size - 1); + errbuf[errbuf_size - 1] = 0; + } + else + strcpy (errbuf, msg); + } + + return msg_size; +} + + +/* Free dynamically allocated space used by PREG. */ + +void +regfree (preg) + regex_t *preg; +{ + if (preg->buffer != NULL) + free (preg->buffer); + preg->buffer = NULL; + + preg->allocated = 0; + preg->used = 0; + + if (preg->fastmap != NULL) + free (preg->fastmap); + preg->fastmap = NULL; + preg->fastmap_accurate = 0; + + if (preg->translate != NULL) + free (preg->translate); + preg->translate = NULL; +} + +#endif /* not emacs */ + +/* +Local variables: +make-backup-files: t +version-control: t +trim-versions-without-asking: nil +End: +*/ diff --git a/lib/Makefile.in b/lib/Makefile.in new file mode 100644 index 00000000000..2a9a7ce3437 --- /dev/null +++ b/lib/Makefile.in @@ -0,0 +1,46 @@ +# +# Makefile for simple liburl +# +# Darren Hardy, hardy@cs.colorado.edu, April 1994 +# +# $Id: Makefile.in,v 1.1 1996/02/22 06:23:57 wessels Exp $ +# +prefix = @prefix@ +INSTALL_BINDIR = $(prefix)/bin +INSTALL_LIBDIR = $(prefix)/lib +INSTALL_MANDIR = $(prefix)/man + +CC = @CC@ +INSTALL = @INSTALL@ +INSTALL_BIN = @INSTALL_PROGRAM@ +INSTALL_FILE = @INSTALL_DATA@ +RANLIB = @RANLIB@ +XTRA_LIBS = @XTRA_LIBS@ +XTRA_CFLAGS = @XTRA_CFLAGS@ + + +DEBUG = $(DEBUG_TOP) #-O #-g #-DDEBUG +DEBUG_LIBS = + +CFLAGS = $(DEBUG) $(CACHE_FLAGS) -I../include $(XTRA_CFLAGS) + +LIBDIR = . +UTILOBJS = rfc850.o rfc1738.o util.o host_cache.o getfullhostname.o \ + debug.o log.o +LIBS = libutil.a + + +all: $(LIBS) + +libutil.a: $(UTILOBJS) + ar r $@ $(UTILOBJS) + $(RANLIB) $@ + +clean: + -rm -f $(UTILOBJS) $(LIBS) core + +realclean: clean + -rm -f libregex.a + -rm -f Makefile + +install: all diff --git a/lib/getfullhostname.c b/lib/getfullhostname.c new file mode 100644 index 00000000000..30e76a8baad --- /dev/null +++ b/lib/getfullhostname.c @@ -0,0 +1,112 @@ +static char rcsid[] = "$Id: getfullhostname.c,v 1.1 1996/02/22 06:23:57 wessels Exp $"; +/* + * getfullhostname.c - Retrieves full DNS name of the current host + * + * DEBUG: section 84, level 1 Common utilities hostname processing + * + * Darren Hardy, hardy@cs.colorado.edu, April 1994 + * + * ---------------------------------------------------------------------- + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "util.h" + + +/* + * getfullhostname() - Returns the fully qualified name of the current + * host, or NULL on error. Pointer is only valid until the next call + * to the gethost*() functions. + */ +char *getfullhostname() +{ + struct hostent *hp = NULL; + static char buf[HARVESTHOSTNAMELEN + 1]; + extern int gethostname(); /* UNIX system call */ + + if (gethostname(buf, HARVESTHOSTNAMELEN) < 0) + return (NULL); + if ((hp = gethostbyname(buf)) == NULL) + return (buf); + return (hp->h_name); +} diff --git a/lib/rfc1738.c b/lib/rfc1738.c new file mode 100644 index 00000000000..ba7c12c34d0 --- /dev/null +++ b/lib/rfc1738.c @@ -0,0 +1,182 @@ +static char rcsid[] = "$Id: rfc1738.c,v 1.1 1996/02/22 06:23:57 wessels Exp $"; +/* + * rfc1738.c - code to comply with RFC 1738 + * + * Darren Hardy, hardy@cs.colorado.edu, March 1995 + * + * ---------------------------------------------------------------------- + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + */ +#include +#include +#include "util.h" +#define BIG_BUFSIZ (BUFSIZ * 4) + +/* + * RFC 1738 defines that these characters should be escaped, as well + * any non-US-ASCII character or anything between 0x00 - 0x1F. + */ +char rfc1738_unsafe_chars[] = +{ + (char) 0x3C, /* < */ + (char) 0x3E, /* > */ + (char) 0x22, /* " */ + (char) 0x23, /* # */ + (char) 0x25, /* % */ + (char) 0x7B, /* { */ + (char) 0x7D, /* } */ + (char) 0x7C, /* | */ + (char) 0x5C, /* \ */ + (char) 0x5E, /* ^ */ + (char) 0x7E, /* ~ */ + (char) 0x5B, /* [ */ + (char) 0x5D, /* ] */ + (char) 0x60, /* ` */ + (char) 0x27, /* ' */ + (char) 0x20 /* space */ +}; + +/* + * rfc1738_escape - Returns a static buffer contains the RFC 1738 + * compliant, escaped version of the given url. + */ +char *rfc1738_escape(url) + char *url; +{ + static char buf[BIG_BUFSIZ]; + char *p, *q; + int i, do_escape; + + for (p = url, q = &buf[0]; *p != '\0'; p++, q++) { + do_escape = 0; + + /* RFC 1738 defines these chars as unsafe */ + for (i = 0; i < sizeof(rfc1738_unsafe_chars); i++) { + if (*p == rfc1738_unsafe_chars[i]) { + do_escape = 1; + break; + } + } + /* RFC 1738 says any control chars (0x00-0x1F) are encoded */ + if ((*p >= (char) 0x00) && (*p <= (char) 0x1F)) { + do_escape = 1; + } + /* RFC 1738 says 0x7f is encoded */ + if (*p == (char) 0x7F) { + do_escape = 1; + } + /* RFC 1738 says any non-US-ASCII are encoded */ + if ((*p >= (char) 0x80) && (*p <= (char) 0xFF)) { + do_escape = 1; + } + /* Do the triplet encoding, or just copy the char */ + if (do_escape == 1) { + (void) sprintf(q, "%%%02x", (unsigned char) *p); + q += sizeof(char) * 2; + } else { + *q = *p; + } + } + *q = '\0'; + return (buf); +} + +/* + * rfc1738_unescape() - Converts escaped characters (%xy numbers) in + * given the string. %% is a %. %ab is the 8-bit hexadecimal number "ab" + */ +void rfc1738_unescape(s) + char *s; +{ + char hexnum[3]; + int i, j; /* i is write, j is read */ + unsigned int x; + + for (i = j = 0; s[j]; i++, j++) { + s[i] = s[j]; + if (s[i] == '%') { + hexnum[0] = s[++j]; + if (hexnum[0] != '%') { + hexnum[1] = s[++j]; + hexnum[2] = '\0'; + sscanf(hexnum, "%x", &x); + s[i] = (char) (0x0ff & x); + } else { + s[i] = '%'; + } + } + } + s[i] = '\0'; +} diff --git a/lib/util.c b/lib/util.c new file mode 100644 index 00000000000..56e320189fb --- /dev/null +++ b/lib/util.c @@ -0,0 +1,235 @@ +static char rcsid[] = "$Id: util.c,v 1.1 1996/02/22 06:23:58 wessels Exp $"; +/* + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include +#include +#include +#include +#include +#include + +void (*failure_notify) () = NULL; +static char msg[128]; + +#include "autoconf.h" + +/* + * xmalloc() - same as malloc(3). Used for portability. + * Never returns NULL; fatal on error. + */ +void *xmalloc(sz) + size_t sz; +{ + static void *p; + + if (sz < 1) + sz = 1; + if ((p = malloc(sz)) == NULL) { + if (failure_notify) { + sprintf(msg, "xmalloc: Unable to allocate %d bytes!\n", + (int) sz); + (*failure_notify) (msg); + } else { + perror("malloc"); + } + exit(1); + } + return (p); +} + +/* + * xfree() - same as free(3). Will not call free(3) if s == NULL. + */ +void xfree(s) + void *s; +{ + if (s != NULL) { + free(s); + } +} + +/* + * xrealloc() - same as realloc(3). Used for portability. + * Never returns NULL; fatal on error. + */ +void *xrealloc(s, sz) + void *s; + size_t sz; +{ + static void *p; + + if (sz < 1) + sz = 1; + if ((p = realloc(s, sz)) == NULL) { + if (failure_notify) { + sprintf(msg, "xrealloc: Unable to reallocate %d bytes!\n", + (int) sz); + (*failure_notify) (msg); + } else { + perror("realloc"); + } + exit(1); + } + return (p); +} + +/* + * xcalloc() - same as calloc(3). Used for portability. + * Never returns NULL; fatal on error. + */ +void *xcalloc(n, sz) + int n; + size_t sz; +{ + static void *p; + + if (n < 1) + n = 1; + if (sz < 1) + sz = 1; + if ((p = calloc(n, sz)) == NULL) { + if (failure_notify) { + sprintf(msg, "xcalloc: Unable to allocate %d blocks of %d bytes!\n", + (int) n, (int) sz); + (*failure_notify) (msg); + } else { + perror("xcalloc"); + } + exit(1); + } + return (p); +} + +/* + * xstrdup() - same as strdup(3). Used for portability. + * Never returns NULL; fatal on error. + */ +char *xstrdup(s) + char *s; +{ + static char *p = NULL; + int sz; + + if (s == NULL) { + if (failure_notify) { + (*failure_notify) ("xstrdup: tried to dup a NULL pointer!\n"); + } else { + fprintf(stderr, "xstrdup: tried to dup a NULL pointer!\n"); + } + exit(1); + } + sz = strlen(s); + p = (char *) xmalloc((size_t) sz + 1); + memcpy(p, s, sz); /* copy string */ + p[sz] = '\0'; /* terminate string */ + return (p); +} + +#ifndef HAVE_STRDUP +/* define for systems that don't have strdup */ +char *strdup(s) + char *s; +{ + return (xstrdup(s)); +} +#endif + +/* + * xstrerror() - return sys_errlist[errno]; + */ +char *xstrerror() +{ + extern int sys_nerr; +#if !defined(__FreeBSD__) && !defined(__NetBSD__) + extern char *sys_errlist[]; +#endif + int n; + + n = errno; + if (n < 0 || n >= sys_nerr) + return ("Unknown"); + return (sys_errlist[n]); +} + +#ifndef HAVE_STRERROR +char *strerror(n) + int n; +{ + return (xstrerror(n)); +} +#endif diff --git a/scripts/AnnounceCache.pl b/scripts/AnnounceCache.pl new file mode 100755 index 00000000000..9df357bb32a --- /dev/null +++ b/scripts/AnnounceCache.pl @@ -0,0 +1,33 @@ +#!/usr/local/bin/perl + +$|=1; + +$host = (shift || 'sd.cache.nlanr.net'); +$port = (shift || '3131'); + +require "$ENV{'HARVEST_HOME'}/lib/socket.ph"; + +$sockaddr = 'S n a4 x8'; +($name, $aliases, $proto) = getprotobyname("udp"); +($fqdn, $aliases, $type, $len, $themaddr) = gethostbyname($host); +$thissock = pack($sockaddr, &AF_INET, 0, "\0\0\0\0"); +$them = pack($sockaddr, &AF_INET, $port, $themaddr); + +chop($me=`uname -a|cut -f2 -d' '`); +$myip=(gethostbyname($me))[4]; + +die "socket: $!\n" unless + socket (SOCK, &AF_INET, &SOCK_DGRAM, $proto); + +while (<>) { + chop; + $request_template = 'CCnx4x8x4a4a' . length; + $request = pack($request_template, 1, 1, 24 + length, $myip, $_); + die "send: $!\n" unless + send(SOCK, $request, 0, $them); + die "recv: $!\n" unless + recv(SOCK, $reply, 1024, 0); + ($type,$ver,$len,$payload) = unpack('CCnx4x8x4A', $reply); + print $CODES[$type] . " $_\n"; +} + diff --git a/scripts/Makefile.in b/scripts/Makefile.in new file mode 100644 index 00000000000..cb089a39592 --- /dev/null +++ b/scripts/Makefile.in @@ -0,0 +1,29 @@ +# +# This file is a Makefile for compiling and installing Cache Manager. +# Cache Manager is a manager program for Internet Object Cache. +# + +prefix = @prefix@ +INSTALL_BINDIR = $(prefix)/bin +INSTALL_LIBDIR = $(prefix)/lib +INSTALL_MANDIR = $(prefix)/man + +CC = @CC@ +INSTALL = @INSTALL@ +INSTALL_BIN = @INSTALL_PROGRAM@ +INSTALL_FILE = @INSTALL_DATA@ +RANLIB = @RANLIB@ +LN_S = @LN_S@ + +all: + +install: + $(INSTALL_BIN) RunCache $(INSTALL_BINDIR) + $(INSTALL_BIN) RunAccel $(INSTALL_BINDIR) + $(INSTALL_BIN) CachedMosaic $(INSTALL_BINDIR) + $(INSTALL_BIN) CachedLynx $(INSTALL_BINDIR) + +clean: + +realclean: + -rm -f RunCache RunAccel diff --git a/scripts/RunAccel.in b/scripts/RunAccel.in new file mode 100644 index 00000000000..5744f8df2e9 --- /dev/null +++ b/scripts/RunAccel.in @@ -0,0 +1,131 @@ +#!/bin/csh -f + +# Set this to be the port of your HTTPD accelerator +set HTTPD_ACCEL_PORT = 80 +# +# RunAccel - runs cached as an HTTPD accelerator. whenever it exits, +# it restarts. Must compile cached *without* the -DDAEMON flag. +# +# Usage: RunAccel [cachedir] +# +# Darren Hardy, University of Colorado - Boulder, August 1994 +# +# $Id: RunAccel.in,v 1.1 1996/02/22 06:23:57 wessels Exp $ +# +####################################################################### +# +# Copyright (c) 1994, 1995. All rights reserved. +# +# The Harvest software was developed by the Internet Research Task +# Force Research Group on Resource Discovery (IRTF-RD): +# +# Mic Bowman of Transarc Corporation. +# Peter Danzig of the University of Southern California. +# Darren R. Hardy of the University of Colorado at Boulder. +# Udi Manber of the University of Arizona. +# Michael F. Schwartz of the University of Colorado at Boulder. +# Duane Wessels of the University of Colorado at Boulder. +# +# This copyright notice applies to software in the Harvest +# ``src/'' directory only. Users should consult the individual +# copyright notices in the ``components/'' subdirectories for +# copyright information about other software bundled with the +# Harvest source code distribution. +# +# TERMS OF USE +# +# The Harvest software may be used and re-distributed without +# charge, provided that the software origin and research team are +# cited in any use of the system. Most commonly this is +# accomplished by including a link to the Harvest Home Page +# (http://harvest.cs.colorado.edu/) from the query page of any +# Broker you deploy, as well as in the query result pages. These +# links are generated automatically by the standard Broker +# software distribution. +# +# The Harvest software is provided ``as is'', without express or +# implied warranty, and with no support nor obligation to assist +# in its use, correction, modification or enhancement. We assume +# no liability with respect to the infringement of copyrights, +# trade secrets, or any patents, and are not responsible for +# consequential damages. Proper use of the Harvest software is +# entirely the responsibility of the user. +# +# DERIVATIVE WORKS +# +# Users may make derivative works from the Harvest software, subject +# to the following constraints: +# +# - You must include the above copyright notice and these +# accompanying paragraphs in all forms of derivative works, +# and any documentation and other materials related to such +# distribution and use acknowledge that the software was +# developed at the above institutions. +# +# - You must notify IRTF-RD regarding your distribution of +# the derivative work. +# +# - You must clearly notify users that your are distributing +# a modified version and not the original Harvest software. +# +# - Any derivative product is also subject to these copyright +# and use restrictions. +# +# Note that the Harvest software is NOT in the public domain. We +# retain copyright, as specified above. +# +# HISTORY OF FREE SOFTWARE STATUS +# +# Originally we required sites to license the software in cases +# where they were going to build commercial products/services +# around Harvest. In June 1995 we changed this policy. We now +# allow people to use the core Harvest software (the code found in +# the Harvest ``src/'' directory) for free. We made this change +# in the interest of encouraging the widest possible deployment of +# the technology. The Harvest software is really a reference +# implementation of a set of protocols and formats, some of which +# we intend to standardize. We encourage commercial +# re-implementations of code complying to this set of standards. +# +# +if ($?HARVEST_HOME == 0) then + setenv HARVEST_HOME @INSTALL_TOPDIR@ +endif + +limit coredumpsize unlimited >&/dev/null + +set path = ($HARVEST_HOME/bin $HARVEST_HOME/lib $path) + +if ($#argv == 1) then + set cachedir = "$1" +else + set cachedir = `grep ^cache_dir /etc/cached.conf | awk '{print $2}'` + if ($?status != 0) then + set cachedir = "/tmp/cache" + endif +endif + +@ failcount = 0 + +while (1) + mkdir $cachedir >& /dev/null + cd $cachedir + + set start=`date '+%d%H%M%S'` + + cached -z -a $HTTPD_ACCEL_PORT >>& cached.out + + set stop=`date '+%d%H%M%S'` + @ t = $stop - $start + if ( 0 <= $t && $t < 5 ) then + @ failcount = $failcount + 1 + else + @ failcount = 0 + endif + if ( $failcount > 5 ) then + echo "RunAccel: EXITING DUE TO REPEATED, FREQUENT FAILURES" + exit 1 + endif + + sleep 10 +end diff --git a/scripts/RunCache.in b/scripts/RunCache.in new file mode 100644 index 00000000000..fdb526b7d62 --- /dev/null +++ b/scripts/RunCache.in @@ -0,0 +1,167 @@ +#!/bin/csh -f +# +# RunCache - runs cached so that whenever it exits, it restarts. +# Must compile cached *without* the -DDAEMON flag. +# +# Usage: RunCache [cachedir] +# +# Darren Hardy, University of Colorado - Boulder, August 1994 +# +# $Id: RunCache.in,v 1.1 1996/02/22 06:23:57 wessels Exp $ +# +####################################################################### +# +# Copyright (c) 1994, 1995. All rights reserved. +# +# The Harvest software was developed by the Internet Research Task +# Force Research Group on Resource Discovery (IRTF-RD): +# +# Mic Bowman of Transarc Corporation. +# Peter Danzig of the University of Southern California. +# Darren R. Hardy of the University of Colorado at Boulder. +# Udi Manber of the University of Arizona. +# Michael F. Schwartz of the University of Colorado at Boulder. +# Duane Wessels of the University of Colorado at Boulder. +# +# This copyright notice applies to software in the Harvest +# ``src/'' directory only. Users should consult the individual +# copyright notices in the ``components/'' subdirectories for +# copyright information about other software bundled with the +# Harvest source code distribution. +# +# TERMS OF USE +# +# The Harvest software may be used and re-distributed without +# charge, provided that the software origin and research team are +# cited in any use of the system. Most commonly this is +# accomplished by including a link to the Harvest Home Page +# (http://harvest.cs.colorado.edu/) from the query page of any +# Broker you deploy, as well as in the query result pages. These +# links are generated automatically by the standard Broker +# software distribution. +# +# The Harvest software is provided ``as is'', without express or +# implied warranty, and with no support nor obligation to assist +# in its use, correction, modification or enhancement. We assume +# no liability with respect to the infringement of copyrights, +# trade secrets, or any patents, and are not responsible for +# consequential damages. Proper use of the Harvest software is +# entirely the responsibility of the user. +# +# DERIVATIVE WORKS +# +# Users may make derivative works from the Harvest software, subject +# to the following constraints: +# +# - You must include the above copyright notice and these +# accompanying paragraphs in all forms of derivative works, +# and any documentation and other materials related to such +# distribution and use acknowledge that the software was +# developed at the above institutions. +# +# - You must notify IRTF-RD regarding your distribution of +# the derivative work. +# +# - You must clearly notify users that your are distributing +# a modified version and not the original Harvest software. +# +# - Any derivative product is also subject to these copyright +# and use restrictions. +# +# Note that the Harvest software is NOT in the public domain. We +# retain copyright, as specified above. +# +# HISTORY OF FREE SOFTWARE STATUS +# +# Originally we required sites to license the software in cases +# where they were going to build commercial products/services +# around Harvest. In June 1995 we changed this policy. We now +# allow people to use the core Harvest software (the code found in +# the Harvest ``src/'' directory) for free. We made this change +# in the interest of encouraging the widest possible deployment of +# the technology. The Harvest software is really a reference +# implementation of a set of protocols and formats, some of which +# we intend to standardize. We encourage commercial +# re-implementations of code complying to this set of standards. +# +# +if ($?HARVEST_HOME == 0) then + setenv HARVEST_HOME @INSTALL_TOPDIR@ +endif + +limit coredumpsize unlimited >&/dev/null + +set path = ($HARVEST_HOME/bin $HARVEST_HOME/lib $path) +set conf = $HARVEST_HOME/lib/cached.conf + +if ($#argv == 1) then + set cachedir = "$1" +else + set cachedir = `grep ^cache_dir $conf | head -1 | awk '{print $2}'` + if (x$cachedir == x) then + set cachedir = "/tmp/cache" + endif +endif + +echo "" +echo " HARVEST_HOME = $HARVEST_HOME" +echo "Cache Configuration File = $conf" +echo " Cache Directory = $cachedir" + +# Try to find 'dnsserver' and 'ftpget' on the PATH, or as +# absolute pathnames +# +set dnsserver = `grep ^cache_dns_program $conf | awk '{print $2}'` +if (x$dnsserver == x) then + set dnsserver = 'dnsserver' +endif + +set ftpget = `grep ^cache_ftp_program $conf | awk '{print $2}'` +if (x$ftpget == x) then + set ftpget = 'ftpget' +endif + +foreach p ( cached $dnsserver $ftpget ) + echo -n " External Program: " + if ! { $p -v } then + echo "" + echo "" + echo "Unable to locate required program '$p' on the PATH." + echo "Please edit either $conf" + echo "or $0 to remedy this situation." + exit 1 + endif +end + +echo "" + +@ failcount = 0 +while (1) + if ! ( -d $cachedir ) then + echo "mkdir $cachedir"; mkdir $cachedir + endif + echo "cd $cachedir"; cd $cachedir + +# NOTE: cached -s option disables logging important messages to syslog. + + # Send a UDP announcement if enabled in the config file + # + send-announce $conf >& /dev/null + + echo "Running: cached -s -f $conf >>& cached.out" + set start=`date '+%d%H%M%S'` + cached -s -f $conf >>& cached.out + set stop=`date '+%d%H%M%S'` + @ t = $stop - $start + if ( 0 <= $t && $t < 5 ) then + @ failcount = $failcount + 1 + else + @ failcount = 0 + endif + if ( $failcount > 5 ) then + echo "RunCache: EXITING DUE TO REPEATED, FREQUENT FAILURES" + exit 1 + endif + + sleep 10 +end diff --git a/scripts/access-log-matrix.pl b/scripts/access-log-matrix.pl new file mode 100755 index 00000000000..a38229687fe --- /dev/null +++ b/scripts/access-log-matrix.pl @@ -0,0 +1,112 @@ +#!/usr/local/bin/perl + +# access-log-matrix.pl +# +# Duane Wessels, Dec 1995 +# +# Stdin is a Harvest access log (in the old, non-common logfile format!). +# The output is a matrix of hostnames and log entry types, plus totals. + +while (<>) { + chop; + @F = split; + $when = $F[0]; + $first = $when unless ($first); + $last = $when; + + $what = pop @F; + $size = pop @F; + $host = pop @F; + + $HOSTS{$host}++; + $HOSTS{'TOTAL'}++; + + if ($what eq 'TCP_DONE') { + $TCP_DONE{$host}++; + $TCP_DONE{'TOTAL'}++; + } elsif ($what eq 'TCP_HIT') { + $TCP_HIT{$host}++; + $TCP_HIT{'TOTAL'}++; + } elsif ($what eq 'TCP_MISS') { + $TCP_MISS{$host}++; + $TCP_MISS{'TOTAL'}++; + } elsif ($what eq 'TCP_MISS_TTL') { + $TCP_MISS_TTL{$host}++; + $TCP_MISS_TTL{'TOTAL'}++; + } elsif ($what eq 'UDP_HIT') { + $UDP_HIT{$host}++; + $UDP_HIT{'TOTAL'}++; + } elsif ($what eq 'UDP_MISS') { + $UDP_MISS{$host}++; + $UDP_MISS{'TOTAL'}++; + } else { + $OTHER{$host}++; + $OTHER{'TOTAL'}++; + } +} + +print ' HOSTNAME: '. `hostname`; +($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdat) = localtime($first); +printf "FIRST LOG ENTRY: %s/%s/%s %.2d:%.2d:%.2d\n", $year,$mon+1,$mday, $hour,$min,$sec; +($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdat) = localtime($last); +printf " LAST LOG ENTRY: %s/%s/%s %.2d:%.2d:%.2d\n", $year,$mon+1,$mday, $hour,$min,$sec; +print "\n"; + +printf ("%25.25s %5s %5s %5s %5s %5s %5s %5s %5s\n", + '', + 'TCP', 'TCP', 'TCP', 'TCP', + 'UDP', 'UDP', '', + ''); +printf ("%25.25s %5s %5s %5s %5s %5s %5s %5s %5s\n", + 'HOST', + 'HIT', 'MISS', 'TTL', 'DONE', + 'HIT', 'MISS', 'OTHER', + 'TOTAL'); + +printf ("%25.25s %5s %5s %5s %5s %5s %5s %5s %5s\n", + '-'x25, + '-'x5, '-'x5, '-'x5, '-'x5, '-'x5, '-'x5, '-'x5, '-'x5); + +foreach $h (sort totalcmp keys %HOSTS) { + next if ($h eq 'TOTAL'); + ($a1,$a2,$a3,$a4) = split('\.', $h); + ($fqdn, @F) = gethostbyaddr(pack('C4',$a1,$a2,$a3,$a4),2); + $fqdn = $h unless ($fqdn ne ''); + + printf "%25.25s %5d %5d %5d %5d %5d %5d %5d %5d\n", + $fqdn, + $TCP_HIT{$h}, + $TCP_MISS{$h}, + $TCP_MISS_TTL{$h}, + $TCP_DONE{$h}, + $UDP_HIT{$h}, + $UDP_MISS{$h}, + $OTHER{$h}, + $HOSTS{$h}; + +} + + +printf ("%25.25s %5s %5s %5s %5s %5s %5s %5s %5s\n", + '-'x25, + '-'x5, '-'x5, '-'x5, '-'x5, '-'x5, '-'x5, '-'x5, '-'x5); +printf "%25.25s %5d %5d %5d %5d %5d %5d %5d %5d\n", + 'TOTAL', + $TCP_HIT{'TOTAL'}, + $TCP_MISS{'TOTAL'}, + $TCP_MISS_TTL{'TOTAL'}, + $TCP_DONE{'TOTAL'}, + $UDP_HIT{'TOTAL'}, + $UDP_MISS{'TOTAL'}, + $OTHER{'TOTAL'}, + $HOSTS{'TOTAL'}; + +exit 0; + +sub hostcmp { + $a cmp $b +} + +sub totalcmp { + $HOSTS{$b} <=> $HOSTS{$a} +} diff --git a/scripts/cache-compare.pl b/scripts/cache-compare.pl new file mode 100755 index 00000000000..cbe54f2e27a --- /dev/null +++ b/scripts/cache-compare.pl @@ -0,0 +1,154 @@ +#!/usr/local/bin/perl + +# cache-compare.pl +# +# Duane Wessels, Dec 1995 +# +# A simple perl script to compare how long it takes to fetch an object +# from a number of different caches. +# +# stdin is a list of URLs. Set the @getfrom array to a list of caches +# to fetch each URL from. Include 'SOURCE' is @getfrom to fetch from +# the source host also. For each URL, print the byte count, elapsed +# time and average data rate. At the end print out some averages. +# +# NOTE: uses the Perl function syscall() to implement gettimeofday(2). +# Assumes # that gettimeofday is syscall #116 on the system +# (see /usr/include/sys/syscall.h). +# +# BUGS: +# Should probably cache the gethostbyname() calls. + +@getfrom = ('SOURCE', 'localhost:3128'); + +require 'sys/socket.ph'; +$gettimeofday = 116; # cheating, should use require syscall.ph + +while (<>) { + chop ($url = $_); + print "$url:\n"; + + foreach $k (@getfrom) { + printf "%30.30s:\t", $k; + if ($k eq 'SOURCE') { + ($b_sec,$b_usec) = &gettimeofday; + $n = &get_from_source($url); + ($e_sec,$e_usec) = &gettimeofday; + } else { + ($host,$port) = split (':', $k); + ($b_sec,$b_usec) = &gettimeofday; + $n = &get_from_cache($host,$port,$url); + ($e_sec,$e_usec) = &gettimeofday; + } + next unless ($n > 0); + $d = ($e_sec - $b_sec) * 1000000 + ($e_usec - $b_usec); + $d /= 1000000; + $r = $n / $d; + printf "%8.1f b/s (%7d bytes, %7.3f sec)\n", + $r, $n, $d; + $bps_sum{$k} += $r; + $bps_n{$k}++; + $bytes_sum{$k} += $n; + $sec_sum{$k} += $d; + } +} + +print "AVERAGE b/s rates:\n"; + foreach $k (@getfrom) { + printf "%30.30s:\t%8.1f b/s (Alt: %8.1f b/s)\n", + $k, + $bps_sum{$k} / $bps_n{$k}, + $bytes_sum{$k} / $sec_sum{$k}; +} + +exit 0; + +sub get_from_source { + local($url) = @_; + local($bytes) = 0; + unless ($url =~ m!([a-z]+)://([^/]+)(.*)$!) { + printf "get_from_source: bad URL\n"; + return 0; + } + $proto = $1; + $host = $2; + $url_path = $3; + unless ($proto eq 'http') { + printf "get_from_source: I only do HTTP\n"; + return 0; + } + $port = 80; + if ($host =~ /([^:]+):(\d+)/) { + $host = $1; + $port = $2; + } + return 0 unless ($SOCK = &client_socket($host,$port)); + print $SOCK "GET $url_path HTTP/1.0\r\nAccept */*\r\n\r\n"; + $bytes += $n while (($n = read(SOCK,$_,4096)) > 0); + close $SOCK; + return $bytes; +} + +sub get_from_cache { + local($host,$port,$url) = @_; + local($bytes) = 0; + return 0 unless ($SOCK = &client_socket($host,$port)); + print $SOCK "GET $url HTTP/1.0\r\nAccept */*\r\n\r\n"; + $bytes += $n while (($n = read(SOCK,$_,4096)) > 0); + close $SOCK; + return $bytes; +} + +sub client_socket { + local ($host, $port) = @_; + local ($sockaddr) = 'S n a4 x8'; + local ($name, $aliases, $proto) = getprotobyname('tcp'); + local ($connected) = 0; + + # Lookup addresses for remote hostname + # + local($w,$x,$y,$z,@thataddrs) = gethostbyname($host); + unless (@thataddrs) { + printf "Unknown Host: $host\n"; + return (); + } + + # bind local socket to INADDR_ANY + # + local ($thissock) = pack($sockaddr, &AF_INET, 0, "\0\0\0\0"); + unless (socket (SOCK, &AF_INET, &SOCK_STREAM, $proto)) { + printf "socket: $!\n"; + return (); + } + unless (bind (SOCK, $thissock)) { + printf "bind: $!\n"; + return (); + } + + # Try all addresses + # + foreach $thataddr (@thataddrs) { + local ($that) = pack($sockaddr, &AF_INET, $port, $thataddr); + if (connect (SOCK, $that)) { + $connected = 1; + last; + } + } + unless ($connected) { + printf "$host:$port: $!\n"; + return (); + } + + # Set socket to flush-after-write and return it + # + select (SOCK); $| = 1; + select (STDOUT); + return (SOCK); +} + +sub gettimeofday { + $tvp="\0\0\0\0\0\0\0\0"; + syscall($gettimeofday, $tvp, $tz); + return unpack('ll', $tvp); +} + diff --git a/scripts/check_cache.pl b/scripts/check_cache.pl new file mode 100755 index 00000000000..1206234aba0 --- /dev/null +++ b/scripts/check_cache.pl @@ -0,0 +1,55 @@ +#!/usr/local/bin/perl + +# check_cache.pl - martin hamilton +# +# Check the Harvest cache directory for stale objects - i.e. those +# which exist on disk but aren't listed in cached's log file. +# Version 1 did all this in memory, but the log file can be a +# little on the large side... 8-( + +# $Id: check_cache.pl,v 1.1 1996/02/22 06:23:57 wessels Exp $ + +require "getopts.pl"; +&Getopts("c:dl:rt:v"); + +$cachedir = $opt_c || "/usr/local/harvest/cache"; +# -d -> turn on debugging output +$logfile = $opt_l || "$cachedir/log"; +# -r -> actually remove stale files +$tmpdir = $opt_t || $ENV{TMPDIR} || "/var/tmp"; +# -v -> list stale files + +chdir($tmpdir); + +# snarf filenames from Harvest log & sort em +system("cut -f2 -d' ' $logfile >pl$$"); +system("sort -T $tmpdir pl$$ >spl$$; rm pl$$"); + +# get list of files in cache & sort em +system("find $cachedir -print -type f >cd$$"); +system("sort -T $tmpdir cd$$ >scd$$; rm cd$$"); + +# get list of objects in one file but not the other +system("comm -13 spl$$ scd$$ >comm$$; rm spl$$ scd$$"); + +# iterate through it +open(IN, "comm$$") || die "Can't open temporary file $tmpdir/comm$$: $!"; +while() { + chop; + print STDERR ">> inspecting $_\n" if $opt_d; + next if -d "$_"; # don't want directories + next if /(log|cached.out)/; # don't want to zap these! + + print "$_\n" if $opt_v; # print filename if asked + + # skip if cached file appeared since script started running + if (-M $_ < 0) { + print STDERR "skipping $_\n" if $opt_d; + next; + } + unlink($_) if $opt_r; # only remove if asked! +} +close(IN); + +unlink("comm$$"); + diff --git a/scripts/flag_truncs.pl b/scripts/flag_truncs.pl new file mode 100755 index 00000000000..024b96a8aa0 --- /dev/null +++ b/scripts/flag_truncs.pl @@ -0,0 +1,68 @@ +#!/usr/local/bin/perl + +# flag_truncs.pl - martin hamilton +# +# Check the CERN/Harvest/Netscape cache for truncated objects +# - i.e. those for which there is a "Content-length:" HTTP header, +# and this does not match the size of the cached object + +# $Id: flag_truncs.pl,v 1.1 1996/02/22 06:23:57 wessels Exp $ + +require "getopts.pl"; +require "stat.pl"; +&Getopts("cd"); +# -c -> just count the number of objects with a Content-length header +# -d -> turn on debugging output + +# pass filenames on command line or via STDIN +@things = $#ARGV >= 0 ? @ARGV : ; + +$total_objects = 0, $content_length = 0; + +# iterate through them +foreach $thing (@things) { + chop $thing; + + $opt_d && (print STDERR ">> inspecting: $thing\n"); + next if -d "$thing"; # don't want directories + + $size = (stat($thing))[$ST_SIZE]||next; + $opt_d && (print STDERR ">> stat: $size\n"); + print "$thing\n", next if ($size == 0); + + $total_objects++; + + $count = 0, $expected = 0; + open(IN, "$thing") || die "Can't open cached object $thing: $!"; + while() { + $count += length($_); + chop; + print STDERR ">> inspecting $_\n" if $opt_d; + last if /^(\s+|)$/; # drop out after the end of the HTTP headers + + # skip if cached file appeared since script started running + if (-M $_ < 0) { + print STDERR ">> skipping $_\n" if $opt_d; + next; + } + + if (/^Content-length:\s+(\d+)/i) { + $expected = $1; + $content_length++; + } + } + close(IN); + + next if $opt_c; + next if $expected == 0; # no Content-length header + + # looked at the headers now + $difference = $size - $count; + $opt_d && print STDERR ">> real: ", $difference, ", expected: $expected\n"; + if ($difference != $expected) { + print "$thing (expected: $expected, got: $difference)\n"; + } +} + +print "$content_length out of $total_objects had Content-length: header\n" + if $opt_c; diff --git a/scripts/icpserver.pl b/scripts/icpserver.pl new file mode 100755 index 00000000000..a786d8fc2c0 --- /dev/null +++ b/scripts/icpserver.pl @@ -0,0 +1,112 @@ +#!/usr/local/bin/perl + +# parse and answer ICP type 1 requests via unicast/multicast UDP +# cf. +# +# returns ICP response code, e.g. 2 == HIT, 3 == MISS, 4 == ERROR +# by looking at CERN or Netscape style cache directory $cachedir +# +# martin hamilton +# Id: icpserver,v 1.11 1995/11/24 16:20:13 martin Exp martin + +# usage: icpserver [-c cachedir] [-n] [-p port] [multicast_group] +# +# -c -> set cache directory +# -n -> use Netscape cache format (default is CERN) +# -p -> port number to listen on (default 3130) +# -v -> verbose - writes activitiy log to stderr +# +# group -> multicast group to listen on + +require "getopts.pl"; +&Getopts("c:np:v"); + +@CODES=("xxx", "QUERY", "HIT", "MISS", "ERROR"); + +$CACHEDIR=$opt_c||"/usr/local/www/cache"; +$PORT=$opt_p||3130; +$SERVER=$ARGV[0]||"0.0.0.0"; +$SERVERIP= ($SERVER =~ m!\d+.\d+.\d+.\d+!) ? + pack("C4", split(/\./, $SERVER)) : (gethostbyname($SERVER))[4]; # lazy! + +$SOCKADDR = 'S n a4 x8'; + +socket(S, 2, 2, 17) || socket(S, 2, 1, 17) || die "Couldn't get socket: $!"; +$us1 = pack($SOCKADDR, 2, $PORT, $SERVERIP); +$us2 = pack($SOCKADDR, 2, $PORT, pack("C4", 0,0,0,0)); +bind(S, $us1) || bind(S, $us2) || die "Couldn't bind socket: $!"; +#bind(S, $us2) || die "Couldn't bind socket: $!"; + +if ($SERVER ne "0.0.0.0") { # i.e. multicast + $whoami = (`uname -a`)[0]; + $IP_ADD_MEMBERSHIP=5; + $whoami =~ /SunOS [^\s]+ 5/ && ($IP_MULTICAST_TTL=19); + $whoami =~ /IRIX [^\s]+ 5/ && ($IP_MULTICAST_TTL=23); + $whoami =~ /OSF1/ && ($IP_MULTICAST_TTL=12); + # any more funnies ? + + setsockopt(S, 0, $IP_ADD_MEMBERSHIP, $SERVERIP."\0\0\0\0") + || die "Couldn't join multicast group $SERVER: $!"; +} + +# Common header for ICP datagrams ... (size in bytes - total 20) +# opcode 1 Numeric code indicating type of message +# version 1 Version of the protocol being used +# length 2 Total length of packet +# reqnum 4 Request number assigned by client +# authenticator 8 Authentication information (future) +# senderid 4 Identification (host id) of sender + +# Type 1 query ... +# requester 4 Host id of original requester URL +# url variable URL whose status is to be checked + +# Type 2 and 3 responses just contain URL, don't return anything else + +# Might be fast enough to get away without forking or non-blocking I/O ... ? +while(1) { + $theiraddr = recv(S, $ICP_request, 1024, 0); + ($junk, $junk, $sourceaddr, $junk) = unpack($SOCKADDR, $theiraddr); + @theirip = unpack('C4', $sourceaddr); + + $URL_length = length($ICP_request) - 24; + $request_template = 'CCnx4x8x4a4a' . $URL_length; + ($type, $version, $length, $requester, $URL) = + unpack($request_template, $ICP_request); + + $URL =~ s/\.\.\///g; # be cautious - any others to watch out for ? + + # lookup object in cache + $hitmisserr = 3; + if ($type eq 1 && $URL =~ m!^([^:]+):/?/?([^/]+)/(.*)!) { + $scheme = $1; $hostport = $2; $path = $3; + if ($path eq "") { $path = "index.html"; } + + if ($opt_n) { + ($host, $port) = split(/:/, $hostport); # strip off port number + $port = ":$port" if ($port); + $match = ""; + foreach (split(/\./, $hostport)) { + $match = "$_/$match"; # little-endian -> big-endian conversion + } + $match = "$CACHEDIR/hosts/$match$scheme$port.urls"; # full path + if (-f "$match") { + #### optimize! #### + open(IN, "$match") && do { + while() { /^$URL / && ($hitmisserr = 2, last); } + close(IN); + } + } + } else { + $hitmisserr = 2 if -f "$CACHEDIR/$scheme/$hostport/$path"; + } + } + + print "$type $hitmisserr ", join(".", @theirip), " $URL\n" if $opt_v; + + $response_template = 'CCnx4x8x4A' . length($URL); + $ICP_response = + pack($response_template, $hitmisserr, 2, 20 + length($URL), $URL); + send(S, $ICP_response, 0, $theiraddr) || die "Couldn't send request: $!"; +} + diff --git a/scripts/tcp-banger.pl b/scripts/tcp-banger.pl new file mode 100755 index 00000000000..3cbebf33f33 --- /dev/null +++ b/scripts/tcp-banger.pl @@ -0,0 +1,47 @@ +#!/usr/local/bin/perl + +# tcp-banger.pl +# +# Duane Wessels, Dec 1995 +# +# Usage: tcp-banger.pl [host [port]] < url-list +# +# Sends a continuous stream of HTTP proxy requests to a cache. Stdin is a +# list of URLs to request. Run N of these at the same time to simulate a +# heavy client load. +# +# NOTE: does not simulate "real-world" events such as aborted requests +# (connections) and other network problems. + +$|=1; + +$host=(shift || 'localhost') ; +$port=(shift || '3128') ; + +require "$ENV{'HARVEST_HOME'}/lib/socket.ph"; + +$sockaddr = 'S n a4 x8'; +($name, $aliases, $proto) = getprotobyname("tcp"); +($fqdn, $aliases, $type, $len, $thataddr) = gethostbyname($host); +$thissock = pack($sockaddr, &AF_INET, 0, "\0\0\0\0"); +$that = pack($sockaddr, &AF_INET, $port, $thataddr); + +while (<>) { + chop ($url = $_); + + die "socket: $!\n" unless + socket (SOCK, &AF_INET, &SOCK_STREAM, $proto); + die "bind: $!\n" unless + bind (SOCK, $thissock); + die "$host:$port: $!\n" unless + connect (SOCK, $that); + select (SOCK); $| = 1; + select (STDOUT); + + print SOCK "GET $url HTTP/1.0\r\n\r\n"; + $_ = ; + ($ver,$code,$junk) = split; + printf "%s %s\n", $code ? $code : 'FAIL', $url; + 1 while (read(SOCK,$_,4096)); + close SOCK; +} diff --git a/scripts/udp-banger.pl b/scripts/udp-banger.pl new file mode 100755 index 00000000000..c141ed62161 --- /dev/null +++ b/scripts/udp-banger.pl @@ -0,0 +1,46 @@ +#!/usr/local/bin/perl + +# udp-banger.pl +# +# Duane Wessels, Dec 1995 +# +# Usage: udp-banger.pl [host [port]] < url-list +# +# Sends a continuous stream of ICP queries to a cache. Stdin is a list of +# URLs to request. Run N of these at the same time to simulate a heavy +# neighbor cache load. + + +$|=1; + +$host=(shift || 'localhost') ; +$port=(shift || '3130') ; + +@CODES=('xxx', 'QUERY', 'HIT', 'MISS', 'ERROR'); + +require "$ENV{'HARVEST_HOME'}/lib/socket.ph"; + +$sockaddr = 'S n a4 x8'; +($name, $aliases, $proto) = getprotobyname("udp"); +($fqdn, $aliases, $type, $len, $themaddr) = gethostbyname($host); +$thissock = pack($sockaddr, &AF_INET, 0, "\0\0\0\0"); +$them = pack($sockaddr, &AF_INET, $port, $themaddr); + +chop($me=`uname -a|cut -f2 -d' '`); +$myip=(gethostbyname($me))[4]; + +die "socket: $!\n" unless + socket (SOCK, &AF_INET, &SOCK_DGRAM, $proto); + +while (<>) { + chop; + $request_template = 'CCnx4x8x4a4a' . length; + $request = pack($request_template, 1, 1, 24 + length, $myip, $_); + die "send: $!\n" unless + send(SOCK, $request, 0, $them); + die "recv: $!\n" unless + recv(SOCK, $reply, 1024, 0); + ($type,$ver,$len,$payload) = unpack('CCnx4x8x4A', $reply); + print $CODES[$type] . " $_\n"; +} + diff --git a/src/Makefile.in b/src/Makefile.in new file mode 100644 index 00000000000..f9516db9972 --- /dev/null +++ b/src/Makefile.in @@ -0,0 +1,108 @@ +# +# Makefile for the Harvest Object Cache server +# +# $Id: Makefile.in,v 1.1 1996/02/22 06:23:53 wessels Exp $ +# +# Uncomment and customize the following to suit your needs: +# +WAIS_OPT = # -DUSE_WAIS_RELAY +LOG_OPT = # -DNO_LOGGGING +PORT_OPT = # -DCACHE_HTTP_PORT=3128 -DCACHE_ICP_PORT=3130 +HOST_OPT = # -DCACHEMGR_HOSTNAME="getfullhostname()" +DEFINES = $(WAIS_OPT) $(LOG_OPT) $(PORT_OPT) $(HOST_OPT) + +prefix = @prefix@ +top_srcdir = @top_srcdir@ +INSTALL_BINDIR = $(prefix)/bin +INSTALL_LIBDIR = $(prefix)/lib +INSTALL_MANDIR = $(prefix)/man +INSTALL_CGIDIR = $(prefix)/cgi-bin + +CC = @CC@ +INSTALL = @INSTALL@ +INSTALL_BIN = @INSTALL_PROGRAM@ +INSTALL_FILE = @INSTALL_DATA@ +RANLIB = @RANLIB@ +YACC = @YACC@ +LEX = @LEX@ +LEXLIB = @LEXLIB@ +LN_S = @LN_S@ +PERL = @CMD_PERL@ +CRYPT_LIB = @CRYPT_LIB@ +XTRA_CFLAGS = @XTRA_CFLAGS@ +XTRA_LIBS = @XTRA_LIBS@ +XTRA_OBJS = @XTRA_OBJS@ +DEBUG_TOP = @DEBUG_TOP@ + + +## AIX users might need to use these values +# +#CC = cc +#XTRA_CFLAGS = -D_HARVEST_HPUX_ -d_AIX -D_ALL_SOURCE +# +# In addition, add -DBUGGY_AIX_SOCKETS if you suspect your sockets +# implementation is broken. + +## LINUX users might need to use these values +# +#XTRA_CFLAGS = -D_ALL_SOURCE + +DEBUG = $(DEBUG_TOP) #-O #-g #-DDEBUG +INCLUDE = -I. -I../include # MUST use -I. first +CFLAGS = $(CCOPTS) $(DEBUG) $(INCLUDE) $(DEFINES) $(XTRA_CFLAGS) +LDFLAGS = $(DEBUG_TOP) $(LDOPTS) -L../lib +LIBS = -lregex -lutil $(XTRA_LIBS) $(CRYPT_LIB) +CLIENT_LIBS = -lutil $(XTRA_LIBS) + +PROGS = cached +UTILS = client dnsserver +CGIPROGS = cachemgr.cgi +OBJS = comm.o cache_cf.o debug.o disk.o dynamic_array.o \ + fdstat.o filemap.o ftp.o gopher.o hash.o \ + http.o icp.o ipcache.o mime.o neighbors.o objcache.o \ + proto.o stack.o stat.o stmem.o store.o storetoString.o \ + tools.o ttl.o url.o wais.o $(XTRA_OBJS) + +MK_TABLE_H = ../url/mk-mime-table-h.pl +MIME_TABLE = ../url/mime.table + +all: $(PROGS) $(UTILS) $(CGIPROGS) + +cached: ../include/mime_table.h main.o $(OBJS) + $(CC) -o $@ $(LDFLAGS) $(OBJS) main.o $(LIBS) + +client: client.o + $(CC) -o $@ $(LDFLAGS) $@.o $(CLIENT_LIBS) + +dnsserver: dnsserver.o + $(CC) -o $@ $(LDFLAGS) $@.o $(LIBS) + +cachemgr.cgi: cachemgr.o + $(CC) -o $@ $(LDFLAGS) cachemgr.o $(CLIENT_LIBS) + +../include/mime_table.h: $(MK_TABLE_H) $(MIME_TABLE) + $(PERL) $(MK_TABLE_H) < $(MIME_TABLE) > $@ + +install: all + @for f in $(PROGS); do \ + echo $(INSTALL_BIN) $$f $(INSTALL_BINDIR); \ + $(INSTALL_BIN) $$f $(INSTALL_BINDIR); \ + done + @for f in $(UTILS); do \ + echo $(INSTALL_BIN) $$f $(INSTALL_BINDIR); \ + $(INSTALL_BIN) $$f $(INSTALL_BINDIR); \ + done + @for f in $(CGIPROGS); do \ + echo $(INSTALL_BIN) $$f $(INSTALL_CGIDIR); \ + $(INSTALL_BIN) $$f $(INSTALL_CGIDIR); \ + done + +clean: + -rm -rf *.o *pure_* core $(PROGS) $(UTILS) $(CGIPROGS) + +realclean: clean + -rm -f Makefile ../include/mime_table.h + +tar: + -rm -f cache.tar + tar cf cache.tar *.c *.h cached.conf Makefile* diff --git a/src/cache_cf.cc b/src/cache_cf.cc new file mode 100644 index 00000000000..b09b8ee87d4 --- /dev/null +++ b/src/cache_cf.cc @@ -0,0 +1,1750 @@ +static char rcsid[] = "$Id: cache_cf.cc,v 1.1 1996/02/22 06:23:53 wessels Exp $"; +/* + * File: cache_cf.c + * Description: cache initialization functions + * Author: Chuck Neerdaaels, USC + * Created: Mon May 23 1994 + * Language: C + * + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "debug.h" +#include "comm.h" +#include "cache_cf.h" +#include "proto.h" +#include "store.h" +#include "ttl.h" +#include "util.h" + +static struct { + struct { + int maxSize; + int highWatherMark; + int lowWaterMark; + } Mem , Swap; + struct { + int maxObjSize; + int defaultTtl; + } Gopher, Http, Ftp; +#if USE_WAIS_RELAY + struct { + int maxObjSize; + int defaultTtl; + char *relayHost; + int relayPort; + } Wais; +#endif + int negativeTtl; + int readTimeout; + int lifetimeDefault; + int connectTimeout; + int ageMaxDefault; + int cleanRate; + int dnsChildren; + double hotVmFactor; + struct { + int ascii; + int udp; + int binary; + } Port; + struct { + char *log; + char *access; + char *hierarchy; + int rotateNumber; + } Log; + char *adminEmail; + char *effectiveUser; + char *effectiveGroup; + struct { + char *ftpget; + char *ftpget_opts; + char *dnsserver; + } Program; + int sourcePing; + int quickAbort; + int commonLogFormat; + int debugLevel; + int neighborTimeout; + int singleParentBypass; + struct { + char *host; + char *prefix; + int port; + int withProxy; + } Accel; + char *appendDomain; +} Config; + +#define DefaultMemMaxSize (16 << 20) /* 16 MB */ +#define DefaultMemHighWatherMark 90 /* 90% */ +#define DefaultMemLowWatherMark 60 /* 60% */ +#define DefaultSwapMaxSize (100 << 10) /* 100 MB (100*1024 kbytes) */ +#define DefaultSwapHighWaterMark 90 /* 90% */ +#define DefaultSwapLowWaterMark 60 /* 60% */ + +#define DefaultFtpDefaultTtl (7 * 24 * 60 * 60) /* 1 week */ +#define DefaultFtpMaxObjSize (4 << 20) /* 4 MB */ +#define DefaultGopherDefaultTtl (7 * 24 * 60 * 60) /* 1 week */ +#define DefaultGopherMaxObjSize (4 << 20) /* 4 MB */ +#define DefaultHttpDefaultTtl (7 * 24 * 60 * 60) /* 1 week */ +#define DefaultHttpMaxObjSize (4 << 20) /* 4 MB */ +#if USE_WAIS_RELAY +#define DefaultWaisDefaultTtl (7 * 24 * 60 * 60) /* 1 week */ +#define DefaultWaisMaxObjSize (4 << 20) /* 4 MB */ +#define DefaultWaisRelayHost (char *)NULL +#define DefaultWaisRelayPort -1 +#endif + +#define DefaultNegativeTtl (5 * 60) /* 5 min */ +#define DefaultReadTimeout (15 * 60) /* 15 min */ +#define DefaultLifetimeDefault (200 * 60) /* 3+ hours */ +#define DefaultConnectTimeout (2 * 60) /* 2 min */ +#define DefaultDefaultAgeMax (3600 * 24 * 30) /* 30 days */ +#define DefaultCleanRate -1 /* disabled */ +#define DefaultDnsChildren 5 /* 3 processes */ +#define DefaultDnsChildrenMax 32 /* 32 processes */ +#define DefaultHotVmFactor 0.0 /* disabled */ + +#define DefaultAsciiPortNum CACHE_HTTP_PORT +#define DefaultBinaryPortNum 3129 +#define DefaultUdpPortNum CACHE_ICP_PORT + +#define DefaultCacheLogFile "cache.log" +#define DefaultAccessLogFile "cache.access.log" +#define DefaultHierarchyLogFile "cache.hierarchy.log" +#define DefaultLogRotateNumber 10 +#define DefaultAdminEmail "webmaster" +#define DefaultFtpgetProgram "ftpget" +#define DefaultFtpgetOptions "" +#define DefaultDnsserverProgram "dnsserver" +#define DefaultEffectiveUser (char *)NULL /* default NONE */ +#define DefaultEffectiveGroup (char *)NULL /* default NONE */ +#define DefaultAppendDomain (char *)NULL /* default NONE */ + +#define DefaultDebugLevel 1 /* default 1 */ +#define DefaultAccelHost (char *)NULL /* default NONE */ +#define DefaultAccelPrefix (char *)NULL /* default NONE */ +#define DefaultAccelPort 0 /* default off */ +#define DefaultAccelWithProxy 0 /* default off */ +#define DefaultSourcePing 0 /* default off */ +#define DefaultCommonLogFormat 1 /* default on */ +#define DefaultQuickAbort 0 /* default off */ +#define DefaultNeighborTimeout 2 /* 2 seconds */ +#define DefaultSingleParentBypass 0 /* default off */ + +stoplist *http_stoplist = NULL; +stoplist *gopher_stoplist = NULL; +stoplist *ftp_stoplist = NULL; +stoplist *bind_addr_list = NULL; +stoplist *local_domain_list = NULL; +stoplist *inside_firewall_list = NULL; + +ip_acl *proxy_ip_acl = NULL; +ip_acl *accel_ip_acl = NULL; +ip_acl *manager_ip_acl = NULL; +ip_acl *local_ip_list = NULL; + +int zap_disk_store = 0; /* off, try to rebuild from disk */ +int httpd_accel_mode = 0; /* for fast access */ +int emulate_httpd_log = DefaultCommonLogFormat; /* for fast access */ +time_t neighbor_timeout = DefaultNeighborTimeout; /* for fast access */ +int single_parent_bypass = 0; +int getDnsChildren(); + +static char w_space[] = " \t\n"; + +static void configSetFactoryDefaults(); +static void configDoConfigure(); + +extern int getMaxFD(); +extern void fatal _PARAMS((char *)); +extern void neighbors_cf_add _PARAMS((char *, char *, int, int, int)); +extern int neighbors_cf_domain _PARAMS((char *, char *)); + +void self_destruct(in_string) + char *in_string; +{ + char fatal_str[4096]; + + sprintf(fatal_str, "Bungled cached.conf: %s", in_string); + fatal(fatal_str); +} + +int ip_acl_match(c1, c2, c3, c4, a1, a2, a3, a4) + int c1; + int c2; + int c3; + int c4; + int a1; + int a2; + int a3; + int a4; +{ + if (!((a1 == 0) || (a1 == c1))) + return 0; + if (!((a2 == 0) || (a2 == c2))) + return 0; + if (!((a3 == 0) || (a3 == c3))) + return 0; + if (!((a4 == 0) || (a4 == c4))) + return 0; + + return 1; +} + + +ip_access_type +ip_access_check(address, list) + struct in_addr address; + ip_acl *list; +{ + int c1, c2, c3, c4; + ip_acl *p; + unsigned int naddr = 0; /* network byte-order IP addr */ + + if (!list) + return IP_ALLOW; + + naddr = htonl(address.s_addr); + c1 = ((int) naddr & 0xff000000) >> 24; + c2 = ((int) naddr & 0x00ff0000) >> 16; + c3 = ((int) naddr & 0x0000ff00) >> 8; + c4 = ((int) naddr & 0x000000ff); + + debug(10, "ip_access_check: Using %d.%d.%d.%d\n", c1, c2, c3, c4); + + if ((c1 == 127) && (c2 == 0) && (c3 == 0) && (c4 == 1)) + return IP_ALLOW; /* always allow localhost */ + + for (p = list; p; p = p->next) { + debug(10, "ip_access_check: %d.%d.%d.%d vs %d.%d.%d.%d\n", + c1, c2, c3, c4, p->a1, p->a2, p->a3, p->a4); + if (ip_acl_match(c1, c2, c3, c4, p->a1, p->a2, p->a3, p->a4)) + return p->access; + } + return IP_ALLOW; +} + +void addToIPACL(list, ip_str, access) + ip_acl **list; + char *ip_str; + ip_access_type access; +{ + ip_acl *p, *q; + int a1, a2, a3, a4; + + if (!ip_str) { + return; + } + if (!(*list)) { + /* empty list */ + *list = (ip_acl *) xcalloc(1, sizeof(ip_acl)); + (*list)->next = NULL; + q = *list; + } else { + p = *list; + while (p->next) + p = p->next; + q = (ip_acl *) xcalloc(1, sizeof(ip_acl)); + q->next = NULL; + p->next = q; + } + + /* decode ip address */ + if (strstr(ip_str, "all") || strstr(ip_str, "ALL") || + strstr(ip_str, "All")) { + a1 = a2 = a3 = a4 = 0; + } else { + a1 = a2 = a3 = a4 = 0; + sscanf(ip_str, "%d.%d.%d.%d", &a1, &a2, &a3, &a4); + } + + q->access = access; + q->a1 = a1; + q->a2 = a2; + q->a3 = a3; + q->a4 = a4; + +} + + +void addToStopList(list, key) + stoplist **list; + char *key; +{ + stoplist *p, *q; + + if (!(*list)) { + /* empty list */ + *list = (stoplist *) xcalloc(1, sizeof(stoplist)); + (*list)->key = xstrdup(key); + (*list)->next = NULL; + } else { + p = *list; + while (p->next) + p = p->next; + q = (stoplist *) xcalloc(1, sizeof(stoplist)); + q->key = xstrdup(key); + q->next = NULL; + p->next = q; + } +} + +/* Use this #define in all the parse*() functions. Assumes + * ** char *token and char *line_in are defined + */ + +#define GetInteger(var) \ + token = strtok(NULL, w_space); \ + if( token == (char *) NULL) \ + self_destruct(line_in); \ + if (sscanf(token, "%d", &var) != 1) \ + self_destruct(line_in); + + +void parseCacheHostLine(line_in) + char *line_in; +{ + char *type = NULL; + char *hostname = NULL; + char *token = NULL; + int ascii_port = CACHE_HTTP_PORT; + int udp_port = CACHE_ICP_PORT; + int proxy_only = 0; + + /* Parse a cache_host line */ + if (!(hostname = strtok(NULL, w_space))) + self_destruct(line_in); + if (!(type = strtok(NULL, w_space))) + self_destruct(line_in); + + GetInteger(ascii_port); + GetInteger(udp_port); + if ((token = strtok(NULL, w_space))) { + if (!strcasecmp(token, "proxy-only")) + proxy_only = 1; + } + neighbors_cf_add(hostname, type, ascii_port, udp_port, proxy_only); +} + +void parseHostDomainLine(line_in) + char *line_in; +{ + char *host = NULL; + char *domain = NULL; + + if (!(host = strtok(NULL, w_space))) + self_destruct(line_in); + while ((domain = strtok(NULL, ", \t\n"))) { + if (neighbors_cf_domain(host, domain) == 0) + self_destruct(line_in); + } +} + +void parseMailTraceLine(line_in) + char *line_in; +{ + fprintf(stderr, "'mail_trace' not supported in this version; ignored.\n"); +} + + +void parseSourcePingLine(line_in) + char *line_in; +{ + char *srcping; + + srcping = strtok(NULL, w_space); + if (srcping == (char *) NULL) + self_destruct(line_in); + + /* set source_ping, default is off. */ + if (!strcasecmp(srcping, "on")) + Config.sourcePing = 1; + else if (!strcasecmp(srcping, "off")) + Config.sourcePing = 0; + else + Config.sourcePing = 0; +} + + +void parseQuickAbortLine(line_in) + char *line_in; +{ + char *abort; + + abort = strtok(NULL, w_space); + if (abort == (char *) NULL) + self_destruct(line_in); + + if (!strcasecmp(abort, "on") || !strcasecmp(abort, "quick")) + Config.quickAbort = 1; + else if (!strcmp(abort, "off") || !strcasecmp(abort, "normal")) + Config.quickAbort = 0; + else + Config.quickAbort = 0; + +} + +void parseMemLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.Mem.maxSize = i << 20; +} + +void parseMemHighLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.Mem.highWatherMark = i; +} + +void parseMemLowLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.Mem.lowWaterMark = i; +} + +void parseHotVmFactorLine(line_in) + char *line_in; +{ + char *token = NULL; + double d; + + token = strtok(NULL, w_space); + if (token == (char *) NULL) + self_destruct(line_in); + if (sscanf(token, "%lf", &d) != 1) + self_destruct(line_in); + if (d < 0) + self_destruct(line_in); + Config.hotVmFactor = d; +} + +void parseSwapLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.Swap.maxSize = i << 10; +} + +void parseSwapHighLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.Swap.highWatherMark = i; +} + +void parseSwapLowLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.Swap.lowWaterMark = i; +} + +void parseHttpLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.Http.maxObjSize = i << 20; + GetInteger(i); + Config.Http.defaultTtl = i * 60; +} + +void parseGopherLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.Gopher.maxObjSize = i << 20; + GetInteger(i); + Config.Gopher.defaultTtl = i * 60; +} + +void parseFtpLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.Ftp.maxObjSize = i << 20; + GetInteger(i); + Config.Ftp.defaultTtl = i * 60; +} + +void parseTTLPattern(line_in) + char *line_in; +{ + char *token; + char *pattern; + time_t abs_ttl = 0; + int pct_age = 0; + time_t age_max = Config.ageMaxDefault; + int i; + + token = strtok(NULL, w_space); /* token: regex pattern */ + if (token == (char *) NULL) + self_destruct(line_in); + pattern = xstrdup(token); + + GetInteger(i); /* token: abs_ttl */ + abs_ttl = (time_t) (i * 60); /* convert minutes to seconds */ + + token = strtok(NULL, w_space); /* token: pct_age */ + if (token != (char *) NULL) { /* pct_age is optional */ + if (sscanf(token, "%d", &pct_age) != 1) + self_destruct(line_in); + } + token = strtok(NULL, w_space); /* token: age_max */ + if (token != (char *) NULL) { /* age_max is optional */ + if (sscanf(token, "%d", &i) != 1) + self_destruct(line_in); + age_max = (time_t) (i * 60); /* convert minutes to seconds */ + } + ttlAddToList(pattern, abs_ttl, pct_age, age_max); + + safe_free(pattern); +} + +void parseNegativeLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.negativeTtl = i * 60; +} + +void parseReadTimeoutLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.readTimeout = i * 60; +} + +void parseLifetimeLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.lifetimeDefault = i * 60; +} + +void parseConnectTimeout(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.connectTimeout = i; +} + +void parseCleanRateLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.cleanRate = i * 60; +} + +void parseDnsChildrenLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.dnsChildren = i; +} + +void parseMgrLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + self_destruct(line_in); + safe_free(Config.adminEmail); + Config.adminEmail = xstrdup(token); +} + +void parseDirLine(line_in) + char *line_in; +{ + char *token; + + token = strtok(NULL, w_space); + if (token == (char *) NULL) + self_destruct(line_in); + storeAddSwapDisk(xstrdup(token)); + +} + +void parseHttpdAccelLine(line_in) + char *line_in; +{ + char *token; + char buf[1024]; + int i; + + token = strtok(NULL, w_space); + if (token == (char *) NULL) + self_destruct(line_in); + safe_free(Config.Accel.host); + Config.Accel.host = xstrdup(token); + GetInteger(i); + Config.Accel.port = i; + safe_free(Config.Accel.prefix); + sprintf(buf, "http://%s:%d", Config.Accel.host, Config.Accel.port); + Config.Accel.prefix = xstrdup(buf); + httpd_accel_mode = 1; +} + +void parseHttpdAccelWithProxyLine(line_in) + char *line_in; +{ + char *proxy; + + proxy = strtok(NULL, w_space); + if (proxy == (char *) NULL) + self_destruct(line_in); + + /* set httpd_accel_with_proxy, default is off. */ + if (!strcasecmp(proxy, "on")) + Config.Accel.withProxy = 1; + else if (!strcasecmp(proxy, "off")) + Config.Accel.withProxy = 0; + else + Config.Accel.withProxy = 0; +} + +void parseEffectiveUserLine(line_in) + char *line_in; +{ + char *token; + + token = strtok(NULL, w_space); + if (token == (char *) NULL) + self_destruct(line_in); + safe_free(Config.effectiveUser); + safe_free(Config.effectiveGroup); + Config.effectiveUser = xstrdup(token); + + token = strtok(NULL, w_space); + if (token == (char *) NULL) + return; /* group is optional */ + Config.effectiveGroup = xstrdup(token); +} + +void parseLogLine(line_in) + char *line_in; +{ + char *token; + int i; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + self_destruct(line_in); + safe_free(Config.Log.log); + Config.Log.log = xstrdup(token); + GetInteger(i); + Config.debugLevel = i; +} + +void parseAccessLogLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + self_destruct(line_in); + safe_free(Config.Log.access); + Config.Log.access = xstrdup(token); +} + +void parseHierachyLogLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + self_destruct(line_in); + safe_free(Config.Log.hierarchy); + Config.Log.hierarchy = xstrdup(token); +} + +void parseLogfileRotateLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.Log.rotateNumber = i; +} + +void parseFtpProgramLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + self_destruct(line_in); + safe_free(Config.Program.ftpget); + Config.Program.ftpget = xstrdup(token); +} + +void parseFtpOptionsLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, ""); /* Note "", don't separate these */ + if (token == (char *) NULL) + self_destruct(line_in); + safe_free(Config.Program.ftpget_opts); + Config.Program.ftpget_opts = xstrdup(token); +} + +void parseDnsProgramLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + self_destruct(line_in); + safe_free(Config.Program.dnsserver); + Config.Program.dnsserver = xstrdup(token); +} + +void parseEmulateLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + self_destruct(line_in); + if (!strcasecmp(token, "on") || !strcasecmp(token, "enable")) + Config.commonLogFormat = 1; + else + Config.commonLogFormat = 0; +} + +#if USE_WAIS_RELAY +void parseWAISRelayLine(line_in) + char *line_in; +{ + char *token; + int i; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + self_destruct(line_in); + safe_free(Config.Wais.relayHost); + Config.Wais.relayHost = xstrdup(token); + GetInteger(i); + Config.Wais.relayPort = i; + GetInteger(i); + Config.Wais.maxObjSize = i << 20; +} + +#endif + +void parseProxyAllowLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + return; + addToIPACL(&proxy_ip_acl, token, IP_ALLOW); +} + +void parseAccelAllowLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + return; + addToIPACL(&accel_ip_acl, token, IP_ALLOW); +} + +void parseManagerAllowLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + return; + addToIPACL(&manager_ip_acl, token, IP_ALLOW); +} + +void parseProxyDenyLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + return; + addToIPACL(&proxy_ip_acl, token, IP_DENY); +} + +void parseAccelDenyLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + return; + addToIPACL(&accel_ip_acl, token, IP_DENY); +} + +void parseManagerDenyLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + return; + addToIPACL(&manager_ip_acl, token, IP_DENY); +} + +void parseLocalIPLine(line_in) + char *line_in; +{ + char *token; + while ((token = strtok(NULL, w_space))) { + addToIPACL(&local_ip_list, token, IP_DENY); + } +} + +void parseHttpStopLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + return; + addToStopList(&http_stoplist, token); +} + +void parseGopherStopLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + return; + addToStopList(&gopher_stoplist, token); +} +void parseFtpStopLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + return; + addToStopList(&ftp_stoplist, token); +} + +void parseAppendDomainLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + self_destruct(line_in); + if (*token != '.') + self_destruct(line_in); + safe_free(Config.appendDomain); + Config.appendDomain = xstrdup(token); +} + +void parseBindAddressLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + self_destruct(line_in); + debug(1, "parseBindAddressLine: adding %s\n", token); + addToStopList(&bind_addr_list, token); +} + +void parseLocalDomainLine(line_in) + char *line_in; +{ + char *token; + while ((token = strtok(NULL, w_space))) { + addToStopList(&local_domain_list, token); + } +} + +void parseInsideFirewallLine(line_in) + char *line_in; +{ + char *token; + while ((token = strtok(NULL, w_space))) { + addToStopList(&inside_firewall_list, token); + } +} + +void parseAsciiPortLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.Port.ascii = i; +} + +void parseUdpPortLine(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.Port.udp = i; +} + +void parseNeighborTimeout(line_in) + char *line_in; +{ + char *token; + int i; + GetInteger(i); + Config.neighborTimeout = i; +} + +void parseSingleParentBypassLine(line_in) + char *line_in; +{ + char *token; + token = strtok(NULL, w_space); + if (token == (char *) NULL) + self_destruct(line_in); + if (!strcasecmp(token, "on")) + Config.singleParentBypass = 1; +} + +void parseCacheNeighborObjLine(line_in) + char *line_in; +{ + printf("WARNING: 'cache_neighbor_obj' is no longer supported. Please\n"); + printf(" use 'proxy-only' on the 'cache_host' line instead now.\n"); + fflush(stdout); +} + +void parseBehindFirewallLine(line_in) + char *line_in; +{ + printf("WARNING: 'behind_firewall' is no longer supported. Please\n"); + printf(" use the 'inside_firewall' and 'local_domain' lines\n"); + printf(" instead now.\n"); + fflush(stdout); +} + +void parseDirectFetchLine(line_in) + char *line_in; +{ + printf("WARNING: 'direct_fetch' is no longer supported. Please\n"); + printf(" use the 'inside_firewall' and 'local_domain' lines\n"); + printf(" instead now.\n"); + fflush(stdout); +} + +/* Maybe a bit heavy handed, but parser is immune to virtually every sane + * persons definition of white space */ + +int parseConfigFile(file_name) + char *file_name; +{ + FILE *fp = fopen(file_name, "r"); + char *token, tmp_line[BUFSIZ]; + char line_in[BUFSIZ]; + char fatal_str[4096]; + + /* Initialize a few global strings, in case they aren't user defined */ + + configSetFactoryDefaults(); + + if (fp == NULL) { + sprintf(fatal_str, "Unable to open configuration file: %s", file_name); + fatal(fatal_str); + } + memset(line_in, '\0', BUFSIZ); + while (fgets(line_in, BUFSIZ, fp)) { + if (line_in[0] == '#' || line_in[0] == '\n' || line_in[0] == '\0') + continue; /* skip comments */ + + /* Use tmp_line as a temporary pointer to the input line */ + /* AWC Debug */ + if (line_in[0] == '\n') + continue; + + strcpy(tmp_line, line_in); + if ((token = strtok(tmp_line, w_space)) == NULL) + continue; + + /* Parse a cache_host line */ + if (!strcmp(token, "cache_host")) + parseCacheHostLine(line_in); + + /* Parse a cache_host_domain line */ + else if (!strcmp(token, "cache_host_domain")) + parseHostDomainLine(line_in); + + /* Parse a neighbor_timeout line */ + else if (!strcmp(token, "neighbor_timeout")) + parseNeighborTimeout(line_in); + else if (!strcmp(token, "neighbour_timeout")) /* alternate spelling */ + parseNeighborTimeout(line_in); + + /* XXX This has been replaced with proxy-only on cache-host line */ + /* give warning if cache_neighbor_obj is used */ + else if (!strcmp(token, "cache_neighbor_obj")) + parseCacheNeighborObjLine(line_in); + + /* Parse a cache_dir line */ + else if (!strcmp(token, "cache_dir")) + parseDirLine(line_in); + + /* Parse a cache_log line */ + else if (!strcmp(token, "cache_log")) + parseLogLine(line_in); + + /* Parse a cache_access_log line */ + else if (!strcmp(token, "cache_access_log")) + parseAccessLogLine(line_in); + + /* Parse a cache_hierarchy_log line */ + else if (!strcmp(token, "cache_hierarchy_log")) + parseHierachyLogLine(line_in); + + /* Parse a logfile_rotate line */ + else if (!strcmp(token, "logfile_rotate")) + parseLogfileRotateLine(line_in); + + /* Parse a httpd_accel_with_proxy line */ + else if (!strcmp(token, "httpd_accel_with_proxy")) + parseHttpdAccelWithProxyLine(line_in); + + /* Parse a httpd_accel line */ + else if (!strcmp(token, "httpd_accel")) + parseHttpdAccelLine(line_in); + + /* Parse a cache_effective_user line */ + else if (!strcmp(token, "cache_effective_user")) + parseEffectiveUserLine(line_in); + + /* Parse a cache_mem_high line */ + else if (!strcmp(token, "cache_swap_high")) + parseSwapHighLine(line_in); + + /* Parse a cache_mem_low line */ + else if (!strcmp(token, "cache_swap_low")) + parseSwapLowLine(line_in); + + /* Parse a cache_mem_high line */ + else if (!strcmp(token, "cache_mem_high")) + parseMemHighLine(line_in); + + /* Parse a cache_mem_low line */ + else if (!strcmp(token, "cache_mem_low")) + parseMemLowLine(line_in); + + /* Parse a cache_hot_vm_factor line */ + else if (!strcmp(token, "cache_hot_vm_factor")) + parseHotVmFactorLine(line_in); + + /* Parse a cache_mem line */ + /* XXX: this must be AFTER cache_mem_low, etc. */ + else if (!strcmp(token, "cache_mem")) + parseMemLine(line_in); + + /* Parse a cache_swap line */ + else if (!strcmp(token, "cache_swap")) + parseSwapLine(line_in); + + /* Parse a cache_mgr line */ + else if (!strcmp(token, "cache_mgr")) + parseMgrLine(line_in); + + /* Parse a proxy_allow line */ + else if (!strcmp(token, "proxy_allow")) + parseProxyAllowLine(line_in); + + /* Parse a proxy_deny line */ + else if (!strcmp(token, "proxy_deny")) + parseProxyDenyLine(line_in); + +#ifndef BACKWARDS_COMPATIBLE + /* Parse a access_allow line */ + else if (!strcmp(token, "access_allow")) /* now proxy_allow */ + parseProxyAllowLine(line_in); + + /* Parse a access_deny line */ + else if (!strcmp(token, "access_deny")) /* now proxy_deny */ + parseProxyDenyLine(line_in); +#endif + + /* Parse a accel_allow line */ + else if (!strcmp(token, "accel_allow")) + parseAccelAllowLine(line_in); + + /* Parse a accel_deny line */ + else if (!strcmp(token, "accel_deny")) + parseAccelDenyLine(line_in); + + /* Parse a manager_allow line */ + else if (!strcmp(token, "manager_allow")) + parseManagerAllowLine(line_in); + + /* Parse a manager_deny line */ + else if (!strcmp(token, "manager_deny")) + parseManagerDenyLine(line_in); + +#ifndef BACKWARDS_COMPATIBLE + /* Parse a manager_access_allow line */ + else if (!strcmp(token, "manager_access_allow")) /* now manager_allow */ + parseManagerAllowLine(line_in); + + /* Parse a manager_access_deny line */ + else if (!strcmp(token, "manager_access_deny")) /* now manager_deny */ + parseManagerDenyLine(line_in); +#endif + + /* Parse a http_stop line */ + else if (!strcmp(token, "http_stop")) + parseHttpStopLine(line_in); + + /* Parse a gopher_stop line */ + else if (!strcmp(token, "gopher_stop")) + parseGopherStopLine(line_in); + + /* Parse a ftp_stop line */ + else if (!strcmp(token, "ftp_stop")) + parseFtpStopLine(line_in); + + /* Parse a gopher protocol line */ + /* XXX: Must go after any gopher* token */ + else if (!strcmp(token, "gopher")) + parseGopherLine(line_in); + + /* Parse a http protocol line */ + /* XXX: Must go after any http* token */ + else if (!strcmp(token, "http")) + parseHttpLine(line_in); + + /* Parse a ftp protocol line */ + /* XXX: Must go after any ftp* token */ + else if (!strcmp(token, "ftp")) + parseFtpLine(line_in); + + else if (!strcmp(token, "ttl_pattern")) + parseTTLPattern(line_in); + + /* Parse a negative_ttl line */ + else if (!strcmp(token, "negative_ttl")) + parseNegativeLine(line_in); + + /* Parse a read_timeout line */ + else if (!strcmp(token, "read_timeout")) + parseReadTimeoutLine(line_in); + + /* Parse a clean_rate line */ + else if (!strcmp(token, "clean_rate")) + parseCleanRateLine(line_in); + + /* Parse a client_lifetime line */ + else if (!strcmp(token, "client_lifetime")) + parseLifetimeLine(line_in); + + /* Parse a connect_timeout line */ + else if (!strcmp(token, "connect_timeout")) + parseConnectTimeout(line_in); + + /* Parse a cache_ftp_program line */ + else if (!strcmp(token, "cache_ftp_program")) + parseFtpProgramLine(line_in); + + /* Parse a cache_ftp_options line */ + else if (!strcmp(token, "cache_ftp_options")) + parseFtpOptionsLine(line_in); + + /* Parse a cache_dns_program line */ + else if (!strcmp(token, "cache_dns_program")) + parseDnsProgramLine(line_in); + + /* Parse a cache_dns_program line */ + else if (!strcmp(token, "dns_children")) + parseDnsChildrenLine(line_in); + + /* Parse mail trace line */ + else if (!strcmp(token, "mail_trace")) + parseMailTraceLine(line_in); + + /* Parse source_ping line */ + else if (!strcmp(token, "source_ping")) + parseSourcePingLine(line_in); + + /* Parse behind_firewall line */ + else if (!strcmp(token, "behind_firewall")) + parseBehindFirewallLine(line_in); + + /* Parse direct_fetch line */ + else if (!strcmp(token, "direct_fetch")) + parseDirectFetchLine(line_in); + + /* Parse quick_abort line */ + else if (!strcmp(token, "quick_abort")) + parseQuickAbortLine(line_in); + + /* Parse old abort_mode line - FOR BACKWARDS COMPATIBILITY */ + else if (!strcmp(token, "abort_mode")) + parseQuickAbortLine(line_in); + + /* Parse emulate_httpd_log line */ + else if (!strcmp(token, "emulate_httpd_log")) + parseEmulateLine(line_in); + + else if (!strcmp(token, "append_domain")) + parseAppendDomainLine(line_in); + +#if USE_WAIS_RELAY + else if (!strcmp(token, "wais_relay")) + parseWAISRelayLine(line_in); +#endif + + /* Parse a local_ip line */ + else if (!strcmp(token, "local_ip")) + parseLocalIPLine(line_in); + + /* Parse a local_domain line */ + else if (!strcmp(token, "local_domain")) + parseLocalDomainLine(line_in); + + /* Parse a bind_address line */ + else if (!strcmp(token, "bind_address")) + parseBindAddressLine(line_in); + + /* Parse a ascii_port line */ + else if (!strcmp(token, "ascii_port")) + parseAsciiPortLine(line_in); + + /* Parse a udp_port line */ + else if (!strcmp(token, "udp_port")) + parseUdpPortLine(line_in); + + else if (!strcmp(token, "inside_firewall")) + parseInsideFirewallLine(line_in); + + else if (!strcmp(token, "single_parent_bypass")) + parseSingleParentBypassLine(line_in); + + /* If unknown, treat as a comment line */ + else { + /*EMPTY */ ; + } + } + + /* Add INADDR_ANY to end of bind_addr_list as last chance */ + addToStopList(&bind_addr_list, "0.0.0.0"); + + /* Sanity checks */ + if (getClientLifetime() < getReadTimeout()) { + printf("WARNING: client_lifetime (%d seconds) is less than read_timeout (%d seconds).\n", + getClientLifetime(), getReadTimeout()); + printf(" This may cause serious problems with your cache!!!\n"); + printf(" Change your cached.conf file.\n"); + fflush(stdout); /* print message */ + } + if (getCacheSwapMax() < (getCacheMemMax() >> 10)) { + printf("WARNING: cache_swap (%d kbytes) is less than cache_mem (%d bytes).\n", getCacheSwapMax(), getCacheMemMax()); + printf(" This will cause serious problems with your cache!!!\n"); + printf(" Change your cached.conf file.\n"); + Config.Swap.maxSize = getCacheMemMax() >> 10; + printf(" For this run, however, cached will use %d kbytes for cache_swap.\n", getCacheSwapMax()); + fflush(stdout); /* print message */ + } + if (getCleanRate() > -1 && getCleanRate() < 60) { + Config.cleanRate = (30 * 60); + printf("WARNING: clean_rate is less than one minute.\n"); + printf(" This will cause serious problems with your cache!!!\n"); + printf(" Change your cached.conf file.\n"); + printf(" For this run, however, cached will use %d minutes for clean_rate.\n", (int) (getCleanRate() / 60)); + fflush(stdout); /* print message */ + } + storeSanityCheck(); + + if (accel_ip_acl == NULL) + accel_ip_acl = proxy_ip_acl; + + if (getDnsChildren() < 1) { + printf("WARNING: dns_children was set to a bad value: %d\n", + getDnsChildren()); + printf("Setting it to the default (3).\n"); + Config.dnsChildren = 3; + } else if (getDnsChildren() > DefaultDnsChildrenMax) { + printf("WARNING: dns_children was set to a bad value: %d\n", + getDnsChildren()); + printf("Setting it to the maximum (%d).\n", DefaultDnsChildrenMax); + Config.dnsChildren = DefaultDnsChildrenMax; + } + fclose(fp); + + configDoConfigure(); + return 0; +} + + +/* + * Daemonize a process according to guidlines in "Advanced Programming + * For The UNIX Environment", W.R. Stevens ( Addison Wesley, 1992) - Ch. 13 + */ +int daemonize() +{ + int n_openf, i; + pid_t pid; + + + if ((pid = fork()) < 0) + return (-1); + else if (pid != 0) + exit(0); + + /* Child continues */ + setsid(); /* Become session leader */ + + n_openf = getMaxFD(); /* Close any inherited files */ + for (i = 0; i < n_openf; i++) + close(i); + + umask(0); /* Clear file mode creation mask */ + + return (0); +} + + +int check_suid() +{ + struct passwd *pwd; + struct group *grp; + + if (geteuid() == 0) { + /* Started as a root, check suid option */ + if (Config.effectiveUser && (pwd = getpwnam(Config.effectiveUser))) { + + /* change current directory to swap space so we can get core */ + if (chdir(swappath(0))) { + debug(1, "Chdir Failed: Cached cannot write core file when it crash: %s\n", + xstrerror()); + } + if (Config.effectiveGroup && (grp = getgrnam(Config.effectiveGroup))) { + setgid(grp->gr_gid); + } else { + setgid(pwd->pw_gid); + } + setuid(pwd->pw_uid); + } + } + return 0; +} + +int getHttpMax() +{ + return Config.Http.maxObjSize; +} + +int getHttpTTL() +{ + return Config.Http.defaultTtl; +} + +int getGopherMax() +{ + return Config.Gopher.maxObjSize; +} + +int getGopherTTL() +{ + return Config.Gopher.defaultTtl; +} + +#if USE_WAIS_RELAY +int getWAISMax() +{ + return Config.Wais.maxObjSize; +} +char *getWaisRelayHost() +{ + return Config.Wais.relayHost; +} +int getWaisRelayPort() +{ + return Config.Wais.relayPort; +} +#endif + +int getFtpMax() +{ + return Config.Ftp.maxObjSize; +} + +int getFtpTTL() +{ + return Config.Ftp.defaultTtl; +} + +int getNegativeTTL() +{ + return Config.negativeTtl; +} + +int getCacheMemMax() +{ + return Config.Mem.maxSize; +} + +int getCacheMemHighWaterMark() +{ + return Config.Mem.highWatherMark; +} + +int getCacheMemLowWaterMark() +{ + return Config.Mem.lowWaterMark; +} + +double getCacheHotVmFactor() +{ + return Config.hotVmFactor; +} + +int getCacheSwapHighWaterMark() +{ + return Config.Swap.highWatherMark; +} + +int getCacheSwapLowWaterMark() +{ + return Config.Swap.lowWaterMark; +} + +int getCacheSwapMax() +{ + return Config.Swap.maxSize; +} + +int setCacheSwapMax(size) + int size; +{ + Config.Swap.maxSize = size; + return Config.Swap.maxSize; +} + +int getReadTimeout() +{ + return Config.readTimeout; +} + +int getClientLifetime() +{ + return Config.lifetimeDefault; +} + +int getConnectTimeout() +{ + return Config.connectTimeout; +} + +int getCleanRate() +{ + return Config.cleanRate; +} + +int getSourcePing() +{ + return Config.sourcePing; +} + +int getDnsChildren() +{ + return Config.dnsChildren; +} + +int getQuickAbort() +{ + return Config.quickAbort; +} + +char *getAccelPrefix() +{ + return Config.Accel.prefix; +} +int getAccelWithProxy() +{ + return Config.Accel.withProxy; +} +char *getAccessLogFile() +{ + return Config.Log.access; +} +char *getHierarchyLogFile() +{ + return Config.Log.hierarchy; +} +int getLogfileRotateNumber() +{ + return Config.Log.rotateNumber; +} +char *getCacheLogFile() +{ + return Config.Log.log; +} +int getAsciiPortNum() +{ + return Config.Port.ascii; +} +int getBinaryPortNum() +{ + return Config.Port.binary; +} +int getUdpPortNum() +{ + return Config.Port.udp; +} +char *getDnsProgram() +{ + return Config.Program.dnsserver; +} +char *getFtpProgram() +{ + return Config.Program.ftpget; +} +char *getFtpOptions() +{ + return Config.Program.ftpget_opts; +} +char *getAdminEmail() +{ + return Config.adminEmail; +} +int getDebugLevel() +{ + return Config.debugLevel; +} +char *getAppendDomain() +{ + return Config.appendDomain; +} +int setAsciiPortNum(p) + int p; +{ + return (Config.Port.ascii = p); +} +int setUdpPortNum(p) + int p; +{ + return (Config.Port.udp = p); +} +int setBinaryPortNum(p) + int p; +{ + return (Config.Port.binary = p); +} + + +char *safe_xstrdup(p) + char *p; +{ + return p ? xstrdup(p) : p; +} + +int safe_strlen(p) + char *p; +{ + return p ? strlen(p) : -1; +} + + +static void configSetFactoryDefaults() +{ + Config.Mem.maxSize = DefaultMemMaxSize; + Config.Mem.highWatherMark = DefaultMemHighWatherMark; + Config.Mem.lowWaterMark = DefaultMemLowWatherMark; + Config.Swap.maxSize = DefaultSwapMaxSize; + Config.Swap.highWatherMark = DefaultSwapHighWaterMark; + Config.Swap.lowWaterMark = DefaultSwapLowWaterMark; + + Config.Ftp.defaultTtl = DefaultFtpDefaultTtl; + Config.Ftp.maxObjSize = DefaultFtpMaxObjSize; + Config.Gopher.defaultTtl = DefaultGopherDefaultTtl; + Config.Gopher.maxObjSize = DefaultGopherMaxObjSize; + Config.Http.defaultTtl = DefaultHttpDefaultTtl; + Config.Http.maxObjSize = DefaultHttpMaxObjSize; +#if USE_WAIS_RELAY + Config.Wais.defaultTtl = DefaultWaisDefaultTtl; + Config.Wais.maxObjSize = DefaultWaisMaxObjSize; + Config.Wais.relayHost = safe_xstrdup(DefaultWaisRelayHost); + Config.Wais.relayPort = DefaultWaisRelayPort; +#endif + + Config.negativeTtl = DefaultNegativeTtl; + Config.readTimeout = DefaultReadTimeout; + Config.lifetimeDefault = DefaultLifetimeDefault; + Config.connectTimeout = DefaultConnectTimeout; + Config.ageMaxDefault = DefaultDefaultAgeMax; + Config.cleanRate = DefaultCleanRate; + Config.dnsChildren = DefaultDnsChildren; + Config.hotVmFactor = DefaultHotVmFactor; + Config.sourcePing = DefaultSourcePing; + Config.quickAbort = DefaultQuickAbort; + Config.commonLogFormat = DefaultCommonLogFormat; + Config.debugLevel = DefaultDebugLevel; + Config.neighborTimeout = DefaultNeighborTimeout; + Config.singleParentBypass = DefaultSingleParentBypass; + Config.adminEmail = safe_xstrdup(DefaultAdminEmail); + Config.effectiveUser = safe_xstrdup(DefaultEffectiveUser); + Config.effectiveGroup = safe_xstrdup(DefaultEffectiveGroup); + Config.appendDomain = safe_xstrdup(DefaultAppendDomain); + + Config.Port.ascii = DefaultAsciiPortNum; + Config.Port.binary = DefaultBinaryPortNum; + Config.Port.udp = DefaultUdpPortNum; + Config.Log.log = safe_xstrdup(DefaultCacheLogFile); + Config.Log.access = safe_xstrdup(DefaultAccessLogFile); + Config.Log.hierarchy = safe_xstrdup(DefaultHierarchyLogFile); + Config.Log.rotateNumber = DefaultLogRotateNumber; + Config.Program.ftpget = safe_xstrdup(DefaultFtpgetProgram); + Config.Program.ftpget_opts = safe_xstrdup(DefaultFtpgetOptions); + Config.Program.dnsserver = safe_xstrdup(DefaultDnsserverProgram); + Config.Accel.host = safe_xstrdup(DefaultAccelHost); + Config.Accel.prefix = safe_xstrdup(DefaultAccelPrefix); + Config.Accel.port = DefaultAccelPort; + Config.Accel.withProxy = DefaultAccelWithProxy; + +} + +static void configDoConfigure() +{ + httpd_accel_mode = Config.Accel.prefix ? 1 : 0; + emulate_httpd_log = Config.commonLogFormat; + neighbor_timeout = (time_t) Config.neighborTimeout; + single_parent_bypass = Config.singleParentBypass; + +#if !ALLOW_HOT_CACHE + if (!httpd_accel_mode || Config.Accel.withProxy) { + /* Not running strict httpd_accel--force hot_vm_factor to be 0 */ + if (Config.hotVmFactor != 0.0) { + printf("WARNING: Non-zero hot_vm_factor not allowed unless running only\n"); + printf(" in httpd_accel mode. Setting hot_vm_factor to 0.\n"); + Config.hotVmFactor = 0.0; + } + } +#endif /* !ALLOW_HOT_CACHE */ +} diff --git a/src/cachemgr.cc b/src/cachemgr.cc new file mode 100644 index 00000000000..7e65a7d0552 --- /dev/null +++ b/src/cachemgr.cc @@ -0,0 +1,692 @@ +static char rcsid[] = "$Id: cachemgr.cc,v 1.1 1996/02/22 06:23:53 wessels Exp $"; +/* + * cachemgr.c - CGI interface to the Cache Manager. + * + * ---------------------------------------------------------------- + * + * Copyright (c) 1994, 1995. All rights reserved. + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to all code in Harvest other than + * subsystems developed elsewhere, which contain other copyright notices + * in their source text. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD). The Harvest + * software may be used for academic, research, government, and internal + * business purposes without charge. If you wish to sell or distribute + * the Harvest software to commercial clients or partners, you must + * license the software. See + * http://harvest.cs.colorado.edu/harvest/copyright,licensing.html#licensing. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist in its + * use, correction, modification or enhancement. We assume no liability + * with respect to the infringement of copyrights, trade secrets, or any + * patents, and are not responsible for consequential damages. Proper + * use of the Harvest software is entirely the responsibility of the user. + * + * For those who are using Harvest for non-commercial purposes, you may + * make derivative works, subject to the following constraints: + * + * - You must include the above copyright notice and these accompanying + * paragraphs in all forms of derivative works, and any documentation + * and other materials related to such distribution and use acknowledge + * that the software was developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of the + * derivative work. + * + * - You must clearly notify users that your are distributing a modified + * version and not the original Harvest software. + * + * - Any derivative product is also subject to the restrictions of the + * copyright, including distribution and use limitations. + */ +#include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" + +static int client_comm_connect(); + +#define MAX_ENTRIES 10000 +#define INFO 0 +#define CACHED 1 +#define SERVER 2 +#define LOG 3 +#define STATS_G 4 +#define STATS_O 5 +#define STATS_U 6 +#define PARAM 7 +#define RESPT 8 +#define SHUTDOWN 9 +#define REFRESH 10 +#ifdef REMOVE_OBJECT +#define REMOVE 11 +#endif +#define FALSE 0 +#define TRUE 1 + +typedef struct { + char *name; + char *val; +} entry; + +int hasTables = FALSE; + +char *script_name = "/Harvest/cgi-bin/cachemgr.cgi"; +char *progname = NULL; + +#define LF 10 +#define CR 13 + +void print_trailer() +{ + time_t now = time(NULL); + static char tbuf[128]; + struct tm *gmt; + + gmt = gmtime(&now); + strftime(tbuf, 128, "%A, %d-%b-%y %H:%M:%S GMT", gmt); + + printf("
\n"); + printf("
\n"); + printf("Generated %s, by %s/%s@%s\n", + tbuf, progname, HARVEST_VERSION, getfullhostname()); + printf("
\n"); +} + +void noargs_html() +{ + printf("\r\n\r\n"); + printf("Harvest Cache Manager Interface\n"); + printf("

Cache Manager Interface

\n"); + printf("

\n"); + printf("This is a WWW interface to the instrumentation interface "); + printf("for the\n"); + printf("\n"); + printf("\tHarvest object cache.\n"); + printf("


\n"); + printf("

\n"); + printf("

\n", script_name); + printf("
\n");
+    printf("
Cache Host:\n", CACHEMGR_HOSTNAME); + printf("
Cache Port:\n", CACHE_HTTP_PORT); + printf("
Password :\n"); + printf("
URL :\n"); + printf("
Operation :"); + printf("\n"); + printf("
\n"); + printf("
\n"); + printf("
\n"); + printf("
\n"); + print_trailer(); +} + +/* A utility function from the NCSA httpd cgi-src utils.c */ +char *makeword(char *line, char stop) +{ + int x = 0, y; + char *word = (char *) malloc(sizeof(char) * (strlen(line) + 1)); + + for (x = 0; ((line[x]) && (line[x] != stop)); x++) + word[x] = line[x]; + + word[x] = '\0'; + if (line[x]) + ++x; + y = 0; + + while ((line[y++] = line[x++])); + return word; +} + +/* A utility function from the NCSA httpd cgi-src utils.c */ +char *fmakeword(FILE * f, char stop, int *cl) +{ + int wsize; + char *word; + int ll; + + wsize = 102400; + ll = 0; + word = (char *) malloc(sizeof(char) * (wsize + 1)); + + while (1) { + word[ll] = (char) fgetc(f); + if (ll == wsize) { + word[ll + 1] = '\0'; + wsize += 102400; + word = (char *) realloc(word, sizeof(char) * (wsize + 1)); + } + --(*cl); + if ((word[ll] == stop) || (feof(f)) || (!(*cl))) { + if (word[ll] != stop) + ll++; + word[ll] = '\0'; + return word; + } + ++ll; + } + /* NOTREACHED */ +} + +/* A utility function from the NCSA httpd cgi-src utils.c */ +char x2c(char *what) +{ + register char digit; + + digit = (what[0] >= 'A' ? ((what[0] & 0xdf) - 'A') + 10 : (what[0] - '0')); + digit *= 16; + digit += (what[1] >= 'A' ? ((what[1] & 0xdf) - 'A') + 10 : (what[1] - '0')); + return (digit); +} + +/* A utility function from the NCSA httpd cgi-src utils.c */ +void unescape_url(char *url) +{ + register int x, y; + + for (x = 0, y = 0; url[y]; ++x, ++y) { + if ((url[x] = url[y]) == '%') { + url[x] = x2c(&url[y + 1]); + y += 2; + } + } + url[x] = '\0'; +} + +/* A utility function from the NCSA httpd cgi-src utils.c */ +void plustospace(char *str) +{ + register int x; + + for (x = 0; str[x]; x++) + if (str[x] == '+') + str[x] = ' '; +} + + +void parse_object(char *string) +{ + char *tmp_line = NULL; + char *url = NULL; + char *token = NULL; + char *store_time = NULL; + char *last_ref = NULL; + char *ttl = NULL; + char *sto = NULL; + char *status = NULL; + char *obj_status = NULL; + char *w_space = " \t\n"; + int obj_size; + int ref_cnt; + + /* Use tmp_line as a temporary pointer to the input line */ + tmp_line = string; + + /* Parse out the url */ + url = strtok(tmp_line, w_space); + +#if !ALL_OBJECTS + if (!strncmp(url, "cache_object", 12)) + return; + if (!strncmp(url, "POST", 4)) + return; +#endif + + tmp_line = NULL; + + token = strtok(tmp_line, w_space); + sscanf(token, "%d", &obj_size); + + token = strtok(tmp_line, w_space); + store_time = strdup(token); + + token = strtok(tmp_line, w_space); + obj_status = strdup(token); + + token = strtok(tmp_line, w_space); + last_ref = strdup(token); + + token = strtok(tmp_line, w_space); + ttl = strdup(token); + + token = strtok(tmp_line, w_space); + /* Active */ + + token = strtok(tmp_line, w_space); + sscanf(token, "%d", &ref_cnt); + + token = strtok(tmp_line, w_space); + sto = strdup(token); + + token = strtok(tmp_line, w_space); + status = strdup(token); + + printf("
  • Cache: %s
    ", + url, url); + printf("Size: %d bytes, TTL: %s ,
    ", + obj_size, ttl); + printf("Stored: %s, %s ago, %s %s,
    ", + sto, store_time, obj_status, status); + printf("Refs: %d, Referenced %s hh:mm:ss ago
  • \n", + ref_cnt, last_ref); + + free(ttl); + free(store_time); + free(last_ref); + free(sto); + free(status); +} + +int main(int argc, char *argv[]) +{ + static char hostname[256]; + static char operation[256]; + static char password[256]; + static char url[4096]; + static char msg[1024]; + static char buf[4096]; + static char reserve[4096]; + static char s1[255]; + static char s2[255]; + char *time_string = NULL; + char *agent = NULL; + char *s = NULL; + int got_data = 0; + int x; + int cl; + int conn; + int len; + int bytesWritten; + int portnum = CACHE_HTTP_PORT; + int op; + int p_state; + int n_loops; + int cpy_ind; + int indx; + int in_list = 0; + int in_table = 0; + int d1, d2, d3, d4, d5, d6, d7; + int single = TRUE; + float f1; + time_t time_val; + entry entries[MAX_ENTRIES]; + + if ((s = strrchr(argv[0], '/'))) + progname = strdup(s + 1); + else + progname = strdup(argv[0]); + if ((s = getenv("SCRIPT_NAME")) != NULL) { + script_name = strdup(s); + } + printf("Content-type: text/html\r\n\r\n"); + if ((agent = getenv("HTTP_USER_AGENT")) != NULL) { + if (!strncasecmp(agent, "Mozilla", 7) || + !strncasecmp(agent, "Netscape", 8)) { + hasTables = TRUE; + } + } + hostname[0] = '\0'; + if ((s = getenv("CONTENT_LENGTH")) == NULL) { + noargs_html(); + exit(0); + } + cl = atoi(s); + password[0] = url[0] = '\0'; + for (x = 0; cl && (!feof(stdin)); x++) { + got_data = 1; + entries[x].val = fmakeword(stdin, '&', &cl); + plustospace(entries[x].val); + unescape_url(entries[x].val); + entries[x].name = makeword(entries[x].val, '='); + if (!strncmp(entries[x].name, "host", 4)) + strncpy(hostname, entries[x].val, 256); + else if (!strncmp(entries[x].name, "operation", 7)) + strncpy(operation, entries[x].val, 256); + else if (!strncmp(entries[x].name, "password", 8)) + strncpy(password, entries[x].val, 256); + else if (!strncmp(entries[x].name, "url", 3)) + strncpy(url, entries[x].val, 4096); + else if (!strncmp(entries[x].name, "port", 4)) + portnum = atoi(entries[x].val); + else { + printf("

    Unknown CGI parameter: %s

    \n", + entries[x].name); + noargs_html(); + exit(0); + } + } + if (!got_data) { /* prints HTML form if no args */ + noargs_html(); + exit(0); + } + if (hostname[0] == '\0') { + printf("

    ERROR

    \n"); + printf("

    You must provide a hostname!\n


    "); + noargs_html(); + exit(0); + } + close(0); + + if (!strncmp(operation, "info", 4) || + !strncmp(operation, "Cache Information", 17)) { + op = INFO; + sprintf(msg, "GET cache_object://%s/info\r\n", hostname); + } else if (!strncmp(operation, "cached.conf", 10) || + !strncmp(operation, "Cache Configuration File", 24)) { + op = CACHED; + sprintf(msg, "GET cache_object://%s/cached.conf\r\n", hostname); + } else if (!strncmp(operation, "server_list", 11) || + !strncmp(operation, "Cache Server List", 17)) { + op = SERVER; + sprintf(msg, "GET cache_object://%s/server_list\r\n", hostname); +#ifdef MENU_SHOW_LOG + } else if (!strncmp(operation, "log", 3) || + !strncmp(operation, "Cache Log", 9)) { + op = LOG; + sprintf(msg, "GET cache_object://%s/log\r\n", hostname); +#endif + } else if (!strncmp(operation, "parameter", 9) || + !strncmp(operation, "Cache Parameters", 16)) { + op = PARAM; + sprintf(msg, "GET cache_object://%s/parameter\r\n", hostname); +#ifdef MENU_RESPONSETIME + } else if (!strncmp(operation, "responsetime", 11) || + !strncmp(operation, "Cache Response Time Histogram", 28)) { + op = RESPT; + sprintf(msg, "GET cache_object://%s/responsetime\r\n", hostname); +#endif + } else if (!strncmp(operation, "stats/general", 13) || + !strncmp(operation, "General Statistics", 18)) { + op = STATS_G; + sprintf(msg, "GET cache_object://%s/stats/general\r\n", hostname); + } else if (!strncmp(operation, "stats/vm_objects", 16)) { + op = STATS_O; + sprintf(msg, "GET cache_object://%s/stats/vm_objects\r\n", hostname); + } else if (!strncmp(operation, "stats/objects", 13) || + !strncmp(operation, "Objects", 7)) { + op = STATS_O; + sprintf(msg, "GET cache_object://%s/stats/objects\r\n", hostname); + } else if (!strncmp(operation, "stats/utilization", 17) || + !strncmp(operation, "Utilization", 11)) { + op = STATS_U; + sprintf(msg, "GET cache_object://%s/stats/utilization\r\n", hostname); + } else if (!strncmp(operation, "shutdown", 8)) { + op = SHUTDOWN; + sprintf(msg, "GET cache_object://%s/shutdown@%s\r\n", hostname, password); + } else if (!strncmp(operation, "refresh", 7)) { + op = REFRESH; + sprintf(msg, "GET %s HTTP/1.0\r\nPragma: no-cache\r\nAccept: */*\r\n\r\n", url); +#ifdef REMOVE_OBJECT + } else if (!strncmp(operation, "remove", 6)) { + op = REMOVE; + /* Peter: not sure what to do here - depends what you do at your end! */ + sprintf(msg, "REMOVE %s HTTP/1.0\r\nPragma: no-cache\r\nAccept: */*\r\n\r\n", url); +#endif + + } else { + printf("Unknown operation: %s\n", operation); + exit(0); + } + + + time_val = time(NULL); + time_string = ctime(&time_val); + + printf("Cache Manager: %s:%s:%d\n", operation, hostname, portnum); + printf("
    \n", script_name); + printf("\n"); + printf(""); + printf("\n", hostname); + printf("\n", portnum); + printf("\n"); + printf("
    "); + printf("

    Empty form

    \n", script_name); + printf("
    \n"); + + printf("

    %s: %s:%d - dated %s

    ", operation, + hostname, portnum, time_string); + printf("

    \n");
    +
    +    /* Connect to the server */
    +    if ((conn = socket(PF_INET, SOCK_STREAM, 0)) < 0) {
    +	perror("client: socket");
    +	exit(1);
    +    }
    +    if ((conn = client_comm_connect(conn, hostname, portnum)) < 0) {
    +	printf("Error: connecting to cache mgr: %s:%d\n", hostname, portnum);
    +	printf("%s
    \n", xstrerror()); + exit(1); + } + bytesWritten = write(conn, msg, strlen(msg)); + + if (bytesWritten < 0) { + printf("Error: write failed\n"); + exit(1); + } else if (bytesWritten != (strlen(msg))) { + printf("Error: write short\n"); + exit(1); + } + /* Print header stuff for tables */ + switch (op) { + case INFO: + case CACHED: + case SERVER: + case LOG: + case STATS_G: + case STATS_O: + case SHUTDOWN: + case REFRESH: + break; + case PARAM: + if (hasTables) { + printf("\n"); + in_table = 1; + } else { + printf("\n %20s %10s %s
    \n", "Parameter", "Value", "Description"); + } + break; + case RESPT: + if (hasTables) { + printf("
    ParameterValueDescription
    \n"); + in_table = 1; + } else { + printf("\n %20s %10s
    \n", "Time (msec)", "Frequency"); + } + break; + case STATS_U: + if (hasTables) { + printf("
    Time (msec)Frequency
    \n"); + in_table = 1; + } else { + printf("Protocol | Count | Maximum | Current | Minimum | Hit | Transfer | Reference | Transfered |\n"); + printf(" | | KB | KB | KB | Ratio | Rate | Count | KB |\n"); + printf("---------|-------|----------|----------|---------|-------|----------|-----------|------------|\n"); + } + break; + default: + printf("\n\n

    \nNot currently implemented.\n"); + exit(1); + } + + p_state = 0; + cpy_ind = 0; + n_loops = 0; /* Keep track of the number of passes through while */ + while ((len = read(conn, buf, sizeof(buf))) > 0) { + n_loops++; + /* Simple state machine for parsing a {{ } { } ...} style list */ + for (indx = 0; indx < len; indx++) { + if (buf[indx] == '{') + p_state++; + else if (buf[indx] == '}') + if (p_state == 2) { /* Have an element of the list */ + single = FALSE; + p_state++; + reserve[cpy_ind] = '\0'; + cpy_ind = 0; + } else if (p_state == 1 && single) /* Check for single element list */ + p_state = 3; + else /* End of list */ + p_state = 0; + else if ((indx == 0) && (n_loops == 1)) { + if (op != REFRESH) + printf("ERROR:%s\n", buf); /* Must be an error message, pass it on */ + else + printf("Refreshed URL: %s\n", url); + } else + reserve[cpy_ind++] = buf[indx]; + + + /* Have an element of the list, so parse reserve[] accordingly */ + if (p_state == 3) { + switch (op) { + case INFO: + case CACHED: + case SERVER: + case LOG: + case STATS_G: + case SHUTDOWN: + p_state = 1; + printf("%s", reserve); + break; + case REFRESH: + /* throw object away */ + break; + case PARAM: + p_state = 1; + memset(s1, '\0', 255); + memset(s2, '\0', 255); + d1 = 0; + sscanf(reserve, "%s %d \"%[^\"]", s1, &d1, s2); + if (hasTables) + printf("

    ProtocolCountMax KBCurrent KBMin KBHit RatioTransfer RateReferencesTransfered KB
    %s%d%s\n", s1, d1, s2 + 2); + else + printf(" %20s %10d %s\n", s1, d1, s2 + 2); + break; + case RESPT: + p_state = 1; + memset(s1, '\0', 255); + d1 = 0; + sscanf(reserve, "%s %d", s1, &d1); + if (hasTables) + printf("
    %s%d\n", s1, d1); + else + printf(" %20s %10d\n", s1, d1); + break; + case STATS_U: + p_state = 1; + sscanf(reserve, "%s %d %d %d %d %f %d %d %d", + s1, &d1, &d2, &d3, &d4, &f1, &d5, &d6, &d7); + if (hasTables) + printf("
    %s%d%d%d%d%4.2f%d%d%d", + s1, d1, d2, d3, d4, f1, d5, d6, d7); + else + printf("%8s %7d %10d %10d %9d %4.2f %10d %10d %10d
    \n", + s1, d1, d2, d3, d4, f1, d5, d6, d7); + break; + case STATS_O: + if (!in_list) { + in_list = 1; + printf("
      \n"); + } + parse_object(reserve); + p_state = 1; + break; + default: + printf("%s\n", "Not currently implemented"); + exit(1); + } + } + } + } + + if (in_list) + printf("
    \n"); + + if (in_table) + printf("
    \n"); + + printf("\n\n"); + print_trailer(); + (void) close(conn); + exit(0); + /* NOTREACHED */ +} + +static int client_comm_connect(sock, dest_host, dest_port) + int sock; /* Type of communication to use. */ + char *dest_host; /* Server's host name. */ + int dest_port; /* Server's port. */ +{ + struct hostent *hp; + static struct sockaddr_in to_addr; + + /* Set up the destination socket address for message to send to. */ + to_addr.sin_family = AF_INET; + + if ((hp = gethostbyname(dest_host)) == 0) { + return (-1); + } + memcpy(&to_addr.sin_addr, hp->h_addr, hp->h_length); + to_addr.sin_port = htons(dest_port); + return connect(sock, (struct sockaddr *) &to_addr, sizeof(struct sockaddr_in)); +} diff --git a/src/client.cc b/src/client.cc new file mode 100644 index 00000000000..a36c64e07a9 --- /dev/null +++ b/src/client.cc @@ -0,0 +1,221 @@ + +static char rcsid[] = "$Id: client.cc,v 1.1 1996/02/22 06:23:53 wessels Exp $"; +/* + * client.c - Simple client to the Harvest cache. + * + *********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef BUFSIZ +#define BUFSIZ 8192 +#endif + +/* Local functions */ +static int client_comm_connect(); +static void usage(); + +static void usage(progname) + char *progname; +{ + fprintf(stderr, "\ +Usage: %s [-rs] [-h host] [-p port] url\n\ +Options:\n\ + -r Force cache to reload URL.\n\ + -s Silent. Do not print data to stdout.\n\ + -h host Retrieve URL from cache on hostname. Default is localhost.\n\ + -p port Port number of cache. Default is %d.\n\ +", progname, CACHE_HTTP_PORT); + exit(1); +} + +int main(argc, argv) + int argc; + char *argv[]; +{ + int conn, c, len, bytesWritten; + int port, to_stdout, reload; + char url[BUFSIZ], msg[BUFSIZ], buf[BUFSIZ], hostname[BUFSIZ]; + extern char *optarg; + + /* set the defaults */ + strcpy(hostname, "localhost"); + port = CACHE_HTTP_PORT; + to_stdout = 1; + reload = 0; + + if (argc < 2) { + usage(argv[0]); /* need URL */ + } else if (argc >= 2) { + strcpy(url, argv[argc - 1]); + if (url[0] == '-') + usage(argv[0]); + while ((c = getopt(argc, argv, "fsrnp:c:h:?")) != -1) + switch (c) { + case 'h': /* host:arg */ + case 'c': /* backward compat */ + if (optarg != NULL) + strcpy(hostname, optarg); + break; + case 's': /* silent */ + case 'n': /* backward compat */ + to_stdout = 0; + break; + case 'r': /* reload */ + reload = 1; + break; + case 'p': /* port number */ + sscanf(optarg, "%d", &port); + if (port < 1) + port = CACHE_HTTP_PORT; /* default */ + break; + case '?': /* usage */ + default: + usage(argv[0]); + break; + } + } + /* Connect to the server */ + if ((conn = socket(PF_INET, SOCK_STREAM, 0)) < 0) { + perror("client: socket"); + exit(1); + } + if (client_comm_connect(conn, hostname, port) < 0) { + if (errno == 0) { + fprintf(stderr, "client: ERROR: Cannot connect to %s:%d: Host unknown.\n", hostname, port); + } else { + char tbuf[BUFSIZ]; + sprintf(tbuf, "client: ERROR: Cannot connect to %s:%d", + hostname, port); + perror(tbuf); + } + exit(1); + } + /* Build the HTTP request */ + if (reload) { + sprintf(msg, "GET %s HTTP/1.0\r\nPragma: no-cache\r\nAccept: */*\r\n\r\n", url); + } else { + sprintf(msg, "GET %s HTTP/1.0\r\nAccept: */*\r\n\r\n", url); + } + + /* Send the HTTP request */ + bytesWritten = write(conn, msg, strlen(msg)); + if (bytesWritten < 0) { + perror("client: ERROR: write"); + exit(1); + } else if (bytesWritten != strlen(msg)) { + fprintf(stderr, "client: ERROR: Cannot send request?: %s\n", msg); + exit(1); + } + /* Read the data */ + while ((len = read(conn, buf, sizeof(buf))) > 0) { + if (to_stdout) + fwrite(buf, len, 1, stdout); + } + (void) close(conn); /* done with socket */ + exit(0); + /*NOTREACHED */ +} + +static int client_comm_connect(sock, dest_host, dest_port) + int sock; /* Type of communication to use. */ + char *dest_host; /* Server's host name. */ + int dest_port; /* Server's port. */ +{ + struct hostent *hp; + static struct sockaddr_in to_addr; + + /* Set up the destination socket address for message to send to. */ + to_addr.sin_family = AF_INET; + + if ((hp = gethostbyname(dest_host)) == 0) { + return (-1); + } + memcpy(&to_addr.sin_addr, hp->h_addr, hp->h_length); + to_addr.sin_port = htons(dest_port); + return connect(sock, (struct sockaddr *) &to_addr, sizeof(struct sockaddr_in)); +} diff --git a/src/comm.cc b/src/comm.cc new file mode 100644 index 00000000000..2d58db4b787 --- /dev/null +++ b/src/comm.cc @@ -0,0 +1,1257 @@ +static char rcsid[] = "$Id: comm.cc,v 1.1 1996/02/22 06:23:53 wessels Exp $"; +/* + * File: comm.c + * Description: socket-based communication facility. Adapted from DHT + * authenticating message library. + * Author: John Noll, USC + * Created: Tue Apr 26 14:24:37 1994 + * Language: C + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ + +#include "config.h" + +#include +#include +#include +#include +#include +#if !defined(_HARVEST_LINUX_) +#include +#endif +#include +#include +#include +#include + +#ifdef _HARVEST_AIX_ +#include +#endif + +#ifdef _HARVEST_SGI_ +#include +#endif + +#include "debug.h" +#include "comm.h" +#include "ipcache.h" +#include "cache_cf.h" +#include "fdstat.h" +#include "util.h" + +/* Block processing new client requests (accepts on ascii port) when we start + * running shy of free file descriptors. For example, under SunOS, we'll keep + * 64 file descriptors free for disk-i/o and connections to remote servers */ + +int RESERVED_FD; + + +#define min(x,y) ((x)<(y)? (x) : (y)) +#define max(a,b) ((a)>(b)? (a) : (b)) + + +/* GLOBAL */ +time_t cached_curtime = 0L; /* global time var set by select loop */ +FD_ENTRY *fd_table = NULL; /* also used in disk.c */ + +/* STATIC */ +static int *fd_lifetime = NULL; +static fd_set send_sockets; +static fd_set receive_sockets; +static int (*app_handler) (); +static void checkTimeouts(); +static void checkLifetimes(); +static void Reserve_More_FDs _PARAMS((void)); +static int commSetReuseAddr _PARAMS((int)); +static int examine_select _PARAMS((fd_set *, fd_set *, fd_set *)); +static int commSetNoLinger _PARAMS((int)); + +/* EXTERN */ +extern int errno; +extern int do_reuse; +extern int getMaxFD(); +extern int theAsciiConnection; +extern int theUdpConnection; +extern int getConnectTimeout(); +extern int fdstat_are_n_free_fd _PARAMS((int)); +extern void fatal_dump _PARAMS((char *)); +extern int fd_of_first_client _PARAMS((StoreEntry *)); +char *fd_note(); + +void comm_handler() +{ + /* Call application installed handler. */ + debug(5, "comm_handler:\n"); + app_handler(); +} + +char *comm_hostname() +{ + static char host[HARVESTHOSTNAMELEN + 1]; + static int present = 0; + struct hostent *h = NULL; + + /* Get the host name and store it in host to return */ + if (!present) { + host[0] = '\0'; + if (gethostname(host, HARVESTHOSTNAMELEN) == -1) { + debug(1, "comm_hostname: gethostname failed: %s\n", + xstrerror()); + return NULL; + } else { + if ((h = ipcache_gethostbyname(host)) != NULL) { + /* DNS lookup successful */ + /* use the official name from DNS lookup */ + strcpy(host, h->h_name); + } + present = 1; + } + } + return host; +} + +char *comm_hostname_direct() +{ + static char temp_host[HARVESTHOSTNAMELEN + 1]; + + temp_host[0] = '\0'; + if (gethostname(temp_host, HARVESTHOSTNAMELEN) == -1) { + debug(1, "comm_hostname_direct: gethostname failed: %s\n", + xstrerror()); + return NULL; + } else { + return temp_host; + } +} + +/* Return the local port associated with fd. */ +int comm_port(fd) + int fd; +{ + struct sockaddr_in addr; + int addr_len = 0; + + if (fd_table[fd].port) + return fd_table[fd].port; + + /* If the fd is closed already, just return */ + if (!fd_table[fd].openned) { + debug(0, "comm_port: FD %d has been closed.\n", fd); + return (COMM_ERROR); + } + addr_len = sizeof(addr); + if (getsockname(fd, (struct sockaddr *) &addr, &addr_len)) { + debug(1, "comm_port: Failed to retrieve TCP/UDP port number for socket: FD %d: %s\n", fd, xstrerror()); + return (COMM_ERROR); + } + debug(6, "comm_port: FD %d: sockaddr %u.\n", fd, addr.sin_addr.s_addr); + fd_table[fd].port = ntohs(addr.sin_port); + + return fd_table[fd].port; +} + +static int do_bind(s, host, port) + int s; + char *host; + int port; +{ + struct sockaddr_in S; + struct in_addr *addr = NULL; + + addr = getAddress(host); + if (addr == (struct in_addr *) NULL) { + debug(0, "do_bind: Unknown host: %s\n", host); + return COMM_ERROR; + } + memset(&S, '\0', sizeof(S)); + S.sin_family = AF_INET; + S.sin_port = htons(port); + S.sin_addr = *addr; + + if (bind(s, (struct sockaddr *) &S, sizeof(S)) == 0) + return COMM_OK; + + debug(0, "do_bind: Cannot bind socket FD %d to %s:%d: %s\n", + s, + S.sin_addr.s_addr == htonl(INADDR_ANY) ? "*" : inet_ntoa(S.sin_addr), + port, xstrerror()); + return COMM_ERROR; +} + +/* Create a socket. Default is blocking, stream (TCP) socket. IO_TYPE + * is OR of flags specified in comm.h. */ +int comm_open(io_type, port, handler, note) + unsigned int io_type; + int port; + int (*handler) (); /* Interrupt handler. */ + char *note; +{ + int new_socket; + FD_ENTRY *conn = NULL; + int sock_type = io_type & COMM_DGRAM ? SOCK_DGRAM : SOCK_STREAM; + stoplist *p = NULL; + + /* Create socket for accepting new connections. */ + if ((new_socket = socket(AF_INET, sock_type, 0)) < 0) { + /* Increase the number of reserved fd's if calls to socket() + * are failing because the open file table is full. This + * limits the number of simultaneous clients */ + switch (errno) { + case ENFILE: + case EMFILE: + debug(1, "comm_open: socket failure: %s\n", xstrerror()); + Reserve_More_FDs(); + break; + default: + debug(0, "comm_open: socket failure: %s\n", xstrerror()); + } + return (COMM_ERROR); + } + /* update fdstat */ + fdstat_open(new_socket, Socket); + + conn = &fd_table[new_socket]; + memset(conn, '\0', sizeof(FD_ENTRY)); + fd_note(new_socket, note); + conn->openned = 1; + + if (fcntl(new_socket, F_SETFD, 1) < 0) { + debug(0, "comm_open: FD %d: failed to set close-on-exec flag: %s\n", + new_socket, xstrerror()); + } + if (port > 0) { + if (commSetNoLinger(new_socket) < 0) { + debug(0, "comm_open: failed to turn off SO_LINGER: %s\n", + xstrerror()); + } + if (do_reuse) { + commSetReuseAddr(new_socket); + } + } + if (port) { + for (p = bind_addr_list; p; p = p->next) { + if (do_bind(new_socket, p->key, port) == COMM_OK) + break; + if (p->next == (stoplist *) NULL) + return COMM_ERROR; + } + } + conn->port = port; + +#ifdef OLD_CODE + if (io_type & COMM_INTERRUPT) { +#ifdef _HARVEST_HPUX_ + pid_t my_pid = getpid(); + int non_zero = 1; + + /* Install interrupt handler. */ + if ((app_handler = conn->handler = handler) == NULL) + return (COMM_NO_HANDLER); + + /* Set up the environment for handling signals. */ + signal(SIGIO, comm_handler); + + /* Set the process receiving SIGIO/SIGURG signals to us. */ + if (ioctl(new_socket, SIOCSPGRP, &my_pid) < 0) { + debug(0, "comm_open: Failure to set SIOCSPGRP: %s\n", + xstrerror()); + return (COMM_ERROR); + } + /* + * Set status flags to allow receipt of asychronous I/O signals. + */ + + if (ioctl(new_socket, FIOASYNC, &non_zero)) { + debug(0, "comm_open: Failure to set FIOASYNC: %s\n", + xstrerror()); + return COMM_ERROR; + } +#else + /* Install interrupt handler. */ + if ((app_handler = conn->handler = handler) == NULL) + return (COMM_NO_HANDLER); + + /* Set up the environment for handling signals. */ + signal(SIGIO, comm_handler); + + /* Set the process receiving SIGIO/SIGURG signals to us. */ + if (fcntl(new_socket, F_SETOWN, getpid()) < 0) { + debug(0, "comm_open: Failure to set F_SETOWN: %s\n", + xstrerror()); + return (COMM_ERROR); + } + /* Set status flags to allow receipt of asychronous I/O signals. */ + if (fcntl(new_socket, F_SETFL, FASYNC)) { + debug(0, "comm_open: Failure to set FASYNC: %s\n", + xstrerror()); + return COMM_ERROR; + } +#endif /* _HARVEST_HPUX_ */ + } +#endif /* OLD_CODE */ + if (io_type & COMM_NONBLOCKING) { + /* + * Set up the flag NOT to have the socket to wait for message from + * network forever, but to return -1 when no message is coming in. + */ +#if defined(O_NONBLOCK) && !defined(_HARVEST_SUNOS_) && !defined(_HARVEST_SOLARIS_) + if (fcntl(new_socket, F_SETFL, O_NONBLOCK)) { + debug(0, "comm_open: FD %d: Failure to set O_NONBLOCK: %s\n", + new_socket, xstrerror()); + return (COMM_ERROR); + } +#else + if (fcntl(new_socket, F_SETFL, FNDELAY)) { + debug(0, "comm_open: FD %d: Failure to set FNDELAY: %s\n", + new_socket, xstrerror()); + return (COMM_ERROR); + } +#endif /* O_NONBLOCK */ + } + conn->comm_type = io_type; + return new_socket; +} + + /* + * NOTE: set the listen queue to 50 and rely on the kernel to + * impose an upper limit. Solaris' listen(3n) page says it has + * no limit on this parameter, but sys/socket.h sets SOMAXCONN + * to 5. HP-UX currently has a limit of 20. SunOS is 5 and + * OSF 3.0 is 8. + */ +int comm_listen(sock) + int sock; +{ + int x; + FD_SET(sock, &receive_sockets); + if ((x = listen(sock, 50)) < 0) { + debug(0, "comm_listen: listen(%d, 50): %s\n", + sock, xstrerror()); + return x; + } + return sock; +} + + +/* Connect SOCK to specified DEST_PORT at DEST_HOST. */ +int comm_connect(sock, dest_host, dest_port) + int sock; /* Type of communication to use. */ + char *dest_host; /* Server's host name. */ + int dest_port; /* Server's port. */ +{ + struct hostent *hp = NULL; + static struct sockaddr_in to_addr; + + /* Set up the destination socket address for message to send to. */ + to_addr.sin_family = AF_INET; + + if ((hp = ipcache_gethostbyname(dest_host)) == 0) { + debug(1, "comm_connect: Failure to lookup host: %s.\n", dest_host); + return (COMM_ERROR); + } + memcpy(&to_addr.sin_addr, hp->h_addr, hp->h_length); + to_addr.sin_port = htons(dest_port); + return comm_connect_addr(sock, &to_addr); +} + +int comm_set_fd_lifetime(fd, lifetime) + int fd; + int lifetime; +{ + if (fd < 0) + return 0; + if (lifetime < 0) + return fd_lifetime[fd] = -1; + return fd_lifetime[fd] = (int) cached_curtime + lifetime; +} + +int comm_get_fd_lifetime(fd) + int fd; +{ + if (fd < 0) + return 0; + return fd_lifetime[fd]; +} + +int comm_get_fd_timeout(fd) + int fd; +{ + if (fd < 0) + return 0; + return fd_table[fd].timeout_time; +} + +int comm_connect_addr(sock, address) + int sock; + struct sockaddr_in *address; +{ + int status = COMM_OK; + FD_ENTRY *conn = &fd_table[sock]; + int len; + int x; + int lft; + + /* sanity check */ + if (ntohs(address->sin_port) == 0) { + debug(10, "comm_connect_addr: %s:%d: URL uses port 0?\n", + inet_ntoa(address->sin_addr), ntohs(address->sin_port)); + errno = 0; + return COMM_ERROR; + } + /* Establish connection. */ + if (connect(sock, (struct sockaddr *) address, sizeof(struct sockaddr_in)) < 0) + switch (errno) { + case EALREADY: + return COMM_ERROR; + /* NOTREACHED */ + case EINPROGRESS: + status = EINPROGRESS; + break; + case EISCONN: + status = COMM_OK; + break; + case EINVAL: + len = sizeof(x); + if (getsockopt(sock, SOL_SOCKET, SO_ERROR, (char *) &x, &len) >= 0) + errno = x; + default: + debug(1, "comm_connect_addr: %s:%d: socket failure: %s.\n", + inet_ntoa(address->sin_addr), + ntohs(address->sin_port), + xstrerror()); + return COMM_ERROR; + } + /* set the lifetime for this client */ + if (status == COMM_OK) { + lft = comm_set_fd_lifetime(sock, getClientLifetime()); + debug(10, "comm_connect_addr: FD %d (lifetime %d): connected to %s:%d.\n", + sock, lft, inet_ntoa(address->sin_addr), + ntohs(address->sin_port)); + } else if (status == EINPROGRESS) { + lft = comm_set_fd_lifetime(sock, getConnectTimeout()); + debug(10, "comm_connect_addr: FD %d connection pending, lifetime %d\n", + sock, lft); + } + /* Add new socket to list of open sockets. */ + FD_SET(sock, &send_sockets); + conn->sender = 1; + + return status; +} + +/* Wait for an incoming connection on FD. FD should be a socket returned + * from comm_listen. */ +int comm_accept(fd, peer, me) + int fd; + struct sockaddr_in *peer; + struct sockaddr_in *me; +{ + int sock; + struct sockaddr_in S; + int Slen; + FD_ENTRY *conn; + FD_ENTRY *listener = &fd_table[fd]; + + Slen = sizeof(S); + while ((sock = accept(fd, (struct sockaddr *) &S, &Slen)) < 0) { + switch (errno) { +#if EAGAIN != EWOULDBLOCK + case EAGAIN: +#endif + case EWOULDBLOCK: + return COMM_NOMESSAGE; + case EINTR: + break; /* if accept interrupted, try again */ + case ENFILE: + case EMFILE: + Reserve_More_FDs(); + return COMM_ERROR; + default: + debug(1, "comm_accept: FD %d: accept failure: %s\n", + fd, xstrerror()); + return COMM_ERROR; + } + } + + if (peer) + *peer = S; + + if (me) { + Slen = sizeof(S); + memset(&S, '\0', Slen); + getsockname(sock, (struct sockaddr *) &S, &Slen); + *me = S; + } + /* fdstat update */ + fdstat_open(sock, Socket); + conn = &fd_table[sock]; + conn->openned = 1; + conn->sender = 0; /* This is an accept, therefore receiver. */ + conn->comm_type = listener->comm_type; + + FD_SET(sock, &receive_sockets); + commSetNonBlocking(sock); + + return sock; +} + +int comm_close(fd) + int fd; +{ + FD_ENTRY *conn = NULL; + + if (fd < 0) + return -1; + + if (fdstat_type(fd) == File) { + debug(0, "FD %d: Someone called comm_close() on a File\n", fd); + fatal_dump(NULL); + } + conn = &fd_table[fd]; + + FD_CLR(fd, &receive_sockets); + FD_CLR(fd, &send_sockets); + + comm_set_fd_lifetime(fd, -1); /* invalidate the lifetime */ + debug(10, "comm_close: FD %d\n", fd); + /* update fdstat */ + fdstat_close(fd); + memset(conn, '\0', sizeof(FD_ENTRY)); + return close(fd); +} + +/* use to clean up fdtable when socket is closed without + * using comm_close */ +int comm_cleanup_fd_entry(fd) + int fd; +{ + FD_ENTRY *conn = &fd_table[fd]; + + memset(conn, 0, sizeof(FD_ENTRY)); + return 0; +} + + +/* Send a udp datagram to specified PORT at HOST. */ +int comm_udp_send(fd, host, port, buf, len) + int fd; + char *host; + int port; + char *buf; + int len; +{ + struct hostent *hp = NULL; + static struct sockaddr_in to_addr; + int bytes_sent; + + /* Set up the destination socket address for message to send to. */ + to_addr.sin_family = AF_INET; + + if ((hp = ipcache_gethostbyname(host)) == 0) { + debug(1, "comm_udp_send: gethostbyname failure: %s: %s\n", + host, xstrerror()); + return (COMM_ERROR); + } + memcpy(&to_addr.sin_addr, hp->h_addr, hp->h_length); + to_addr.sin_port = htons(port); + if ((bytes_sent = sendto(fd, buf, len, 0, (struct sockaddr *) &to_addr, + sizeof(to_addr))) < 0) { + debug(1, "comm_udp_send: sendto failure: FD %d: %s\n", + fd, xstrerror()); + return COMM_ERROR; + } + return bytes_sent; +} + +/* Send a udp datagram to specified TO_ADDR. */ +int comm_udp_sendto(fd, to_addr, addr_len, buf, len) + int fd; + struct sockaddr_in *to_addr; + int addr_len; + char *buf; + int len; +{ + int bytes_sent; + + if ((bytes_sent = sendto(fd, buf, len, 0, (struct sockaddr *) to_addr, addr_len)) < 0) { + debug(1, "comm_udp_sendto: sendto failure: FD %d: %s\n", fd, xstrerror()); + debug(1, "comm_udp_sendto: --> sin_family = %d\n", to_addr->sin_family); + debug(1, "comm_udp_sendto: --> sin_port = %d\n", htons(to_addr->sin_port)); + debug(1, "comm_udp_sendto: --> sin_addr = %s\n", inet_ntoa(to_addr->sin_addr)); + return COMM_ERROR; + } + return bytes_sent; +} + +int comm_udp_recv(fd, buf, size, from_addr, from_size) + int fd; + char *buf; + int size; + struct sockaddr_in *from_addr; + int *from_size; /* in: size of from_addr; out: size filled in. */ +{ + int len = recvfrom(fd, buf, size, 0, (struct sockaddr *) from_addr, + from_size); + if (len < 0) { + debug(1, "comm_udp_recv: recvfrom failure: FD %d: %s\n", fd, + xstrerror()); + return COMM_ERROR; + } + return len; +} + + + +/* Select on all sockets; call handlers for those that are ready. */ +int comm_select(sec, usec, failtime) + long sec, usec; + time_t failtime; +{ + int fd; + int i; + fd_set readfds; + fd_set writefds; + fd_set exceptfds; + int num; + time_t timeout; + static time_t last_timeout = 0; + struct timeval poll_time; + struct timeval zero_tv; + int sel_fd_width; + + /* assume all process are very fast (less than 1 second). Call + * time() only once */ + cached_curtime = time(0L); + /* use only 1 second granularity */ + zero_tv.tv_sec = 0; + zero_tv.tv_usec = 0; + timeout = cached_curtime + sec; + + + while (timeout > (cached_curtime = time(0L))) { + if (0 < failtime && failtime < cached_curtime) + break; + + FD_ZERO(&readfds); + FD_ZERO(&writefds); + FD_ZERO(&exceptfds); + + for (i = 0; i < fdstat_biggest_fd() + 1; i++) { + /* Check each open socket for a handler. */ + if (fd_table[i].read_handler) + FD_SET(i, &readfds); + if (fd_table[i].write_handler) + FD_SET(i, &writefds); + if (fd_table[i].except_handler) + FD_SET(i, &exceptfds); + } + if (!fdstat_are_n_free_fd(RESERVED_FD)) { + FD_CLR(theAsciiConnection, &readfds); + } + while (1) { + poll_time.tv_sec = 1; + poll_time.tv_usec = 0; + num = select(fdstat_biggest_fd() + 1, + &readfds, &writefds, &exceptfds, &poll_time); + if (num >= 0) + break; + + if (errno != EINTR) { + debug(0, "comm_select: select failure: %s (errno %d).\n", + xstrerror(), errno); + examine_select(&readfds, &writefds, &exceptfds); + return COMM_ERROR; + } + /* if select interrupted, try again */ + } + + debug(num ? 5 : 8, "comm_select: %d sockets ready at %d\n", + num, cached_curtime); + + /* Check lifetime and timeout handlers ONCE each second. + * Replaces brain-dead check every time through the loop! */ + if (cached_curtime > last_timeout) { + last_timeout = cached_curtime; + checkTimeouts(); + checkLifetimes(); + } + /* scan each socket but the accept socket. Poll this + * more frequently to minimiize losses due to the 5 connect + * limit in SunOS */ + + if (num) { + for (fd = 0; (fd < (fdstat_biggest_fd() + 1)) && (num > 0); fd++) { + + if (!(FD_ISSET(fd, &readfds) || FD_ISSET(fd, &writefds) || + FD_ISSET(fd, &exceptfds))) + continue; + else + --num; + + /* + * Admit more connections quickly until we hit the hard limit. + * Don't forget to keep the UDP acks coming and going. + */ + { + fd_set read_mask, write_mask; + int (*tmp) () = NULL; + + FD_ZERO(&read_mask); + FD_ZERO(&write_mask); + + if ((fdstat_are_n_free_fd(RESERVED_FD)) && (fd_table[theAsciiConnection].read_handler)) + FD_SET(theAsciiConnection, &read_mask); + else + FD_CLR(theAsciiConnection, &read_mask); + if (theUdpConnection >= 0) { + if (fd_table[theUdpConnection].read_handler) + FD_SET(theUdpConnection, &read_mask); + if (fd_table[theUdpConnection].write_handler) + FD_SET(theUdpConnection, &write_mask); + } + sel_fd_width = max(theAsciiConnection, theUdpConnection) + 1; + if (select(sel_fd_width, &read_mask, &write_mask, NULL, &zero_tv) > 0) { + if (FD_ISSET(theAsciiConnection, &read_mask)) { + tmp = fd_table[theAsciiConnection].read_handler; + fd_table[theAsciiConnection].read_handler = 0; + tmp(theAsciiConnection, fd_table[theAsciiConnection].read_data); + } + if ((theUdpConnection >= 0)) { + if (FD_ISSET(theUdpConnection, &read_mask)) { + tmp = fd_table[theUdpConnection].read_handler; + fd_table[theUdpConnection].read_handler = 0; + tmp(theUdpConnection, fd_table[theUdpConnection].read_data); + } + if (FD_ISSET(theUdpConnection, &write_mask)) { + tmp = fd_table[theUdpConnection].write_handler; + fd_table[theUdpConnection].write_handler = 0; + tmp(theUdpConnection, fd_table[theUdpConnection].write_data); + } + } + } + } + if ((fd == theUdpConnection) || (fd == theAsciiConnection)) + continue; + + if (FD_ISSET(fd, &readfds)) { + debug(6, "comm_select: FD %d ready for reading\n", fd); + if (fd_table[fd].read_handler) { + int (*tmp) () = fd_table[fd].read_handler; + fd_table[fd].read_handler = 0; + debug(10, "calling read handler %p(%d,%p)\n", + tmp, fd, fd_table[fd].read_data); + tmp(fd, fd_table[fd].read_data); + } + } + if (FD_ISSET(fd, &writefds)) { + debug(5, "comm_select: FD %d ready for writing\n", fd); + if (fd_table[fd].write_handler) { + int (*tmp) () = fd_table[fd].write_handler; + fd_table[fd].write_handler = 0; + debug(10, "calling write handler %p(%d,%p)\n", + tmp, fd, fd_table[fd].write_data); + tmp(fd, fd_table[fd].write_data); + } + } + if (FD_ISSET(fd, &exceptfds)) { + debug(5, "comm_select: FD %d has an exception\n", fd); + if (fd_table[fd].except_handler) { + int (*tmp) () = fd_table[fd].except_handler; + fd_table[fd].except_handler = 0; + debug(10, "calling except handler %p(%d,%p)\n", + tmp, fd, fd_table[fd].except_data); + tmp(fd, fd_table[fd].except_data); + } + } + } + return COMM_OK; + } + } + + debug(8, "comm_select: time out: %d.\n", cached_curtime); + return COMM_TIMEOUT; +} + + +/* Select on fd to see if any io pending. */ +int comm_pending(fd, sec, usec) + int fd; + long sec, usec; +{ + fd_set readfds; + int num; + struct timeval timeout; + + /* Find a fd ready for reading. */ + FD_ZERO(&readfds); + FD_SET(fd, &readfds); + + while (1) { + timeout.tv_sec = (time_t) sec; + timeout.tv_usec = (time_t) usec; + num = select(FD_SETSIZE, &readfds, NULL, NULL, &timeout); + if (num >= 0) + break; + switch (errno) { +#if EAGAIN != EWOULDBLOCK + case EAGAIN: +#endif + case EWOULDBLOCK: + return COMM_NOMESSAGE; + case EINTR: + break; /* if select interrupted, try again */ + default: + debug(1, "comm_pending: select failure: %s\n", xstrerror()); + return COMM_ERROR; + } + } + + debug(5, "comm_pending: %d sockets ready for reading\n", num); + + if (num && FD_ISSET(fd, &readfds)) { + return COMM_OK; + } + return COMM_TIMEOUT; +} + +int comm_set_select_handler(fd, type, handler, client_data) + int fd; + unsigned int type; +/* 01 - read; 10 - write; 100 - except; 1000 - timeout ; 10000 - lifetime */ + int (*handler) (); + caddr_t client_data; +{ + + return (comm_set_select_handler_plus_timeout(fd, type, handler, client_data, 0)); +} + +/* Should use var args here PBD */ +int comm_set_select_handler_plus_timeout(fd, type, handler, client_data, timeout) + int fd; + unsigned int type; +/* 01 - read; 10 - write; 100 - except; 1000 - timeout ; 10000 - lifetime */ + int (*handler) (); + caddr_t client_data; + time_t timeout; +{ + if (type & COMM_SELECT_TIMEOUT) { + fd_table[fd].timeout_time = (time(0L) + timeout); + fd_table[fd].timeout_delta = timeout; + fd_table[fd].timeout_handler = handler; + fd_table[fd].timeout_data = client_data; + if ((timeout <= 0) && handler) { + debug(2, "comm_set_select_handler_plus_timeout: Zero timeout doesn't make sense\n"); + } + } + if (type & COMM_SELECT_READ) { + fd_table[fd].read_handler = handler; + fd_table[fd].read_data = client_data; + } + if (type & COMM_SELECT_WRITE) { + fd_table[fd].write_handler = handler; + fd_table[fd].write_data = client_data; + } + if (type & COMM_SELECT_EXCEPT) { + fd_table[fd].except_handler = handler; + fd_table[fd].except_data = client_data; + } + if (type & COMM_SELECT_LIFETIME) { + fd_table[fd].lifetime_handler = handler; + fd_table[fd].lifetime_data = client_data; + } + return 0; /* XXX What is meaningful? */ +} + +int comm_get_select_handler(fd, type, handler_ptr, client_data_ptr) + int fd; + unsigned int type; + int (**handler_ptr) (); + caddr_t *client_data_ptr; +{ + if (type & COMM_SELECT_TIMEOUT) { + *handler_ptr = fd_table[fd].timeout_handler; + *client_data_ptr = fd_table[fd].timeout_data; + } + if (type & COMM_SELECT_READ) { + *handler_ptr = fd_table[fd].read_handler; + *client_data_ptr = fd_table[fd].read_data; + } + if (type & COMM_SELECT_WRITE) { + *handler_ptr = fd_table[fd].write_handler; + *client_data_ptr = fd_table[fd].write_data; + } + if (type & COMM_SELECT_EXCEPT) { + *handler_ptr = fd_table[fd].except_handler; + *client_data_ptr = fd_table[fd].except_data; + } + if (type & COMM_SELECT_LIFETIME) { + *handler_ptr = fd_table[fd].lifetime_handler; + *client_data_ptr = fd_table[fd].lifetime_data; + } + return 0; /* XXX What is meaningful? */ +} + + +static int commSetNoLinger(fd) + int fd; +{ + struct linger L; + + L.l_onoff = 0; /* off */ + L.l_linger = 0; + + debug(10, "commSetNoLinger: turning off SO_LINGER on FD %d\n", fd); + return setsockopt(fd, SOL_SOCKET, SO_LINGER, (char *) &L, sizeof(L)); +} + +static int commSetReuseAddr(fd) + int fd; +{ + int on = 1; + int rc; + + debug(10, "commSetReuseAddr: turning on SO_REUSEADDR on FD %d\n", fd); + rc = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (char *) &on, sizeof(on)); + if (rc < 0) + debug(1, "commSetReuseAddr: FD=%d: %s\n", fd, xstrerror()); + return rc; +} + +int commSetNonBlocking(fd) + int fd; +{ + debug(10, "commSetNonBlocking: setting FD %d to non-blocking i/o.\n", + fd); + /* + * Set up the flag NOT to have the socket to wait for message from + * network forever, but to return -1 when no message is coming in. + */ + +#if defined(O_NONBLOCK) && !defined(_HARVEST_SUNOS_) && !defined(_HARVEST_SOLARIS_) + if (fcntl(fd, F_SETFL, O_NONBLOCK)) { + debug(0, "comm_open: FD %d: error setting O_NONBLOCK: %s\n", + fd, xstrerror()); + return (COMM_ERROR); + } +#else + if (fcntl(fd, F_SETFL, FNDELAY)) { + debug(0, "comm_open: FD %d: error setting FNDELAY: %s\n", + fd, xstrerror()); + return (COMM_ERROR); + } +#endif /* HPUX */ + return 0; +} + +char **getAddressList(name) + char *name; +{ + struct hostent *hp = NULL; + if (name == NULL) + return NULL; + if ((hp = ipcache_gethostbyname(name))) + return hp->h_addr_list; + debug(0, "getAddress: gethostbyname failure: %s: %s\n", + name, xstrerror()); + return NULL; +} + +struct in_addr *getAddress(name) + char *name; +{ + static struct in_addr first; + char **list = NULL; + if (name == NULL) + return NULL; + if ((list = getAddressList(name))) { + memcpy(&first.s_addr, *list, 4); + return (&first); + } + debug(0, "getAddress: gethostbyname failure: %s: %s\n", + name, xstrerror()); + return NULL; +} + +/* + * the fd_lifetime is used as a hardlimit to timeout dead sockets. + * The basic problem is that many WWW clients are abusive and + * it results in cached having lots of CLOSE_WAIT states. Until + * we can find a better solution, we give all asciiPort or + * cached initiated clients a maximum lifetime. + */ +int comm_init() +{ + int i, max_fd = getMaxFD(); + + fd_table = (FD_ENTRY *) xmalloc(sizeof(FD_ENTRY) * max_fd); + memset(fd_table, '\0', sizeof(FD_ENTRY) * max_fd); /* clear fd_table */ + /* Keep a few file descriptors free so that we don't run out of FD's + * after accepting a client but before it opens a socket or a file. + * Since getMaxFD can be as high as several thousand, don't waste them */ + RESERVED_FD = min(100, getMaxFD() / 4); + /* hardwired lifetimes */ + fd_lifetime = (int *) xmalloc(sizeof(int) * max_fd); + for (i = 0; i < max_fd; i++) { + comm_set_fd_lifetime(i, -1); /* denotes invalid */ + } + return 0; +} + + +/* + * examine_select - debug routine. + * + * I spend the day chasing this core dump that occurs when both the client + * and the server side of a cache fetch simultaneoulsy abort the + * connection. While I haven't really studied the code to figure out how + * it happens, the snippet below may prevent the cache from exitting: + * + * Call this from where the select loop fails. + */ +static int examine_select(readfds, writefds, exceptfds) + fd_set *readfds, *writefds, *exceptfds; +{ + int fd = 0; + fd_set read_x, write_x, except_x; + int num; + struct timeval tv; + + debug(0, "examine_select: Examining open file descriptors...\n"); + for (fd = 0; fd < getMaxFD(); ++fd) { + FD_ZERO(&read_x); + FD_ZERO(&write_x); + FD_ZERO(&except_x); + tv.tv_sec = tv.tv_usec = 0; + if ((FD_ISSET(fd, readfds)) || + (FD_ISSET(fd, writefds)) || + (FD_ISSET(fd, exceptfds))) { + FD_SET(fd, &read_x); + num = select(FD_SETSIZE, &read_x, &read_x, &read_x, &tv); + if (num < 0) { + debug(0, "WARNING: FD %d has handlers, but it's invalid.\n", fd); + debug(0, "Timeout handler:%x read:%x write:%x except:%x\n", + fd_table[fd].timeout_handler, + fd_table[fd].read_handler, + fd_table[fd].write_handler, + fd_table[fd].except_handler); + fd_table[fd].timeout_handler = 0; + fd_table[fd].read_handler = 0; + fd_table[fd].write_handler = 0; + fd_table[fd].except_handler = 0; + FD_CLR(fd, readfds); + FD_CLR(fd, writefds); + FD_CLR(fd, exceptfds); + } + } + } + debug(0, "examine_select: Finished examining open file descriptors.\n"); + return 0; +} + +char *fd_note(fd, s) + int fd; + char *s; +{ + if (s == NULL) + return (fd_table[fd].ascii_note); + strncpy(fd_table[fd].ascii_note, s, FD_ASCII_NOTE_SZ - 1); + return (NULL); +} + +static void checkTimeouts() +{ + int fd; + /* scan for timeout */ + for (fd = 0; fd < (fdstat_biggest_fd() + 1); ++fd) { + if ((fd_table[fd].timeout_handler) && + (fd_table[fd].timeout_time <= cached_curtime)) { + int (*tmp) () = fd_table[fd].timeout_handler; + debug(5, "comm_select: timeout on socket %d at %d\n", + fd, cached_curtime); + fd_table[fd].timeout_handler = 0; + tmp(fd, fd_table[fd].timeout_data); + } + } +} + +static void checkLifetimes() +{ + int fd; + int max_fd = getMaxFD(); + time_t lft; + + /* scan for hardwired lifetime expires, do the timeouts first though */ + for (fd = 0; fd < max_fd; fd++) { + lft = comm_get_fd_lifetime(fd); + if ((lft != -1) && (lft < cached_curtime)) { + int use_lifetime_handler = 0; + int use_read = 0; + int (*tmp_local) () = NULL; + + if (fd_table[fd].lifetime_handler != NULL) { + use_lifetime_handler = 1; + tmp_local = fd_table[fd].lifetime_handler; + fd_table[fd].lifetime_handler = 0; /* reset it */ + } else if (fd_table[fd].read_handler != NULL) { + use_read = 1; + tmp_local = fd_table[fd].read_handler; + fd_table[fd].read_handler = 0; /* reset it */ + } else if (fd_table[fd].write_handler != NULL) { + use_read = 0; + tmp_local = fd_table[fd].write_handler; + fd_table[fd].write_handler = 0; /* reset it */ + } else { + use_read = 0; + tmp_local = NULL; + } + if (tmp_local) { + if (use_lifetime_handler) { + debug(2, "comm_select: FD %d lifetime expire: %d < %d (Lifetime handler %p)\n", + fd, lft, cached_curtime, tmp_local); + } else { + debug(2, "comm_select: FD %d lifetime expire: %d < %d (%s handler %p)\n", + fd, lft, cached_curtime, + use_read ? "read" : "write", tmp_local); + } + } else { + debug(1, "comm_select: FD %d lifetime expire: %d < %d (handler not available.)\n", + fd, lft, cached_curtime); + } + + if (tmp_local != NULL) { + if (use_lifetime_handler) { + tmp_local(fd, fd_table[fd].lifetime_data); + } else { + /* + * we close(2) first so that the handler fails and + * deallocates the structure. + */ + (void) close(fd); + tmp_local(fd, use_read ? fd_table[fd].read_data : + fd_table[fd].write_data); + } + if (fd_table[fd].openned) { + /* hmm.. still openned. do full comm_close */ + debug(5, "comm_select: FD %d lifetime expire: %d < %d : Handler did not close the socket.\n comm_select will do.\n", + fd, lft, cached_curtime); + comm_close(fd); + } else { + /* seems like handle closed it. + * clean up fd_table just to make sure */ + debug(5, "comm_select: FD %d lifetime expire: %d : Handler closed the socket.\n", + fd, lft); + /* just to make sure here */ + comm_cleanup_fd_entry(fd); + } + } else { + /* no handle. do full comm_close */ + debug(5, "comm_select: FD %d lifetime expire: %d < %d : No handler to close the socket.\n comm_select will do.\n", + fd, lft, cached_curtime); + comm_close(fd); + } + } + } +} + +/* + * Reserve_More_FDs() called when acceopt(), open(), or socket is failing + */ +static void Reserve_More_FDs() +{ + if (RESERVED_FD < getMaxFD() - 64) { + RESERVED_FD = RESERVED_FD + 1; + } else if (RESERVED_FD == getMaxFD() - 64) { + RESERVED_FD = RESERVED_FD + 1; + debug(0, "Don't you have a tiny open-file table size of %d\n", + getMaxFD() - RESERVED_FD); + } +} + +int fd_of_first_client(e) + StoreEntry *e; +{ + int fd; + + fd = store_mem_obj(e, fd_of_first_client); + + if (fd > 0) { + if (e == fd_table[fd].store_entry) { + return (fd); + } + } + return (-1); +} diff --git a/src/debug.cc b/src/debug.cc new file mode 100644 index 00000000000..a7ab8b5f1b2 --- /dev/null +++ b/src/debug.cc @@ -0,0 +1,266 @@ +static char rcsid[] = "$Id: debug.cc,v 1.1 1996/02/22 06:23:54 wessels Exp $"; +/* + * File: debug.c + * Description: implementation of trace facility for debugging + * Author: John Noll, USC + * Created: Tue Jan 8 11:05:41 1991 + * Language: C + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include "config.h" +#include +#include +#include +#include +#if defined(__STRICT_ANSI__) +#include +#else +#include +#endif +#include +#include /* For MAXPATHLEN. */ + +#include "debug.h" +#include "util.h" +#include "cache_cf.h" + +static char *_db_modules = 0; /* colon separated list of modules to debug. */ +int _db_level = 0; +char *_db_file = __FILE__; +int _db_line = 0; + +extern time_t cached_curtime; +extern char *mkrfc850(); +extern int unbuffered_logs; /* main.c */ + +int syslog_enable = 0; +int stderr_enable = 0; +FILE *debug_log = NULL; +static char *debug_log_file = NULL; +static time_t last_cached_curtime = 0; +static char the_time[81]; + +#if defined(__STRICT_ANSI__) +void _db_print(int level,...) +{ + char *format; + va_list args; +#else +void _db_print(va_alist) + va_dcl +{ + char *format; + int level; + va_list args; +#endif + static char f[BUFSIZ]; + static char tmpbuf[BUFSIZ]; + char *module = NULL; + char *s = NULL; + + if (debug_log == NULL) + return; + +#if defined(__STRICT_ANSI__) + /* strict ANSI compliance requires the second arg to va_start - we don't */ + va_start(args, level); + format = va_arg(args, char *); +#else + va_start(args); + level = va_arg(args, int); + format = va_arg(args, char *); +#endif + + /* Obtain module name from filename. */ + if ((module = strrchr(_db_file, '/')) != NULL) + module++; + else + module = _db_file; + + if (_db_level >= level) { + if (!_db_modules || strstr(_db_modules, module)) { + /* don't compute the curtime too much */ + if (last_cached_curtime != cached_curtime) { + last_cached_curtime = cached_curtime; + the_time[0] = '\0'; + s = mkhttpdlogtime(&cached_curtime); + strcpy(the_time, s); + } + sprintf(f, "[%s] %s:%d:\t %s", the_time, module, _db_line, format); + + /* level 0 go to syslog */ + if ((level == 0) && syslog_enable) { + tmpbuf[0] = '\0'; + vsprintf(tmpbuf, f, args); + syslog(LOG_ERR, tmpbuf); + } + /* write to log file */ + vfprintf(debug_log, f, args); + if (unbuffered_logs) + fflush(debug_log); + + /* if requested, dump it to stderr also */ + if (stderr_enable) { + vfprintf(stderr, f, args); + fflush(stderr); + } + } + } + va_end(args); +} + +void _db_init(prefix, initial_level, logfile) + char *prefix; + int initial_level; + char *logfile; +{ + char *db_level_str, db_buf[MAXPATHLEN]; + + sprintf(db_buf, "%s_debug_level", prefix); + if ((db_level_str = getenv(db_buf)) != NULL) + _db_level = atoi(db_level_str); + else + _db_level = initial_level; + + _db_modules = getenv("DHT_DEBUG_MODULES"); + + /* open error logging file */ + if (logfile != NULL) { + if (debug_log_file) + free(debug_log_file); + debug_log_file = strdup(logfile); /* keep a static copy */ + debug_log = fopen(logfile, "a+"); + if (!debug_log) { + fprintf(stderr, "WARNING: Cannot write log file: %s\n", logfile); + perror(logfile); + fprintf(stderr, " messages will be sent to 'stderr'.\n"); + fflush(stderr); + debug_log = stderr; + /* avoid reduntancy */ + stderr_enable = 0; + } + } else { + fprintf(stderr, "WARNING: No log file specified?\n"); + fprintf(stderr, " messages will be sent to 'stderr'.\n"); + fflush(stderr); + debug_log = stderr; + /* avoid reduntancy */ + stderr_enable = 0; + } + + if (syslog_enable) { + openlog("cached", LOG_PID | LOG_NDELAY | LOG_CONS, LOG_LOCAL4); + } +} + +/* gack! would be nice to use _db_init() instead */ +void _db_rotate_log() +{ + int i; + static char from[MAXPATHLEN]; + static char to[MAXPATHLEN]; + + if (debug_log_file == NULL) + return; + + /* Rotate numbers 0 through N up one */ + for (i = getLogfileRotateNumber(); i > 1;) { + i--; + sprintf(from, "%s.%d", debug_log_file, i - 1); + sprintf(to, "%s.%d", debug_log_file, i); + rename(from, to); + } + /* Rotate the current log to .0 */ + if (getLogfileRotateNumber() > 0) { + sprintf(to, "%s.%d", debug_log_file, 0); + rename(debug_log_file, to); + } + /* Close and reopen the log. It may have been renamed "manually" + * before HUP'ing us. */ + fclose(debug_log); + debug_log = fopen(debug_log_file, "a+"); + if (debug_log == NULL) { + fprintf(stderr, "WARNING: Cannot write log file: %s\n", + debug_log_file); + perror(debug_log_file); + fprintf(stderr, " messages will be sent to 'stderr'.\n"); + fflush(stderr); + debug_log = stderr; + /* avoid redundancy */ + stderr_enable = 0; + } +} diff --git a/src/disk.cc b/src/disk.cc new file mode 100644 index 00000000000..1c63337f0c3 --- /dev/null +++ b/src/disk.cc @@ -0,0 +1,736 @@ +static char rcsid[] = "$Id: disk.cc,v 1.1 1996/02/22 06:23:54 wessels Exp $"; +/* + * File: disk.c + * Description: non blocking disk i/o + * Author: Anawat Chankhunthod, USC + * Created: Wed May 25 23:01:01 PDT 1994 + * Language: C + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include "config.h" + +#include +#include +#include +#if !defined(_HARVEST_LINUX_) +#include +#endif +#include +#include +#include +#include +#include +#include +#include + +#include "ansihelp.h" +#include "comm.h" +#include "disk.h" +#include "fdstat.h" +#include "cache_cf.h" +#include "util.h" +#include "debug.h" + +#define DISK_LINE_LEN 1024 +#define MAX_FILE_NAME_LEN 256 + +typedef struct _dread_ctrl { + int fd; + off_t offset; + int req_len; + char *buf; + int cur_len; + int end_of_file; + int (*handler) _PARAMS((int fd, char *buf, int size, int errflag, caddr_t data, + int offset)); + caddr_t client_data; +} dread_ctrl; + +typedef struct _dwalk_ctrl { + int fd; + off_t offset; + char *buf; /* line buffer */ + int cur_len; /* line len */ + int (*handler) _PARAMS((int fd, int errflag, caddr_t data)); + caddr_t client_data; + int (*line_handler) _PARAMS((int fd, char *buf, int size, caddr_t line_data)); + caddr_t line_data; +} dwalk_ctrl; + +typedef struct _dwrite_q { + caddr_t buf; + int len; + int cur_offset; + struct _dwrite_q *next; +} dwrite_q; + +typedef struct _FileEntry { + char filename[MAX_FILE_NAME_LEN]; + enum { + NO, YES + } at_eof; + enum { + NOT_OPEN, OPEN + } open_stat; + enum { + NOT_REQUEST, REQUEST + } close_request; + enum { + NOT_PRESENT, PRESENT + } write_daemon; + enum { + UNLOCK, LOCK + } write_lock; + int access_code; /* use to verify write lock */ + enum { + NO_WRT_PENDING, WRT_PENDING + } write_pending; + void (*wrt_handle) (); + void *wrt_handle_data; + dwrite_q *write_q; + dwrite_q *write_q_tail; +} FileEntry; + + +/* table for FILE variable, write lock and queue. Indexed by fd. */ +FileEntry *file_table; +static int disk_initialized = 0; + +extern int getMaxFD(); +extern void fatal_dump _PARAMS((char *)); + +/* initialize table */ +int disk_init() +{ + int fd, max_fd = getMaxFD(); + + if (disk_initialized) + return 0; + + file_table = (FileEntry *) xmalloc(sizeof(FileEntry) * max_fd); + memset(file_table, '\0', sizeof(FileEntry) * max_fd); + + for (fd = 0; fd < max_fd; fd++) { + file_table[fd].filename[0] = '\0'; + file_table[fd].at_eof = NO; + file_table[fd].open_stat = NOT_OPEN; + file_table[fd].close_request = NOT_REQUEST; + file_table[fd].write_daemon = NOT_PRESENT; + file_table[fd].write_lock = UNLOCK; + file_table[fd].access_code = 0; + file_table[fd].write_pending = NO_WRT_PENDING; + file_table[fd].write_q = file_table[fd].write_q_tail = NULL; + } + disk_initialized = 1; + return 0; +} + +/* Open a disk file. Return a file descriptor */ +int file_open(path, handler, mode) + char *path; /* path to file */ + int (*handler) (); /* Interrupt handler. */ + int mode; +{ + FD_ENTRY *conn; + int fd; + + /* lazy initialization */ + if (!disk_initialized) + disk_init(); + + /* Open file */ + if ((fd = open(path, mode | O_NDELAY, 0644)) < 0) { + debug(0, "file_open: error opening file %s: %s\n", + path, xstrerror()); + return (DISK_ERROR); + } + /* update fdstat */ + fdstat_open(fd, File); + + /* init table */ + strncpy(file_table[fd].filename, path, MAX_FILE_NAME_LEN); + file_table[fd].at_eof = NO; + file_table[fd].open_stat = OPEN; + file_table[fd].close_request = NOT_REQUEST; + file_table[fd].write_lock = UNLOCK; + file_table[fd].write_pending = NO_WRT_PENDING; + file_table[fd].write_daemon = NOT_PRESENT; + file_table[fd].access_code = 0; + file_table[fd].write_q = NULL; + + conn = &fd_table[fd]; + memset(conn, 0, sizeof(FD_ENTRY)); + + conn->port = 0; + conn->handler = NULL; + + /* set non-blocking mode */ +#if defined(O_NONBLOCK) && !defined(_HARVEST_SUNOS_) && !defined(_HARVEST_SOLARIS_) + if (fcntl(fd, F_SETFL, O_NONBLOCK) < 0) { + debug(0, "file_open: FD %d: Failure to set O_NONBLOCK: %s\n", + fd, xstrerror()); + return DISK_ERROR; + } +#else + if (fcntl(fd, F_SETFL, FNDELAY) < 0) { + debug(0, "file_open: FD %d: Failure to set FNDELAY: %s\n", + fd, xstrerror()); + return DISK_ERROR; + } +#endif /* O_NONBLOCK */ + conn->comm_type = COMM_NONBLOCKING; + + return fd; +} + +int file_update_open(fd, path) + int fd; + char *path; /* path to file */ +{ + FD_ENTRY *conn; + + /* lazy initialization */ + if (!disk_initialized) + disk_init(); + + /* update fdstat */ + fdstat_open(fd, File); + + /* init table */ + strncpy(file_table[fd].filename, path, MAX_FILE_NAME_LEN); + file_table[fd].at_eof = NO; + file_table[fd].open_stat = OPEN; + file_table[fd].close_request = NOT_REQUEST; + file_table[fd].write_lock = UNLOCK; + file_table[fd].write_pending = NO_WRT_PENDING; + file_table[fd].write_daemon = NOT_PRESENT; + file_table[fd].access_code = 0; + file_table[fd].write_q = NULL; + + conn = &fd_table[fd]; + memset(conn, 0, sizeof(FD_ENTRY)); + + conn->port = 0; + conn->handler = NULL; + + conn->comm_type = COMM_NONBLOCKING; + + return fd; +} + + +/* close a disk file. */ +int file_close(fd) + int fd; /* file descriptor */ +{ + FD_ENTRY *conn = NULL; + + /* we might have to flush all the write back queue before we can + * close it */ + /* save it for later */ + + if ((file_table[fd].open_stat == OPEN) && + (file_table[fd].write_daemon == NOT_PRESENT) && + (file_table[fd].write_pending == NO_WRT_PENDING)) { + file_table[fd].open_stat = NOT_OPEN; + file_table[fd].write_lock = UNLOCK; + file_table[fd].write_daemon = NOT_PRESENT; + file_table[fd].filename[0] = '\0'; + + if (fdstat_type(fd) == Socket) { + debug(0, "FD %d: Someone called file_close() on a socket\n", fd); + fatal_dump(NULL); + } + /* update fdstat */ + fdstat_close(fd); + conn = &fd_table[fd]; + memset(conn, '\0', sizeof(FD_ENTRY)); + comm_set_fd_lifetime(fd, -1); /* invalidate the lifetime */ + close(fd); + return DISK_OK; + } else { + /* refused to close file if there is a daemon running */ + /* have pending flag set */ + file_table[fd].close_request = REQUEST; + return DISK_ERROR; + } +} + + +/* return a opened fd associate with given path name. */ +/* return DISK_FILE_NOT_FOUND if not found. */ +int file_get_fd(filename) + char *filename; +{ + int fd, max_fd = getMaxFD(); + for (fd = 1; fd < max_fd; fd++) { + if (file_table[fd].open_stat == OPEN) { + if (strncmp(file_table[fd].filename, filename, MAX_FILE_NAME_LEN) == 0) { + return fd; + } + } + } + return DISK_FILE_NOT_FOUND; +} + +/* grab a writing lock for file */ +int file_write_lock(fd) + int fd; +{ + if (file_table[fd].write_lock == LOCK) { + debug(0, "trying to lock a locked file\n"); + return DISK_WRT_LOCK_FAIL; + } else { + file_table[fd].write_lock = LOCK; + file_table[fd].access_code += 1; + file_table[fd].access_code %= 65536; + return file_table[fd].access_code; + } +} + + +/* release a writing lock for file */ +int file_write_unlock(fd, access_code) + int fd; + int access_code; +{ + if (file_table[fd].access_code == access_code) { + file_table[fd].write_lock = UNLOCK; + return DISK_OK; + } else { + debug(0, "trying to unlock the file with the wrong access code\n"); + return DISK_WRT_WRONG_CODE; + } +} + + +/* write handler */ +int diskHandleWrite(fd, entry) + int fd; + FileEntry *entry; +{ + int len; + dwrite_q *q; + int block_complete = 0; + + if (file_table[fd].at_eof == NO) + lseek(fd, 0, SEEK_END); + + for (;;) { + len = write(fd, entry->write_q->buf + entry->write_q->cur_offset, + entry->write_q->len - entry->write_q->cur_offset); + + file_table[fd].at_eof = YES; + + if (len < 0) { + switch (errno) { +#if EAGAIN != EWOULDBLOCK + case EAGAIN: +#endif + case EWOULDBLOCK: + /* just reschedule itself, try again */ + comm_set_select_handler(fd, + COMM_SELECT_WRITE, + (PF) diskHandleWrite, + (caddr_t) entry); + entry->write_daemon = PRESENT; + return DISK_OK; + default: + /* disk i/o failure--flushing all outstanding writes */ + debug(1, "diskHandleWrite: disk write error %s\n", + xstrerror()); + entry->write_daemon = NOT_PRESENT; + entry->write_pending = NO_WRT_PENDING; + /* call finish handler */ + do { + q = entry->write_q; + entry->write_q = q->next; + if (!entry->wrt_handle) { + safe_free(q->buf); + } else { + /* XXXXXX + * Notice we call the handler multiple times but + * the write handler (in page mode) doesn't know + * the buf ptr so it'll be hard to deallocate + * memory. + * XXXXXX */ + entry->wrt_handle(fd, + errno == ENOSPC ? DISK_NO_SPACE_LEFT : DISK_ERROR, + entry->wrt_handle_data); + } + safe_free(q); + } while (entry->write_q); + return DISK_ERROR; + } + } + entry->write_q->cur_offset += len; + block_complete = (entry->write_q->cur_offset >= entry->write_q->len); + + if (block_complete && (!entry->write_q->next)) { + /* No more data */ + if (!entry->wrt_handle) + safe_free(entry->write_q->buf); + safe_free(entry->write_q); + entry->write_q = entry->write_q_tail = NULL; + entry->write_pending = NO_WRT_PENDING; + entry->write_daemon = NOT_PRESENT; + /* call finish handle */ + if (entry->wrt_handle) { + entry->wrt_handle(fd, DISK_OK, entry->wrt_handle_data); + } + /* Close it if requested */ + if (file_table[fd].close_request == REQUEST) { + file_close(fd); + } + return DISK_OK; + } else if ((block_complete) && (entry->write_q->next)) { + /* Do next block */ + + /* XXXXX THESE PRIMITIVES ARE WEIRD XXXXX + * If we have multiple blocks to send, we + * only call the completion handler once, + * so it becomes our job to free buffer space + */ + + q = entry->write_q; + entry->write_q = entry->write_q->next; + if (!entry->wrt_handle) + safe_free(q->buf); + safe_free(q); + /* Schedule next write + * comm_set_select_handler(fd, COMM_SELECT_WRITE, (PF) diskHandleWrite, + * (caddr_t) entry); + */ + entry->write_daemon = PRESENT; + /* Repeat loop */ + } else { /* !Block_completed; block incomplete */ + /* reschedule */ + comm_set_select_handler(fd, COMM_SELECT_WRITE, (PF) diskHandleWrite, + (caddr_t) entry); + entry->write_daemon = PRESENT; + return DISK_OK; + } + } +} + + + +/* write block to a file */ +/* write back queue. Only one writer at a time. */ +/* call a handle when writing is complete. */ +int file_write(fd, ptr_to_buf, len, access_code, handle, handle_data) + int fd; + caddr_t ptr_to_buf; + int len; + int access_code; + void (*handle) (); + void *handle_data; +{ + dwrite_q *wq; + + if (file_table[fd].open_stat != OPEN) { + return DISK_ERROR; + } + if ((file_table[fd].write_lock == LOCK) && + (file_table[fd].access_code != access_code)) { + debug(0, "file write: access code checked failed. Sync problem.\n"); + return DISK_WRT_WRONG_CODE; + } + /* if we got here. Caller is eligible to write. */ + wq = (dwrite_q *) xcalloc(1, sizeof(dwrite_q)); + + wq->buf = ptr_to_buf; + + wq->len = len; + wq->cur_offset = 0; + wq->next = NULL; + file_table[fd].wrt_handle = handle; + file_table[fd].wrt_handle_data = handle_data; + + /* add to queue */ + file_table[fd].write_pending = WRT_PENDING; + if (!(file_table[fd].write_q)) { + /* empty queue */ + file_table[fd].write_q = file_table[fd].write_q_tail = wq; + + } else { + file_table[fd].write_q_tail->next = wq; + file_table[fd].write_q_tail = wq; + } + + if (file_table[fd].write_daemon == NOT_PRESENT) { + /* got to start write routine for this fd */ + comm_set_select_handler(fd, COMM_SELECT_WRITE, (PF) diskHandleWrite, + (caddr_t) & file_table[fd]); + } + return DISK_OK; +} + + + +/* Read from FD */ +int diskHandleRead(fd, ctrl_dat) + int fd; + dread_ctrl *ctrl_dat; +{ + int len; + + /* go to requested position. */ + lseek(fd, ctrl_dat->offset, SEEK_SET); + file_table[fd].at_eof = NO; + len = read(fd, ctrl_dat->buf + ctrl_dat->cur_len, + ctrl_dat->req_len - ctrl_dat->cur_len); + + if (len < 0) + switch (errno) { +#if EAGAIN != EWOULDBLOCK + case EAGAIN: +#endif + case EWOULDBLOCK: + break; + default: + debug(1, "diskHandleRead: FD %d: error reading: %s\n", + fd, xstrerror()); + ctrl_dat->handler(fd, ctrl_dat->buf, + ctrl_dat->cur_len, DISK_ERROR, + ctrl_dat->client_data, ctrl_dat->offset); + safe_free(ctrl_dat); + return DISK_ERROR; + } else if (len == 0) { + /* EOF */ + ctrl_dat->end_of_file = 1; + /* call handler */ + ctrl_dat->handler(fd, ctrl_dat->buf, ctrl_dat->cur_len, DISK_EOF, + ctrl_dat->client_data, ctrl_dat->offset); + safe_free(ctrl_dat); + return DISK_OK; + } + ctrl_dat->cur_len += len; + ctrl_dat->offset = lseek(fd, 0L, SEEK_CUR); + + /* reschedule if need more data. */ + if (ctrl_dat->cur_len < ctrl_dat->req_len) { + comm_set_select_handler(fd, COMM_SELECT_READ, (PF) diskHandleRead, + (caddr_t) ctrl_dat); + return DISK_OK; + } else { + /* all data we need is here. */ + /* calll handler */ + ctrl_dat->handler(fd, ctrl_dat->buf, ctrl_dat->cur_len, DISK_OK, + ctrl_dat->client_data, ctrl_dat->offset); + safe_free(ctrl_dat); + return DISK_OK; + } +} + + +/* start read operation */ +/* buffer must be allocated from the caller. + * It must have at least req_len space in there. + * call handler when a reading is complete. */ +int file_read(fd, buf, req_len, offset, handler, client_data) + int fd; + caddr_t buf; + int req_len; + int offset; + FILE_READ_HD handler; + caddr_t client_data; +{ + dread_ctrl *ctrl_dat; + + ctrl_dat = (dread_ctrl *) xmalloc(sizeof(dread_ctrl)); + memset(ctrl_dat, '\0', sizeof(dread_ctrl)); + ctrl_dat->fd = fd; + ctrl_dat->offset = offset; + ctrl_dat->req_len = req_len; + ctrl_dat->buf = buf; + ctrl_dat->cur_len = 0; + ctrl_dat->end_of_file = 0; + ctrl_dat->handler = handler; + ctrl_dat->client_data = client_data; + + comm_set_select_handler(fd, COMM_SELECT_READ, (PF) diskHandleRead, + (caddr_t) ctrl_dat); + + return DISK_OK; +} + + +/* Read from FD and pass a line to routine. Walk to EOF. */ +int diskHandleWalk(fd, walk_dat) + int fd; + dwalk_ctrl *walk_dat; +{ + int len; + int end_pos; + int st_pos; + int used_bytes; + char temp_line[DISK_LINE_LEN]; + + lseek(fd, walk_dat->offset, SEEK_SET); + file_table[fd].at_eof = NO; + len = read(fd, walk_dat->buf, DISK_LINE_LEN - 1); + + if (len < 0) + switch (errno) { +#if EAGAIN != EWOULDBLOCK + case EAGAIN: +#endif + case EWOULDBLOCK: + break; + default: + debug(1, "diskHandleWalk: FD %d: error readingd: %s\n", + fd, xstrerror()); + walk_dat->handler(fd, DISK_ERROR, walk_dat->client_data); + safe_free(walk_dat->buf); + safe_free(walk_dat); + return DISK_ERROR; + } else if (len == 0) { + /* EOF */ + walk_dat->handler(fd, DISK_EOF, walk_dat->client_data); + safe_free(walk_dat->buf); + safe_free(walk_dat); + return DISK_OK; + } + /* emulate fgets here. Cut the into separate line. newline is excluded */ + /* it throws last partial line, if exist, away. */ + used_bytes = st_pos = end_pos = 0; + while (end_pos < len) { + if (walk_dat->buf[end_pos] == '\n') { + /* new line found */ + strncpy(temp_line, walk_dat->buf + st_pos, end_pos - st_pos); + temp_line[end_pos - st_pos] = '\0'; + used_bytes += end_pos - st_pos + 1; + + /* invoke line handler */ + walk_dat->line_handler(fd, temp_line, strlen(temp_line), + walk_dat->line_data); + + /* skip to next line */ + st_pos = end_pos + 1; + } + end_pos++; + } + + /* update file pointer to the next to be read character */ + walk_dat->offset += used_bytes; + + /* reschedule it for next line. */ + comm_set_select_handler(fd, COMM_SELECT_READ, (PF) diskHandleWalk, + (caddr_t) walk_dat); + return DISK_OK; +} + + +/* start walk through whole file operation + * read one block and chop it to a line and pass it to provided + * handler one line at a time. + * call a completion handler when done. */ +int file_walk(fd, handler, client_data, line_handler, line_data) + int fd; + FILE_WALK_HD handler; + caddr_t client_data; + FILE_WALK_LHD line_handler; + caddr_t line_data; + +{ + dwalk_ctrl *walk_dat; + + walk_dat = (dwalk_ctrl *) xmalloc(sizeof(dwalk_ctrl)); + memset(walk_dat, '\0', sizeof(dwalk_ctrl)); + walk_dat->fd = fd; + walk_dat->offset = 0; + walk_dat->buf = (caddr_t) xcalloc(1, DISK_LINE_LEN); + walk_dat->cur_len = 0; + walk_dat->handler = handler; + walk_dat->client_data = client_data; + walk_dat->line_handler = line_handler; + walk_dat->line_data = line_data; + + comm_set_select_handler(fd, COMM_SELECT_READ, (PF) diskHandleWalk, + (caddr_t) walk_dat); + return DISK_OK; +} + +char *diskFileName(fd) + int fd; +{ + if (file_table[fd].filename[0]) + return (file_table[fd].filename); + else + return (0); +} diff --git a/src/dnsserver.cc b/src/dnsserver.cc new file mode 100644 index 00000000000..72adad0f06a --- /dev/null +++ b/src/dnsserver.cc @@ -0,0 +1,299 @@ +static char rcsid[] = "$Id: dnsserver.cc,v 1.1 1996/02/22 06:23:54 wessels Exp $"; +/* + * File: dnsserver.c + * Description: dnsserver process for non-blocking DNS lookup. + * Author: Anawat Chankhunthod + * Created: + * Language: C + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" + +extern int h_errno; + +int do_debug = 0; + +/* error messages from gethostbyname() */ +#define my_h_msgs(x) (\ + ((x) == HOST_NOT_FOUND) ? \ + "Host not found (authoritative)" : \ + ((x) == TRY_AGAIN) ? \ + "Host not found (non-authoritative)" : \ + ((x) == NO_RECOVERY) ? \ + "Non recoverable errors" : \ + ((x) == NO_DATA) ? \ + "Valid name, no data record of requested type" : \ + ((x) == NO_ADDRESS) ? \ + "No address, look for MX record" : \ + "Unknown DNS problem") + +/* + * Modified to use UNIX domain sockets between cached and the dnsservers to + * save an FD per DNS server, Hong Mei, USC. + * + * Before forking a dnsserver, cached creates listens on a UNIX domain + * socket. After the fork(), cached closes its end of the rendevouz socket + * but then immediately connects to it to establish the connection to the + * dnsserver process. We use AF_UNIX to prevent other folks from + * connecting to our little dnsservers after we fork but before we connect + * to them. + * + * Cached creates UNIX domain sockets named dns.PID.NN, e.g. dns.19215.11 + * + * In ipcache_init(): + * . dnssocket = ipcache_opensocket(getDnsProgram()) + * . dns_child_table[i]->inpipe = dnssocket + * . dns_child_table[i]->outpipe = dnssocket + * + * The dnsserver inherits socket(socket_from_ipcache) from cached which it + * uses to rendevouz with. The child takes responsibility for cleaning up + * the UNIX domain pathnames by setting a few signal handlers. + * + */ + +int main(argc, argv) + int argc; + char *argv[]; +{ + char request[256]; + char msg[256]; + struct hostent *result = NULL; + FILE *logfile = NULL; + long start; + long stop; + char *t = NULL; + char buf[256]; + int socket_from_cache, fd; + int a1, a2, a3, a4; + int addr_count = 0; + int alias_count = 0; + int i; + char *dnsServerPathname = NULL; + int c; + extern char *optarg; + + while ((c = getopt(argc, argv, "vhdp:")) != -1) + switch (c) { + case 'v': + case 'h': + printf("dnsserver version %s\n", HARVEST_VERSION); + exit(0); + break; + case 'd': + sprintf(buf, "dnsserver.%d.log", (int) getpid()); + logfile = fopen(buf, "a"); + do_debug++; + if (!logfile) + fprintf(stderr, "Could not open dnsserver's log file\n"); + break; + case 'p': + dnsServerPathname = xstrdup(optarg); + break; + default: + fprintf(stderr, "usage: dnsserver -h -d -p socket-filename\n"); + exit(1); + break; + } + + socket_from_cache = 3; + + /* accept DNS look up from ipcache */ + if (dnsServerPathname) { + fd = accept(socket_from_cache, (struct sockaddr *) 0, (int *) 0); + unlink(dnsServerPathname); + if (fd < 0) { + fprintf(stderr, "dnsserver: accept: %s\n", xstrerror()); + exit(1); + } + close(socket_from_cache); + + /* point stdout to fd */ + dup2(fd, 1); + dup2(fd, 0); + close(fd); + } + while (1) { + memset(request, '\0', 256); + + /* read from ipcache */ + if (fgets(request, 255, stdin) == (char *) NULL) + exit(1); + if ((t = strrchr(request, '\n')) != NULL) + *t = '\0'; /* strip NL */ + if ((t = strrchr(request, '\r')) != NULL) + *t = '\0'; /* strip CR */ + if (strcmp(request, "$shutdown") == 0) { + exit(0); + } + if (strcmp(request, "$hello") == 0) { + printf("$alive\n"); + printf("$end\n"); + fflush(stdout); + continue; + } + /* check if it's already an IP address in text form. */ + if (sscanf(request, "%d.%d.%d.%d", &a1, &a2, &a3, &a4) == 4) { + printf("$name %s\n", request); + printf("$h_name %s\n", request); + printf("$h_len %d\n", 4); + printf("$ipcount %d\n", 1); + printf("%s\n", request); + printf("$aliascount %d\n", 0); + printf("$end\n"); + fflush(stdout); + continue; + } + start = time(NULL); + result = gethostbyname(request); + if (!result) { + if (h_errno == TRY_AGAIN) { + sleep(2); + result = gethostbyname(request); /* try a little harder */ + } + } + stop = time(NULL); + + msg[0] = '\0'; + if (!result) { + if (h_errno == TRY_AGAIN) { + sprintf(msg, "Name Server for domain '%s' is unavailable.", + request); + } else { + sprintf(msg, "DNS Domain '%s' is invalid: %s.\n", + request, my_h_msgs(h_errno)); + } + } + if (!result || (strlen(result->h_name) == 0)) { + if (logfile) { + fprintf(logfile, "%s %d\n", request, (int) (stop - start)); + fflush(logfile); + } + printf("$fail %s\n", request); + printf("$message %s\n", msg[0] ? msg : "Unknown Error"); + printf("$end\n"); + fflush(stdout); + continue; + } else { + + printf("$name %s\n", request); + printf("$h_name %s\n", result->h_name); + printf("$h_len %d\n", result->h_length); + + addr_count = alias_count = 0; + while (result->h_addr_list[addr_count] && addr_count < 255) + ++addr_count; + printf("$ipcount %d\n", addr_count); + for (i = 0; i < addr_count; i++) { + struct in_addr addr; + memcpy((char *) &addr, result->h_addr_list[i], result->h_length); + printf("%s\n", inet_ntoa(addr)); + } + +#ifdef SEND_ALIASES + while ((alias_count < 255) && result->h_aliases[alias_count]) + ++alias_count; +#endif + printf("$aliascount %d\n", alias_count); + for (i = 0; i < alias_count; i++) { + printf("%s\n", result->h_aliases[i]); + } + + printf("$end\n"); + fflush(stdout); + continue; + } + } + + exit(0); + /*NOTREACHED */ +} diff --git a/src/filemap.cc b/src/filemap.cc new file mode 100644 index 00000000000..6eae757cc84 --- /dev/null +++ b/src/filemap.cc @@ -0,0 +1,226 @@ +static char rcsid[] = "filemap.c,v 1.5.6.3 1995/12/10 23:02:47 duane Exp"; +/* + * File: main.c + * Description: main loop for cache + * Author: John Noll, USC + * Created: Mon Dec 13 10:10:28 1993 (John Noll, USC) sfdif + * Language: C + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +/* + * Maintain a bitmap of the allocated file numbers. This + * eliminates the call to stat() to find available file numbers + * + * We use a bitmap where the bit position corresponds to file number. + * On create, we allocate the bit map, and then test, set, and reset it + * in a handful of 1-liners. + */ +#include "config.h" +#include + +#include "ansihelp.h" +#include "filemap.h" +#include "util.h" +#include "debug.h" + +/* Number of bits in a long */ +#if SIZEOF_LONG == 8 +#define LONG_BIT_SHIFT 6 +#define BITS_IN_A_LONG 0x40 +#define LONG_BIT_MASK 0x3F +#define ALL_ONES (unsigned long) 0xFFFFFFFFFFFFFFFF +#elif SIZEOF_LONG == 4 +#define LONG_BIT_SHIFT 5 +#define BITS_IN_A_LONG 0x20 +#define LONG_BIT_MASK 0x1F +#define ALL_ONES (unsigned long) 0xFFFFFFFF +#else +#define LONG_BIT_SHIFT 5 +#define BITS_IN_A_LONG 0x20 +#define LONG_BIT_MASK 0x1F +#define ALL_ONES (unsigned long) 0xFFFFFFFF +#endif + +extern int storeGetSwapSpace _PARAMS((int)); +extern void fatal_dump _PARAMS((char *)); + +static fileMap *fm = NULL; + +fileMap *file_map_create(n) + int n; /* Number of files */ +{ + fm = xcalloc(1, sizeof(fileMap)); + + fm->max_n_files = n; + fm->nwords = n >> LONG_BIT_SHIFT; + debug(1, "file_map_create: creating space for %d files\n", n); + debug(5, "--> %d words of %d bytes each\n", + fm->nwords, sizeof(unsigned long)); + fm->file_map = (unsigned long *) xcalloc(fm->nwords, sizeof(unsigned long)); + return (fm); +} + +int file_map_bit_set(file_number) + int file_number; +{ + unsigned long bitmask = (1L << (file_number & LONG_BIT_MASK)); + +#ifdef XTRA_DEBUG + if (fm->file_map[file_number >> LONG_BIT_SHIFT] & bitmask) + debug(0, "file_map_bit_set: WARNING: file number %d is already set!\n", + file_number); +#endif + + fm->file_map[file_number >> LONG_BIT_SHIFT] |= bitmask; + + fm->n_files_in_map++; + if (!fm->toggle && (fm->n_files_in_map > ((fm->max_n_files * 7) >> 3))) { + fm->toggle++; + debug(0, "You should increment MAX_SWAP_FILE\n"); + } else if (fm->n_files_in_map > (fm->max_n_files - 100)) { + debug(0, "You've run out of swap file numbers. Freeing 1MB\n"); + storeGetSwapSpace(1000000); + } + return (file_number); +} + +void file_map_bit_reset(file_number) + int file_number; +{ + unsigned long bitmask = (1L << (file_number & LONG_BIT_MASK)); + + fm->file_map[file_number >> LONG_BIT_SHIFT] &= ~bitmask; + fm->n_files_in_map--; +} + +int file_map_bit_test(file_number) + int file_number; +{ + unsigned long bitmask = (1L << (file_number & LONG_BIT_MASK)); + /* be sure the return value is an int, not a u_long */ + return (fm->file_map[file_number >> LONG_BIT_SHIFT] & bitmask ? 1 : 0); +} + +int file_map_allocate(suggestion) + int suggestion; +{ + int word; + int bit; + int count; + + if (!file_map_bit_test(suggestion)) { + fm->last_file_number_allocated = suggestion; + return file_map_bit_set(suggestion); + } + word = suggestion >> LONG_BIT_SHIFT; + for (count = 0; count < fm->nwords; count++) { + if (fm->file_map[word] != ALL_ONES) + break; + word = (word + 1) % fm->nwords; + } + + for (bit = 0; bit < BITS_IN_A_LONG; bit++) { + suggestion = ((unsigned long) word << LONG_BIT_SHIFT) | bit; + if (!file_map_bit_test(suggestion)) { + fm->last_file_number_allocated = suggestion; + return file_map_bit_set(suggestion); + } + } + + debug(0, "file_map_allocate: All %d files are in use!\n", fm->max_n_files); + debug(0, "You need to recompile with a larger value for MAX_SWAP_FILE\n"); + fatal_dump(NULL); + /* NOTREACHED */ +} + +#ifdef TEST + +#define TEST_SIZE 1<<16 +main(argc, argv) +{ + int i; + + fm = file_map_create(TEST_SIZE); + + for (i = 0; i < TEST_SIZE; ++i) { + file_map_bit_set(i); + if (!file_map_bit_test(i)) + fatal_dump(NULL); + file_map_bit_reset(i); + } +} +#endif diff --git a/src/ftp.cc b/src/ftp.cc new file mode 100644 index 00000000000..b829795d7b9 --- /dev/null +++ b/src/ftp.cc @@ -0,0 +1,746 @@ +static char rcsid[] = "$Id: ftp.cc,v 1.1 1996/02/22 06:23:54 wessels Exp $"; +/* + * File: ftp.c + * Description: state machine for ftp retrieval protocol. Based on John's + * gopher retrieval module. + * Author: Anawat Chankhunthod, USC + * Created: Tue May 28 10:57:11 1994 + * Language: C + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include "config.h" +#include +#include +#include +#include +#include +#include +#include /* for WNOHANG */ +#include + +#include "ansihelp.h" +#include "comm.h" +#include "store.h" +#include "stat.h" +#include "url.h" +#include "mime.h" +#include "fdstat.h" +#include "cache_cf.h" +#include "ttl.h" +#include "util.h" +#include "ftp.h" + +#define FTP_DELETE_GAP (64*1024) + +ftpget_thread *FtpgetThread = NULL; +ftpget_thread **FtpgetThreadTailP = &FtpgetThread; + +static char ftpASCII[] = "A"; +static char ftpBinary[] = "I"; + +typedef struct _Ftpdata { + StoreEntry *entry; + char host[HARVESTHOSTNAMELEN + 1]; + char type_id; + char request[MAX_URL]; + char *type; + char *mime_hdr; + int cpid; + int ftp_fd; +} FtpData; + +extern char *tmp_error_buf; +extern time_t cached_curtime; + +static int ftp_open_pipe(); +static int ftp_close_pipe(); + +/* XXX: this does not support FTP on a different port! */ +int ftp_url_parser(url, host, request, user, password) + char *url; + char *host; + char *request; + char *user; + char *password; +{ + static char atypebuf[MAX_URL]; + static char hostbuf[MAX_URL]; + char *tmp = NULL; + int t; + + /* initialize everything */ + atypebuf[0] = hostbuf[0] = '\0'; + request[0] = host[0] = user[0] = password[0] = '\0'; + + t = sscanf(url, "%[a-zA-Z]://%[^/]%s", atypebuf, hostbuf, request); + if ((t < 2) || + !(!strcasecmp(atypebuf, "ftp") || !strcasecmp(atypebuf, "file"))) { + return -1; + } else if (t == 2) { /* no request */ + strcpy(request, "/"); + } else { + tmp = url_convert_hex(request); /* convert %xx to char */ + strncpy(request, tmp, MAX_URL); + safe_free(tmp); + } + + /* url address format is something like this: + * [ userid [ : password ] @ ] host + * or possibly even + * [ [ userid ] [ : [ password ] ] @ ] host + * + * So we must try to make sense of it. */ + + /* XXX: this only support [user:passwd@]host */ + t = sscanf(hostbuf, "%[^:]:%[^@]@%s", user, password, host); + if (t < 3) { + strcpy(host, user); /* no login/passwd information */ + strcpy(user, "anonymous"); + strcpy(password, "harvest@"); + } + /* we need to convert user and password for URL encodings */ + tmp = url_convert_hex(user); + strcpy(user, tmp); + safe_free(tmp); + + tmp = url_convert_hex(password); + strcpy(password, tmp); + safe_free(tmp); + + return 0; +} + +int ftpCachable(url, type, mime_hdr) + char *url; + char *type; + char *mime_hdr; +{ + stoplist *p = NULL; + + /* scan stop list */ + p = ftp_stoplist; + while (p) { + if (strstr(url, p->key)) + return 0; + p = p->next; + } + + /* else cachable */ + return 1; +} + +/* This will be called when socket lifetime is expired. */ +void ftpLifetimeExpire(fd, data) + int fd; + FtpData *data; +{ + StoreEntry *entry = NULL; + entry = data->entry; + debug(4, "ftpLifeTimeExpire: FD %d: \n", fd, entry->url); + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "FTP", + 210, + "Transaction Timeout", + "The Network/Remote site may be down or too slow. Try again later.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); + ftp_close_pipe(data->ftp_fd, data->cpid); + comm_close(fd); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_210", /* FTP LIFETIME EXPIRE */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); +} + + + +/* This will be called when data is ready to be read from fd. Read until + * error or connection closed. */ +int ftpReadReply(fd, data) + int fd; + FtpData *data; +{ + static char buf[4096]; + int len; + int clen; + int off; + StoreEntry *entry = NULL; + + entry = data->entry; + if (entry->flag & DELETE_BEHIND) { + if (storeClientWaiting(entry)) { + /* check if we want to defer reading */ + clen = store_mem_obj(entry, e_current_len); + off = store_mem_obj(entry, e_lowest_offset); + if ((clen - off) > FTP_DELETE_GAP) { + debug(3, "ftpReadReply: Read deferred for Object: %s\n", + entry->key); + debug(3, " Current Gap: %d bytes\n", + clen - off); + + /* reschedule, so it will automatically be reactivated when + * Gap is big enough. */ + comm_set_select_handler(fd, + COMM_SELECT_READ, + (PF) ftpReadReply, + (caddr_t) data); + return 0; + } + } else { + /* we can terminate connection right now */ + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "FTP", + 319, + "No Client", + "All Clients went away before tranmission is complete and object is too big to cache.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); + ftp_close_pipe(data->ftp_fd, data->cpid); + comm_close(fd); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_319", /* FTP NO CLIENTS, BIG OBJECT */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); + return 0; + } + } + len = read(fd, buf, 4096); + debug(5, "ftpReadReply FD %d, len=%d\n", fd, len); + + if (len < 0 || ((len == 0) && (store_mem_obj(entry, e_current_len) == 0))) { + if (len < 0) + debug(1, "ftpReadReply - error reading: %s\n", xstrerror()); + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "FTP", + 305, + "Read Error.", + "Network/Remote Site is down. Try again later.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); + ftp_close_pipe(data->ftp_fd, data->cpid); + comm_close(fd); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_305", /* FTP READ ERROR */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); + } else if (len == 0) { + /* Connection closed; retrieval done. */ + /* If ftpget failed, arrange so the object gets ejected and + * doesn't get to disk. */ + if (ftp_close_pipe(data->ftp_fd, data->cpid) != 0) { + entry->expires = cached_curtime + getNegativeTTL(); + BIT_RESET(entry->flag, CACHABLE); + BIT_SET(entry->flag, RELEASE_REQUEST); + } else if (!(entry->flag & DELETE_BEHIND)) { + entry->expires = cached_curtime + ttlSet(entry); + } + /* update fdstat and fdtable */ + comm_close(fd); + storeComplete(entry); + safe_free(data); + } else if (((store_mem_obj(entry, e_current_len) + len) > getFtpMax()) && + !(entry->flag & DELETE_BEHIND)) { + /* accept data, but start to delete behind it */ + storeStartDeleteBehind(entry); + + storeAppend(entry, buf, len); + comm_set_select_handler(fd, + COMM_SELECT_READ, + (PF) ftpReadReply, + (caddr_t) data); + + } else if (entry->flag & CLIENT_ABORT_REQUEST) { + /* append the last bit of info we get */ + storeAppend(entry, buf, len); + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "FTP", + 307, + "Client Aborted", + "Client(s) dropped connection before transmission is complete.\nObject fetching is aborted.\n", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); + ftp_close_pipe(data->ftp_fd, data->cpid); + comm_close(fd); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_307", /* FTP CLIENT ABORT */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); + } else { + storeAppend(entry, buf, len); + comm_set_select_handler(fd, + COMM_SELECT_READ, + (PF) ftpReadReply, + (caddr_t) data); + comm_set_select_handler_plus_timeout(fd, + COMM_SELECT_TIMEOUT, + (PF) ftpLifetimeExpire, + (caddr_t) data, + getReadTimeout()); + } + return 0; +} + + +#ifdef OLD_CODE +/* This will be called when request write is complete. Schedule read of reply. */ +int ftpSendComplete(fd, buf, size, errflag, data) + int fd; + char *buf; + int size; + int errflag; + FtpData *data; +{ + debug(1, "ftpSendComplete - THIS SHOULD NOT HAPPEN fd: %d size: %d errflag: %d\n", + fd, size, errflag); + return 0; +} + +/* This will be called when connect completes. Write request. */ +int ftpSendRequest(fd, data) + int fd; + FtpData *data; +{ + debug(1, "ftpSendRequest - THIS SHOULD NOT HAPPEN fd: %d\n", fd); + return 0; +} +#endif + +int ftpStart(unusedfd, url, entry) + int unusedfd; + char *url; + StoreEntry *entry; +{ + static char user[MAX_URL]; + static char password[MAX_URL]; + FtpData *data = NULL; + + debug(3, "FtpStart: FD %d \n", unusedfd, url); + + data = (FtpData *) xcalloc(1, sizeof(FtpData)); + data->entry = entry; + + /* Parse url. */ + user[0] = password[0] = '\0'; + if (ftp_url_parser(url, data->host, data->request, user, password)) { + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "FTP", + 309, + "Invalid URL syntax: Cannot parse.", + "Please contact your system manager for further help.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_309", /* FTP INVALID URL */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); + return COMM_ERROR; + } + debug(5, "FtpStart - host:%s, request:%s, user:%s, passwd:%s\n", + data->host, data->request, user, password); + + /* Create socket. */ + data->ftp_fd = ftp_open_pipe(getFtpProgram(), + data->host, + data->request, + user, + password, + "r", + &(data->cpid), + getFtpOptions()); + + if (data->ftp_fd < 0) { + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "FTP", + 308, + "Cannot connect to FTP slave process", + "Please contact your system manager for further help.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_308", /* FTP FTPGET FAIL */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); + return COMM_ERROR; + } + /* Pipe created ok */ + + debug(5, "startftp - conn %d EINPROGRESS\n", data->ftp_fd); + + fdstat_open(data->ftp_fd, Pipe); + commSetNonBlocking(data->ftp_fd); + (void) fd_note(data->ftp_fd, entry->url); + + /* Install connection complete handler. */ + fd_note(data->ftp_fd, entry->url); + comm_set_select_handler(data->ftp_fd, COMM_SELECT_WRITE, 0, 0); + comm_set_fd_lifetime(data->ftp_fd, getClientLifetime()); + comm_set_select_handler(data->ftp_fd, + COMM_SELECT_LIFETIME, + (PF) ftpLifetimeExpire, + (caddr_t) data); + comm_set_select_handler(data->ftp_fd, + COMM_SELECT_READ, + (PF) ftpReadReply, + (caddr_t) data); + comm_set_select_handler_plus_timeout(data->ftp_fd, + COMM_SELECT_TIMEOUT, + (PF) ftpLifetimeExpire, + (caddr_t) data, getReadTimeout()); + return COMM_OK; +} + +/* + * ftp_open_pipe - This opens a pipe to the ftpget command. + * It currently supports read-only pipes and hardcoded args. The child + * process only has stdin from /dev/null, stdout to the pipe, + * and stderr inherited from the parent. cpid is set to the + * pid of the child process or to -1 on error. Returns a read-only + * file descriptor to the read end of the pipe, or -1 on error. + * + * Allows process to make many ftp_open_pipe() calls. -DH + */ +static int ftp_open_pipe(p1, p2, p3, p4, p5, type, cpid, opts) + char *p1, *p2, *p3, *p4, *p5, *type, *opts; + int *cpid; +{ + int pfd[2]; + int pid; + int fd; + char *transfer = NULL; + ftpget_thread *thread = NULL; + static char tbuf[64]; + int got_timeout = 0; + int got_negttl = 0; + int argc; + char *argv[64]; + static char *w_space = "\n\t "; + char *s = NULL; + + if (p3[strlen(p3) - 1] == '/') + transfer = ftpASCII; + else { + char *ext; + ext_table_entry *e; + + if ((ext = strrchr(p3, '.')) != NULL) { + ext++; + transfer = ((e = mime_ext_to_type(ext)) && + strncmp(e->mime_type, "text", 4) == 0) ? ftpASCII : + ftpBinary; + } else + transfer = ftpBinary; + } + + *cpid = -1; /* initialize first */ + + if (type == NULL || strcmp(type, "r") != 0) { + debug(0, "ftp_open_pipe: type %s unsupported.\n", + type ? type : "(null)"); + return (-1); /* unsupported */ + } + pfd[0] = pfd[1] = -1; /* For debugging */ + if (pipe(pfd) < 0) { + debug(0, "ftp_open_pipe: pipe: %s\n", xstrerror()); + if (pfd[0] > -1) + close(pfd[0]); + if (pfd[1] > -1) + close(pfd[1]); + return (-1); + } + if ((pid = fork()) < 0) { + debug(0, "ftp_open_pipe: fork: %s\n", xstrerror()); + close(pfd[0]); + close(pfd[1]); + return (-1); + } + if (pid != 0) { /* parent */ + *cpid = pid; /* pass child pid */ + (void) close(pfd[1]); /* close the write pipe */ + fcntl(pfd[0], F_SETFD, 1); /* set close-on-exec */ + thread = (ftpget_thread *) xcalloc(1, sizeof(ftpget_thread)); + thread->pid = pid; + thread->fd = pfd[0]; + *FtpgetThreadTailP = thread; + FtpgetThreadTailP = (&(thread->next)); + return (pfd[0]); /* return read FD */ + } + /* child */ + close(0); + if (open("/dev/null", O_RDONLY, 0) < 0) + debug(0, "ftp_open_pipe: /dev/null: %s\n", xstrerror()); + if (dup2(pfd[1], 1) < 0) { /* stdout -> write pipe */ + debug(0, "ftp_open_pipe: dup2(%d,%d): %s\n", pfd[1], 1, xstrerror()); + _exit(1); + } + /* stderr is inherited */ + + /* close all file desc, and make sure we close the read pipe */ + for (fd = 3; fd < fdstat_biggest_fd(); fd++) + (void) close(fd); + (void) close(pfd[0]); + (void) close(pfd[1]); + + /* + * Remove leading slash from FTP url-path so that we can + * handle ftp://user:pw@host/path objects where path and /path + * are quite different. -DW + */ + if (!strcmp(p3, "/")) + *p3 = '.'; + if (*p3 == '/') + p3++; + + + /* + * Run the ftpget command: + * p1 is the ftpget program, need execlp() to use PATH + * p2 is the remote host + * p3 is the remote file + * transfer is "A" for ASCII and "I" for binary transfer + * p4 is the username + * p5 is the password + */ + + argc = 0; + argv[argc++] = xstrdup(p1); + for (s = strtok(opts, w_space); s; s = strtok(NULL, w_space)) { + argv[argc++] = xstrdup(s); + if (!strncmp(s, "-t", 2)) + got_timeout = 1; + if (!strncmp(s, "-n", 2)) + got_negttl = 1; + } + if (!got_timeout) { + argv[argc++] = xstrdup("-t"); + sprintf(tbuf, "%d", getReadTimeout()); + argv[argc++] = xstrdup(tbuf); + } + if (!got_negttl) { + argv[argc++] = xstrdup("-n"); + sprintf(tbuf, "%d", getNegativeTTL()); + argv[argc++] = xstrdup(tbuf); + } + argv[argc++] = xstrdup("-h"); /* httpify */ + argv[argc++] = xstrdup("-"); /* stdout */ + argv[argc++] = xstrdup(p2); /* hostname */ + argv[argc++] = xstrdup(p3); /* pathname */ + argv[argc++] = xstrdup(transfer); /* A or I */ + argv[argc++] = xstrdup(p4); /* username */ + argv[argc++] = xstrdup(p5); /* password */ + argv[argc++] = NULL; /* terminate */ + execvp(p1, argv); + perror(p1); + _exit(1); + /* NOTREACHED */ +} + +/* + * ftp_close_pipe - closes the pipe opened by ftp_open_pipe. + * Non-blocking. -DH + * + * Return 0 if ftpget exits successfully, or 1 upon failure. + */ +static int ftp_close_pipe(fd, cpid) + int fd; + int cpid; +{ + int status; + int ret; + ftpget_thread *t = NULL; + ftpget_thread **T = NULL; + ftpget_thread *match = NULL; + ftpget_thread *next = NULL; + + (void) close(fd); /* close stdio ptr -- should generate SIGCHLD */ + + /* + * Look through the ftpget-thread list for an entry with + * the same pid and FILE ptr. These entries are added in + * ftp_open_pipe() + */ + for (t = FtpgetThread; t; t = t->next) { + if (t->pid == cpid && t->fd == fd) { + match = t; + break; + } + } + + /* + * If the matched entry is in state FTPGET_THREAD_WAITED + * then the child process was wait()'ed for in the + * generic SIGCHLD handler. That handler will have + * filled in status and return values + */ + if (match && match->state == FTPGET_THREAD_WAITED) { + ret = match->wait_retval; + status = match->status; + debug(3, "Check Thread: Match found, wait_retval=%d status=0x%x\n", + ret, status); + } else { + /* No match found, do the wait() ourselves */ + ret = waitpid(cpid, &status, WNOHANG); /* non-blocking wait */ + } + + if (match) { + /* remove match from the linked list */ + for (T = &FtpgetThread, t = FtpgetThread; t; t = next) { + next = t->next; + if (t == match) { + *T = t->next; + xfree(t); + } else { + T = &(t->next); + } + } + FtpgetThreadTailP = T; + } + if (ret == 0) + return 0; + if (ret < 0) { + return 1; + } + if (ret != cpid) { + return 1; + } + if (WIFSIGNALED(status)) { + debug(0, "%s exited due to signal %d\n", + getFtpProgram(), WTERMSIG(status)); + return 1; + } + if (WIFEXITED(status) && WEXITSTATUS(status) > 0) { + if (WEXITSTATUS(status) < 10) { + /* SOFT ERROR -- DONT CACHE */ + debug(1, "%s returned exit status %d\n", + getFtpProgram(), WEXITSTATUS(status)); + return 1; + } else { + /* HARD ERROR -- DO CACHE */ + debug(5, "%s returned exit status %d\n", + getFtpProgram(), WEXITSTATUS(status)); + return 0; + } + } + return 0; +} diff --git a/src/gopher.cc b/src/gopher.cc new file mode 100644 index 00000000000..d2116dd7c74 --- /dev/null +++ b/src/gopher.cc @@ -0,0 +1,1191 @@ +static char rcsid[] = "$Id: gopher.cc,v 1.1 1996/02/22 06:23:54 wessels Exp $"; +/* + * File: gopher.c + * Description: state machine for gopher retrieval protocol. Based on Anawat's + * gopher retrieval module. + * Author: John Noll, USC, Anawat + * Created: Thu Apr 28 10:57:11 1994 + * Language: C + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include "config.h" +#include +#include +#include +#include +#include + +#include "ansihelp.h" +#include "comm.h" +#include "store.h" +#include "stat.h" +#include "url.h" +#include "mime.h" +#include "cache_cf.h" +#include "ttl.h" +#include "util.h" +#include "stmem.h" +#include "ipcache.h" +#include "icp.h" + +extern char *dns_error_message; +extern time_t cached_curtime; +extern char *tmp_error_buf; + +/* gopher type code from rfc. Anawat. */ +#define GOPHER_FILE '0' +#define GOPHER_DIRECTORY '1' +#define GOPHER_CSO '2' +#define GOPHER_ERROR '3' +#define GOPHER_MACBINHEX '4' +#define GOPHER_DOSBIN '5' +#define GOPHER_UUENCODED '6' +#define GOPHER_INDEX '7' +#define GOPHER_TELNET '8' +#define GOPHER_BIN '9' +#define GOPHER_REDUNT '+' +#define GOPHER_3270 'T' +#define GOPHER_GIF 'g' +#define GOPHER_IMAGE 'I' + +#define GOPHER_HTML 'h' /* HTML */ +#define GOPHER_INFO 'i' +#define GOPHER_WWW 'w' /* W3 address */ +#define GOPHER_SOUND 's' + +#define GOPHER_PLUS_IMAGE ':' +#define GOPHER_PLUS_MOVIE ';' +#define GOPHER_PLUS_SOUND '<' + +#define GOPHER_PORT 70 +#define GOPHER_DELETE_GAP (64*1024) + +#define TAB '\t' +#define TEMP_BUF_SIZE SM_PAGE_SIZE +#define MAX_CSO_RESULT 1024 + +typedef struct gopher_ds { + StoreEntry *entry; + char host[HARVESTHOSTNAMELEN + 1]; + enum { + NORMAL, + HTML_DIR, + HTML_INDEX_RESULT, + HTML_CSO_RESULT, + HTML_INDEX_PAGE, + HTML_CSO_PAGE + } conversion; + int HTML_header_added; + int port; + char *mime_hdr; + char type_id; + char request[MAX_URL]; + int data_in; + int cso_recno; + int len; + char *buf; /* pts to a 4k page */ + char *icp_page_ptr; /* Pts to gopherStart buffer that needs to be freed */ + char *icp_rwd_ptr; /* Pts to icp rw structure that needs to be freed */ +} GopherData; + +GopherData *CreateGopherData(); + +static void freeGopherData _PARAMS((GopherData *)); + +char def_gopher_bin[] = "www/unknown"; +char def_gopher_text[] = "text/plain"; + +/* figure out content type from file extension */ +static void gopher_mime_content(buf, name, def) + char *buf; + char *name; + char *def; +{ + static char temp[MAX_URL]; + char *ext1 = NULL; + char *ext2 = NULL; + char *str = NULL; + ext_table_entry *e = NULL; + + ext2 = NULL; + strcpy(temp, name); + for (ext1 = temp; *ext1; ext1++) + if (isupper(*ext1)) + *ext1 = tolower(*ext1); + if ((ext1 = strrchr(temp, '.')) == NULL) { + /* use default */ + sprintf(buf + strlen(buf), "Content-Type: %s\r\n", def); + return; + } + /* try extension table */ + *ext1++ = 0; + if (strcmp("gz", ext1) == 0 || strcmp("z", ext1) == 0) { + ext2 = ext1; + if ((ext1 = strrchr(temp, '.')) == NULL) { + ext1 = ext2; + ext2 = NULL; + } else + ext1++; + } + if ((e = mime_ext_to_type(ext1)) == NULL) { + /* mime_ext_to_type() can return a NULL */ + if (ext2 && (e = mime_ext_to_type(ext2))) { + str = e->mime_type; + ext2 = NULL; + } else { + str = def; + } + } else { + str = e->mime_type; + } + sprintf(buf + strlen(buf), "Content-Type: %s\r\n", str); + if (ext2 && (e = mime_ext_to_type(ext2))) { + sprintf(buf + strlen(buf), "Content-Encoding: %s\r\n", + e->mime_encoding); + } +} + + + +/* create MIME Header for Gopher Data */ +void gopherMimeCreate(data) + GopherData *data; +{ + static char tempMIME[MAX_MIME]; + + sprintf(tempMIME, "\ +HTTP/1.0 200 OK Gatewaying\r\n\ +Server: HarvestCache/%s\r\n\ +MIME-version: 1.0\r\n", HARVEST_VERSION); + + switch (data->type_id) { + + case GOPHER_DIRECTORY: + case GOPHER_INDEX: + case GOPHER_HTML: + case GOPHER_WWW: + case GOPHER_CSO: + strcat(tempMIME, "Content-Type: text/html\r\n"); + break; + case GOPHER_GIF: + case GOPHER_IMAGE: + case GOPHER_PLUS_IMAGE: + strcat(tempMIME, "Content-Type: image/gif\r\n"); + break; + case GOPHER_SOUND: + case GOPHER_PLUS_SOUND: + strcat(tempMIME, "Content-Type: audio/basic\r\n"); + break; + case GOPHER_PLUS_MOVIE: + strcat(tempMIME, "Content-Type: video/mpeg\r\n"); + break; + case GOPHER_MACBINHEX: + case GOPHER_DOSBIN: + case GOPHER_UUENCODED: + case GOPHER_BIN: + /* Rightnow We have no idea what it is. */ + gopher_mime_content(tempMIME, data->request, def_gopher_bin); + break; + + case GOPHER_FILE: + default: + gopher_mime_content(tempMIME, data->request, def_gopher_text); + break; + + } + + strcat(tempMIME, "\r\n"); + storeAppend(data->entry, tempMIME, strlen(tempMIME)); +} + +/* Parse a gopher url into components. By Anawat. */ +int gopher_url_parser(url, host, port, type_id, request) + char *url; + char *host; + int *port; + char *type_id; + char *request; +{ + static char atypebuf[MAX_URL]; + static char hostbuf[MAX_URL]; + char *tmp = NULL; + int t; + + atypebuf[0] = hostbuf[0] = '\0'; + host[0] = request[0] = '\0'; + (*port) = 0; + (*type_id) = 0; + + t = sscanf(url, "%[a-zA-Z]://%[^/]/%c%s", atypebuf, hostbuf, + type_id, request); + if ((t < 2) || strcasecmp(atypebuf, "gopher")) { + return -1; + } else if (t == 2) { + (*type_id) = GOPHER_DIRECTORY; + request[0] = '\0'; + } else if (t == 3) { + request[0] = '\0'; + } else { + /* convert %xx to char */ + tmp = url_convert_hex(request); + strncpy(request, tmp, MAX_URL); + safe_free(tmp); + } + + host[0] = '\0'; + if (sscanf(hostbuf, "%[^:]:%d", host, port) < 2) + (*port) = GOPHER_PORT; + + return 0; +} + +int gopherCachable(url, type, mime_hdr) + char *url; + char *type; + char *mime_hdr; +{ + stoplist *p = NULL; + GopherData *data = NULL; + int cachable = 1; + + /* scan stop list */ + for (p = gopher_stoplist; p; p = p->next) + if (strstr(url, p->key)) + return 0; + + /* use as temp data structure to parse gopher URL */ + data = CreateGopherData(); + + /* parse to see type */ + gopher_url_parser(url, data->host, &data->port, &data->type_id, data->request); + + switch (data->type_id) { + case GOPHER_INDEX: + case GOPHER_CSO: + case GOPHER_TELNET: + case GOPHER_3270: + cachable = 0; + break; + default: + cachable = 1; + } + freeGopherData(data); + + return cachable; +} + +void gopherEndHTML(data) + GopherData *data; +{ + static char tmpbuf[TEMP_BUF_SIZE]; + + if (!data->data_in) { + sprintf(tmpbuf, "

    Server Return Nothing.

    \n"); + storeAppend(data->entry, tmpbuf, strlen(tmpbuf)); + return; + } +} + + +/* Convert Gopher to HTML */ +/* Borrow part of code from libwww2 came with Mosaic distribution */ +void gopherToHTML(data, inbuf, len) + GopherData *data; + char *inbuf; + int len; +{ + char *pos = inbuf; + char *lpos = NULL; + char *tline = NULL; + static char line[TEMP_BUF_SIZE]; + static char tmpbuf[TEMP_BUF_SIZE]; + static char outbuf[TEMP_BUF_SIZE << 4]; + char *name = NULL; + char *selector = NULL; + char *host = NULL; + char *port = NULL; + char *escaped_selector = NULL; + char *icon_type = NULL; + char gtype; + StoreEntry *entry = NULL; + + memset(outbuf, '\0', sizeof(outbuf)); + memset(tmpbuf, '\0', sizeof(outbuf)); + memset(line, '\0', sizeof(outbuf)); + + entry = data->entry; + + if (data->conversion == HTML_INDEX_PAGE) { + sprintf(outbuf, "Gopher Index %s

    %s
    Gopher Search

    This is a searchable Gopher index.Use the search function of your browser to enter search terms. \n", entry->url, entry->url); + + storeAppend(entry, outbuf, strlen(outbuf)); + /* now let start sending stuff to client */ + BIT_RESET(entry->flag, DELAY_SENDING); + data->data_in = 1; + + return; + } + if (data->conversion == HTML_CSO_PAGE) { + sprintf(outbuf, "CSO Search of %s

    %s
    CSO Search

    A CSO database usually contains a phonebook or directory. Use the search function of your browser to enter search terms.\n", + entry->url, entry->url); + + storeAppend(entry, outbuf, strlen(outbuf)); + /* now let start sending stuff to client */ + BIT_RESET(entry->flag, DELAY_SENDING); + data->data_in = 1; + + return; + } + inbuf[len] = '\0'; + + if (!data->HTML_header_added) { + if (data->conversion == HTML_CSO_RESULT) + strcat(outbuf, "

    CSO Searchs Result

    \n
    \n");
    +	else
    +	    strcat(outbuf, "

    Gopher Menu

    \n
    \n");
    +	data->HTML_header_added = 1;
    +    }
    +    while ((pos != NULL) && (pos < inbuf + len)) {
    +
    +	if (data->len != 0) {
    +	    /* there is something left from last tx. */
    +	    strncpy(line, data->buf, data->len);
    +	    lpos = (char *) memccpy(line + data->len, inbuf, '\n', len);
    +	    if (lpos)
    +		*lpos = '\0';
    +	    else {
    +		/* there is no complete line in inbuf */
    +		/* copy it to temp buffer */
    +		if (data->len + len > TEMP_BUF_SIZE) {
    +		    debug(1, "GopherHTML: Buffer overflow. Lost some data on URL: %s\n",
    +			entry->url);
    +		    len = TEMP_BUF_SIZE - data->len;
    +		}
    +		memcpy(data->buf + data->len, inbuf, len);
    +		data->len += len;
    +		return;
    +	    }
    +
    +	    /* skip one line */
    +	    pos = (char *) memchr(pos, '\n', 256);
    +	    if (pos)
    +		pos++;
    +
    +	    /* we're done with the remain from last tx. */
    +	    data->len = 0;
    +	    *(data->buf) = '\0';
    +	} else {
    +
    +	    lpos = (char *) memccpy(line, pos, '\n', len - (pos - inbuf));
    +	    if (lpos)
    +		*lpos = '\0';
    +	    else {
    +		/* there is no complete line in inbuf */
    +		/* copy it to temp buffer */
    +		if ((len - (pos - inbuf)) > TEMP_BUF_SIZE) {
    +		    debug(1, "GopherHTML: Buffer overflow. Lost some data on URL: %s\n",
    +			entry->url);
    +		    len = TEMP_BUF_SIZE;
    +		}
    +		if (len > (pos - inbuf)) {
    +		    memcpy(data->buf, pos, len - (pos - inbuf));
    +		    data->len = len - (pos - inbuf);
    +		}
    +		break;
    +	    }
    +
    +	    /* skip one line */
    +	    pos = (char *) memchr(pos, '\n', 256);
    +	    if (pos)
    +		pos++;
    +
    +	}
    +
    +	/* at this point. We should have one line in buffer to process */
    +
    +	if (*line == '.') {
    +	    /* skip it */
    +	    memset(line, '\0', TEMP_BUF_SIZE);
    +	    continue;
    +	}
    +	switch (data->conversion) {
    +
    +	case HTML_INDEX_RESULT:
    +	case HTML_DIR:{
    +		tline = line;
    +		gtype = *tline++;
    +		name = tline;
    +		selector = strchr(tline, TAB);
    +		if (selector) {
    +		    *selector++ = '\0';
    +		    host = strchr(selector, TAB);
    +		    if (host) {
    +			*host++ = '\0';
    +			port = strchr(host, TAB);
    +			if (port) {
    +			    char *junk;
    +			    port[0] = ':';
    +			    junk = strchr(host, TAB);
    +			    if (junk)
    +				*junk++ = 0;	/* Chop port */
    +			    else {
    +				junk = strchr(host, '\r');
    +				if (junk)
    +				    *junk++ = 0;	/* Chop port */
    +				else {
    +				    junk = strchr(host, '\n');
    +				    if (junk)
    +					*junk++ = 0;	/* Chop port */
    +				}
    +			    }
    +			    if ((port[1] == '0') && (!port[2]))
    +				port[0] = 0;	/* 0 means none */
    +			}
    +			/* escape a selector here */
    +			escaped_selector = url_escape(selector);
    +
    +			switch (gtype) {
    +			case GOPHER_DIRECTORY:
    +			    icon_type = "internal-gopher-menu";
    +			    break;
    +			case GOPHER_FILE:
    +			    icon_type = "internal-gopher-text";
    +			    break;
    +			case GOPHER_INDEX:
    +			case GOPHER_CSO:
    +			    icon_type = "internal-gopher-index";
    +			    break;
    +			case GOPHER_IMAGE:
    +			case GOPHER_GIF:
    +			case GOPHER_PLUS_IMAGE:
    +			    icon_type = "internal-gopher-image";
    +			    break;
    +			case GOPHER_SOUND:
    +			case GOPHER_PLUS_SOUND:
    +			    icon_type = "internal-gopher-sound";
    +			    break;
    +			case GOPHER_PLUS_MOVIE:
    +			    icon_type = "internal-gopher-movie";
    +			    break;
    +			case GOPHER_TELNET:
    +			case GOPHER_3270:
    +			    icon_type = "internal-gopher-telnet";
    +			    break;
    +			case GOPHER_BIN:
    +			case GOPHER_MACBINHEX:
    +			case GOPHER_DOSBIN:
    +			case GOPHER_UUENCODED:
    +			    icon_type = "internal-gopher-binary";
    +			    break;
    +			default:
    +			    icon_type = "internal-gopher-unknown";
    +			    break;
    +			}
    +
    +
    +			memset(tmpbuf, '\0', TEMP_BUF_SIZE);
    +			if ((gtype == GOPHER_TELNET) || (gtype == GOPHER_3270)) {
    +			    if (strlen(escaped_selector) != 0)
    +				sprintf(tmpbuf, " %s\n",
    +				    icon_type, escaped_selector, host, name);
    +			    else
    +				sprintf(tmpbuf, " %s\n",
    +				    icon_type, host, name);
    +
    +			} else {
    +			    sprintf(tmpbuf, " %s\n",
    +				icon_type, host, gtype, escaped_selector, name);
    +			}
    +			safe_free(escaped_selector);
    +			strcat(outbuf, tmpbuf);
    +			data->data_in = 1;
    +		    } else {
    +			memset(line, '\0', TEMP_BUF_SIZE);
    +			continue;
    +		    }
    +		} else {
    +		    memset(line, '\0', TEMP_BUF_SIZE);
    +		    continue;
    +		}
    +		break;
    +	    }			/* HTML_DIR, HTML_INDEX_RESULT */
    +
    +
    +	case HTML_CSO_RESULT:{
    +		int t;
    +		int code;
    +		int recno;
    +		char result[MAX_CSO_RESULT];
    +
    +		tline = line;
    +
    +		if (tline[0] == '-') {
    +		    t = sscanf(tline, "-%d:%d:%[^\n]", &code, &recno, result);
    +		    if (t < 3)
    +			break;
    +
    +		    if (code != 200)
    +			break;
    +
    +		    if (data->cso_recno != recno) {
    +			sprintf(tmpbuf, "

    Record# %d
    %s

    \n
    ", recno, result);
    +			data->cso_recno = recno;
    +		    } else {
    +			sprintf(tmpbuf, "%s\n", result);
    +		    }
    +		    strcat(outbuf, tmpbuf);
    +		    data->data_in = 1;
    +		    break;
    +		} else {
    +		    /* handle some error codes */
    +		    t = sscanf(tline, "%d:%[^\n]", &code, result);
    +
    +		    if (t < 2)
    +			break;
    +
    +		    switch (code) {
    +
    +		    case 200:{
    +			    /* OK */
    +			    /* Do nothing here */
    +			    break;
    +			}
    +
    +		    case 102:	/* Number of matches */
    +		    case 501:	/* No Match */
    +		    case 502:	/* Too Many Matches */
    +			{
    +			    /* Print the message the server returns */
    +			    sprintf(tmpbuf, "

    %s

    \n
    ", result);
    +			    strcat(outbuf, tmpbuf);
    +			    data->data_in = 1;
    +			    break;
    +			}
    +
    +
    +		    }
    +		}
    +
    +	    }			/* HTML_CSO_RESULT */
    +	default:
    +	    break;		/* do nothing */
    +
    +	}			/* switch */
    +
    +    }				/* while loop */
    +
    +    if ((int) strlen(outbuf) > 0) {
    +	storeAppend(entry, outbuf, strlen(outbuf));
    +	/* now let start sending stuff to client */
    +	BIT_RESET(entry->flag, DELAY_SENDING);
    +    }
    +    return;
    +}
    +
    +
    +int gopherReadReplyTimeout(fd, data)
    +     int fd;
    +     GopherData *data;
    +{
    +    StoreEntry *entry = NULL;
    +    entry = data->entry;
    +    debug(4, "GopherReadReplyTimeout: Timeout on %d\n url: %s\n", fd, entry->url);
    +    sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
    +	entry->url,
    +	entry->url,
    +	"Gopher",
    +	203,
    +	"Read timeout",
    +	"Network/Remote site may be down.  Try again later.",
    +	HARVEST_VERSION,
    +	comm_hostname());
    +    storeAbort(entry, tmp_error_buf);
    +    if (data->icp_page_ptr)
    +	put_free_4k_page(data->icp_page_ptr);
    +    if (data->icp_rwd_ptr)
    +	safe_free(data->icp_rwd_ptr);
    +    comm_close(fd);
    +#ifdef LOG_ERRORS
    +    CacheInfo->log_append(CacheInfo,
    +	entry->url,
    +	"0.0.0.0",
    +	store_mem_obj(entry, e_current_len),
    +	"ERR_203",		/* GOPHER READ TIMEOUT */
    +	"GET");
    +#endif
    +    freeGopherData(data);
    +    return 0;
    +}
    +
    +/* This will be called when socket lifetime is expired. */
    +void gopherLifetimeExpire(fd, data)
    +     int fd;
    +     GopherData *data;
    +{
    +    StoreEntry *entry = NULL;
    +    entry = data->entry;
    +    debug(4, "gopherLifeTimeExpire: FD %d: \n", fd, entry->url);
    +    sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
    +	entry->url,
    +	entry->url,
    +	"GOPHER",
    +	210,
    +	"Transaction Timeout",
    +	"The Network/Remote site may be down or too slow.  Try again later.",
    +	HARVEST_VERSION,
    +	comm_hostname());
    +    storeAbort(entry, tmp_error_buf);
    +    if (data->icp_page_ptr)
    +	put_free_4k_page(data->icp_page_ptr);
    +    if (data->icp_rwd_ptr)
    +	safe_free(data->icp_rwd_ptr);
    +    comm_set_select_handler(fd, COMM_SELECT_READ | COMM_SELECT_WRITE, 0, 0);
    +    comm_close(fd);
    +#ifdef LOG_ERRORS
    +    CacheInfo->log_append(CacheInfo,
    +	entry->url,
    +	"0.0.0.0",
    +	store_mem_obj(entry, e_current_len),
    +	"ERR_210",		/* GOPHER LIFETIME EXPIRE */
    +	"GET");
    +#endif
    +    freeGopherData(data);
    +}
    +
    +
    +
    +
    +/* This will be called when data is ready to be read from fd.  Read until
    + * error or connection closed. */
    +int gopherReadReply(fd, data)
    +     int fd;
    +     GopherData *data;
    +{
    +    char *buf = NULL;
    +    int len;
    +    int clen;
    +    int off;
    +    StoreEntry *entry = NULL;
    +
    +    entry = data->entry;
    +    if (entry->flag & DELETE_BEHIND) {
    +	if (storeClientWaiting(entry)) {
    +	    clen = store_mem_obj(entry, e_current_len);
    +	    off = store_mem_obj(entry, e_lowest_offset);
    +	    if ((clen - off) > GOPHER_DELETE_GAP) {
    +		debug(3, "gopherReadReply: Read deferred for Object: %s\n",
    +		    entry->key);
    +		debug(3, "                Current Gap: %d bytes\n",
    +		    clen - off);
    +
    +		/* reschedule, so it will automatically reactivated when
    +		 * Gap is big enough.  */
    +		comm_set_select_handler(fd,
    +		    COMM_SELECT_READ,
    +		    (PF) gopherReadReply,
    +		    (caddr_t) data);
    +/* don't install read timeout until we are below the GAP */
    +#ifdef INSTALL_READ_TIMEOUT_ABOVE_GAP
    +		comm_set_select_handler_plus_timeout(fd,
    +		    COMM_SELECT_TIMEOUT,
    +		    (PF) gopherReadReplyTimeout,
    +		    (caddr_t) data,
    +		    getReadTimeout());
    +#else
    +		comm_set_select_handler_plus_timeout(fd,
    +		    COMM_SELECT_TIMEOUT,
    +		    (PF) NULL,
    +		    (caddr_t) NULL,
    +		    (time_t) 0);
    +#endif
    +		return 0;
    +	    }
    +	} else {
    +	    /* we can terminate connection right now */
    +	    sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
    +		entry->url,
    +		entry->url,
    +		"Gopher",
    +		219,
    +		"No Client",
    +		"All Clients went away before tranmission is complete and object is too big to cache.",
    +		HARVEST_VERSION,
    +		comm_hostname());
    +	    storeAbort(entry, tmp_error_buf);
    +	    comm_close(fd);
    +#ifdef LOG_ERRORS
    +	    CacheInfo->log_append(CacheInfo,
    +		entry->url,
    +		"0.0.0.0",
    +		store_mem_obj(entry, e_current_len),
    +		"ERR_219",	/* GOPHER NO CLIENTS, BIG OBJ */
    +		"GET");
    +#endif
    +	    freeGopherData(data);
    +	    return 0;
    +	}
    +    }
    +    buf = get_free_4k_page();
    +    len = read(fd, buf, TEMP_BUF_SIZE - 1);	/* leave one space for \0 in gopherToHTML */
    +    debug(5, "gopherReadReply - fd: %d read len:%d\n", fd, len);
    +
    +    if (len < 0 || ((len == 0) && (store_mem_obj(entry, e_current_len) == 0))) {
    +	debug(1, "gopherReadReply - error reading: %s\n",
    +	    xstrerror());
    +	sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
    +	    entry->url,
    +	    entry->url,
    +	    "Gopher",
    +	    205,
    +	    "Read error",
    +	    "Network/Remote Site is down.  Try again later.",
    +	    HARVEST_VERSION,
    +	    comm_hostname());
    +	storeAbort(entry, tmp_error_buf);
    +	comm_close(fd);
    +#ifdef LOG_ERRORS
    +	CacheInfo->log_append(CacheInfo,
    +	    entry->url,
    +	    "0.0.0.0",
    +	    store_mem_obj(entry, e_current_len),
    +	    "ERR_205",		/* GOPHER READ FAIL */
    +	    "GET");
    +#endif
    +	freeGopherData(data);
    +
    +    } else if (len == 0) {
    +	/* Connection closed; retrieval done. */
    +	/* flush the rest of data in temp buf if there is one. */
    +	if (data->conversion != NORMAL)
    +	    gopherEndHTML(data);
    +	if (!(entry->flag & DELETE_BEHIND))
    +	    entry->expires = cached_curtime + ttlSet(entry);
    +	BIT_RESET(entry->flag, DELAY_SENDING);
    +	storeComplete(entry);
    +	comm_close(fd);
    +	freeGopherData(data);
    +
    +    } else if (((store_mem_obj(entry, e_current_len) + len) > getGopherMax()) &&
    +	!(entry->flag & DELETE_BEHIND)) {
    +	/*  accept data, but start to delete behind it */
    +	storeStartDeleteBehind(entry);
    +
    +	if (data->conversion != NORMAL) {
    +	    gopherToHTML(data, buf, len);
    +	} else {
    +	    storeAppend(entry, buf, len);
    +	}
    +	comm_set_select_handler(fd, COMM_SELECT_READ, (PF) gopherReadReply, (caddr_t) data);
    +	comm_set_select_handler_plus_timeout(fd, COMM_SELECT_TIMEOUT, (PF) gopherReadReplyTimeout,
    +	    (caddr_t) data, getReadTimeout());
    +
    +    } else if (entry->flag & CLIENT_ABORT_REQUEST) {
    +	/* append the last bit of info we got */
    +	if (data->conversion != NORMAL) {
    +	    gopherToHTML(data, buf, len);
    +	} else {
    +	    storeAppend(entry, buf, len);
    +	}
    +	sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
    +	    entry->url,
    +	    entry->url,
    +	    "Gopher",
    +	    207,
    +	    "Client Aborted",
    +	    "Client(s) dropped connection before transmission is complete.\nObject fetching is aborted.\n",
    +	    HARVEST_VERSION,
    +	    comm_hostname());
    +	if (data->conversion != NORMAL)
    +	    gopherEndHTML(data);
    +	BIT_RESET(entry->flag, DELAY_SENDING);
    +	storeAbort(entry, tmp_error_buf);
    +	comm_close(fd);
    +#ifdef LOG_ERRORS
    +	CacheInfo->log_append(CacheInfo,
    +	    entry->url,
    +	    "0.0.0.0",
    +	    store_mem_obj(entry, e_current_len),
    +	    "ERR_207",		/* GOPHER CLIENT ABORT */
    +	    "GET");
    +#endif
    +	freeGopherData(data);
    +
    +    } else {
    +	if (data->conversion != NORMAL) {
    +	    gopherToHTML(data, buf, len);
    +	} else {
    +	    storeAppend(entry, buf, len);
    +	}
    +	comm_set_select_handler(fd, COMM_SELECT_READ, (PF) gopherReadReply, (caddr_t) data);
    +	comm_set_select_handler_plus_timeout(fd, COMM_SELECT_TIMEOUT, (PF) gopherReadReplyTimeout,
    +	    (caddr_t) data, getReadTimeout());
    +    }
    +    put_free_4k_page(buf);
    +    return 0;
    +}
    +
    +/* This will be called when request write is complete. Schedule read of
    + * reply. */
    +int gopherSendComplete(fd, buf, size, errflag, data)
    +     int fd;
    +     char *buf;
    +     int size;
    +     int errflag;
    +     GopherData *data;
    +{
    +    StoreEntry *entry = NULL;
    +    entry = data->entry;
    +    debug(5, "gopherSendComplete - fd: %d size: %d errflag: %d\n",
    +	fd, size, errflag);
    +    if (errflag) {
    +	sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
    +	    entry->url,
    +	    entry->url,
    +	    "Gopher",
    +	    201,
    +	    "Cannot connect to the original site",
    +	    "The remote site may be down.",
    +	    HARVEST_VERSION,
    +	    comm_hostname());
    +	storeAbort(entry, tmp_error_buf);
    +	comm_close(fd);
    +#ifdef LOG_ERRORS
    +	CacheInfo->log_append(CacheInfo,
    +	    entry->url,
    +	    "0.0.0.0",
    +	    store_mem_obj(entry, e_current_len),
    +	    "ERR_201",		/* GOPHER CONNECT FAIL */
    +	    "GET");
    +#endif
    +	freeGopherData(data);
    +	if (buf)
    +	    put_free_4k_page(buf);	/* Allocated by gopherSendRequest. */
    +	return 0;
    +    }
    +    /* 
    +     * OK. We successfully reach remote site.  Start MIME typing
    +     * stuff.  Do it anyway even though request is not HTML type.
    +     */
    +    gopherMimeCreate(data);
    +
    +    if (!BIT_TEST(entry->flag, REQ_HTML))
    +	data->conversion = NORMAL;
    +    else
    +	switch (data->type_id) {
    +
    +	case GOPHER_DIRECTORY:
    +	    /* we got to convert it first */
    +	    BIT_SET(entry->flag, DELAY_SENDING);
    +	    data->conversion = HTML_DIR;
    +	    data->HTML_header_added = 0;
    +	    break;
    +
    +	case GOPHER_INDEX:
    +	    /* we got to convert it first */
    +	    BIT_SET(entry->flag, DELAY_SENDING);
    +	    data->conversion = HTML_INDEX_RESULT;
    +	    data->HTML_header_added = 0;
    +	    break;
    +
    +	case GOPHER_CSO:
    +	    /* we got to convert it first */
    +	    BIT_SET(entry->flag, DELAY_SENDING);
    +	    data->conversion = HTML_CSO_RESULT;
    +	    data->cso_recno = 0;
    +	    data->HTML_header_added = 0;
    +	    break;
    +
    +	default:
    +	    data->conversion = NORMAL;
    +
    +	}
    +    /* Schedule read reply. */
    +    comm_set_select_handler(fd,
    +	COMM_SELECT_READ,
    +	(PF) gopherReadReply,
    +	(caddr_t) data);
    +    comm_set_select_handler_plus_timeout(fd,
    +	COMM_SELECT_TIMEOUT,
    +	(PF) gopherReadReplyTimeout,
    +	(caddr_t) data,
    +	getReadTimeout());
    +    comm_set_fd_lifetime(fd, -1);	/* disable */
    +
    +    if (buf)
    +	put_free_4k_page(buf);	/* Allocated by gopherSendRequest. */
    +    data->icp_page_ptr = NULL;
    +    data->icp_rwd_ptr = NULL;
    +    return 0;
    +}
    +
    +/* This will be called when connect completes. Write request. */
    +int gopherSendRequest(fd, data)
    +     int fd;
    +     GopherData *data;
    +{
    +#define CR '\015'
    +#define LF '\012'
    +    int len;
    +    static char query[MAX_URL];
    +    char *buf = get_free_4k_page();
    +
    +    data->icp_page_ptr = buf;
    +
    +    if (data->type_id == GOPHER_CSO) {
    +	sscanf(data->request, "?%s", query);
    +	len = strlen(query) + 15;
    +	sprintf(buf, "query %s%c%cquit%c%c", query, CR, LF, CR, LF);
    +    } else if (data->type_id == GOPHER_INDEX) {
    +	char *c_ptr = strchr(data->request, '?');
    +	if (c_ptr) {
    +	    *c_ptr = '\t';
    +	}
    +	len = strlen(data->request) + 3;
    +	sprintf(buf, "%s%c%c", data->request, CR, LF);
    +    } else {
    +	len = strlen(data->request) + 3;
    +	sprintf(buf, "%s%c%c", data->request, CR, LF);
    +    }
    +
    +    debug(5, "gopherSendRequest - fd: %d\n", fd);
    +    data->icp_rwd_ptr = icpWrite(fd, buf, len, 30, gopherSendComplete, data);
    +    return 0;
    +}
    +
    +int gopherStart(unusedfd, url, entry)
    +     int unusedfd;
    +     char *url;
    +     StoreEntry *entry;
    +{
    +    /* Create state structure. */
    +    int sock, status;
    +    GopherData *data = CreateGopherData();
    +
    +    data->entry = entry;
    +
    +    debug(3, "gopherStart - url: %s\n", url);
    +
    +    /* Parse url. */
    +    if (gopher_url_parser(url, data->host, &data->port,
    +	    &data->type_id, data->request)) {
    +	sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
    +	    entry->url,
    +	    entry->url,
    +	    "Gopher",
    +	    208,
    +	    "Invalid URL syntax: Cannot parse.",
    +	    "Contact your system adminstrator for further help.",
    +	    HARVEST_VERSION,
    +	    comm_hostname());
    +	storeAbort(entry, tmp_error_buf);
    +#ifdef LOG_ERRORS
    +	CacheInfo->log_append(CacheInfo,
    +	    entry->url,
    +	    "0.0.0.0",
    +	    store_mem_obj(entry, e_current_len),
    +	    "ERR_208",		/* GOPHER INVALID URL */
    +	    "GET");
    +#endif
    +	freeGopherData(data);
    +	return COMM_ERROR;
    +    }
    +    /* Create socket. */
    +    sock = comm_open(COMM_NONBLOCKING, 0, 0, url);
    +
    +    if (sock == COMM_ERROR) {
    +	debug(4, "gopherStart: Failed because we're out of sockets.\n");
    +	sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
    +	    entry->url,
    +	    entry->url,
    +	    "Gopher",
    +	    211,
    +	    "Cached short of file-descriptors, sorry",
    +	    "",
    +	    HARVEST_VERSION,
    +	    comm_hostname());
    +	storeAbort(entry, tmp_error_buf);
    +#ifdef LOG_ERRORS
    +	CacheInfo->log_append(CacheInfo,
    +	    entry->url,
    +	    "0.0.0.0",
    +	    store_mem_obj(entry, e_current_len),
    +	    "ERR_211",		/* GOPHER NO FD'S */
    +	    "GET");
    +#endif
    +	freeGopherData(data);
    +	return COMM_ERROR;
    +    }
    +    /* check if IP is already in cache. It must be. 
    +     * It should be done before this route is called. 
    +     * Otherwise, we cannot check return code for connect. */
    +    if (!ipcache_gethostbyname(data->host)) {
    +	debug(4, "gopherStart: Called without IP entry in ipcache. OR lookup failed.\n");
    +	comm_close(sock);
    +	sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
    +	    entry->url,
    +	    entry->url,
    +	    "Gopher",
    +	    202,
    +	    "DNS name lookup failure",
    +	    dns_error_message,
    +	    HARVEST_VERSION,
    +	    comm_hostname());
    +	storeAbort(entry, tmp_error_buf);
    +#ifdef LOG_ERRORS
    +	CacheInfo->log_append(CacheInfo,
    +	    entry->url,
    +	    "0.0.0.0",
    +	    store_mem_obj(entry, e_current_len),
    +	    "ERR_202",		/* GOPHER DNS FAIL */
    +	    "GET");
    +#endif
    +	freeGopherData(data);
    +	return COMM_ERROR;
    +    }
    +    if (((data->type_id == GOPHER_INDEX) || (data->type_id == GOPHER_CSO))
    +	&& (strchr(data->request, '?') == NULL)
    +	&& (BIT_TEST(entry->flag, REQ_HTML))) {
    +	/* Index URL without query word */
    +	/* We have to generate search page back to client. No need for connection */
    +	gopherMimeCreate(data);
    +
    +	if (data->type_id == GOPHER_INDEX) {
    +	    data->conversion = HTML_INDEX_PAGE;
    +	} else {
    +	    if (data->type_id == GOPHER_CSO) {
    +		data->conversion = HTML_CSO_PAGE;
    +	    } else {
    +		data->conversion = HTML_INDEX_PAGE;
    +	    }
    +	}
    +	gopherToHTML(data, (char *) NULL, 0);
    +	storeComplete(entry);
    +	freeGopherData(data);
    +	comm_close(sock);
    +	return COMM_OK;
    +    }
    +    /* Open connection. */
    +    if ((status = comm_connect(sock, data->host, data->port)) != 0) {
    +	if (status != EINPROGRESS) {
    +	    comm_close(sock);
    +	    sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
    +		entry->url,
    +		entry->url,
    +		"Gopher",
    +		204,
    +		"Cannot connect to the original site",
    +		"The remote site may be down.",
    +		HARVEST_VERSION,
    +		comm_hostname());
    +	    storeAbort(entry, tmp_error_buf);
    +#ifdef LOG_ERRORS
    +	    CacheInfo->log_append(CacheInfo,
    +		entry->url,
    +		"0.0.0.0",
    +		store_mem_obj(entry, e_current_len),
    +		"ERR_204",	/* GOPHER CONNECT FAIL */
    +		"GET");
    +#endif
    +	    freeGopherData(data);
    +	    return COMM_ERROR;
    +	} else {
    +	    debug(5, "startGopher - conn %d EINPROGRESS\n", sock);
    +	}
    +    }
    +    /* Install connection complete handler. */
    +    comm_set_select_handler(sock, COMM_SELECT_LIFETIME,
    +	(PF) gopherLifetimeExpire, (caddr_t) data);
    +    comm_set_select_handler(sock, COMM_SELECT_WRITE,
    +	(PF) gopherSendRequest, (caddr_t) data);
    +
    +    return COMM_OK;
    +}
    +
    +
    +GopherData *CreateGopherData()
    +{
    +    GopherData *gd = (GopherData *) xcalloc(1, sizeof(GopherData));
    +    gd->buf = get_free_4k_page();
    +    return (gd);
    +}
    +
    +static void freeGopherData(gd)
    +     GopherData *gd;
    +{
    +    put_free_4k_page(gd->buf);
    +    safe_free(gd);
    +}
    diff --git a/src/http.cc b/src/http.cc
    new file mode 100644
    index 00000000000..efda8a1cb07
    --- /dev/null
    +++ b/src/http.cc
    @@ -0,0 +1,853 @@
    +static char rcsid[] = "$Id: http.cc,v 1.1 1996/02/22 06:23:55 wessels Exp $";
    +/* 
    + *  File:         http.c
    + *  Description:  state machine for http retrieval protocol.  
    + *                Based on John's gopher retrieval module.
    + *  Author:       Anawat Chankhunthod, USC
    + *  Created:      Tue May 28 10:57:11 1994
    + *  Language:     C
    + **********************************************************************
    + *  Copyright (c) 1994, 1995.  All rights reserved.
    + *  
    + *    The Harvest software was developed by the Internet Research Task
    + *    Force Research Group on Resource Discovery (IRTF-RD):
    + *  
    + *          Mic Bowman of Transarc Corporation.
    + *          Peter Danzig of the University of Southern California.
    + *          Darren R. Hardy of the University of Colorado at Boulder.
    + *          Udi Manber of the University of Arizona.
    + *          Michael F. Schwartz of the University of Colorado at Boulder.
    + *          Duane Wessels of the University of Colorado at Boulder.
    + *  
    + *    This copyright notice applies to software in the Harvest
    + *    ``src/'' directory only.  Users should consult the individual
    + *    copyright notices in the ``components/'' subdirectories for
    + *    copyright information about other software bundled with the
    + *    Harvest source code distribution.
    + *  
    + *  TERMS OF USE
    + *    
    + *    The Harvest software may be used and re-distributed without
    + *    charge, provided that the software origin and research team are
    + *    cited in any use of the system.  Most commonly this is
    + *    accomplished by including a link to the Harvest Home Page
    + *    (http://harvest.cs.colorado.edu/) from the query page of any
    + *    Broker you deploy, as well as in the query result pages.  These
    + *    links are generated automatically by the standard Broker
    + *    software distribution.
    + *    
    + *    The Harvest software is provided ``as is'', without express or
    + *    implied warranty, and with no support nor obligation to assist
    + *    in its use, correction, modification or enhancement.  We assume
    + *    no liability with respect to the infringement of copyrights,
    + *    trade secrets, or any patents, and are not responsible for
    + *    consequential damages.  Proper use of the Harvest software is
    + *    entirely the responsibility of the user.
    + *  
    + *  DERIVATIVE WORKS
    + *  
    + *    Users may make derivative works from the Harvest software, subject 
    + *    to the following constraints:
    + *  
    + *      - You must include the above copyright notice and these 
    + *        accompanying paragraphs in all forms of derivative works, 
    + *        and any documentation and other materials related to such 
    + *        distribution and use acknowledge that the software was 
    + *        developed at the above institutions.
    + *  
    + *      - You must notify IRTF-RD regarding your distribution of 
    + *        the derivative work.
    + *  
    + *      - You must clearly notify users that your are distributing 
    + *        a modified version and not the original Harvest software.
    + *  
    + *      - Any derivative product is also subject to these copyright 
    + *        and use restrictions.
    + *  
    + *    Note that the Harvest software is NOT in the public domain.  We
    + *    retain copyright, as specified above.
    + *  
    + *  HISTORY OF FREE SOFTWARE STATUS
    + *  
    + *    Originally we required sites to license the software in cases
    + *    where they were going to build commercial products/services
    + *    around Harvest.  In June 1995 we changed this policy.  We now
    + *    allow people to use the core Harvest software (the code found in
    + *    the Harvest ``src/'' directory) for free.  We made this change
    + *    in the interest of encouraging the widest possible deployment of
    + *    the technology.  The Harvest software is really a reference
    + *    implementation of a set of protocols and formats, some of which
    + *    we intend to standardize.  We encourage commercial
    + *    re-implementations of code complying to this set of standards.  
    + *  
    + *  
    + */
    +#include "config.h"
    +#include 
    +#include 
    +#include 
    +#include 
    +
    +#include "ansihelp.h"
    +#include "comm.h"
    +#include "store.h"
    +#include "stat.h"
    +#include "url.h"
    +#include "ipcache.h"
    +#include "cache_cf.h"
    +#include "ttl.h"
    +#include "icp.h"
    +#include "util.h"
    +
    +#define HTTP_PORT         80
    +#define HTTP_DELETE_GAP   (64*1024)
    +
    +extern int errno;
    +extern char *dns_error_message;
    +extern time_t cached_curtime;
    +
    +typedef struct _httpdata {
    +    StoreEntry *entry;
    +    char host[HARVESTHOSTNAMELEN + 1];
    +    int port;
    +    char *type;
    +    char *mime_hdr;
    +    char type_id;
    +    char request[MAX_URL + 1];
    +    char *icp_page_ptr;		/* Used to send proxy-http request: 
    +				 * put_free_8k_page(me) if the lifetime
    +				 * expires */
    +    char *icp_rwd_ptr;		/* When a lifetime expires during the
    +				 * middle of an icpwrite, don't lose the
    +				 * icpReadWriteData */
    +} HttpData;
    +
    +extern char *tmp_error_buf;
    +
    +char *HTTP_OPS[] =
    +{"GET", "POST", "HEAD", ""};
    +
    +int http_url_parser(url, host, port, request)
    +     char *url;
    +     char *host;
    +     int *port;
    +     char *request;
    +{
    +    static char hostbuf[MAX_URL];
    +    static char atypebuf[MAX_URL];
    +    int t;
    +
    +    /* initialize everything */
    +    (*port) = 0;
    +    atypebuf[0] = hostbuf[0] = request[0] = host[0] = '\0';
    +
    +    t = sscanf(url, "%[a-zA-Z]://%[^/]%s", atypebuf, hostbuf, request);
    +    if ((t < 2) || (strcasecmp(atypebuf, "http") != 0)) {
    +	return -1;
    +    } else if (t == 2) {
    +	strcpy(request, "/");
    +    }
    +    if (sscanf(hostbuf, "%[^:]:%d", host, port) < 2)
    +	(*port) = HTTP_PORT;
    +    return 0;
    +}
    +
    +int httpCachable(url, type, mime_hdr)
    +     char *url;
    +     char *type;
    +     char *mime_hdr;
    +{
    +    stoplist *p;
    +
    +    /* GET and HEAD are cachable. Others are not. */
    +    if (((strncasecmp(type, "GET", 3) != 0)) &&
    +	(strncasecmp(type, "HEAD", 4) != 0))
    +	return 0;
    +
    +    /* url's requiring authentication are uncachable */
    +    if (mime_hdr && (strstr(mime_hdr, "Authorization")))
    +	return 0;
    +
    +    /* scan stop list */
    +    p = http_stoplist;
    +    while (p) {
    +	if (strstr(url, p->key))
    +	    return 0;
    +	p = p->next;
    +    }
    +
    +    /* else cachable */
    +    return 1;
    +}
    +
    +/* This will be called when timeout on read. */
    +void httpReadReplyTimeout(fd, data)
    +     int fd;
    +     HttpData *data;
    +{
    +    StoreEntry *entry = NULL;
    +
    +    entry = data->entry;
    +    debug(4, "httpReadReplyTimeout: FD %d: \n", fd, entry->url);
    +    sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
    +	entry->url,
    +	entry->url,
    +	"HTTP",
    +	103,
    +	"Read timeout",
    +	"The Network/Remote site may be down.  Try again later.",
    +	HARVEST_VERSION,
    +	comm_hostname());
    +
    +    if (data->icp_rwd_ptr)
    +	safe_free(data->icp_rwd_ptr);
    +    if (data->icp_page_ptr) {
    +	put_free_8k_page(data->icp_page_ptr);
    +	data->icp_page_ptr = NULL;
    +    }
    +    storeAbort(entry, tmp_error_buf);
    +    comm_set_select_handler(fd, COMM_SELECT_READ, 0, 0);
    +    comm_close(fd);
    +#ifdef LOG_ERRORS
    +    CacheInfo->log_append(CacheInfo,
    +	entry->url,
    +	"0.0.0.0",
    +	store_mem_obj(entry, e_current_len),
    +	"ERR_103",		/* HTTP READ TIMEOUT */
    +	data->type ? data->type : "NULL");
    +#endif
    +    safe_free(data);
    +}
    +
    +/* This will be called when socket lifetime is expired. */
    +void httpLifetimeExpire(fd, data)
    +     int fd;
    +     HttpData *data;
    +{
    +    StoreEntry *entry = NULL;
    +
    +    entry = data->entry;
    +    debug(4, "httpLifeTimeExpire: FD %d: \n", fd, entry->url);
    +
    +    sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
    +	entry->url,
    +	entry->url,
    +	"HTTP",
    +	110,
    +	"Transaction Timeout",
    +	"The Network/Remote site may be down or too slow.  Try again later.",
    +	HARVEST_VERSION,
    +	comm_hostname());
    +
    +    if (data->icp_page_ptr) {
    +	put_free_8k_page(data->icp_page_ptr);
    +	data->icp_page_ptr = NULL;
    +    }
    +    if (data->icp_rwd_ptr)
    +	safe_free(data->icp_rwd_ptr);
    +    storeAbort(entry, tmp_error_buf);
    +    comm_set_select_handler(fd, COMM_SELECT_READ | COMM_SELECT_WRITE, 0, 0);
    +    comm_close(fd);
    +#ifdef LOG_ERRORS
    +    CacheInfo->log_append(CacheInfo,
    +	entry->url,
    +	"0.0.0.0",
    +	store_mem_obj(entry, e_current_len),
    +	"ERR_110",		/* HTTP LIFETIME EXPIRE */
    +	data->type ? data->type : "NULL");
    +#endif
    +    safe_free(data);
    +}
    +
    +
    +
    +/* This will be called when data is ready to be read from fd.  Read until
    + * error or connection closed. */
    +void httpReadReply(fd, data)
    +     int fd;
    +     HttpData *data;
    +{
    +    static char buf[4096];
    +    int len;
    +    int clen;
    +    int off;
    +    StoreEntry *entry = NULL;
    +
    +    entry = data->entry;
    +    if (entry->flag & DELETE_BEHIND) {
    +	if (storeClientWaiting(entry)) {
    +	    /* check if we want to defer reading */
    +	    clen = store_mem_obj(entry, e_current_len);
    +	    off = store_mem_obj(entry, e_lowest_offset);
    +	    if ((clen - off) > HTTP_DELETE_GAP) {
    +		debug(3, "httpReadReply: Read deferred for Object: %s\n",
    +		    entry->key);
    +		debug(3, "                Current Gap: %d bytes\n",
    +		    clen - off);
    +
    +		/* reschedule, so it will be automatically reactivated
    +		 * when Gap is big enough. */
    +		comm_set_select_handler(fd,
    +		    COMM_SELECT_READ,
    +		    (PF) httpReadReply,
    +		    (caddr_t) data);
    +
    +/* don't install read timeout until we are below the GAP */
    +#ifdef INSTALL_READ_TIMEOUT_ABOVE_GAP
    +		comm_set_select_handler_plus_timeout(fd,
    +		    COMM_SELECT_TIMEOUT,
    +		    (PF) httpReadReplyTimeout,
    +		    (caddr_t) data,
    +		    getReadTimeout());
    +#else
    +		comm_set_select_handler_plus_timeout(fd,
    +		    COMM_SELECT_TIMEOUT,
    +		    (PF) NULL,
    +		    (caddr_t) NULL,
    +		    (time_t) 0);
    +#endif
    +		return;
    +	    }
    +	} else {
    +	    /* we can terminate connection right now */
    +	    sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG,
    +		entry->url,
    +		entry->url,
    +		"HTTP",
    +		119,
    +		"No Client",
    +		"All Clients went away before tranmission is complete and object is too big to cache.",
    +		HARVEST_VERSION,
    +		comm_hostname());
    +	    storeAbort(entry, tmp_error_buf);
    +	    comm_close(fd);
    +#ifdef LOG_ERRORS
    +	    CacheInfo->log_append(CacheInfo,
    +		entry->url,
    +		"0.0.0.0",
    +		store_mem_obj(entry, e_current_len),
    +		"ERR_119",	/* HTTP NO CLIENTS, BIG OBJ */
    +		data->type ? data->type : "NULL");
    +#endif
    +	    safe_free(data);
    +	    return;
    +	}
    +    }
    +    len = read(fd, buf, 4096);
    +    debug(5, "httpReadReply: FD %d: len %d.\n", fd, len);
    +
    +    if (len < 0 || ((len == 0) && (store_mem_obj(entry, e_current_len) == 0))) {
    +	/* XXX we we should log when len==0 and current_len==0 */
    +	debug(2, "httpReadReply: FD %d: read failure: %s.\n",
    +	    fd, xstrerror());
    +	if (errno == ECONNRESET) {
    +	    /* Connection reset by peer */
    +	    /* consider it as a EOF */
    +	    if (!(entry->flag & DELETE_BEHIND))
    +		entry->expires = cached_curtime + ttlSet(entry);
    +	    sprintf(tmp_error_buf, "\n

    Warning: The Remote Server sent RESET at the end of transmission.\n"); + storeAppend(entry, tmp_error_buf, strlen(tmp_error_buf)); + storeComplete(entry); + } else { + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "HTTP", + 105, + "Read error", + "Network/Remote site is down. Try again later.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); + } + comm_close(fd); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_105", /* HTTP READ ERROR */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); + } else if (len == 0) { + /* Connection closed; retrieval done. */ + if (!(entry->flag & DELETE_BEHIND)) + entry->expires = cached_curtime + ttlSet(entry); + storeComplete(entry); + comm_close(fd); + safe_free(data); + } else if (((store_mem_obj(entry, e_current_len) + len) > getHttpMax()) && + !(entry->flag & DELETE_BEHIND)) { + /* accept data, but start to delete behind it */ + storeStartDeleteBehind(entry); + + storeAppend(entry, buf, len); + comm_set_select_handler(fd, COMM_SELECT_READ, + (PF) httpReadReply, (caddr_t) data); + comm_set_select_handler_plus_timeout(fd, COMM_SELECT_TIMEOUT, + (PF) httpReadReplyTimeout, (caddr_t) data, getReadTimeout()); + + } else if (entry->flag & CLIENT_ABORT_REQUEST) { + /* append the last bit of info we get */ + storeAppend(entry, buf, len); + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "HTTP", + 107, + "Client Aborted", + "Client(s) dropped connection before transmission is complete.\nObject fetching is aborted.\n", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); + comm_close(fd); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_107", /* HTTP CLIENT ABORT */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); + } else { + storeAppend(entry, buf, len); + comm_set_select_handler(fd, COMM_SELECT_READ, + (PF) httpReadReply, (caddr_t) data); + comm_set_select_handler_plus_timeout(fd, COMM_SELECT_TIMEOUT, + (PF) httpReadReplyTimeout, (caddr_t) data, getReadTimeout()); + } +} + +/* This will be called when request write is complete. Schedule read of + * reply. */ +void httpSendComplete(fd, buf, size, errflag, data) + int fd; + char *buf; + int size; + int errflag; + HttpData *data; +{ + StoreEntry *entry = NULL; + + entry = data->entry; + debug(5, "httpSendComplete: FD %d: size %d: errflag %d.\n", + fd, size, errflag); + + if (buf) { + put_free_8k_page(buf); /* Allocated by httpSendRequest. */ + buf = NULL; + } + data->icp_page_ptr = NULL; /* So lifetime expire doesn't re-free */ + data->icp_rwd_ptr = NULL; /* Don't double free in lifetimeexpire */ + + if (errflag) { + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "HTTP", + 101, + "Cannot connect to the original site", + "The remote site may be down.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); + comm_close(fd); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_101", /* HTTP CONNECT FAIL */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); + return; + } else { + /* Schedule read reply. */ + comm_set_select_handler(fd, COMM_SELECT_READ, + (PF) httpReadReply, (caddr_t) data); + comm_set_select_handler_plus_timeout(fd, COMM_SELECT_TIMEOUT, + (PF) httpReadReplyTimeout, (caddr_t) data, getReadTimeout()); + comm_set_fd_lifetime(fd, -1); /* disable lifetime DPW */ + + } +} + +/* This will be called when connect completes. Write request. */ +void httpSendRequest(fd, data) + int fd; + HttpData *data; +{ + char *xbuf = NULL; + char *ybuf = NULL; + char *buf = NULL; + char *t = NULL; + char *post_buf = NULL; + static char *crlf = "\r\n"; + static char *HARVEST_PROXY_TEXT = "via Harvest Cache version"; + int len = 0; + int buflen; + + debug(5, "httpSendRequest: FD %d: data %p.\n", fd, data); + buflen = strlen(data->type) + strlen(data->request); + if (data->mime_hdr) + buflen += strlen(data->mime_hdr); + buflen += 512; /* lots of extra */ + + if (!strcasecmp(data->type, "POST") && data->mime_hdr) { + if ((t = strstr(data->mime_hdr, "\r\n\r\n"))) { + post_buf = xstrdup(t + 4); + *(t + 4) = '\0'; + } + } + /* Since we limit the URL read to a 4K page, I doubt that the + * mime header could be longer than an 8K page */ + buf = (char *) get_free_8k_page(); + data->icp_page_ptr = buf; + if (buflen > DISK_PAGE_SIZE) { + debug(0, "Mime header length %d is breaking ICP code\n", buflen); + } + memset(buf, '\0', buflen); + + sprintf(buf, "%s %s ", data->type, data->request); + len = strlen(buf); + if (data->mime_hdr) { /* we have to parse the MIME header */ + xbuf = xstrdup(data->mime_hdr); + for (t = strtok(xbuf, crlf); t; t = strtok(NULL, crlf)) { + if (strncasecmp(t, "User-Agent:", 11) == 0) { + ybuf = (char *) get_free_4k_page(); + memset(ybuf, '\0', SM_PAGE_SIZE); + sprintf(ybuf, "%s %s %s", t, HARVEST_PROXY_TEXT, HARVEST_VERSION); + t = ybuf; + } + if (strncasecmp(t, "If-Modified-Since:", 18) == 0) + continue; + if (len + (int) strlen(t) > buflen - 10) + continue; + strcat(buf, t); + strcat(buf, crlf); + len += strlen(t) + 2; + } + xfree(xbuf); + if (ybuf) { + put_free_4k_page(ybuf); + ybuf = NULL; + } + } + strcat(buf, crlf); + len += 2; + if (post_buf) { + strcat(buf, post_buf); + len += strlen(post_buf); + xfree(post_buf); + } + debug(6, "httpSendRequest: FD %d: buf '%s'\n", fd, buf); + data->icp_rwd_ptr = icpWrite(fd, buf, len, 30, httpSendComplete, data); +} + +void httpConnInProgress(fd, data) + int fd; + HttpData *data; +{ + StoreEntry *entry = data->entry; + + if (comm_connect(fd, data->host, data->port) != COMM_OK) + switch (errno) { + case EINPROGRESS: + case EALREADY: + /* schedule this handler again */ + comm_set_select_handler(fd, + COMM_SELECT_WRITE, + (PF) httpConnInProgress, + (caddr_t) data); + return; + case EISCONN: + break; /* cool, we're connected */ + default: + comm_close(fd); + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "HTTP", + 104, + "Cannot connect to the original site", + "The remote site may be down.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_104", /* HTTP CONNECT FAIL */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); + return; + } + /* Call the real write handler, now that we're fully connected */ + comm_set_select_handler(fd, COMM_SELECT_WRITE, + (PF) httpSendRequest, (caddr_t) data); +} + +int proxyhttpStart(e, url, entry) + edge *e; + char *url; + StoreEntry *entry; +{ + + /* Create state structure. */ + int sock, status; + HttpData *data = (HttpData *) xmalloc(sizeof(HttpData)); + + debug(3, "proxyhttpStart: \n", url); + debug(10, "proxyhttpStart: HTTP request header:\n%s\n", + store_mem_obj(entry, mime_hdr)); + + memset(data, '\0', sizeof(HttpData)); + data->entry = entry; + + strncpy(data->request, url, sizeof(data->request) - 1); + data->type = HTTP_OPS[entry->type_id]; + data->port = e->ascii_port; + data->mime_hdr = store_mem_obj(entry, mime_hdr); + strncpy(data->host, e->host, sizeof(data->host) - 1); + + if (e->proxy_only) + storeStartDeleteBehind(entry); + + /* Create socket. */ + sock = comm_open(COMM_NONBLOCKING, 0, 0, url); + if (sock == COMM_ERROR) { + debug(4, "proxyhttpStart: Failed because we're out of sockets.\n"); + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "HTTP", + 111, + "Cached short of file-descriptors, sorry", + "", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_111", /* HTTP NO FD'S */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); + return COMM_ERROR; + } + /* check if IP is already in cache. It must be. + * It should be done before this route is called. + * Otherwise, we cannot check return code for connect. */ + if (!ipcache_gethostbyname(data->host)) { + debug(4, "proxyhttpstart: Called without IP entry in ipcache. OR lookup failed.\n"); + comm_close(sock); + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "HTTP", + 102, + "DNS name lookup failure", + dns_error_message, + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_102", /* HTTP DNS FAIL */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); + return COMM_ERROR; + } + /* Open connection. */ + if ((status = comm_connect(sock, data->host, data->port))) { + if (status != EINPROGRESS) { + comm_close(sock); + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "HTTP", + 104, + "Cannot connect to the original site", + "The remote site may be down.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_104", /* HTTP CONNECT FAIL */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); + e->last_fail_time = cached_curtime; + e->neighbor_up = 0; + return COMM_ERROR; + } else { + debug(5, "proxyhttpStart: FD %d: EINPROGRESS.\n", sock); + comm_set_select_handler(sock, COMM_SELECT_LIFETIME, + (PF) httpLifetimeExpire, (caddr_t) data); + comm_set_select_handler(sock, COMM_SELECT_WRITE, + (PF) httpConnInProgress, (caddr_t) data); + return COMM_OK; + } + } + /* Install connection complete handler. */ + fd_note(sock, entry->url); + comm_set_select_handler(sock, COMM_SELECT_LIFETIME, + (PF) httpLifetimeExpire, (caddr_t) data); + comm_set_select_handler(sock, COMM_SELECT_WRITE, + (PF) httpSendRequest, (caddr_t) data); + return COMM_OK; + +} + +int httpStart(unusedfd, url, type, mime_hdr, entry) + int unusedfd; + char *url; + char *type; + char *mime_hdr; + StoreEntry *entry; +{ + /* Create state structure. */ + int sock, status; + HttpData *data = (HttpData *) xmalloc(sizeof(HttpData)); + + debug(3, "httpStart: %s \n", type, url); + debug(10, "httpStart: mime_hdr '%s'\n", mime_hdr); + + memset(data, '\0', sizeof(HttpData)); + data->entry = entry; + data->type = type; + data->mime_hdr = mime_hdr; + + /* Parse url. */ + if (http_url_parser(url, data->host, &data->port, data->request)) { + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "HTTP", + 110, + "Invalid URL syntax: Cannot parse.", + "Contact your system administrator for further help.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_110", /* HTTP INVALID URL */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); + return COMM_ERROR; + } + /* Create socket. */ + sock = comm_open(COMM_NONBLOCKING, 0, 0, url); + if (sock == COMM_ERROR) { + debug(4, "httpStart: Failed because we're out of sockets.\n"); + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "HTTP", + 111, + "Cached short of file-descriptors, sorry", + "", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_111", /* HTTP NO FD'S */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); + return COMM_ERROR; + } + /* check if IP is already in cache. It must be. + * It should be done before this route is called. + * Otherwise, we cannot check return code for connect. */ + if (!ipcache_gethostbyname(data->host)) { + debug(4, "httpstart: Called without IP entry in ipcache. OR lookup failed.\n"); + comm_close(sock); + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "HTTP", + 108, + "DNS name lookup failure", + dns_error_message, + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_108", /* HTTP DNS FAIL */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); + return COMM_ERROR; + } + /* Open connection. */ + if ((status = comm_connect(sock, data->host, data->port))) { + if (status != EINPROGRESS) { + comm_close(sock); + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "HTTP", + 109, + "Cannot connect to the original site", + "The remote site may be down.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_109", /* HTTP CONNECT FAIL */ + data->type ? data->type : "NULL"); +#endif + safe_free(data); + return COMM_ERROR; + } else { + debug(5, "httpStart: FD %d: EINPROGRESS.\n", sock); + comm_set_select_handler(sock, COMM_SELECT_LIFETIME, + (PF) httpLifetimeExpire, (caddr_t) data); + comm_set_select_handler(sock, COMM_SELECT_WRITE, + (PF) httpConnInProgress, (caddr_t) data); + return COMM_OK; + } + } + /* Install connection complete handler. */ + fd_note(sock, entry->url); + comm_set_select_handler(sock, COMM_SELECT_LIFETIME, + (PF) httpLifetimeExpire, (caddr_t) data); + comm_set_select_handler(sock, COMM_SELECT_WRITE, + (PF) httpSendRequest, (caddr_t) data); + return COMM_OK; +} diff --git a/src/ipcache.cc b/src/ipcache.cc new file mode 100644 index 00000000000..4fdcb4dcddb --- /dev/null +++ b/src/ipcache.cc @@ -0,0 +1,1596 @@ +static char rcsid[] = "$Id: ipcache.cc,v 1.1 1996/02/22 06:23:55 wessels Exp $"; +/* + * File: ipcache.c + * Description: ip address cache. speed up gethostbyname() + * Author: Anawat Chankhunthod + * Created: + * Language: C + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "ansihelp.h" /* goes first */ +#include "debug.h" +#include "comm.h" +#include "fdstat.h" +#include "icp.h" +#include "cache_cf.h" +#include "ipcache.h" +#include "autoconf.h" +#include "stat.h" +#include "hash.h" +#include "util.h" + +struct hostent *gethostbyname(); +int urlcmp _PARAMS((char *s1, char *s2)); + +#define MAX_LINELEN (4096) +char ipcache_status_char _PARAMS((ipcache_entry *)); +int ipcache_hash_entry_count(); + +#define IP_POS_TTL 86400 /* one day */ +#define IP_NEG_TTL 120 /* 2 minutes */ +#define MAX_IP 1024 /* Maximum cached IP */ +#define IP_LOW_WATER 70 +#define IP_HIGH_WATER 90 +#define MAX_HOST_NAME 256 +#define IP_INBUF 4096 + +long ipcache_low = 180; +long ipcache_high = 200; + +typedef struct _ip_pending { + int fd; + IPH handler; + caddr_t data; + struct _ip_pending *next; +} IpPending; + + +typedef struct _ipcache_list { + ipcache_entry *entry; + struct _ipcache_list *next; +} ipcache_list; + + +typedef struct _dnsserver_entry { + int id; + int alive; + int inpipe; + int outpipe; + int pending_count; /* counter of outstanding request */ + long lastcall; + long answer; + unsigned int offset; + unsigned int size; + char *ip_inbuf; + /* global ipcache_entry list for pending entry */ + ipcache_list *global_pending; + ipcache_list *global_pending_tail; +} dnsserver_entry; + +typedef struct _line_entry { + char *line; + struct _line_entry *next; +} line_entry; + +#define TEST_SITE 5 +static char *test_site[TEST_SITE] = +{ + "internic.net", + "usc.edu", + "cs.colorado.edu", + "mit.edu", + "yale.edu" +}; + +static char w_space[] = " \t\n"; +static dnsserver_entry **dns_child_table = NULL; +static int last_dns_dispatched = 2; +static struct hostent *static_result = NULL; +#ifdef USE_DNS_PIPE +static int dnspipe[2]; +#endif +static int dns_child_alive = 0; +static int ipcache_initialized = 0; + +char *dns_error_message = NULL; /* possible error message */ +HashID ip_table = 0; + +extern int do_dns_test; +extern time_t cached_curtime; +extern int getMaxFD(); +extern int getDnsChildren(); +extern void fatal_dump _PARAMS((char *)); + +void update_dns_child_alive() +{ + int i; + + dns_child_alive = 0; + for (i = 0; i < getDnsChildren(); ++i) { + if (dns_child_table[i]->alive) { + dns_child_alive = 1; + break; + } + } +} + +int ipcache_testname() +{ + int success, i; + + for (success = i = 0; i < TEST_SITE; i++) { + if (gethostbyname(test_site[i]) != NULL) + ++success; + } + return (success == 0) ? -1 : 0; +} + + +#ifdef USE_DNS_PIPE +/* open a process and pipes to it */ +int ipcache_openpipe(dnspipe, command) + int dnspipe[2]; + char *command; +{ + int childpid, pipe1[2], pipe2[2]; + + if (pipe(pipe1) < 0 || pipe(pipe2) < 0) { + debug(0, "ipcache_openpipe: pipe failure: %s\n", xstrerror()); + return -1; + } + if ((childpid = fork()) < 0) { + debug(0, "ipcache_openpipe: fork failure: %s\n", xstrerror()); + return -1; + } else if (childpid > 0) { /* parent */ + close(pipe1[1]); /* read from pipe 1 */ + close(pipe2[0]); /* write to pipe 2 */ + + /* return file descriptor */ + dnspipe[0] = pipe1[0]; /* read file descriptor */ + dnspipe[1] = pipe2[1]; /* write file descriptor */ + + fcntl(dnspipe[0], F_SETFD, 1); /* set close-on-exec */ + fcntl(dnspipe[1], F_SETFD, 1); /* set close-on-exec */ + + return 0; + + } else { /* child */ + int fd; + + close(pipe1[0]); /* write to pipe 1 */ + close(pipe2[1]); /* read from pipe 2 */ + + dup2(pipe1[1], 1); /* point stdout to pipe1 */ + dup2(pipe2[0], 0); /* point stdin to pipe2 */ + for (fd = 3; fd < getMaxFD(); fd++) + (void) close(fd); + + /* use a dummy argument 0 */ + (void) execlp(command, "(dnsserver)", (char *) NULL); + perror(command); + _exit(-1); + } + /* NOTREACHED */ +} + + +#else /* USE_DNS_PIPE */ + + +/* + * open a UNIX domain socket for rendevouing with dnsservers + */ +int ipcache_create_dnsserver(command) + char *command; +{ + int pid; + struct sockaddr_un addr; + static int n_dnsserver = 0; + char socketname[256]; + int cfd; /* socket for child (dnsserver) */ + int sfd; /* socket for server (cached) */ + int fd; + + if ((cfd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { + debug(0, "ipcache_create_dnsserver: socket: %s\n", xstrerror()); + return -1; + } + memset(&addr, '\0', sizeof(addr)); + addr.sun_family = AF_UNIX; + sprintf(socketname, "dns/dns%d.%d", (int) getpid(), n_dnsserver++); + strcpy(addr.sun_path, socketname); + debug(4, "ipcache_create_dnsserver: path is %s\n", addr.sun_path); + + if (bind(cfd, (struct sockaddr *) &addr, sizeof(addr)) < 0) { + close(cfd); + debug(0, "ipcache_create_dnsserver: bind: %s\n", xstrerror()); + return -1; + } + debug(4, "ipcache_create_dnsserver: bind to local host.\n"); + listen(cfd, 1); + + if ((pid = fork()) < 0) { + debug(0, "ipcache_create_dnsserver: fork: %s\n", xstrerror()); + close(cfd); + return -1; + } + if (pid > 0) { /* parent */ + close(cfd); /* close shared socket with child */ + + /* open new socket for parent process */ + if ((sfd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { + debug(0, "ipcache_create_dnsserver: socket: %s\n", xstrerror()); + return -1; + } + fcntl(sfd, F_SETFD, 1); /* set close-on-exec */ + memset(&addr, '\0', sizeof(addr)); + addr.sun_family = AF_UNIX; + strcpy(addr.sun_path, socketname); + if (connect(sfd, (struct sockaddr *) &addr, sizeof(addr)) < 0) { + close(sfd); + debug(0, "ipcache_create_dnsserver: connect: %s\n", xstrerror()); + return -1; + } + debug(4, "ipcache_create_dnsserver: FD %d connected to %s #%d.\n", + sfd, command, n_dnsserver); + return sfd; + } + /* child */ + + dup2(cfd, 3); + for (fd = 4; fd < getMaxFD(); fd++) { + (void) close(fd); + } + + execlp(command, "(dnsserver)", "-p", socketname, NULL); + perror(command); + _exit(1); + /* NOTREACHED */ +} + +#endif /* else USE_DNS_PIPE */ + +/* removes the given ipcache entry */ +int ipcache_release(e) + ipcache_entry *e; +{ + ipcache_entry *result = 0; + int i; + + debug(5, "ipcache_release: ipcache_count before: %d \n", meta_data.ipcache_count); + + if (e != NULL && ip_table) { /* sometimes called with NULL e */ + hash_link *table_entry = hash_lookup(ip_table, e->name); + if (table_entry) { + result = (ipcache_entry *) table_entry; + debug(5, "HASH table count before delete: %d\n", ipcache_hash_entry_count()); + if (hash_remove_link(ip_table, table_entry)) { + debug(3, "ipcache_release: Cannot delete '%s' from hash table %d\n", e->name, ip_table); + } + debug(5, "HASH table count after delete: %d\n", ipcache_hash_entry_count()); + if (result) { + if (result->status == PENDING) { + debug(1, "ipcache_release: Try to release entry with PENDING status. ignored.\n"); + debug(5, "ipcache_release: ipcache_count: %d \n", meta_data.ipcache_count); + return -1; + } + if (result->status == CACHED) { + if (result->addr_count) + for (i = 0; i < (int) result->addr_count; i++) + safe_free(result->entry.h_addr_list[i]); + if (result->entry.h_addr_list) + safe_free(result->entry.h_addr_list); + if (result->alias_count) + for (i = 0; i < (int) result->alias_count; i++) + safe_free(result->entry.h_aliases[i]); + if (result->entry.h_aliases) + safe_free(result->entry.h_aliases); + safe_free(result->entry.h_name); + debug(5, "ipcache_release: Released IP cached record for '%s'.\n", e->name); + } + /* XXX: we're having mem mgmt problems; zero, then free */ + safe_free(result->name); + memset(result, '\0', sizeof(ipcache_entry)); + safe_free(result); + } + --meta_data.ipcache_count; + debug(5, "ipcache_release: ipcache_count when return: %d \n", meta_data.ipcache_count); + return meta_data.ipcache_count; + } + } + debug(3, "ipcache_release: can't delete entry\n"); + return -1; /* can't delete entry */ +} + +/* return match for given name */ +ipcache_entry *ipcache_get(name) + char *name; +{ + hash_link *e; + static ipcache_entry *result; + + result = NULL; + if (ip_table) { + if ((e = hash_lookup(ip_table, name)) != NULL) + result = (ipcache_entry *) e; + } + if (result == NULL) + return NULL; + + if (((result->timestamp + result->ttl) < cached_curtime) && + (result->status != PENDING)) { /* expired? */ + ipcache_release(result); + return NULL; + } + return result; +} + + +/* get the first ip entry in the storage */ +ipcache_entry *ipcache_GetFirst() +{ + static hash_link *entryPtr; + + if ((!ip_table) || ((entryPtr = hash_first(ip_table)) == NULL)) + return NULL; + return ((ipcache_entry *) entryPtr); +} + + +/* get the next ip entry in the storage for a given search pointer */ +ipcache_entry *ipcache_GetNext() +{ + static hash_link *entryPtr; + + if ((!ip_table) || ((entryPtr = hash_next(ip_table)) == NULL)) + return NULL; + return ((ipcache_entry *) entryPtr); +} + +int ipcache_compareLastRef(e1, e2) + ipcache_entry **e1, **e2; +{ + if (!e1 || !e2) + fatal_dump(NULL); + + if ((*e1)->lastref > (*e2)->lastref) + return (1); + + if ((*e1)->lastref < (*e2)->lastref) + return (-1); + + return (0); +} + + + +/* finds the LRU and deletes */ +int ipcache_purgelru() +{ + ipcache_entry *e; + int local_ip_count = 0; + int local_ip_notpending_count = 0; + int removed = 0; + int i; + ipcache_entry **LRU_list; + int LRU_list_count = 0; + int LRU_cur_size = meta_data.ipcache_count; + + LRU_list = (ipcache_entry **) xcalloc(LRU_cur_size, sizeof(ipcache_entry *)); + + e = NULL; + + for (e = ipcache_GetFirst(); e; e = ipcache_GetNext()) { + local_ip_count++; + + if (LRU_list_count >= LRU_cur_size) { + /* have to realloc */ + LRU_cur_size += 16; + debug(3, "ipcache_purgelru: Have to grow LRU_list to %d. This shouldn't happen.\n", + LRU_cur_size); + LRU_list = (ipcache_entry **) xrealloc((char *) LRU_list, + LRU_cur_size * sizeof(ipcache_entry *)); + } + if ((e->status != PENDING) && (e->pending_head == NULL)) { + local_ip_notpending_count++; + LRU_list[LRU_list_count++] = e; + } + } + + debug(3, "ipcache_purgelru: ipcache_count: %5d\n", meta_data.ipcache_count); + debug(3, " actual count : %5d\n", local_ip_count); + debug(3, " high W mark : %5d\n", ipcache_high); + debug(3, " low W mark : %5d\n", ipcache_low); + debug(3, " not pending : %5d\n", local_ip_notpending_count); + debug(3, " LRU candidated : %5d\n", LRU_list_count); + + /* sort LRU candidate list */ + qsort((char *) LRU_list, LRU_list_count, sizeof(e), (int (*)(const void *, const void *)) ipcache_compareLastRef); + + for (i = 0; LRU_list[i] && (meta_data.ipcache_count > ipcache_low) + && i < LRU_list_count; + ++i) { + ipcache_release(LRU_list[i]); + removed++; + } + + debug(3, " removed : %5d\n", removed); + safe_free(LRU_list); + return (removed > 0) ? 0 : -1; +} + + +/* create blank ipcache_entry */ +ipcache_entry *ipcache_create() +{ + static ipcache_entry *ipe; + static ipcache_entry *new; + debug(5, "ipcache_create: when enter. ipcache_count == %d\n", meta_data.ipcache_count); + + if (meta_data.ipcache_count > ipcache_high) { + if (ipcache_purgelru() < 0) { + debug(1, "ipcache_create: Cannot release needed IP entry via LRU: %d > %d, removing first entry...\n", meta_data.ipcache_count, MAX_IP); + ipe = ipcache_GetFirst(); + if (!ipe) { + debug(1, "ipcache_create: First entry is a null pointer ???\n"); + /* have to let it grow beyond limit here */ + } else if (ipe && ipe->status != PENDING) { + ipcache_release(ipe); + } else { + debug(1, "ipcache_create: First entry is also PENDING entry.\n"); + /* have to let it grow beyond limit here */ + } + } + } + meta_data.ipcache_count++; + debug(5, "ipcache_create: before return. ipcache_count == %d\n", meta_data.ipcache_count); + new = (ipcache_entry *) xcalloc(1, sizeof(ipcache_entry)); + /* set default to 4, in case parser fail to get token $h_length from + * dnsserver. */ + new->entry.h_length = 4; + return new; + +} + +void ipcache_add_to_hash(e) + ipcache_entry *e; +{ + if (!ipcache_initialized) + ipcache_init(); + if (hash_join(ip_table, (hash_link *) e)) { + debug(1, "ipcache_add_to_hash: Cannot add %s (%p) to hash table %d.\n", + e->name, e, ip_table); + } + debug(5, "ipcache_add_to_hash: name <%s>\n", e->name); + debug(5, " ipcache_count: %d\n", meta_data.ipcache_count); +} + + +void ipcache_add(name, e, data, cached) + char *name; + ipcache_entry *e; + struct hostent *data; + int cached; +{ + int addr_count, alias_count, i; + + debug(10, "ipcache_add: Adding name '%s' (%s).\n", name, + cached ? "cached" : "not cached"); + + e->name = xstrdup(name); + if (cached) { + + /* count for IPs */ + addr_count = 0; + while ((addr_count < 255) && data->h_addr_list[addr_count]) + ++addr_count; + + e->addr_count = addr_count; + + /* count for Alias */ + alias_count = 0; + if (data->h_aliases) + while ((alias_count < 255) && data->h_aliases[alias_count]) + ++alias_count; + + e->alias_count = alias_count; + + /* copy ip addresses information */ + e->entry.h_addr_list = (char **) xcalloc(addr_count + 1, sizeof(char *)); + for (i = 0; i < addr_count; i++) { + e->entry.h_addr_list[i] = (char *) xcalloc(1, data->h_length); + memcpy(e->entry.h_addr_list[i], data->h_addr_list[i], data->h_length); + } + + if (alias_count) { + /* copy aliases information */ + e->entry.h_aliases = (char **) xcalloc(alias_count + 1, sizeof(char *)); + for (i = 0; i < alias_count; i++) { + e->entry.h_aliases[i] = (char *) xcalloc(1, strlen(data->h_aliases[i]) + 1); + strcpy(e->entry.h_aliases[i], data->h_aliases[i]); + } + } + e->entry.h_length = data->h_length; + e->entry.h_name = xstrdup(data->h_name); + e->lastref = e->timestamp = cached_curtime; + e->status = CACHED; + e->ttl = IP_POS_TTL; + } else { + e->lastref = e->timestamp = cached_curtime; + e->status = NEGATIVE_CACHED; + e->ttl = IP_NEG_TTL; + } + + ipcache_add_to_hash(e); +} + + +/* exactly the same to ipcache_add, + * except it does NOT + * - create entry->name (assume it's there already.) + * - add the entry to the hash (it's should be in hash table already.). + * + * Intend to be used by ipcache_cleanup_pendinglist. + */ +void ipcache_update_content(name, e, data, cached) + char *name; + ipcache_entry *e; + struct hostent *data; + int cached; +{ + int addr_count, alias_count, i; + + debug(10, "ipcache_update: Updating name '%s' (%s).\n", name, + cached ? "cached" : "not cached"); + + if (cached) { + + /* count for IPs */ + addr_count = 0; + while ((addr_count < 255) && data->h_addr_list[addr_count]) + ++addr_count; + + e->addr_count = addr_count; + + /* count for Alias */ + alias_count = 0; + while ((alias_count < 255) && data->h_aliases[alias_count]) + ++alias_count; + + e->alias_count = alias_count; + + /* copy ip addresses information */ + e->entry.h_addr_list = (char **) xcalloc(addr_count + 1, sizeof(char *)); + for (i = 0; i < addr_count; i++) { + e->entry.h_addr_list[i] = (char *) xcalloc(1, data->h_length); + memcpy(e->entry.h_addr_list[i], data->h_addr_list[i], data->h_length); + } + + /* copy aliases information */ + e->entry.h_aliases = (char **) xcalloc(alias_count + 1, sizeof(char *)); + for (i = 0; i < alias_count; i++) { + e->entry.h_aliases[i] = (char *) xcalloc(1, strlen(data->h_aliases[i]) + 1); + strcpy(e->entry.h_aliases[i], data->h_aliases[i]); + } + + e->entry.h_length = data->h_length; + e->entry.h_name = xstrdup(data->h_name); + e->lastref = e->timestamp = cached_curtime; + e->status = CACHED; + e->ttl = IP_POS_TTL; + } else { + e->lastref = e->timestamp = cached_curtime; + e->status = NEGATIVE_CACHED; + e->ttl = IP_NEG_TTL; + } + +} + + + +/* walks down the pending list, calling handlers */ +void ipcache_call_pending(entry) + ipcache_entry *entry; +{ + IpPending *p; + int nhandler = 0; + + entry->lastref = cached_curtime; + + while (entry->pending_head != NULL) { + p = entry->pending_head; + entry->pending_head = entry->pending_head->next; + if (entry->pending_head == NULL) + entry->pending_tail = NULL; + if (p->handler != NULL) { + nhandler++; + p->handler(p->fd, (entry->status == CACHED) ? + &(entry->entry) : NULL, p->data); + } + memset(p, '\0', sizeof(IpPending)); + safe_free(p); + } + entry->pending_head = entry->pending_tail = NULL; /* nuke list */ + debug(10, "ipcache_call_pending: Called %d handlers.\n", nhandler); +} + +void ipcache_call_pending_badname(fd, handler, data) + int fd; + IPH handler; + caddr_t data; +{ + debug(4, "ipcache_call_pending_badname: Bad Name: Calling handler with NULL result.\n"); + handler(fd, NULL, data); +} + + +/* call when dnsserver is broken, have to switch to blocking mode. + * All pending lookup will be looked up by blocking call. + */ +int ipcache_cleanup_pendinglist(data) + dnsserver_entry *data; +{ + ipcache_list *p; + struct hostent *s_result = NULL; + + while (data->global_pending != NULL) { + s_result = gethostbyname(data->global_pending->entry->name); + ipcache_update_content(data->global_pending->entry->name, + data->global_pending->entry, s_result, s_result ? 1 : 0); + ipcache_call_pending(data->global_pending->entry); + p = data->global_pending; + data->global_pending = data->global_pending->next; + /* XXX: we're having mem mgmt problems; zero, then free */ + memset(p, '\0', sizeof(ipcache_list)); + safe_free(p); + } + data->global_pending = data->global_pending_tail = NULL; /* nuke */ + return 0; +} + +/* free all lines in the list */ +void free_lines(line) + line_entry *line; +{ + line_entry *tmp; + + while (line) { + tmp = line; + line = line->next; + safe_free(tmp->line); + safe_free(tmp); + } +} + +/* return entry in global pending list that has entry which key match to name */ +ipcache_list *globalpending_search(name, global_pending) + char *name; + ipcache_list *global_pending; +{ + static ipcache_list *p; + + if (name == NULL) + return NULL; + + for (p = global_pending; p != NULL; p = p->next) { + /* XXX: this is causing core dumps! p->entry is corrupt */ + if (p->entry && p->entry->name && + strcmp(p->entry->name, name) == 0) { + return p; + } + } + return NULL; + +} + +/* remove entry from global pending list */ +void globalpending_remove(p, data) + ipcache_list *p; + dnsserver_entry *data; +{ + ipcache_list *q, *r; + + r = q = data->global_pending; + while (q && (p != q)) { + r = q; /* r is the node before the one to kill */ + q = q->next; /* q (and 'p') is the node to kill */ + } + + if (q == NULL) { /* 'p' is not in the list? */ + debug(1, "globalpending_remove: Failure while deleting entry from global pending list.\n"); + return; + } + /* nuke p from the list; do this carefully... */ + if (p == data->global_pending) { /* p is head */ + if (p->next != NULL) { /* nuke head */ + data->global_pending = p->next; + } else { /* nuke whole list */ + data->global_pending = NULL; + data->global_pending_tail = NULL; + } + } else if (p == data->global_pending_tail) { /* p is tail */ + data->global_pending_tail = r; /* tail is prev */ + data->global_pending_tail->next = NULL; /* last node */ + } else { /* p in middle */ + r->next = p->next; + } + + /* we need to delete all references to p */ + /* XXX: we're having mem mgmt probs; zero then free DRH */ + memset(p, '\0', sizeof(ipcache_list)); + /* XXX: what about freeing p->entry? DRH */ + safe_free(p); + + if (data->pending_count > 0) + data->pending_count--; + +} + +/* scan through buffer and do a conversion if possible + * return number of char used */ +int ipcache_parsebuffer(buf, offset, data) + char *buf; + unsigned int offset; + dnsserver_entry *data; +{ + char *pos = NULL; + char *tpos = NULL; + char *endpos = NULL; + char *token = NULL; + char *tmp_ptr = NULL; + line_entry *line_head = NULL; + line_entry *line_tail = NULL; + line_entry *line_cur = NULL; + ipcache_list *plist = NULL; + + *dns_error_message = '\0'; + + pos = buf; + while (pos < (buf + offset)) { + + /* no complete record here */ + if ((endpos = strstr(pos, "$end\n")) == NULL) { + debug(2, "ipcache_parsebuffer: DNS response incomplete.\n"); + break; + } + line_head = line_tail = NULL; + + while (pos < endpos) { + /* add the next line to the end of the list */ + line_cur = (line_entry *) xcalloc(1, sizeof(line_entry)); + + if ((tpos = memchr(pos, '\n', 4096)) == NULL) { + debug(2, "ipcache_parsebuffer: DNS response incomplete.\n"); + return -1; + } + *tpos = '\0'; + line_cur->line = xstrdup(pos); + debug(7, "ipcache_parsebuffer: %s\n", line_cur->line); + *tpos = '\n'; + + if (line_tail) + line_tail->next = line_cur; + if (line_head == NULL) + line_head = line_cur; + line_tail = line_cur; + line_cur = NULL; + + /* update pointer */ + pos = tpos + 1; + } + pos = endpos + 5; /* strlen("$end\n") */ + + /* + * At this point, the line_head is a linked list with each + * link node containing another line of the DNS response. + * Start parsing... + */ + if (strstr(line_head->line, "$alive")) { + data->answer = cached_curtime; + free_lines(line_head); + debug(10, "ipcache_parsebuffer: $alive succeeded.\n"); + } else if (strstr(line_head->line, "$fail")) { + /* + * The $fail messages look like: + * $fail host\n$message msg\n$end\n + */ + token = strtok(line_head->line, w_space); /* skip first token */ + token = strtok(NULL, w_space); + + line_cur = line_head->next; + if (line_cur && !strncmp(line_cur->line, "$message", 8)) { + strcpy(dns_error_message, line_cur->line + 8); + } + if (token == NULL) { + debug(1, "ipcache_parsebuffer: Invalid $fail for DNS table?\n"); + } else { + plist = globalpending_search(token, data->global_pending); + if (plist) { + plist->entry->lastref = plist->entry->timestamp = cached_curtime; + plist->entry->ttl = IP_NEG_TTL; + plist->entry->status = NEGATIVE_CACHED; + ipcache_call_pending(plist->entry); + globalpending_remove(plist, data); + debug(10, "ipcache_parsebuffer: $fail succeeded: %s.\n", + dns_error_message[0] ? dns_error_message : "why?"); + } else { + debug(1, "ipcache_parsebuffer: No entry in DNS table?\n"); + } + } + free_lines(line_head); + } else if (strstr(line_head->line, "$name")) { + tmp_ptr = line_head->line; + /* skip the first token */ + token = strtok(tmp_ptr, w_space); + tmp_ptr = NULL; + token = strtok(tmp_ptr, w_space); + if (!token) { + debug(1, "ipcache_parsebuffer: Invalid OPCODE for DNS table?\n"); + } else { + plist = globalpending_search(token, data->global_pending); + if (plist) { + int ipcount, aliascount; + ipcache_entry *e = plist->entry; + + if (e->status != PENDING) { + debug(4, "ipcache_parsebuffer: DNS record already resolved.\n"); + } else { + e->lastref = e->timestamp = cached_curtime; + e->ttl = IP_POS_TTL; + e->status = CACHED; + + line_cur = line_head->next; + + /* get $h_name */ + if (line_cur == NULL || + !strstr(line_cur->line, "$h_name")) { + debug(1, "ipcache_parsebuffer: DNS record in invalid format? No $h_name.\n"); + /* abandon this record */ + break; + } + tmp_ptr = line_cur->line; + /* skip the first token */ + token = strtok(tmp_ptr, w_space); + tmp_ptr = NULL; + token = strtok(tmp_ptr, w_space); + e->entry.h_name = xstrdup(token); + + line_cur = line_cur->next; + + /* get $h_length */ + if (line_cur == NULL || + !strstr(line_cur->line, "$h_len")) { + debug(1, "ipcache_parsebuffer: DNS record in invalid format? No $h_len.\n"); + /* abandon this record */ + break; + } + tmp_ptr = line_cur->line; + /* skip the first token */ + token = strtok(tmp_ptr, w_space); + tmp_ptr = NULL; + token = strtok(tmp_ptr, w_space); + e->entry.h_length = atoi(token); + + line_cur = line_cur->next; + + /* get $ipcount */ + if (line_cur == NULL || + !strstr(line_cur->line, "$ipcount")) { + debug(1, "ipcache_parsebuffer: DNS record in invalid format? No $ipcount.\n"); + /* abandon this record */ + break; + } + tmp_ptr = line_cur->line; + /* skip the first token */ + token = strtok(tmp_ptr, w_space); + tmp_ptr = NULL; + token = strtok(tmp_ptr, w_space); + e->addr_count = ipcount = atoi(token); + + if (ipcount == 0) { + e->entry.h_addr_list = NULL; + } else { + e->entry.h_addr_list = (char **) xcalloc(ipcount, sizeof(char *)); + } + + /* get ip addresses */ + { + int i = 0; + line_cur = line_cur->next; + while (i < ipcount) { + if (line_cur == NULL) { + debug(1, "ipcache_parsebuffer: DNS record in invalid format? No $ipcount data.\n"); + break; + } + e->entry.h_addr_list[i] = (char *) xcalloc(1, e->entry.h_length); + *((unsigned long *) e->entry.h_addr_list[i]) = inet_addr(line_cur->line); + line_cur = line_cur->next; + i++; + } + } + + /* get $aliascount */ + if (line_cur == NULL || + !strstr(line_cur->line, "$aliascount")) { + debug(1, "ipcache_parsebuffer: DNS record in invalid format? No $aliascount.\n"); + /* abandon this record */ + break; + } + tmp_ptr = line_cur->line; + /* skip the first token */ + token = strtok(tmp_ptr, w_space); + tmp_ptr = NULL; + token = strtok(tmp_ptr, w_space); + e->alias_count = aliascount = atoi(token); + + if (aliascount == 0) { + e->entry.h_aliases = NULL; + } else { + e->entry.h_aliases = (char **) xcalloc(aliascount, sizeof(char *)); + } + + /* get aliases */ + { + int i = 0; + line_cur = line_cur->next; + while (i < aliascount) { + if (line_cur == NULL) { + debug(1, "ipcache_parsebuffer: DNS record in invalid format? No $aliascount data.\n"); + break; + } + e->entry.h_aliases[i] = xstrdup(line_cur->line); + line_cur = line_cur->next; + i++; + } + } + + ipcache_call_pending(e); + globalpending_remove(plist, data); + debug(10, "ipcache_parsebuffer: $name succeeded.\n"); + } + } else { + debug(1, "ipcache_parsebuffer: No entries in DNS $name record?\n"); + } + } + free_lines(line_head); + } else { + free_lines(line_head); + debug(1, "ipcache_parsebuffer: Invalid OPCODE for DNS table?\n"); + return -1; + } + } + return (int) (pos - buf); +} + + +int ipcache_dnsHandleRead(fd, data) + int fd; + dnsserver_entry *data; +{ + int char_scanned; + int len = read(fd, data->ip_inbuf + data->offset, data->size - data->offset); + + debug(5, "ipcache_dnsHandleRead: Result from DNS ID %d.\n", data->id); + + if (len == 0) { + debug(1, "ipcache_dnsHandleRead: Connection from DNSSERVER is closed.\n"); + debug(1, " Disabling this server ID %d.\n", data->id); + data->alive = 0; + update_dns_child_alive(); + ipcache_cleanup_pendinglist(data); + return 0; + } + data->offset += len; + data->ip_inbuf[data->offset] = '\0'; + + if (strstr(data->ip_inbuf, "$end\n")) { + /* end of record found */ + char_scanned = ipcache_parsebuffer(data->ip_inbuf, data->offset, data); + if (char_scanned > 0) { + /* update buffer */ + memcpy(data->ip_inbuf, data->ip_inbuf + char_scanned, data->offset - char_scanned); + data->offset -= char_scanned; + data->ip_inbuf[data->offset] = '\0'; + } + } + /* reschedule */ + comm_set_select_handler(data->inpipe, COMM_SELECT_READ, + (PF) ipcache_dnsHandleRead, (caddr_t) data); + return 0; +} + +int ipcache_nbgethostbyname(name, fd, handler, data) + char *name; + int fd; + IPH handler; + caddr_t data; +{ + ipcache_entry *e; + IpPending *pending; + dnsserver_entry *dns; + + debug(4, "ipcache_nbgethostbyname: FD %d: Name '%s'.\n", fd, name); + + if (name == NULL || name[0] == '\0') { + debug(4, "ipcache_nbgethostbyname: Invalid name!\n"); + ipcache_call_pending_badname(fd, handler, data); + return 0; + } + if ((e = ipcache_get(name)) != NULL && (e->status != PENDING)) { + /* hit here */ + debug(4, "ipcache_nbgethostbyname: Hit for name '%s'.\n", name); + pending = (IpPending *) xcalloc(1, sizeof(IpPending)); + pending->fd = fd; + pending->handler = handler; + pending->data = data; + pending->next = NULL; + if (e->pending_head == NULL) { /* empty list */ + e->pending_head = e->pending_tail = pending; + } else { /* add to tail of list */ + e->pending_tail->next = pending; + e->pending_tail = e->pending_tail->next; + } + ipcache_call_pending(e); + return 0; + } + debug(4, "ipcache_nbgethostbyname: Name '%s': MISS or PENDING.\n", name); + + pending = (IpPending *) xcalloc(1, sizeof(IpPending)); + pending->fd = fd; + pending->handler = handler; + pending->data = data; + pending->next = NULL; + if (e == NULL) { + /* No entry, create the new one */ + debug(5, "ipcache_nbgethostbyname: Creating new entry for '%s'...\n", + name); + e = ipcache_create(); + e->name = xstrdup(name); + e->status = PENDING; + e->pending_tail = e->pending_head = pending; + ipcache_add_to_hash(e); + } else { + /* There is an entry. Add handler to list */ + debug(5, "ipcache_nbgethostbyname: Adding handler to pending list for '%s'.\n", name); + if (e->pending_head == NULL) { /* empty list */ + e->pending_head = e->pending_tail = pending; + } else { /* add to tail of list */ + e->pending_tail->next = pending; + e->pending_tail = e->pending_tail->next; + } + return 0; + } + + if (dns_child_alive) { + int i, j, min_dns = 0, min_count = 255, alive = 0; + + j = last_dns_dispatched; + /* select DNS server with the lowest number of pending */ + for (i = 0; i < getDnsChildren(); ++i) { + j += 1; + j %= getDnsChildren(); + if ((dns_child_table[j]->alive) && + (dns_child_table[j]->pending_count < min_count)) { + min_dns = j; + min_count = dns_child_table[j]->pending_count; + } + alive = dns_child_table[j]->alive | alive; + } + + if (alive == 0) { + dns_child_alive = 0; /* all dead */ + last_dns_dispatched = 0; /* use entry 0 */ + } else { + last_dns_dispatched = min_dns; + } + } else { + last_dns_dispatched = 0; + } + + dns = dns_child_table[last_dns_dispatched]; + debug(5, "ipcache_nbgethostbyname: Dispatched DNS %d.\n", + last_dns_dispatched); + + /* add to global pending list */ + if (dns->global_pending == NULL) { /* new list */ + dns->global_pending = (ipcache_list *) xcalloc(1, sizeof(ipcache_list)); + dns->global_pending->entry = e; + dns->global_pending->next = NULL; + dns->global_pending_tail = dns->global_pending; + } else { /* add to end of list */ + ipcache_list *p = (ipcache_list *) xcalloc(1, sizeof(ipcache_list)); + p->entry = e; + p->next = NULL; + dns->global_pending_tail->next = p; + dns->global_pending_tail = dns->global_pending_tail->next; + } + + if (dns_child_alive) { + char *buf = (char *) xcalloc(1, 256); + strncpy(buf, name, 254); + strcat(buf, "\n"); + dns->pending_count++; + file_write(dns->outpipe, + buf, + strlen(buf), + 0, /* Lock */ + 0, /* Handler */ + 0); /* Handler-data */ + + debug(5, "ipcache_nbgethostbyname: Request sent DNS server ID %d.\n", last_dns_dispatched); + } else { + /* do a blocking mode */ + debug(4, "ipcache_nbgethostbyname: Fall back to blocking mode. Server's dead...\n"); + ipcache_cleanup_pendinglist(dns); + } + return 0; +} + + +/* initialize the ipcache */ +void ipcache_init() +{ + int i, dnssocket; + char fd_note_buf[FD_ASCII_NOTE_SZ]; + + debug(3, "ipcache_init: Called. ipcache_initialized=%d getDnsChildren()=%d\n", ipcache_initialized, getDnsChildren()); + + if (ipcache_initialized) + return; + +#ifndef USE_DNS_PIPE + if (mkdir("dns", 0755) < 0 && errno != EEXIST) { + debug(0, "ipcache_init: mkdir %s\n", xstrerror()); + } +#endif + last_dns_dispatched = getDnsChildren() - 1; + dns_error_message = xcalloc(1, 256); + + /* test naming lookup */ + if (!do_dns_test) { + debug(4, "ipcache_init: Skipping DNS name lookup tests, -D flag given.\n"); + } else if (ipcache_testname() < 0) { + debug(0, "ipcache_init: DNS name lookup appears to be broken on this machine.\n"); + fprintf(stderr, "ipcache_init: DNS name lookup appears to be broken on this machine.\n"); + exit(-1); + } else { + debug(4, "ipcache_init: Successful DNS name lookup tests...\n"); + } + + ip_table = hash_create(urlcmp, 229); /* small hash table */ + /* init static area */ + static_result = (struct hostent *) xcalloc(1, sizeof(struct hostent)); + static_result->h_length = 4; + /* Need a terminating NULL address (h_addr_list[1]) */ + static_result->h_addr_list = (char **) xcalloc(2, sizeof(char *)); + static_result->h_addr_list[0] = (char *) xcalloc(1, 4); + static_result->h_name = (char *) xcalloc(1, MAX_HOST_NAME + 1); + /* start up companion process */ + dns_child_table = (dnsserver_entry **) xcalloc(getDnsChildren(), sizeof(dnsserver_entry)); + dns_child_alive = 0; + debug(1, "ipcache_init: Starting %d 'dns_server' processes\n", + getDnsChildren()); + for (i = 0; i < getDnsChildren(); ++i) { + dns_child_table[i] = (dnsserver_entry *) xcalloc(1, sizeof(dnsserver_entry)); + if ((dnssocket = ipcache_create_dnsserver(getDnsProgram())) < 0) { + debug(1, "ipcache_init: WARNING: Cannot run 'dnsserver' process.\n"); + debug(1, " Fallling back to the blocking version.\n"); + dns_child_table[i]->alive = 0; + } else { + dns_child_alive = 1; + dns_child_table[i]->id = i; + dns_child_table[i]->inpipe = dnssocket; + dns_child_table[i]->outpipe = dnssocket; + dns_child_table[i]->lastcall = cached_curtime; + dns_child_table[i]->pending_count = 0; + dns_child_table[i]->size = IP_INBUF - 1; /* spare one for \0 */ + dns_child_table[i]->offset = 0; + dns_child_table[i]->alive = 1; + dns_child_table[i]->ip_inbuf = (char *) xcalloc(1, IP_INBUF); + + /* update fd_stat */ + + file_update_open(dns_child_table[i]->inpipe, fd_note_buf); + + sprintf(fd_note_buf, "%s #%d", + getDnsProgram(), + dns_child_table[i]->id); + + fd_note(dns_child_table[i]->inpipe, fd_note_buf); + commSetNonBlocking(dns_child_table[i]->inpipe); + + /* clear unused handlers */ + comm_set_select_handler(dns_child_table[i]->inpipe, COMM_SELECT_WRITE, 0, 0); + comm_set_select_handler(dns_child_table[i]->outpipe, COMM_SELECT_READ, 0, 0); + + /* set handler for incoming result */ + comm_set_select_handler(dns_child_table[i]->inpipe, COMM_SELECT_READ, + (PF) ipcache_dnsHandleRead, (caddr_t) dns_child_table[i]); + debug(3, "ipcache_init: 'dns_server' %d started\n", i); + } + } + ipcache_high = (long) (((float) MAX_IP * + (float) IP_HIGH_WATER) / (float) 100); + ipcache_low = (long) (((float) MAX_IP * + (float) IP_LOW_WATER) / (float) 100); + + + ipcache_initialized = 1; +} + +/* clean up the pending entries in dnsserver */ +/* return 1 if we found the host, 0 otherwise */ +int ipcache_unregister(name, fd) + char *name; + int fd; +{ + ipcache_entry *e; + IpPending *p, *q; + + e = ipcache_get(name); + if (!e) { + /* not found any where */ + return 0; + } + /* look for matched fd */ + for (q = p = e->pending_head; p; q = p, p = p->next) { + if (p->fd == fd) { + break; + } + } + + if (p == NULL) { + /* Can not find this ipcache_entry, weird */ + debug(1, "ipcache_unregister: Failed to unregister FD %d from name: %s, can't find this FD.\n", + fd, name); + return 0; + } + /* found */ + if (p == e->pending_head) { + /* it's at the head of the queue */ + if (p->next) { + /* there is something along the line */ + e->pending_head = p->next; + free(p->data); + free(p); + } else { + /* it is the only entry */ + e->pending_head = e->pending_tail = NULL; + free(p->data); + free(p); + } + } else if (p == e->pending_tail) { + /* it's at the tail */ + e->pending_tail = q; + q->next = NULL; + free(p->data); + free(p); + } else { + /* it's in the middle */ + /* skip it in the list */ + q->next = p->next; + free(p->data); + free(p); + } + return 1; +} + +#ifdef OLD_CODE +/* return 0 if address match one of address listed for that host */ +int ipcache_check(name, address, insist) + char *name; + void *address; + int insist; /* insist another lookup if that name is not in a cache */ +{ + ipcache_entry *e; + int i; + + e = ipcache_get(name); + if (!e) + if (insist != 1) + return -1; + else { + /* do a blocking gethostbyname */ + ipcache_gethostbyname(name); + e = ipcache_get(name); + if (!e) { + debug(1, "ipcache_check: Failed to lookup up %s even with insist flag.\n", name); + return -1; + } + } + if (e->status != CACHED) + return -1; + + for (i = 0; i < (int) e->addr_count; i++) { + if (memcmp(e->entry.h_addr_list[i], address, + e->entry.h_length) == 0) + return 0; + } + + return -1; + +} +#endif /* OLD_CODE */ + + +struct hostent *ipcache_gethostbyname(name) + char *name; +{ + ipcache_entry *result; + unsigned int a1, a2, a3, a4; + struct hostent *s_result = NULL; + + if (!ipcache_initialized) + ipcache_init(); + + if (!name) { + debug(5, "ipcache_gethostbyname: Invalid argument?\n"); + return (NULL); + } + if (!(result = ipcache_get(name))) { + /* cache miss */ + if (name) { + debug(5, "ipcache_gethostbyname: IPcache miss for '%s'.\n", name); + } + /* check if it's already a IP address in text form. */ + if (sscanf(name, "%u.%u.%u.%u", &a1, &a2, &a3, &a4) == 4) { + *((unsigned long *) static_result->h_addr_list[0]) = inet_addr(name); + strncpy(static_result->h_name, name, MAX_HOST_NAME); + return static_result; + } else { + s_result = gethostbyname(name); + } + + if (s_result && s_result->h_name && (s_result->h_name[0] != '\0')) { + /* good address, cached */ + debug(10, "ipcache_gethostbyname: DNS success: cache for '%s'.\n", name); + ipcache_add(name, ipcache_create(), s_result, 1); + result = ipcache_get(name); + return &(result->entry); + } else { + /* bad address, negative cached */ + debug(3, "ipcache_gethostbyname: DNS failure: negative cache for '%s'.\n", name); + ipcache_add(name, ipcache_create(), s_result, 0); + return NULL; + } + + } + /* cache hit */ + debug(5, "ipcache_gethostbyname: Hit for '%s'.\n", name ? name : "NULL"); + result->lastref = cached_curtime; + return (result->status == CACHED) ? &(result->entry) : NULL; +} + + +#ifdef OLD_CODE +/* return 0 if name match one of name listed for that host */ +int ipcache_checkname(refname, cname) + char *refname; + char *cname; +{ + ipcache_entry *e, *f; + int i, j; + + if (strcasecmp(refname, cname) == 0) + return 0; + + (void) ipcache_gethostbyname(cname); + e = ipcache_get(cname); + if (!e) { + /* try to look up */ + e = ipcache_get(cname); + if (!e) + return -1; + } + if (e->status != CACHED) + return -1; + + if (strcasecmp(refname, e->entry.h_name) == 0) + return 0; + for (i = 0; i < (int) e->alias_count; i++) { + if (strcasecmp(refname, e->entry.h_aliases[i]) == 0) + return 0; + } + + /* try ipentry of ref */ + + f = ipcache_get(refname); + if (!f) { + /* try to look up */ + (void) ipcache_gethostbyname(cname); + f = ipcache_get(refname); + if (!f) + return -1; + } + /* PBD POSSIBLE BUG FIX? ANAWAT, PLEASE CHECK */ + if (f->entry.h_name && strcasecmp(f->entry.h_name, cname) == 0) + return 0; + + /* check name from ref look up again all cname aliases */ + for (i = 0; i < (int) e->alias_count; i++) { + if (strcasecmp(f->entry.h_name, e->entry.h_aliases[i]) == 0) + return 0; + } + + /* check aliases from ref look up again all cname aliases */ + for (j = 0; j < (int) f->alias_count; j++) { + for (i = 0; i < (int) e->alias_count; i++) { + if (strcasecmp(f->entry.h_aliases[j], e->entry.h_aliases[i]) == 0) + return 0; + } + } + return -1; + +} +#endif /* OLD_CODE */ + + +/* process objects list */ +void stat_ipcache_get(sentry, obj) + StoreEntry *sentry; + cacheinfo *obj; +{ + char buffer[MAX_LINELEN]; + ipcache_entry *e = NULL; + int i; + int ttl; + char status; + + sprintf(buffer, "{IP Cache Contents:\n\n"); + storeAppend(sentry, buffer, strlen(buffer)); + + for (e = ipcache_GetFirst(); (e); e = ipcache_GetNext()) { + if (e) { + ttl = (e->ttl - cached_curtime + e->lastref); + status = ipcache_status_char(e); + if (status == 'P') + ttl = 0; + + sprintf(buffer, " {%s %c %d %d", + e->name, status, ttl, e->addr_count); + storeAppend(sentry, buffer, strlen(buffer)); + + for (i = 0; i < (int) e->addr_count; i++) { + struct in_addr addr; + memcpy((char *) &addr, e->entry.h_addr_list[i], e->entry.h_length); + + sprintf(buffer, "%s ", inet_ntoa(addr)); + storeAppend(sentry, buffer, strlen(buffer)); + } + for (i = 0; i < (int) e->alias_count; i++) { + sprintf(buffer, "%s ", e->entry.h_aliases[i]); + storeAppend(sentry, buffer, strlen(buffer)); + } + if (e->entry.h_name && strncmp(e->name, e->entry.h_name, MAX_LINELEN)) { + sprintf(buffer, "%s ", e->entry.h_name); + storeAppend(sentry, buffer, strlen(buffer)); + } + sprintf(buffer, "}\n"); + storeAppend(sentry, buffer, strlen(buffer)); + } + } + sprintf(buffer, "}\n"); + storeAppend(sentry, buffer, strlen(buffer)); + +} + +char ipcache_status_char(e) + ipcache_entry *e; +{ + switch (e->status) { + case CACHED: + return ('C'); + case PENDING: + return ('P'); + case NEGATIVE_CACHED: + return ('N'); + default: + debug(1, "ipcache_status_char: unexpected IP cache status.\n"); + } + return ('X'); +} + +int ipcache_hash_entry_count() +{ + ipcache_entry *e; + int local_ip_count = 0; + + e = NULL; + + for (e = ipcache_GetFirst(); e; e = ipcache_GetNext()) { + local_ip_count++; + } + + return local_ip_count; +} diff --git a/src/main.cc b/src/main.cc new file mode 100644 index 00000000000..85cd7a08440 --- /dev/null +++ b/src/main.cc @@ -0,0 +1,522 @@ +static char rcsid[] = "$Id: main.cc,v 1.1 1996/02/22 06:23:55 wessels Exp $"; +/* + * File: main.c + * Description: main loop for cache + * Author: John Noll, USC + * Created: Mon Dec 13 10:10:28 1993 (John Noll, USC) sfdif + * Language: C + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ansihelp.h" +#include "cache_cf.h" +#include "debug.h" +#include "comm.h" +#include "icp.h" +#include "stat.h" +#include "stack.h" +#include "fdstat.h" +#include "ipcache.h" +#include "util.h" + +/* WRITE_PID_FILE - tries to write a cached.pid file on startup */ +#ifndef WRITE_PID_FILE +#define WRITE_PID_FILE +#endif + +time_t cached_starttime = (time_t) 0; +time_t next_cleaning = (time_t) 0; +int theAsciiConnection = -1; +int theBinaryConnection = -1; +int theUdpConnection = -1; +int do_reuse = 1; +int debug_level = 0; +int catch_signals = 1; +int do_dns_test = 1; +char *tmp_error_buf = NULL; +char *config_file = NULL; +int vhost_mode = 0; +int unbuffered_logs = 0; /* debug and hierarhcy buffered by default */ + +extern time_t cached_curtime; +extern void (*failure_notify) (); /* for error reporting from xmalloc */ +extern int do_mallinfo; +extern void hash_init _PARAMS((int)); +extern int disk_init(); +extern void stmemInit(); +extern int storeMaintainSwapSpace(); +extern void fatal_dump _PARAMS((char *)); +extern void fatal _PARAMS((char *)); +extern void kill_zombie(); + +static int asciiPortNumOverride = 0; +static int binaryPortNumOverride = 0; +static int udpPortNumOverride = 0; + +void raise_debug_lvl(), reset_debug_lvl(); +void death(), deathb(), shut_down(), rotate_logs(); +void sig_child(); + +int main(argc, argv) + int argc; + char **argv; +{ + int c; + int malloc_debug_level = 0; + int debug_level_overwrite = 0; + extern char *optarg; + int errcount = 0; + static int neighbors = 0; + char *s = NULL; + int n; /* # of GC'd objects */ + time_t last_maintain = 0; + +#ifdef WRITE_PID_FILE + FILE *pid_fp = NULL; + static char pidfn[MAXPATHLEN]; +#endif + + cached_starttime = cached_curtime = time((time_t *) NULL); + failure_notify = fatal_dump; + + /* try to use as many file descriptors as possible */ + /* System V uses RLIMIT_NOFILE and BSD uses RLIMIT_OFILE */ +#if defined(HAVE_SETRLIMIT) + { + struct rlimit rl; + +#if defined(RLIMIT_NOFILE) + if (getrlimit(RLIMIT_NOFILE, &rl) < 0) { + perror("getrlimit: RLIMIT_NOFILE"); + } else { + rl.rlim_cur = rl.rlim_max; /* set it to the max */ + if (setrlimit(RLIMIT_NOFILE, &rl) < 0) { + perror("setrlimit: RLIMIT_NOFILE"); + } + } +#elif defined(RLIMIT_OFILE) + if (getrlimit(RLIMIT_OFILE, &rl) < 0) { + perror("getrlimit: RLIMIT_OFILE"); + } else { + rl.rlim_cur = rl.rlim_max; /* set it to the max */ + if (setrlimit(RLIMIT_OFILE, &rl) < 0) { + perror("setrlimit: RLIMIT_OFILE"); + } + } +#endif + } +#endif + +#if USE_MALLOPT + /* set malloc option */ + /* use small block algorithm for faster allocation */ + /* grain of small block */ + mallopt(M_GRAIN, 16); + /* biggest size that is considered a small block */ + mallopt(M_MXFAST, 4096); + /* number of holding small block */ + mallopt(M_NLBLKS, 100); +#endif + + /* allocate storage for error messages */ + tmp_error_buf = (char *) xcalloc(1, 8192); +/*init comm module */ + comm_init(); + +#ifdef DAEMON + if (daemonize()) { + fprintf(stderr, "Error: couldn't create daemon process\n"); + exit(0); + } + /* signal( SIGHUP, restart ); *//* restart if/when proc dies */ +#endif /* DAEMON */ + + /* we have to init fdstat here. */ + fdstat_init(PREOPEN_FD); + fdstat_open(0, LOG); + fdstat_open(1, LOG); + fdstat_open(2, LOG); + fd_note(0, "STDIN"); + fd_note(1, "STDOUT"); + fd_note(2, "STDERR"); + + if ((s = getenv("HARVEST_HOME")) != NULL) { + config_file = (char *) xcalloc(1, strlen(s) + 64); + sprintf(config_file, "%s/lib/cached.conf", s); + } else { + config_file = xstrdup("/usr/local/harvest/lib/cached.conf"); + } + + /* enable syslog by default */ + syslog_enable = 1; + /* disable stderr debug printout by default */ + stderr_enable = 0; + /* preinit for debug module */ + debug_log = stderr; + hash_init(0); + + while ((c = getopt(argc, argv, "vCDRVseif:a:p:u:d:m:zh?")) != -1) + switch (c) { + case 'v': + printf("Harvest Cache: Version %s\n", HARVEST_VERSION); + exit(0); + /* NOTREACHED */ + case 'V': + vhost_mode = 1; + break; + case 'C': + catch_signals = 0; + break; + case 'D': + do_dns_test = 0; + break; + case 's': + syslog_enable = 0; + break; + case 'e': + stderr_enable = 1; + break; + case 'R': + do_reuse = 0; + break; + case 'f': + xfree(config_file); + config_file = xstrdup(optarg); + break; + case 'a': + asciiPortNumOverride = atoi(optarg); + break; + case 'p': + binaryPortNumOverride = atoi(optarg); + break; + case 'u': + udpPortNumOverride = atoi(optarg); + break; + case 'd': + stderr_enable = 1; + debug_level_overwrite = 1; + debug_level = atoi(optarg); + unbuffered_logs = 1; + break; + case 'm': + malloc_debug_level = atoi(optarg); + break; + case 'z': + zap_disk_store = 1; + break; + case '?': + case 'h': + default: + printf("\ +Usage: cached [-Rsehvz] [-f config-file] [-d debug-level] [-[apu] port]\n\ + -e Print messages to stderr.\n\ + -h Print help message.\n\ + -s Disable syslog output.\n\ + -v Print version.\n\ + -z Zap disk storage -- deletes all objects in disk cache.\n\ + -C Do not catch fatal signals.\n\ + -D Disable initial DNS tests.\n\ + -R Do not set REUSEADDR on port.\n\ + -f file Use given config-file instead of\n\ + $HARVEST_HOME/lib/cached.conf.\n\ + -d level Use given debug-level, prints messages to stderr.\n\ + -a port Specify ASCII port number (default: %d).\n\ + -u port Specify UDP port number (default: %d).\n", + CACHE_HTTP_PORT, CACHE_ICP_PORT); + + exit(1); + break; + } + + if (catch_signals) { + signal(SIGSEGV, death); + signal(SIGBUS, deathb); + } + signal(SIGPIPE, SIG_IGN); + signal(SIGCHLD, sig_child); + signal(SIGHUP, rotate_logs); + signal(SIGTERM, shut_down); + signal(SIGINT, shut_down); + + parseConfigFile(config_file); + + if (!neighbors) { + neighbors_create(); + ++neighbors; + }; + + if (asciiPortNumOverride > 0) + setAsciiPortNum(asciiPortNumOverride); + if (binaryPortNumOverride > 0) + setBinaryPortNum(binaryPortNumOverride); + if (udpPortNumOverride > 0) + setUdpPortNum(udpPortNumOverride); + + if (!debug_level_overwrite) { + debug_level = getDebugLevel(); + } + /* to toggle debugging */ +#ifdef SIGUSR1 + signal(SIGUSR1, raise_debug_lvl); +#endif +#ifdef SIGUSR2 + signal(SIGUSR2, reset_debug_lvl); +#endif + +#ifdef NO_LOGGING + _db_init("cached", 0, getCacheLogFile()); +#else + _db_init("cached", debug_level, getCacheLogFile()); +#endif + fdstat_open(fileno(debug_log), LOG); + fd_note(fileno(debug_log), getCacheLogFile()); + + debug(0, "Starting Harvest Cache (version %s)...\n", HARVEST_VERSION); + + /* init ipcache */ + ipcache_init(); + + /* init neighbors */ + neighbors_init(); + + +#if defined(MALLOC_DBG) + malloc_debug(malloc_debug_level); +#endif + + theAsciiConnection = comm_open(COMM_NONBLOCKING, + getAsciiPortNum(), + 0, + "Ascii Port"); + if (theAsciiConnection < 0) { + fatal("Cannot open ascii Port\n"); + } + fdstat_open(theAsciiConnection, Socket); + fd_note(theAsciiConnection, "HTTP (Ascii) socket"); + comm_listen(theAsciiConnection); + comm_set_select_handler(theAsciiConnection, + COMM_SELECT_READ, + asciiHandleConn, + 0); + debug(1, "Accepting HTTP (ASCII) connections on FD %d.\n", + theAsciiConnection); + + if (!httpd_accel_mode || getAccelWithProxy()) { +#ifdef KEEP_BINARY_CONN + theBinaryConnection = comm_open(COMM_NONBLOCKING, + binaryPortNum, + 0, + "Binary Port"); + + if (theBinaryConnection < 0) { + fatal("Cannot open Binary Port\n"); + } + comm_listen(theBinaryConnection); + comm_set_select_handler(theBinaryConnection, + COMM_SELECT_READ, + icpHandleTcp, + 0); + debug(1, "Binary connection opened on fd %d\n", theBinaryConnection); +#endif + if (getUdpPortNum() > -1) { + theUdpConnection = comm_open(COMM_NONBLOCKING | COMM_DGRAM, + getUdpPortNum(), + 0, + "Ping Port"); + if (theUdpConnection < 0) + fatal("Cannot open UDP Port\n"); + fdstat_open(theUdpConnection, Socket); + fd_note(theUdpConnection, "ICP (UDP) socket"); + comm_set_select_handler(theUdpConnection, + COMM_SELECT_READ, + icpHandleUdp, + 0); + debug(1, "Accepting ICP (UDP) connections on FD %d.\n", + theUdpConnection); + } + } + if (theUdpConnection > 0) { + /* Now that the fd's are open, initialize neighbor connections */ + if (!httpd_accel_mode || getAccelWithProxy()) { + neighbors_open(theUdpConnection); + } + } + /* do suid checking here */ + check_suid(); + + /* module initialization */ + disk_init(); + stat_init(&CacheInfo, getAccessLogFile()); + storeInit(); + stmemInit(); + +#ifdef WRITE_PID_FILE + /* Try to write the pid to cached.pid in the same directory as + * cached.conf */ + memset(pidfn, '\0', MAXPATHLEN); + strcpy(pidfn, config_file); + if ((s = strrchr(pidfn, '/')) != NULL) + strcpy(s, "/cached.pid"); + else + strcpy(pidfn, "/usr/local/harvest/lib/cached.pid"); + pid_fp = fopen(pidfn, "w"); + if (pid_fp != NULL) { + fprintf(pid_fp, "%d\n", (int) getpid()); + fclose(pid_fp); + } +#endif + + /* after this point we want to see the mallinfo() output */ + do_mallinfo = 1; + debug(0, "Ready to serve requests.\n"); + + /* main loop */ + if (getCleanRate() > 0) + next_cleaning = time(0L) + getCleanRate(); + while (1) { + /* maintain cache storage */ + if (cached_curtime > last_maintain) { + storeMaintainSwapSpace(); + last_maintain = cached_curtime; + } + switch (comm_select((long) 60, (long) 0, next_cleaning)) { + case COMM_OK: + /* do nothing */ + break; + case COMM_ERROR: + errcount++; + debug(0, "Select loop Error. Retry. %d\n", errcount); + if (errcount == 10) + fatal_dump("Select Loop failed.!\n"); + break; + case COMM_TIMEOUT: + /* this happens after 1 minute of idle time, or + * when next_cleaning has arrived */ + /* garbage collection */ + if (getCleanRate() > 0 && cached_curtime >= next_cleaning) { + debug(1, "Performing a garbage collection...\n"); + n = storePurgeOld(); + debug(1, "Garbage collection done, %d objects removed\n", n); + next_cleaning = cached_curtime + getCleanRate(); + } + /* house keeping */ +#ifdef THIS_BREAKS_FTP + kill_zombie(); +#endif + break; + default: + debug(0, "MAIN: Internal error -- this should never happen.\n"); + break; + } + } + /* NOTREACHED */ + exit(0); +} + +void raise_debug_lvl() +{ + extern int _db_level; + _db_level = 10; + +#if defined(_HARVEST_SYSV_SIGNALS_) && defined(SIGUSR1) + signal(SIGUSR1, raise_debug_lvl); +#endif +} + +void reset_debug_lvl() +{ + extern int _db_level; + _db_level = debug_level; + +#if defined(_HARVEST_SYSV_SIGNALS_) && defined(SIGUSR2) + signal(SIGUSR2, reset_debug_lvl); +#endif +} diff --git a/src/mime.cc b/src/mime.cc new file mode 100644 index 00000000000..11edaea7c61 --- /dev/null +++ b/src/mime.cc @@ -0,0 +1,167 @@ +static char rcsid[] = "$Id: mime.cc,v 1.1 1996/02/22 06:23:55 wessels Exp $"; +/* + * File: mime.c + * Description: Mime Module + * Author: Anawat Chankhunthod, USC + * Created: Mon Dec 12 16:09:40 PST 1994 + * Language: C + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include "config.h" +#include +#include +#include + +#include "ansihelp.h" /* goes first */ +#include "debug.h" +#include "mime.h" +#include "mime_table.h" + +extern time_t cached_curtime; + +int mime_refresh_request(mime) + char *mime; +{ + if (strstr(mime, "no-cache")) + return 1; + + return 0; +} + +ext_table_entry * + mime_ext_to_type(extension) + char *extension; +{ + int i, low, high, comp; + char ext[16], *cp; + + if (!extension || strlen(extension) >= (sizeof(ext) - 1)) + return NULL; + strcpy(ext, extension); + for (cp = ext; *cp; cp++) + if (isupper(*cp)) + *cp = tolower(*cp); + low = 0; + high = EXT_TABLE_LEN - 1; + while (low <= high) { + i = (low + high) / 2; + if ((comp = strcmp(ext, ext_mime_table[i].name)) == 0) + return &ext_mime_table[i]; + if (comp > 0) + low = i + 1; + else + high = i - 1; + } + return NULL; +} + +/* + * mk_mime_hdr - Generates a MIME header using the given parameters. + * You can call mk_mime_hdr with a 'lmt = time(NULL) - ttl' to + * generate a fake Last-Modified-Time for the header. + * 'ttl' is the number of seconds relative to the current time + * that the object is valid. + * + * Returns the MIME header in the provided 'result' buffer, and + * returns non-zero on error, or 0 on success. + */ +int mk_mime_hdr(result, ttl, size, lmt, type) + char *result, *type; + int size; + time_t ttl, lmt; +{ + extern char *mkrfc850(); + time_t expiretime; + time_t t; + char date[100]; + char expire[100]; + char last_modified_time[100]; + + if (result == NULL) + return 1; + + t = cached_curtime; + expiretime = t + ttl; + + date[0] = expire[0] = last_modified_time[0] = result[0] = '\0'; + strncpy(date, mkrfc850(&t), 100); + strncpy(expire, mkrfc850(&expiretime), 100); + strncpy(last_modified_time, mkrfc850(&lmt), 100); + + sprintf(result, "Content-Type: %s\r\nContent-Size: %d\r\nDate: %s\r\nExpires: %s\r\nLast-Modified-Time: %s\r\n", type, size, date, expire, last_modified_time); + return 0; +} diff --git a/src/neighbors.cc b/src/neighbors.cc new file mode 100644 index 00000000000..46215e554f7 --- /dev/null +++ b/src/neighbors.cc @@ -0,0 +1,849 @@ +static char rcsid[] = "$Id: neighbors.cc,v 1.1 1996/02/22 06:23:55 wessels Exp $"; +/* + * File: icp.c + * Description: Keeps track of other caches and cache clients + * Author: Peter Danzig, Chuck and Anawat Chankhunthod USC + * Created: Mon May 16 14:18:36 PDT 1994 + * Language: C + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include "config.h" +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ansihelp.h" +#include "comm.h" +#include "store.h" +#include "icp.h" +#include "proto.h" +#include "neighbors.h" +#include "ipcache.h" +#include "cache_cf.h" +#include "util.h" +#include "disk.h" + +static neighbors *friends = NULL; + +extern time_t cached_curtime; +extern int unbuffered_logs; /* main.c */ +extern char *tmp_error_buf; /* main.c */ +extern int icpUdpSend _PARAMS((int, char *, icp_common_t *, struct sockaddr_in *, icp_opcode)); +extern int getFromOrgSource _PARAMS((int fd, StoreEntry * entry)); +extern int getFromCache _PARAMS((int fd, StoreEntry * entry, edge * e)); +extern int getFromDefaultSource _PARAMS((int fd, StoreEntry * entry)); +extern void fatal_dump _PARAMS((char *)); +extern void fatal _PARAMS((char *)); + +static struct neighbor_cf *Neighbor_cf = NULL; + +static icp_common_t echo_hdr; +static short echo_port; +FILE *cache_hierarchy_log = NULL; + +static char *hier_strings[] = +{ + "NONE", + "DIRECT", + "NEIGHBOR_HIT", + "PARENT_HIT", + "SINGLE_PARENT", + "NO_PARENT_DIRECT", + "FIRST_PARENT_MISS", + "LOCAL_IP_DIRECT", + "DEAD_PARENT", + "DEAD_NEIGHBOR", + "REVIVE_PARENT", + "REVIVE_NEIGHBOR", + "NO_DIRECT_FAIL", + "SOURCE_FASTEST", + "INVALID CODE" +}; + + +edge *whichEdge(header, from) + icp_common_t *header; + struct sockaddr_in *from; +{ + int i, j; + int port; + struct in_addr ip; + edge *e = NULL; + + port = ntohs(from->sin_port); + ip = from->sin_addr; + + debug(3, "whichEdge: from %s port %d\n", inet_ntoa(ip), port); + + for (e = friends->edges_head; e; e = e->next) { + for (j = 0; j < e->n_addresses; j++) { + if (ip.s_addr == e->addresses[j].s_addr && port == e->udp_port) { + return e; + } + } + } + return (NULL); +} + + +void hierarchy_log_append(url, code, timeout, cache_host) + char *url; + hier_code code; + int timeout; + char *cache_host; +{ + static time_t last_time = 0; + static char time_str[128]; + char *s = NULL; + + if (!cache_hierarchy_log) + return; + + if (code > HIER_MAX) + code = HIER_MAX; + + if (emulate_httpd_log) { + if (cached_curtime != last_time) { + s = mkhttpdlogtime(&cached_curtime); + strcpy(time_str, s); + last_time = cached_curtime; + } + if (cache_host) { + fprintf(cache_hierarchy_log, "[%s] %s %s%s %s\n", + time_str, + url, + timeout ? "TIMEOUT_" : "", + hier_strings[code], + cache_host); + } else { + fprintf(cache_hierarchy_log, "[%s] %s %s%s\n", + time_str, + url, + timeout ? "TIMEOUT_" : "", + hier_strings[code]); + } + } else { + if (cache_host) { + fprintf(cache_hierarchy_log, "%ld %s %s%s %s\n", + cached_curtime, + url, + timeout ? "TIMEOUT_" : "", + hier_strings[code], + cache_host); + } else { + fprintf(cache_hierarchy_log, "%ld %s %s%s\n", + cached_curtime, + url, + timeout ? "TIMEOUT_" : "", + hier_strings[code]); + } + } + if (unbuffered_logs) + fflush(cache_hierarchy_log); +} + +static int edgeWouldBePinged(e, host) + edge *e; + char *host; +{ + int offset; + dom_list *d = NULL; + int do_ping = 1; + + if (e->domains == NULL) + return do_ping; + + do_ping = 0; + for (d = e->domains; d; d = d->next) { + if ((offset = strlen(host) - strlen(d->domain)) < 0) { + do_ping = !d->do_ping; + continue; + } + if (strcasecmp(d->domain, host + offset) == 0) { + /* found a match, no need to check any more domains */ + do_ping = d->do_ping; + break; + } else { + do_ping = !d->do_ping; + } + } + return do_ping; +} + +edge *getSingleParent(host, n) + char *host; + int *n; +{ + edge *p = NULL; + edge *e = NULL; + int i; + int count = 0; + + if (n == NULL && friends->n_parent < 1) + return NULL; + for (e = friends->edges_head; e; e = e->next) { + if (edgeWouldBePinged(e, host)) { + count++; + if (e->type != is_a_parent) { + /* we matched a neighbor, not a parent. There + * can be no single parent */ + if (n == NULL) + return NULL; + continue; + } + if (p) { + /* already have a parent, this makes the second, + * so there can be no single parent */ + if (n == NULL) + return NULL; + continue; + } + p = e; + } + } + /* Ok, all done checking the edges. If only one parent matched, then + * p will already point to it */ + if (n) + *n = count; + if (count == 1) + return p; + return NULL; +} + +edge *getFirstParent(host) + char *host; +{ + edge *e = NULL; + int i; + if (friends->n_parent < 1) + return NULL; + for (e = friends->edges_head; e; e = e->next) { + if (e->type != is_a_parent) + continue; + if (edgeWouldBePinged(e, host)) + return e; + } + return NULL; +} + +edge *getNextEdge(edge * e) +{ + return e->next; +} + +edge *getFirstEdge() +{ + return friends->edges_head; +} + +void neighbors_install(host, type, ascii_port, udp_port, proxy_only, domains) + char *host; + char *type; + int ascii_port; + int udp_port; + int proxy_only; + dom_list *domains; +{ + int i; + edge *e; + + debug(1, "Adding a %s: %s\n", type, host); + + e = (edge *) xcalloc(1, sizeof(edge)); + + e->ack_deficit = 0; + e->ascii_port = ascii_port; + e->udp_port = udp_port; + e->proxy_only = proxy_only; + e->host = xstrdup(host); + e->domains = domains; + e->num_pings = 0; + e->pings_sent = 0; + e->pings_acked = 0; + e->neighbor_up = 1; + e->hits = 0; + e->misses = 0; + + if (!strcmp(type, "parent")) { + friends->n_parent++; + e->type = is_a_parent; + } else { + friends->n_neighbor++; + e->type = is_a_neighbor; + } + + /* Append edge */ + if (!friends->edges_head) + friends->edges_head = e; + if (friends->edges_tail) + friends->edges_tail->next = e; + friends->edges_tail = e; + + friends->n++; +} + +void neighbors_open(fd) + int fd; +{ + int i, j; + struct sockaddr_in our_socket_name; + struct sockaddr_in *ap; + int sock_name_length = sizeof(our_socket_name); + int log_fd; + char *fname = NULL; + char **list = NULL; + edge *e = NULL; + struct in_addr *ina = NULL; + + if (getsockname(fd, (struct sockaddr *) &our_socket_name, + &sock_name_length) == -1) { + debug(1, "getsockname(%d,%p,%p) failed.\n", + fd, &our_socket_name, &sock_name_length); + } + friends->fd = fd; + + /* open log file */ + if ((fname = getHierarchyLogFile())) { + log_fd = file_open(fname, NULL, O_WRONLY | O_CREAT | O_APPEND); + if (log_fd < 0) { + debug(1, "%s: %s\n", fname, xstrerror()); + debug(1, "Hierachical logging is disabled.\n"); + } else if (!(cache_hierarchy_log = fdopen(log_fd, "a"))) { + debug(1, "%s: %s\n", fname, xstrerror()); + debug(1, "Hierachical logging is disabled.\n"); + } + } + /* Prepare neighbor connections, one at a time */ + for (e = friends->edges_head; e; e = e->next) { + debug(2, "Finding IP addresses for '%s'\n", e->host); + if ((list = getAddressList(e->host)) == NULL) { + sprintf(tmp_error_buf, "DNS lookup for '%s' failed! Cannot continue.\n", + e->host); + fatal(tmp_error_buf); + } + e->n_addresses = 0; + for (j = 0; *list && j < EDGE_MAX_ADDRESSES; j++) { + ina = &e->addresses[j]; + memcpy(&(ina->s_addr), *list, 4); + list++; + e->n_addresses++; + } + if (e->n_addresses < 1) { + sprintf(tmp_error_buf, "No IP addresses found for '%s'; Cannot continue.\n", e->host); + fatal(tmp_error_buf); + } + for (j = 0; j < e->n_addresses; j++) { + debug(2, "--> IP address #%d: %s\n", j, inet_ntoa(e->addresses[j])); + } + e->rtt = 1000; + + /* Prepare query packet for future use */ + e->header.opcode = ICP_OP_QUERY; + e->header.version = ICP_VERSION_CURRENT; + e->header.length = 0; + e->header.reqnum = 0; + memset(e->header.auth, '\0', sizeof(u_num32) * ICP_AUTH_SIZE); + e->header.shostid = our_socket_name.sin_addr.s_addr; + + ap = &e->in_addr; + memset(ap, '\0', sizeof(struct sockaddr_in)); + ap->sin_family = AF_INET; + ap->sin_addr = e->addresses[0]; + ap->sin_port = htons(e->udp_port); + + if (e->type == is_a_parent) { + debug(3, "parent_install: host %s addr %s port %d\n", + e->host, inet_ntoa(ap->sin_addr), + e->udp_port); + e->neighbor_up = 1; + } else { + debug(3, "neighbor_install: host %s addr %s port %d\n", + e->host, inet_ntoa(ap->sin_addr), + e->udp_port); + e->neighbor_up = 1; + } + + /* do this only the first time thru */ + if (0 == echo_hdr.opcode) { + struct servent *sep; + + echo_hdr.opcode = ICP_OP_SECHO; + echo_hdr.version = ICP_VERSION_CURRENT; + echo_hdr.length = 0; + echo_hdr.reqnum = 0; + memset(echo_hdr.auth, '\0', sizeof(u_num32) * ICP_AUTH_SIZE); + echo_hdr.shostid = our_socket_name.sin_addr.s_addr; + + sep = getservbyname("echo", "udp"); + echo_port = sep ? sep->s_port : 7; + } + } +} + +neighbors *neighbors_create() +{ + neighbors *f; + + f = (neighbors *) xcalloc(1, sizeof(neighbors)); + f->n = 0; + f->n_parent = 0; + f->n_neighbor = 0; + f->edges_head = (edge *) NULL; + f->edges_tail = (edge *) NULL; + f->first_ping = (edge *) NULL; + return (friends = f); +} + + +int neighborsUdpPing(proto) + protodispatch_data *proto; +{ + char *t = NULL; + char *host = proto->host; + char *url = proto->url; + StoreEntry *entry = proto->entry; + struct hostent *hep = NULL; + struct sockaddr_in to_addr; + edge *e = NULL; + int i; + + store_mem_obj(entry, e_pings_n_pings) = 0; + store_mem_obj(entry, e_pings_n_acks) = 0; + store_mem_obj(entry, e_pings_first_miss) = NULL; + + if (friends->edges_head == (edge *) NULL) + return 0; + + for (i = 0, e = friends->first_ping; i++ < friends->n; e = e->next) { + if (e == (edge *) NULL) + e = friends->edges_head; + debug(5, "neighborsUdpPing: Edge %s\n", e->host); + + /* Don't resolve refreshes through neighbors because we don't resolve + * misses through neighbors */ + if ((e->type == is_a_neighbor) && (entry->flag & REFRESH_REQUEST)) + continue; + + /* skip dumb caches where we failed to connect() w/in the last 60s */ + if (e->udp_port == echo_port && + (cached_curtime - e->last_fail_time < 60)) + continue; + + if (!edgeWouldBePinged(e, host)) + continue; /* next edge */ + + debug(4, "neighborsUdpPing: pinging cache %s for \n", + e->host, url); + + e->header.reqnum++; + + if (e->udp_port == echo_port) { + debug(4, "neighborsUdpPing: Looks like a dumb cache, send DECHO ping\n"); + icpUdpSend(friends->fd, url, &echo_hdr, &e->in_addr, ICP_OP_DECHO); + } else { + icpUdpSend(friends->fd, url, &e->header, &e->in_addr, ICP_OP_QUERY); + } + + e->ack_deficit++; + e->num_pings++; + e->pings_sent++; + + if (e->ack_deficit < HIER_MAX_DEFICIT) { + /* consider it's alive. count it */ + e->neighbor_up = 1; + store_mem_obj(entry, e_pings_n_pings)++; + } else { + /* consider it's dead. send a ping but don't count it. */ + e->neighbor_up = 0; + if (e->ack_deficit > (HIER_MAX_DEFICIT << 1)) + /* do this to prevent wrap around but we still want it + * to move a bit so we can debug it easier. */ + e->ack_deficit = HIER_MAX_DEFICIT + 1; + debug(6, "cache %s is considered dead but send PING anyway, hope it comes up soon.\n", + inet_ntoa(e->in_addr.sin_addr)); + /* log it once at the threshold */ + if ((e->ack_deficit == HIER_MAX_DEFICIT)) { + if (e->type == is_a_neighbor) { + hierarchy_log_append("Detect: ", + HIER_DEAD_NEIGHBOR, 0, + e->host); + } else { + hierarchy_log_append("Detect: ", + HIER_DEAD_PARENT, 0, + e->host); + } + } + } + friends->first_ping = e->next; + } + + /* only do source_ping if we have neighbors */ + if (echo_hdr.opcode) { + if (proto->source_ping && (hep = ipcache_gethostbyname(host))) { + debug(6, "neighborsUdpPing: Send to original host\n"); + debug(6, "neighborsUdpPing: url=%s, host=%s, t=%d\n", + url, host, t); + to_addr.sin_family = AF_INET; + memcpy(&to_addr.sin_addr, hep->h_addr, hep->h_length); + to_addr.sin_port = echo_port; + echo_hdr.reqnum = cached_curtime; + debug(6, "neighborsUdpPing - url: %s to url-host %s \n", + url, inet_ntoa(to_addr.sin_addr)); + /* send to original site */ + icpUdpSend(friends->fd, url, &echo_hdr, &to_addr, ICP_OP_SECHO); + } else { + debug(6, "neighborsUdpPing: Source Ping is disabled.\n"); + } + } + return (store_mem_obj(entry, e_pings_n_pings)); +} + + +/* I should attach these records to the entry. We take the first + * hit we get our wait until everyone misses. The timeout handler + * call needs to nip this shopping list or call one of the misses. + * + * If a hit process is already started, then sobeit + */ +void neighborsUdpAck(fd, url, header, from, entry) + int fd; + char *url; + icp_common_t *header; + struct sockaddr_in *from; + StoreEntry *entry; +{ + edge *e = NULL; + + debug(6, "neighborsUdpAck: url=%s (%d chars), header=0x%x, from=0x%x, ent=0x%x\n", + url, strlen(url), header, from, entry); + debug(6, " hdr: opcode=%d, ver=%d, shostid=%x, len=%d, rn=0x%x\n", + header->opcode, header->version, header->shostid, + header->length, header->reqnum); + debug(6, " from: fam=%d, port=%d, addr=0x%x\n", + from->sin_family, from->sin_port, from->sin_addr.s_addr); + + /* look up for neighbor/parent entry */ + e = whichEdge(header, from); + + if (e) { + /* reset the deficit. It's alive now. */ + /* Don't care about exact count. */ + if ((e->ack_deficit >= HIER_MAX_DEFICIT)) { + if (e->type == is_a_neighbor) { + hierarchy_log_append("Detect: ", + HIER_REVIVE_NEIGHBOR, 0, e->host); + } else { + hierarchy_log_append("Detect: ", + HIER_REVIVE_PARENT, 0, e->host); + } + } + e->ack_deficit = 0; + e->neighbor_up = 1; + e->pings_acked++; + } + /* check if someone is already fetching it */ + if (BIT_TEST(entry->flag, REQ_DISPATCHED) || (entry->ping_status != WAITING)) { + if (entry->ping_status == DONE) { + debug(5, "There is already a cache/source dispatched for this object\n"); + debug(5, "--> \n", entry->url); + debug(5, "--> entry->flag & REQ_DISPATCHED = %d\n", + BIT_TEST(entry->flag, REQ_DISPATCHED)); + debug(5, "--> entry->ping_status = %d\n", entry->ping_status); + } else { + debug(5, "The ping already timed out.\n"); + debug(5, "--> \n", entry->url); + debug(5, "--> entry->flag & REQ_DISPATCHED = %lx\n", + BIT_TEST(entry->flag, REQ_DISPATCHED)); + debug(5, "--> entry->ping_status = %d\n", entry->ping_status); + } + return; + } + debug(6, "neighborsUdpAck - url: %s to us %s \n", + url, e ? inet_ntoa(e->in_addr.sin_addr) : "url-host"); + + if (header->opcode == ICP_OP_SECHO) { + /* receive ping back from source or from non-cached cache */ + if (e) { + debug(6, "Got SECHO from non-cached cache:%s\n", + inet_ntoa(e->in_addr.sin_addr)); + debug(6, "This is not supposed to happen. Ignored.\n"); + } else { + /* if we reach here, source is the one has the fastest respond. */ + /* fetch directly from source */ + debug(6, "Source is the first to respond.\n"); + hierarchy_log_append(entry->url, + HIER_SOURCE_FASTEST, + 0, + inet_ntoa(from->sin_addr)); + BIT_SET(entry->flag, REQ_DISPATCHED); + entry->ping_status = DONE; + getFromOrgSource(0, entry); + } + return; + } + if (header->opcode == ICP_OP_HIT) { + /* If an edge is not found, count it as a MISS message. */ + if (!e) { + /* count it as a MISS message */ + store_mem_obj(entry, e_pings_n_acks)++; + return; + } + /* GOT a HIT here */ + debug(6, "HIT: Getting %s from host: %s\n", entry->url, e->host); + if (e->type == is_a_neighbor) { + hierarchy_log_append(entry->url, HIER_NEIGHBOR_HIT, 0, e->host); + } else { + hierarchy_log_append(entry->url, HIER_PARENT_HIT, 0, e->host); + } + BIT_SET(entry->flag, REQ_DISPATCHED); + entry->ping_status = DONE; + getFromCache(0, entry, e); + e->hits++; + return; + } else if ((header->opcode == ICP_OP_MISS) || (header->opcode == ICP_OP_DECHO)) { + /* everytime we get here, count it as a miss */ + store_mem_obj(entry, e_pings_n_acks)++; + if (e) + e->misses++; + + if (header->opcode == ICP_OP_DECHO) { + /* receive ping back from non-cached cache */ + + if (e) { + debug(6, "Got DECHO from non-cached cache:%s\n", + inet_ntoa(e->in_addr.sin_addr)); + debug(6, "Good."); + + if (e->type == is_a_parent) { + if (store_mem_obj(entry, e_pings_first_miss) == NULL) { + debug(6, "OK. We got dumb-cached parent as the first miss here.\n"); + store_mem_obj(entry, e_pings_first_miss) = e; + } + } else { + debug(6, "Dumb Cached as a neighbor does not make sense.\n"); + debug(6, "Count it anyway.\n"); + } + + + } else { + debug(6, "Got DECHO from non-cached cache: But the host is not in the list.\n"); + debug(6, "Count it anyway.\n"); + } + + } else { + /* ICP_OP_MISS from a cache */ + if ((store_mem_obj(entry, e_pings_first_miss) == NULL) && e && e->type == is_a_parent) { + store_mem_obj(entry, e_pings_first_miss) = e; + + } + } + + if (store_mem_obj(entry, e_pings_n_acks) == store_mem_obj(entry, e_pings_n_pings)) { + BIT_SET(entry->flag, REQ_DISPATCHED); + entry->ping_status = DONE; + debug(6, "Receive MISSes from all neighbors and parents\n"); + /* pass in fd=0 here so getFromCache() looks up the real FD + * and resets the timeout handler */ + getFromDefaultSource(0, entry); + return; + } + } else { + debug(0, "neighborsUdpAck: WHY ARE WE HERE? header->opcode = %d\n", + header->opcode); + } +} + +void neighbors_cf_add(host, type, ascii_port, udp_port, proxy_only) + char *host; + char *type; + int ascii_port; + int udp_port; + int proxy_only; +{ + struct neighbor_cf *t, *u; + + t = (struct neighbor_cf *) xcalloc(sizeof(struct neighbor_cf), 1); + t->host = xstrdup(host); + t->type = xstrdup(type); + t->ascii_port = ascii_port; + t->udp_port = udp_port; + t->proxy_only = proxy_only; + t->next = (struct neighbor_cf *) NULL; + + if (Neighbor_cf == (struct neighbor_cf *) NULL) { + Neighbor_cf = t; + } else { + for (u = Neighbor_cf; u->next; u = u->next); + u->next = t; + } +} + +int neighbors_cf_domain(host, domain) + char *host; + char *domain; +{ + struct neighbor_cf *t; + dom_list *l; + dom_list **L; + + for (t = Neighbor_cf; t; t = t->next) { + if (strcmp(t->host, host) == 0) + break; + } + + if (t == NULL) + return 0; + + l = (dom_list *) xmalloc(sizeof(dom_list)); + l->do_ping = 1; + if (*domain == '!') { /* check for !.edu */ + l->do_ping = 0; + domain++; + } + l->domain = xstrdup(domain); + l->next = NULL; + for (L = &(t->domains); *L; L = &((*L)->next)); + *L = l; + + return 1; +} + +void neighbors_init() +{ + struct neighbor_cf *t, *next; + + for (t = Neighbor_cf; t; t = next) { + next = t->next; + if (strncmp(t->host, comm_hostname(), HARVESTHOSTNAMELEN) || + t->ascii_port != getAsciiPortNum()) { + neighbors_install(t->host, t->type, + t->ascii_port, t->udp_port, t->proxy_only, + t->domains); + } else { + debug(0, "neighbors_init: skipping cache_host %s %s %d %d\n", + t->type, t->host, t->ascii_port, t->udp_port); + debug(0, "neighbors_init: because it seems to be identical to this cached\n"); + } + xfree(t->host); + xfree(t->type); + xfree(t); + } +} + +void neighbors_rotate_log() +{ + int i; + static char from[MAXPATHLEN]; + static char to[MAXPATHLEN]; + char *fname = NULL; + int log_fd; + + if ((fname = getHierarchyLogFile()) == NULL) + return; + + debug(1, "neighbors_rotate_log: Rotating.\n"); + + /* Rotate numbers 0 through N up one */ + for (i = getLogfileRotateNumber(); i > 1;) { + i--; + sprintf(from, "%s.%d", fname, i - 1); + sprintf(to, "%s.%d", fname, i); + rename(from, to); + } + /* Rotate the current log to .0 */ + if (getLogfileRotateNumber() > 0) { + sprintf(to, "%s.%d", fname, 0); + rename(fname, to); + } + /* Close and reopen the log. It may have been renamed "manually" + * before HUP'ing us. */ + fclose(cache_hierarchy_log); + log_fd = file_open(fname, NULL, O_WRONLY | O_CREAT | O_APPEND); + if (log_fd < 0) { + debug(0, "rotate_logs: %s: %s\n", fname, xstrerror()); + debug(1, "Hierachical logging is disabled.\n"); + } else if ((cache_hierarchy_log = fdopen(log_fd, "a")) == NULL) { + debug(0, "rotate_logs: %s: %s\n", + fname, xstrerror()); + debug(1, "Hierachical logging is disabled.\n"); + } +} diff --git a/src/recv-announce.cc b/src/recv-announce.cc new file mode 100644 index 00000000000..bb0474ead29 --- /dev/null +++ b/src/recv-announce.cc @@ -0,0 +1,91 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +#define RECV_BUF_SIZE 8192 + +/* + * This program must be run from inetd. First add something like this + * to /etc/services: + * + * cached_announce 3131/udp # harvest cached announcements + * + * And then add something like this to /etc/inetd/conf: + * + * cached_announce dgram udp wait cached /tmp/recv-announce recv-announce /tmp/recv-announce.log + * + * + * A single instance of this process will continue to handle incoming + * requests. If it dies, or is killed, inetd should restart it when the + * next message arrives. + * + */ + +/* + * usage: recv-announce logfile + */ + +void sig_handle() +{ + fflush(stdout); + close(2); + close(1); + close(0); + exit(0); +} + + +int main(argc, argv) + int argc; + char *argv[]; +{ + char buf[RECV_BUF_SIZE]; + struct sockaddr_in R; + int len; + struct hostent *hp = NULL; + char logfile[BUFSIZ]; + char *t = NULL; + char ip[4]; + + for (len = 0; len < 32; len++) { + signal(len, sig_handle); + } + + + if (argc > 1) + strcpy(logfile, argv[1]); + else + strcpy(logfile, "/tmp/recv-announce.log"); + + close(1); + if (open(logfile, O_WRONLY | O_CREAT | O_APPEND, 0660) < 0) { + perror(logfile); + exit(1); + } + close(2); + dup(1); + + + while (1) { + memset(buf, '\0', RECV_BUF_SIZE); + memset(&R, '\0', len = sizeof(R)); + + if (recvfrom(0, buf, RECV_BUF_SIZE, 0, &R, &len) < 0) { + perror("recv"); + exit(2); + } + memcpy(ip, &R.sin_addr.s_addr, 4); + hp = gethostbyaddr(ip, 4, AF_INET); + printf("==============================================================================\n"); + printf("Received from %s [%s]\n", + inet_ntoa(R.sin_addr), + (hp && hp->h_name) ? hp->h_name : "Unknown"); + fputs(buf, stdout); + fflush(stdout); + } +} diff --git a/src/send-announce.cc b/src/send-announce.cc new file mode 100644 index 00000000000..64dc50ece3d --- /dev/null +++ b/src/send-announce.cc @@ -0,0 +1,182 @@ +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "util.h" + +char *databuf = NULL; +int quiet = 0; +int debug = 0; +char *announce_to_host = "sd.cache.nlanr.net"; +int announce_to_port = 3131; + +int http_port = CACHE_HTTP_PORT; +int icp_port = CACHE_ICP_PORT; + + +int read_config(fname) + char *fname; +{ + FILE *fp = NULL; + char buf[BUFSIZ]; + char munge[BUFSIZ]; + char *t = NULL; + char *tag = NULL; + char *w_space = " \t\n"; + + if ((fp = fopen(fname, "r")) == (FILE *) NULL) + return 0; + + while (fgets(buf, BUFSIZ, fp)) { + if ((t = strchr(buf, '#'))) + *t = '\0'; + if (buf[0] == '\0') + continue; + strcpy(munge, buf); + if ((tag = strtok(munge, w_space)) == NULL) + continue; + if (!strcasecmp(tag, "cache_announce")) { + if ((t = strtok(NULL, w_space)) == NULL) + exit(0); + if (strcasecmp(t, "on")) + exit(0); + } else if (!strcasecmp(tag, "announce_to")) { + if ((t = strtok(NULL, w_space)) == NULL) + continue; + announce_to_host = xstrdup(t); + if ((t = strchr(announce_to_host, ':'))) { + announce_to_port = atoi(t + 1); + *t = '\0'; + } + } else if (!strncasecmp(tag, "announce_", 9)) { + strcat(databuf, buf); + } else if (!strcasecmp(tag, "ascii_port")) { + if ((t = strtok(NULL, w_space))) + http_port = atoi(t); + } else if (!strcasecmp(tag, "udp_port")) { + if ((t = strtok(NULL, w_space))) + icp_port = atoi(t); + } + } + fclose(fp); + return 1; +} + + +int send_packet(host, port) + char *host; + int port; +{ + char buf[256]; + time_t t; + int s; + struct sockaddr_in R; + struct sockaddr_in L; + struct hostent *hp = NULL; + + sprintf(buf, "cache_version HARVEST/%s\n", HARVEST_VERSION); + strcat(databuf, buf); + sprintf(buf, "Running on %s %d %d\n", + getfullhostname(), + http_port, + icp_port); + strcat(databuf, buf); + t = time(NULL); + sprintf(buf, "generated %d [%s]\n", + (int) t, mkhttpdlogtime(&t)); + strcat(databuf, buf); + + if ((hp = gethostbyname(host)) == NULL) { + if (!quiet) + fprintf(stderr, "%s: Unknown host\n", host); + return 0; + } + memset(&L, '\0', sizeof(L)); + L.sin_family = AF_INET; + L.sin_port = 0; + L.sin_addr.s_addr = INADDR_ANY; + + if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { + if (!quiet) + perror("socket"); + return 0; + } + if (bind(s, (struct sockaddr *) &L, sizeof(L)) < 0) { + if (!quiet) + perror("bind"); + return 0; + } + memset(&R, '\0', sizeof(R)); + R.sin_family = AF_INET; + R.sin_port = htons(port); + memcpy(&R.sin_addr, hp->h_addr_list[0], 4); + + if (debug) { + close(s); + printf("This would be sent to %s [%s] port %d\n", + host, inet_ntoa(R.sin_addr), port); + puts(databuf); + return 0; + } + if (sendto(s, databuf, strlen(databuf), 0, (struct sockaddr *) &R, sizeof(R)) < 0) { + if (!quiet) + perror(host); + return 0; + } + close(s); + return 1; +} + + +main(argc, argv) + int argc; + char *argv[]; + +{ + char config[256]; + char *s = NULL; + int c; + extern int optind; + while ((c = getopt(argc, argv, "dqh")) != -1) { + switch (c) { + case 'd': + debug = 1; + break; + case 'q': + quiet = 1; + break; + case 'h': + fprintf(stderr, "usage: %s -d -q -h [cached.conf]\n", + argv[0]); + exit(0); + break; + } + } + argv += (optind - 1); + argc -= (optind - 1); + + if (argc > 1) { + strcpy(config, argv[1]); + } else if ((s = getenv("HARVEST_HOME"))) { + sprintf(config, "%s/lib/cached.conf", s); + } else { + strcpy(config, "/usr/local/harvest/lib/cached.conf"); + } + + databuf = (char *) xcalloc(8192, 1); + if (!read_config(config)) { + if (!quiet) + perror(config); + exit(1); + } + send_packet(announce_to_host, announce_to_port); + return 0; +} diff --git a/src/stat.cc b/src/stat.cc new file mode 100644 index 00000000000..1381f2eb4ae --- /dev/null +++ b/src/stat.cc @@ -0,0 +1,1343 @@ +static char rcsid[] = "$Id: stat.cc,v 1.1 1996/02/22 06:23:55 wessels Exp $"; +/* + * File: stat.c + * Description: stat module for object cache + * Author: Anawat Chankhunthod, USC + * Created: May 12 1994 + * Language: C + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include "config.h" +#include +#include +#include +#include /* for lseek. should be removed later. */ +#include +#include +#include +#include +#include +#include + +#ifdef _HARVEST_HPUX_ +#include +#define getrusage(a, b) syscall(SYS_GETRUSAGE, a, b) +#define getpagesize( ) sysconf(_SC_PAGE_SIZE) +#endif /* _HARVEST_HPUX_ */ + +#include "comm.h" +#include "store.h" +#include "stat.h" +#include "disk.h" +#include "ipcache.h" +#include "cache_cf.h" +#include "hash.h" +#include "util.h" +#include "neighbors.h" +#include "fdstat.h" + +extern int emulate_httpd_log; + +#define MIN_BUFSIZE (4096) +#define MAX_LINELEN (4096) +#define max(a,b) ((a)>(b)? (a): (b)) + +typedef struct _log_read_data_t { + StoreEntry *sentry; +} log_read_data_t; + +typedef struct _cached_read_data_t { + StoreEntry *sentry; + int fd; +} cached_read_data_t; + +/* GLOBALS */ +Meta_data meta_data; +unsigned long nconn = 0; + +extern int getMaxFD(), fdstat_biggest_fd(); +extern int RESERVED_FD; +extern char *mkrfc850(); +extern time_t cached_curtime; +extern time_t cached_starttime; +extern time_t next_cleaning; +extern int httpd_accel_mode; +extern int debug_level; +extern int hash_links_allocated; +extern void stat_ipcache_get _PARAMS((StoreEntry * e, cacheinfo * obj)); +extern int fdstat_are_n_free_fd _PARAMS((int)); +extern int comm_get_fd_timeout _PARAMS((int)); +extern int file_write_lock _PARAMS((int)); +extern void fatal _PARAMS((char *)); +char *stat_describe(); +char *mem_describe(); +char *ttl_describe(); +char *flags_describe(); +char *elapsed_time(); +char *diskFileName(); + +/* LOCALS */ +static char *open_bracket = "{\n"; +static char *close_bracket = "}\n"; + +/* process utilization information */ +void stat_utilization_get(obj, sentry) + cacheinfo *obj; + StoreEntry *sentry; +{ + static char tempbuf[MAX_LINELEN]; + int proto_id; + proto_stat *p = &obj->proto_stat_data[0]; + proto_stat *q = NULL; + int secs = 0; + + secs = (int) (cached_curtime - cached_starttime); + + storeAppend(sentry, open_bracket, (int) strlen(open_bracket)); + + p->object_count = 0; + p->kb.max = 0; + p->kb.min = 0; + p->kb.avg = 0; + p->kb.now = 0; + p->hit = 0; + p->miss = 0; + p->refcount = 0; + p->transferbyte = 0; + + + /* find the total */ + for (proto_id = 1; proto_id <= PROTOCOL_SUPPORTED; ++proto_id) { + q = &obj->proto_stat_data[proto_id]; + + p->object_count += q->object_count; + p->kb.max += q->kb.max; + p->kb.min += q->kb.min; + p->kb.avg += q->kb.avg; + p->kb.now += q->kb.now; + p->hit += q->hit; + p->miss += q->miss; + p->refcount += q->refcount; + p->transferbyte += q->transferbyte; + } + + /* dump it */ + for (proto_id = 0; proto_id < PROTOCOL_SUPPORTED + PROTOCOL_EXTRA; ++proto_id) { + p = &obj->proto_stat_data[proto_id]; + if (p->hit != 0) { + p->hitratio = + (float) p->hit / + ((float) p->hit + + (float) p->miss); + } + sprintf(tempbuf, "{%s %d %d %d %d %4.2f %d %d %d}\n", + p->protoname, + p->object_count, + p->kb.max, + p->kb.now, + p->kb.min, + p->hitratio, + (secs ? p->transferbyte / secs : 0), + p->refcount, + p->transferbyte); + storeAppend(sentry, tempbuf, strlen(tempbuf)); + } + + storeAppend(sentry, close_bracket, strlen(close_bracket)); +} + + +/* return total bytes of all registered and known objects. + * may not reflect the retrieving object.... + * something need to be done here to get more accurate cache size */ +int cache_size_get(obj) + cacheinfo *obj; +{ + int size = 0; + int proto_id; + /* sum all size, exclude total */ + for (proto_id = 1; proto_id <= PROTOCOL_SUPPORTED + PROTOCOL_EXTRA - 1; + ++proto_id) { + size += obj->proto_stat_data[proto_id].kb.now; + } + return (size); +} + +/* process general IP cache information */ +void stat_general_get(obj, sentry) + cacheinfo *obj; + StoreEntry *sentry; +{ + + /* have to use old method for this guy, + * otherwise we have to make ipcache know about StoreEntry */ + stat_ipcache_get(sentry, obj); +} + + +/* process objects list */ +void stat_objects_get(obj, sentry, vm_or_not) + cacheinfo *obj; + StoreEntry *sentry; + int vm_or_not; +{ + static char tempbuf[MAX_LINELEN]; + static char space[40], space2[40]; + int npend = 0; + StoreEntry *entry; + int N = 0; + int obj_size; + + storeAppend(sentry, open_bracket, (int) strlen(open_bracket)); + + for (entry = storeGetFirst(); + entry != NULL; + entry = storeGetNext()) { + if (vm_or_not && (entry->mem_status == NOT_IN_MEMORY) && + (entry->swap_status == SWAP_OK)) + continue; + if ((++N & 0xFF) == 0) { + cached_curtime = time(NULL); + debug(3, "stat_objects_get: Processed %d objects...\n", N); + } + obj_size = entry->object_len; + npend = storePendingNClients(entry); + if (entry->mem_obj) + obj_size = store_mem_obj(entry, e_current_len); + tempbuf[0] = '\0'; + sprintf(tempbuf, "{ %s %d %s %s %s %s %d %ld %s %s }\n", + entry->url, + obj_size, + elapsed_time(entry, (int) entry->timestamp, space), + flags_describe(entry), + elapsed_time(entry, (int) entry->lastref, space2), + ttl_describe(entry, (int) entry->expires), + npend, + entry->refcount, + mem_describe(entry), + stat_describe(entry)); + storeAppend(sentry, tempbuf, strlen(tempbuf)); + } + storeAppend(sentry, close_bracket, strlen(close_bracket)); +} + + +/* process a requested object into a manager format */ +void stat_get(obj, req, sentry) + cacheinfo *obj; + char *req; + StoreEntry *sentry; +{ + + if (strncmp(req, "objects", strlen("objects")) == 0) { + stat_objects_get(obj, sentry, 0); + } else if (strncmp(req, "vm_objects", strlen("vm_objects")) == 0) { + stat_objects_get(obj, sentry, 1); + } else if (strncmp(req, "general", strlen("general")) == 0) { + stat_general_get(obj, sentry); + } else if (strncmp(req, "utilization", strlen("utilization")) == 0) { + stat_utilization_get(obj, sentry); + } +} + + +/* generate logfile status information */ +void log_status_get(obj, sentry) + cacheinfo *obj; + StoreEntry *sentry; +{ + static char tempbuf[MAX_LINELEN]; + + if (obj->logfile_status == LOG_ENABLE) { + sprintf(tempbuf, "{\"Logfile is Enabled. Filename: %s\"}\n", + obj->logfilename); + } else { + sprintf(tempbuf, "{\"Logfile is Disabled.\"}\n"); + } + storeAppend(sentry, tempbuf, strlen(tempbuf)); +} + + + +/* log convert handler */ +/* call for each line in file, use fileWalk routine */ +int logReadHandler(fd_unused, buf, size_unused, data) + int fd_unused; + char *buf; + int size_unused; + log_read_data_t *data; +{ + static char tempbuf[MAX_LINELEN]; + + sprintf(tempbuf, "{%s}\n", buf); + return storeAppend(data->sentry, + tempbuf, + (int) strlen(tempbuf) % MAX_LINELEN); +} + +/* log convert end handler */ +/* call when a walk is completed or error. */ +void logReadEndHandler(fd_unused, errflag_unused, data) + int fd_unused; + int errflag_unused; + log_read_data_t *data; +{ + storeAppend(data->sentry, close_bracket, strlen(close_bracket)); + storeComplete(data->sentry); + safe_free(data); +} + + + +/* start converting logfile to processed format */ +void log_get_start(obj, sentry) + cacheinfo *obj; + StoreEntry *sentry; +{ + char tmp[3]; + log_read_data_t *data = NULL; + + if (obj->logfile_status == LOG_DISABLE) { + /* Manufacture status when logging is disabled */ + log_status_get(obj, sentry); + storeComplete(sentry); + return; + } + data = (log_read_data_t *) xmalloc(sizeof(log_read_data_t)); + memset(data, '\0', sizeof(log_read_data_t)); + data->sentry = sentry; + strcpy(tmp, open_bracket); + storeAppend(sentry, tmp, 2); + file_walk(obj->logfile_fd, (FILE_WALK_HD) logReadEndHandler, + (caddr_t) data, (FILE_WALK_LHD) logReadHandler, (caddr_t) data); + return; +} + + +/* cached convert handler */ +/* call for each line in file, use fileWalk routine */ +int cachedReadHandler(fd_unused, buf, size_unused, data) + int fd_unused; + char *buf; + int size_unused; + cached_read_data_t *data; +{ + static char tempbuf[MAX_LINELEN]; + tempbuf[0] = '\0'; + sprintf(tempbuf, "{\"%s\"}\n", buf); + return storeAppend(data->sentry, + tempbuf, + (int) strlen(tempbuf) % MAX_LINELEN); +} + +/* cached convert end handler */ +/* call when a walk is completed or error. */ +void cachedReadEndHandler(fd_unused, errflag_unused, data) + int fd_unused; + int errflag_unused; + cached_read_data_t *data; +{ + storeAppend(data->sentry, close_bracket, strlen(close_bracket)); + storeComplete(data->sentry); + file_close(data->fd); + safe_free(data); +} + + +/* start convert cached.conf file to processed format */ +void cached_get_start(obj, sentry) + cacheinfo *obj; + StoreEntry *sentry; +{ + cached_read_data_t *data; + extern char *config_file; + + data = (cached_read_data_t *) xmalloc(sizeof(cached_read_data_t)); + memset(data, '\0', sizeof(cached_read_data_t)); + data->sentry = sentry; + data->fd = file_open((char *) config_file, NULL, O_RDONLY); + storeAppend(sentry, open_bracket, (int) strlen(open_bracket)); + file_walk(data->fd, (FILE_WALK_HD) cachedReadEndHandler, (caddr_t) data, + (FILE_WALK_LHD) cachedReadHandler, (caddr_t) data); +} + + +void dummyhandler(obj, sentry) + cacheinfo *obj; + StoreEntry *sentry; +{ + static char *msg = "{ \"Not_Implemented_yet.\"}\n"; + storeAppend(sentry, msg, strlen(msg)); +} + +void server_list(obj, sentry) + cacheinfo *obj; + StoreEntry *sentry; +{ + static char tempbuf[MAX_LINELEN]; + edge *e = NULL; + dom_list *d = NULL; + + storeAppend(sentry, open_bracket, (int) strlen(open_bracket)); + + if (getFirstEdge() == (edge *) NULL) { + sprintf(tempbuf, "{There are no neighbors installed.}\n"); + storeAppend(sentry, tempbuf, strlen(tempbuf)); + } + for (e = getFirstEdge(); e; e = getNextEdge(e)) { + if (e->host == NULL) + fatal_dump("Found an edge without a hostname!\n"); + sprintf(tempbuf, "\n{Hostname: %s}\n", e->host); + storeAppend(sentry, tempbuf, strlen(tempbuf)); + sprintf(tempbuf, "{Edge type: %s}\n", + e->type == is_a_parent ? "parent" : "neighbor"); + storeAppend(sentry, tempbuf, strlen(tempbuf)); + sprintf(tempbuf, "{Status: %s}\n", + e->neighbor_up ? "Up" : "Down"); + storeAppend(sentry, tempbuf, strlen(tempbuf)); + sprintf(tempbuf, "{UDP PORT: %d}\n", e->udp_port); + storeAppend(sentry, tempbuf, strlen(tempbuf)); + sprintf(tempbuf, "{ASCII PORT: %d}\n", e->ascii_port); + storeAppend(sentry, tempbuf, strlen(tempbuf)); + sprintf(tempbuf, "{ACK DEFICIT: %d}\n", e->ack_deficit); + storeAppend(sentry, tempbuf, strlen(tempbuf)); + sprintf(tempbuf, "{PINGS SENT: %d}\n", e->num_pings); + storeAppend(sentry, tempbuf, strlen(tempbuf)); + sprintf(tempbuf, "{PINGS ACKED: %d}\n", e->pings_acked); + storeAppend(sentry, tempbuf, strlen(tempbuf)); + if (e->last_fail_time) { + sprintf(tempbuf, "{Last failed connect() at: %s}\n", + mkhttpdlogtime(&(e->last_fail_time))); + storeAppend(sentry, tempbuf, strlen(tempbuf)); + } + sprintf(tempbuf, "{DOMAIN LIST: "); + storeAppend(sentry, tempbuf, strlen(tempbuf)); + for (d = e->domains; d; d = d->next) { + if (d->do_ping) + sprintf(tempbuf, "%s ", d->domain); + else + sprintf(tempbuf, "!%s ", d->domain); + storeAppend(sentry, tempbuf, strlen(tempbuf)); + } + storeAppend(sentry, close_bracket, strlen(close_bracket)); /* } */ + } + storeAppend(sentry, close_bracket, strlen(close_bracket)); +} + + + +void info_get(obj, sentry) + cacheinfo *obj; + StoreEntry *sentry; +{ + char *tod = NULL; + static char tempbuf[MAX_LINELEN]; + static char line[MAX_LINELEN]; + +#if defined(HAVE_GETRUSAGE) && defined(RUSAGE_SELF) + struct rusage rusage; +#endif + +#if USE_MALLINFO + struct mallinfo mp; +#endif + + memset(line, '\0', SM_PAGE_SIZE); + + storeAppend(sentry, open_bracket, (int) strlen(open_bracket)); + + sprintf(line, "{Harvest Object Cache: Version %s}\n", HARVEST_VERSION); + storeAppend(sentry, line, strlen(line)); + + tod = mkrfc850(&cached_starttime); + + sprintf(line, "{Start Time:\t%s}\n", tod); + storeAppend(sentry, line, strlen(line)); + + tod = mkrfc850(&cached_curtime); + sprintf(line, "{Current Time:\t%s}\n", tod); + storeAppend(sentry, line, strlen(line)); + + /* -------------------------------------------------- */ + + sprintf(line, "{Connection information for cached:}\n"); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{\tNumber of connections:\t%lu}\n", nconn); + storeAppend(sentry, line, strlen(line)); + + { + float f; + f = cached_curtime - cached_starttime; + sprintf(line, "{\tConnections per hour:\t%.1f}\n", f == 0.0 ? 0.0 : + (nconn / (f / 3600))); + storeAppend(sentry, line, strlen(line)); + } + + /* -------------------------------------------------- */ + + + + sprintf(line, "{Cache information for cached:}\n"); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{\tStorage Swap size:\t%d MB}\n", storeGetSwapSize() >> 10); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{\tStorage Mem size:\t%d KB}\n", storeGetMemSize() >> 10); + storeAppend(sentry, line, strlen(line)); + + tod = mkrfc850(&next_cleaning); + sprintf(line, "{\tStorage Expiration at:\t%s}\n", tod); + storeAppend(sentry, line, strlen(line)); + +#if defined(HAVE_GETRUSAGE) && defined(RUSAGE_SELF) + sprintf(line, "{Resource usage for cached:}\n"); + storeAppend(sentry, line, strlen(line)); + + getrusage(RUSAGE_SELF, &rusage); + sprintf(line, "{\tCPU Usage: user %d sys %d}\n{\tProcess Size: rss %d KB}\n", + rusage.ru_utime.tv_sec, rusage.ru_stime.tv_sec, + rusage.ru_maxrss * getpagesize() >> 10); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{\tPage faults with physical i/o:\t%d}\n", + rusage.ru_majflt); + storeAppend(sentry, line, strlen(line)); + +#endif + +#if USE_MALLINFO + mp = mallinfo(); + + sprintf(line, "{Memory usage for cached via mallinfo():}\n"); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{\ttotal space in arena:\t%d KB}\n", mp.arena >> 10); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{\tnumber of ordinary blocks:\t%d}\n", mp.ordblks); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{\tnumber of small blocks:\t%d}\n", mp.smblks); + storeAppend(sentry, line, strlen(line)); + if (mp.hblks) { + sprintf(line, "{\tnumber of holding blocks:\t%d}\n", mp.hblks); + storeAppend(sentry, line, strlen(line)); + } + if (mp.hblkhd) { + sprintf(line, "{\tspace in holding block headers:\t%d}\n", mp.hblkhd); + storeAppend(sentry, line, strlen(line)); + } + if (mp.usmblks) { + sprintf(line, "{\tspace in small blocks in use:\t%d}\n", mp.usmblks); + storeAppend(sentry, line, strlen(line)); + } + if (mp.fsmblks) { + sprintf(line, "{\tspace in free blocks:\t%d}\n", mp.fsmblks); + storeAppend(sentry, line, strlen(line)); + } + sprintf(line, "{\tspace in ordinary blocks in use:\t%d KB}\n", + mp.uordblks >> 10); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{\tspace in free ordinary blocks:\t%d KB}\n", mp.fordblks >> 10); + storeAppend(sentry, line, strlen(line)); + if (mp.keepcost) { + sprintf(line, "{\tcost of enabling keep option:\t%d}\n", mp.keepcost); + storeAppend(sentry, line, strlen(line)); + } +#if LNG_MALLINFO + sprintf(line, "{\tmax size of small blocks:\t%d}\n", mp.mxfast); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{\tnumber of small blocks in a holding block:\t%d}\n", + mp.nlblks); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{\tsmall block rounding factor:\t%d}\n", mp.grain); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{\tspace (including overhead) allocated in ord. blks:\t%d}\n" + ,mp.uordbytes); + sprintf(line, "{\tnumber of ordinary blocks allocated:\t%d}\n", + mp.allocated); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{\tbytes used in maintaining the free tree:\t%d}\n", + mp.treeoverhead); + storeAppend(sentry, line, strlen(line)); + +#endif /* LNG_MALLINFO */ + +#endif /* USE_MALLINFO */ + + sprintf(line, "{File descriptor usage for cached:}\n"); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{\tMax number of file desc available:\t%d}\n", getMaxFD()); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{\tLargest file desc currently in use:\t%d}\n", + fdstat_biggest_fd()); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{\tAvailable number of file descriptors :\t%d}\n", + fdstat_are_n_free_fd(0)); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{\tReserved number of file descriptors :\t%d}\n", + RESERVED_FD); + storeAppend(sentry, line, strlen(line)); + + { + int i, max_fd = getMaxFD(); + char *s = NULL; + + sprintf(line, "{\tActive file descriptors:}\n"); + storeAppend(sentry, line, strlen(line)); + + for (i = 0; i < max_fd; i++) { + int lft, to; + if (!fdstat_isopen(i)) + continue; + line[0] = '\0'; + switch (fdstat_type(i)) { + case Socket: + /* the lifetime should be greater than curtime */ + lft = comm_get_fd_lifetime(i); + to = comm_get_fd_timeout(i); + sprintf(line, "{\t\t(%3d = %3ld, %3ld) NET %s}\n", + i, + lft > 0 ? lft - cached_curtime : -1, + max((to - cached_curtime), 0), + fd_note(i, NULL)); + break; + case File: + sprintf(line, "{\t\t(%3d = FILE) %s}\n", i, + (s = diskFileName(i)) ? s : "Unknown"); + break; + case Pipe: + sprintf(line, "{\t\t(%3d = PIPE) %s}\n", i, fd_note(i, NULL)); + break; + case LOG: + sprintf(line, "{\t\t(%3d = LOG) %s}\n", i, fd_note(i, NULL)); + break; + case Unknown: + default: + sprintf(line, "{\t\t(%3d = UNKNOWN) %s}\n", i, fd_note(i, NULL)); + break; + } + storeAppend(sentry, line, strlen(line)); + } + } + + + sprintf(line, "{Stop List:}\n"); + storeAppend(sentry, line, strlen(line)); + if (http_stoplist) { + stoplist *p; + p = http_stoplist; + sprintf(line, "{\tHTTP:}\n"); + storeAppend(sentry, line, strlen(line)); + while (p) { + sprintf(line, "{\t\t%s}\n", p->key); + storeAppend(sentry, line, strlen(line)); + p = p->next; + } + } + if (gopher_stoplist) { + stoplist *p; + p = gopher_stoplist; + sprintf(line, "{\tGOPHER:}\n"); + storeAppend(sentry, line, strlen(line)); + while (p) { + sprintf(line, "{\t\t%s}\n", p->key); + storeAppend(sentry, line, strlen(line)); + p = p->next; + } + } + if (ftp_stoplist) { + stoplist *p; + p = ftp_stoplist; + sprintf(line, "{\tFTP:}\n"); + storeAppend(sentry, line, strlen(line)); + while (p) { + sprintf(line, "{\t\t%s}\n", p->key); + storeAppend(sentry, line, strlen(line)); + p = p->next; + } + } + sprintf(line, "{Internal Data Structures:}\n"); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{Meta Data:}\n"); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{\t\tStoreEntry %ld x %d}\n", sizeof(StoreEntry), + meta_data.store_entries); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{\t\tStoreMemObject %ld x %d}\n", sizeof(MemObject), + meta_data.store_in_mem_objects); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{\t\tIPCacheEntry %ld x %d}\n", sizeof(ipcache_entry), + meta_data.ipcache_count); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{\t\tHash link %ld x %d}\n", sizeof(hash_link), + meta_data.hash_links = hash_links_allocated); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{\t\tURL strings %d}\n", meta_data.url_strings); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{\t\tHot Object Cache Items %d}\n", meta_data.hot_vm); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{\t\tPool for disk I/O %d KB (Free %d KB)}\n", + (disk_stats.total_pages_allocated * disk_stats.page_size) / (1 << 10), + ((disk_stats.total_pages_allocated - disk_stats.n_pages_in_use) * disk_stats.page_size) / + (1 << 10) + ); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{\t\tPool for in-memory objects %d KB (Free %d KB)}\n", + (sm_stats.total_pages_allocated * sm_stats.page_size) / (1 << 10), + ((sm_stats.total_pages_allocated - sm_stats.n_pages_in_use) * sm_stats.page_size) / (1 << 10)); + storeAppend(sentry, line, strlen(line)); + sprintf(line, "{\tTotal Accounted %ld KB}\n", + (meta_data.store_entries * sizeof(StoreEntry) + + meta_data.store_in_mem_objects * sizeof(MemObject) + + meta_data.ipcache_count * sizeof(ipcache_entry) + + meta_data.hash_links * sizeof(hash_link) + + sm_stats.total_pages_allocated * sm_stats.page_size + + disk_stats.total_pages_allocated * disk_stats.page_size + + meta_data.url_strings) >> 10); + storeAppend(sentry, line, strlen(line)); + + storeAppend(sentry, close_bracket, strlen(close_bracket)); +} + + +void parameter_get(obj, sentry) + cacheinfo *obj; + StoreEntry *sentry; + +{ + /* be careful if an object is bigger than 4096, + * need more malloc here */ + static char line[MAX_LINELEN]; + + memset(line, '\0', MAX_LINELEN); + + storeAppend(sentry, open_bracket, (int) strlen(open_bracket)); + + sprintf(line, "{VM-Max %d \"# Maximum hot-vm cache (MB)\"}\n", + getCacheMemMax() / (1 << 20)); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{VM-High %d \"# High water mark hot-vm cache (%%)\"}\n", + getCacheMemHighWaterMark()); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{VM-Low %d \"# Low water-mark hot-vm cache (%%)\"}\n", + getCacheMemLowWaterMark()); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{Swap-Max %d \"# Maximum disk cache (MB)\"}\n", + getCacheSwapMax() / (1 << 10)); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{Swap-High %d \"# High Water mark disk cache (%%)\"}\n", + getCacheSwapHighWaterMark()); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{Swap-Low %d \"# Low water mark disk cache (%%)\"}\n", + getCacheSwapLowWaterMark()); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{HTTP-Max %d\"# Maximum size HTTP objects (KB)\"}\n", + getHttpMax() / (1 << 10)); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{HTTP-TTL %d \"# Http object default TTL (hrs)\"}\n", getHttpTTL() / 3600); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{Gopher-Max %d \"# Maximum size gopher objects (KB)\"}\n", + getGopherMax() / (1 << 10)); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{Gopher-TTL %d \"# TTL for gopher objects (hrs)\"}\n", getGopherTTL() / 3600); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{FTP-Max %d \"# Maximum size FTP objects (KB)\"}\n", + getFtpMax() / (1 << 10)); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{FTP-TTL %d \"# TTL for FTP objects (hrs)\"}\n", getFtpTTL() / 3600); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{Neg-TTL %d \"# TTL for negative cache (s)\"}\n", + getNegativeTTL()); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{ReadTimeout %d \"# Maximum idle connection (s)\"}\n", getReadTimeout()); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{ClientLifetime %d \"# Lifetime for incoming ascii port requests or outgoing clients (s)\"}\n", getClientLifetime()); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{CleanRate %d \"# Rate for periodic object expiring\"}\n", + getCleanRate()); + storeAppend(sentry, line, strlen(line)); + + /* Cachemgr.cgi expects an integer in the second field of the string */ + sprintf(line, "{HttpAccelMode %d \"# Is operating as an HTTP accelerator\"}\n", + httpd_accel_mode); + storeAppend(sentry, line, strlen(line)); + + sprintf(line, "{DebugLevel %d \"# Cache debug level\"}\n", + debug_level); + storeAppend(sentry, line, strlen(line)); + + /* end of stats */ + storeAppend(sentry, close_bracket, strlen(close_bracket)); +} + + +void log_append(obj, url, id, size, action, method) + cacheinfo *obj; + char *url; + char *id; + int size; + char *action; + char *method; +{ + static char tmp[6000]; /* MAX_URL is 4096 */ + time_t t; + char *buf; + + t = cached_curtime = time(NULL); + +#ifdef LOG_FQDN + /* ENABLE THIS IF YOU WANT A *SLOW* CACHE, OR + * JUST WRITE A PERL SCRIPT TO MUCK YOUR LOGS */ + { + int ipx[4]; + unsigned long ipy; + struct hostent *h = NULL; + if (sscanf(id, "%d.%d.%d.%d", &ipx[0], &ipx[1], &ipx[2], &ipx[3]) == 4) { + ipy = inet_addr(id); + if (h = gethostbyaddr((char *) &ipy, 4, AF_INET)) { + id = xstrdup(h->h_name); + } + } + } +#endif + + if (obj->logfile_status == LOG_ENABLE) { + if (emulate_httpd_log) + sprintf(tmp, "%s - - [%s] \"%s %s\" %s %d\n", + id, mkhttpdlogtime(&t), method, url, action, size); + else + sprintf(tmp, "%d %s %s %d %s\n", (int) t, url, id, size, action); + + + if (file_write(obj->logfile_fd, buf = xstrdup(tmp), strlen(tmp), + obj->logfile_access, NULL, NULL) != DISK_OK) { + debug(1, "log_append: File write failed.\n"); + safe_free(buf); + } + } +} + +void log_enable(obj, sentry) + cacheinfo *obj; + StoreEntry *sentry; +{ + static char tempbuf[MAX_LINELEN]; + + if (obj->logfile_status == LOG_DISABLE) { + obj->logfile_status = LOG_ENABLE; + + /* open the logfile */ + obj->logfile_fd = file_open(obj->logfilename, NULL, O_RDWR | O_CREAT); + if (obj->logfile_fd == DISK_ERROR) { + debug(0, "Cannot open logfile: %s\n", obj->logfilename); + obj->logfile_status = LOG_DISABLE; + } + obj->logfile_access = file_write_lock(obj->logfile_fd); + + } + /* at the moment, store one char to make a storage manager happy */ + sprintf(tempbuf, " "); + storeAppend(sentry, tempbuf, strlen(tempbuf)); +} + +void log_disable(obj, sentry) + cacheinfo *obj; + StoreEntry *sentry; +{ + static char tempbuf[MAX_LINELEN]; + + if (obj->logfile_status == LOG_ENABLE) + file_close(obj->logfile_fd); + + obj->logfile_status = LOG_DISABLE; + /* at the moment, store one char to make a storage manager happy */ + sprintf(tempbuf, " "); + storeAppend(sentry, tempbuf, strlen(tempbuf)); +} + + + +void log_clear(obj, sentry) + cacheinfo *obj; + StoreEntry *sentry; +{ + static char tempbuf[MAX_LINELEN]; + + + /* what should be done here. Erase file ??? or move it to another name */ + /* At the moment, just erase it. */ + /* bug here need to be fixed. what if there are still data in memory. Need flush here */ + if (obj->logfile_status == LOG_ENABLE) + file_close(obj->logfile_fd); + + unlink(obj->logfilename); + + /* reopen it anyway */ + obj->logfile_fd = file_open(obj->logfilename, NULL, O_RDWR | O_CREAT); + if (obj->logfile_fd == DISK_ERROR) { + debug(0, "Cannot open logfile: %s\n", obj->logfilename); + obj->logfile_status = LOG_DISABLE; + } + /* at the moment, store one char to make a storage manager happy */ + sprintf(tempbuf, " "); + storeAppend(sentry, tempbuf, strlen(tempbuf)); +} + + + +void proto_newobject(obj, proto_id, size, restart) + cacheinfo *obj; + int proto_id; + int size; + int restart; +{ + proto_stat *p = &obj->proto_stat_data[proto_id]; + + p->object_count++; + + /* Account for 1KB granularity */ + p->kb.now += ((size + 1023) >> 10); + + if (p->kb.now > p->kb.max) + p->kb.max = p->kb.now; + if (restart) + p->kb.min = p->kb.now; +} + + +void proto_purgeobject(obj, proto_id, size) + cacheinfo *obj; + int proto_id; + int size; +{ + proto_stat *p = &obj->proto_stat_data[proto_id]; + + p->object_count--; + + /* Scale down to KB */ + p->kb.now -= ((size + 1023) >> 10); + + if (p->kb.now < p->kb.min) + p->kb.min = p->kb.now; +} + +/* update stat for each particular protocol when an object is fetched */ +void proto_touchobject(obj, proto_id, size) + cacheinfo *obj; + int proto_id; + int size; +{ + obj->proto_stat_data[proto_id].refcount++; + obj->proto_stat_data[proto_id].transferbyte += (1023 + size) >> 10; +} + +void proto_hit(obj, proto_id) + cacheinfo *obj; + int proto_id; +{ + obj->proto_stat_data[proto_id].hit++; +} + +void proto_miss(obj, proto_id) + cacheinfo *obj; + int proto_id; +{ + obj->proto_stat_data[proto_id].miss++; +} + +int proto_url_to_id(url) + char *url; +{ + if (strncmp(url, "http:", 5) == 0) + return HTTP_ID; + if (strncmp(url, "ftp:", 4) == 0) + return FTP_ID; + if (strncmp(url, "gopher:", 7) == 0) + return GOPHER_ID; + if (strncmp(url, "cache_object:", 13) == 0) + return CACHEOBJ_ID; + if (strncmp(url, "abort:", 6) == 0) + return ABORT_ID; + if (strncmp(url, "news:", 5) == 0) + return NOTIMPLE_ID; + if (strncmp(url, "file:", 5) == 0) + return NOTIMPLE_ID; + return NOTIMPLE_ID; +} + + + +void stat_init(object, logfilename) + cacheinfo **object; + char *logfilename; +{ + cacheinfo *obj = NULL; + int i; + + obj = (cacheinfo *) xmalloc(sizeof(cacheinfo)); + memset(obj, '\0', sizeof(cacheinfo)); + + obj->stat_get = stat_get; + obj->info_get = info_get; + obj->cache_size_get = cache_size_get; + + obj->log_get_start = log_get_start; + obj->log_status_get = log_status_get; + obj->log_append = log_append; + obj->log_clear = log_clear; + obj->log_enable = log_enable; + obj->log_disable = log_disable; + obj->logfile_status = LOG_ENABLE; + + obj->cached_get_start = cached_get_start; + + obj->parameter_get = parameter_get; + obj->server_list = server_list; + + memcpy(obj->logfilename, logfilename, (int) (strlen(logfilename) + 1) % 256); + obj->logfile_fd = file_open(obj->logfilename, NULL, O_RDWR | O_CREAT); + if (obj->logfile_fd == DISK_ERROR) { + debug(0, "Cannot open logfile: %s\n", obj->logfilename); + obj->logfile_status = LOG_DISABLE; + fatal("Cannot open logfile.\n"); + } + obj->logfile_access = file_write_lock(obj->logfile_fd); + + obj->proto_id = proto_url_to_id; + obj->proto_newobject = proto_newobject; + obj->proto_purgeobject = proto_purgeobject; + obj->proto_touchobject = proto_touchobject; + obj->proto_hit = proto_hit; + obj->proto_miss = proto_miss; + obj->NotImplement = dummyhandler; + + for (i = 0; i < PROTOCOL_SUPPORTED + PROTOCOL_EXTRA; ++i) { + + switch (i) { + + case TOTAL_ID: + strcpy(obj->proto_stat_data[i].protoname, "TOTAL"); + break; + + case HTTP_ID: + strcpy(obj->proto_stat_data[i].protoname, "HTTP"); + break; + + case GOPHER_ID: + strcpy(obj->proto_stat_data[i].protoname, "GOPHER"); + break; + + case FTP_ID: + strcpy(obj->proto_stat_data[i].protoname, "FTP"); + break; + + case CACHEOBJ_ID: + strcpy(obj->proto_stat_data[i].protoname, "CACHEMGR"); + break; + + case ABORT_ID: + strcpy(obj->proto_stat_data[i].protoname, "ABORTED"); + break; + + case NOTIMPLE_ID: + default: + strcpy(obj->proto_stat_data[i].protoname, "UNKNOWN"); + break; + } + + obj->proto_stat_data[i].object_count = 0; + obj->proto_stat_data[i].hit = 0; + obj->proto_stat_data[i].miss = 0; + obj->proto_stat_data[i].hitratio = 0.0; + obj->proto_stat_data[i].transferrate = 0; + obj->proto_stat_data[i].refcount = 0; + obj->proto_stat_data[i].transferbyte = 0; + + obj->proto_stat_data[i].kb.max = 0; + obj->proto_stat_data[i].kb.min = 0; + obj->proto_stat_data[i].kb.avg = 0; + obj->proto_stat_data[i].kb.now = 0; + + } + + *object = obj; +} + +char *stat_describe(entry) + StoreEntry *entry; +{ + static char state[256]; + + state[0] = '\0'; + switch (entry->status) { + case STORE_OK: + strncat(state, "STORE-OK", sizeof(state)); + break; + case STORE_PENDING: + strncat(state, "ST-PEND", sizeof(state)); + break; + case STORE_ABORTED: + strncat(state, "ABORTED", sizeof(state)); + break; + default: + strncat(state, "YEEHAH", sizeof(state)); + break; + } + strncat(state, "/", sizeof(state)); + + switch (entry->ping_status) { + case WAITING: + strncat(state, "PING-WAIT", sizeof(state)); + break; + case TIMEOUT: + strncat(state, "PING-TIMEOUT", sizeof(state)); + break; + case DONE: + strncat(state, "PING-DONE", sizeof(state)); + break; + case NOPING: + strncat(state, "NO-PING", sizeof(state)); + break; + default: + strncat(state, "YEEHAH", sizeof(state)); + break; + } + return (state); +} + +char *mem_describe(entry) + StoreEntry *entry; +{ + static char where[100]; + + where[0] = '\0'; + if (entry->swap_file_number >= 0) + sprintf(where, "D%d", entry->swap_file_number); + if (entry->swap_status == SWAPPING_OUT) + strncat(where, "/SWAP-OUT", sizeof(where)); + if (entry->swap_status == SWAP_OK) + strncat(where, "/SWAP-OK", sizeof(where)); + else + strncat(where, "/NO-SWAP", sizeof(where)); + + if (entry->mem_status == SWAPPING_IN) + strncat(where, "/SWAP-IN", sizeof(where)); + else if (entry->mem_status == IN_MEMORY) + strncat(where, "/IN-MEM", sizeof(where)); + else /* STORE_PENDING */ + strncat(where, "/OUT-MEM", sizeof(where)); + return (where); +} + + +char *ttl_describe(entry, expires) + StoreEntry *entry; + int expires; +{ + int hh, mm, ss; + static char TTL[60]; + int ttl; + + TTL[0] = '\0'; + strcpy(TTL, "UNKNOWN"); /* sometimes the TTL isn't set below */ + ttl = expires - cached_curtime; + if (ttl < 0) + strcpy(TTL, "EXPIRED"); + else { + + hh = ttl / 3600; + ttl -= hh * 3600; + mm = ttl / 60; + ttl -= mm * 60; + ss = ttl; + + sprintf(TTL, "% 6d:%02d:%02d", hh, mm, ss); + } + return (TTL); +} + +char *elapsed_time(entry, since, TTL) + StoreEntry *entry; + int since; + char *TTL; +{ + int hh, mm, ss, ttl; + + TTL[0] = '\0'; + strcpy(TTL, "UNKNOWN"); /* sometimes TTL doesn't get set */ + ttl = cached_curtime - since; + if (since == 0) { + strcpy(TTL, "NEVER"); + } else if (ttl < 0) { + strcpy(TTL, "EXPIRED"); + } else { + hh = ttl / 3600; + ttl -= hh * 3600; + mm = ttl / 60; + ttl -= mm * 60; + ss = ttl; + sprintf(TTL, "% 6d:%02d:%02d", hh, mm, ss); + } + return (TTL); +} + + +char *flags_describe(entry) + StoreEntry *entry; +{ + static char FLAGS[32]; + char LOCK_CNT[32]; + + strcpy(FLAGS, "F:"); + if (BIT_TEST(entry->flag, KEY_CHANGE)) + strncat(FLAGS, "K", sizeof(FLAGS) - 1); + if (BIT_TEST(~entry->flag, CACHABLE)) + strncat(FLAGS, "C", sizeof(FLAGS) - 1); + if (BIT_TEST(entry->flag, REFRESH_REQUEST)) + strncat(FLAGS, "R", sizeof(FLAGS) - 1); + if (BIT_TEST(entry->flag, RELEASE_REQUEST)) + strncat(FLAGS, "Z", sizeof(FLAGS) - 1); + if (BIT_TEST(entry->flag, ABORT_MSG_PENDING)) + strncat(FLAGS, "A", sizeof(FLAGS) - 1); + if (BIT_TEST(entry->flag, DELAY_SENDING)) + strncat(FLAGS, "D", sizeof(FLAGS) - 1); + if (BIT_TEST(entry->flag, IP_LOOKUP_PENDING)) + strncat(FLAGS, "P", sizeof(FLAGS) - 1); + if (entry->lock_count) + strncat(FLAGS, "L", sizeof(FLAGS) - 1); + if (entry->lock_count) { + sprintf(LOCK_CNT, "%d", entry->lock_count); + strncat(FLAGS, LOCK_CNT, sizeof(FLAGS) - 1); + } + return (FLAGS); +} + +void stat_rotate_log() +{ + int i; + static char from[MAXPATHLEN]; + static char to[MAXPATHLEN]; + char *fname = NULL; + + if ((fname = CacheInfo->logfilename) == NULL) + return; + + debug(1, "stat_rotate_log: Rotating\n"); + + /* Rotate numbers 0 through N up one */ + for (i = getLogfileRotateNumber(); i > 1;) { + i--; + sprintf(from, "%s.%d", fname, i - 1); + sprintf(to, "%s.%d", fname, i); + rename(from, to); + } + /* Rotate the current log to .0 */ + if (getLogfileRotateNumber() > 0) { + sprintf(to, "%s.%d", fname, 0); + rename(fname, to); + } + /* Close and reopen the log. It may have been renamed "manually" + * before HUP'ing us. */ + file_close(CacheInfo->logfile_fd); + CacheInfo->logfile_fd = file_open(fname, NULL, O_RDWR | O_CREAT | O_APPEND); + if (CacheInfo->logfile_fd == DISK_ERROR) { + debug(0, "rotate_logs: Cannot open logfile: %s\n", fname); + CacheInfo->logfile_status = LOG_DISABLE; + fatal("Cannot open logfile.\n"); + } + CacheInfo->logfile_access = file_write_lock(CacheInfo->logfile_fd); +} diff --git a/src/stmem.cc b/src/stmem.cc new file mode 100644 index 00000000000..93ae5c7236a --- /dev/null +++ b/src/stmem.cc @@ -0,0 +1,488 @@ +static char rcsid[] = "$Id: stmem.cc,v 1.1 1996/02/22 06:23:55 wessels Exp $"; +/* + * File: stmem.c + * Description: Store manager <-> memory manager interface + * Author: Chuck Neerdaels, USC (chuckn@rand.org) + * Created: Tue Apr 5 16:51:26 1994 + * Language: C + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include "config.h" +#include +#include +#include + +#include "stmem.h" +#include "util.h" + +#define min(x,y) ((x)<(y)? (x) : (y)) + +#ifndef USE_MEMALIGN +#define USE_MEMALIGN 0 +#endif + +extern int getCacheMemMax(); +extern int empty_stack _PARAMS((Stack * stack)); +extern int full_stack _PARAMS((Stack * stack)); +extern void push _PARAMS((Stack * stack, generic_ptr data)); +extern void init_stack _PARAMS((Stack * stack, int size)); +extern void fatal_dump _PARAMS((char *)); + +void memFree(mem) + mem_ptr mem; +{ + mem_node lastp, p = mem->head; + + if (p) { + while (p && (p != mem->tail)) { + lastp = p; + p = p->next; + if (lastp) { + put_free_4k_page(lastp->data); + safe_free(lastp); + } + } + + if (p) { + put_free_4k_page(p->data); + safe_free(p); + } + } + memset(mem, '\0', sizeof(mem_ptr)); /* nuke in case ref'ed again */ + safe_free(mem); +} + +void memFreeData(mem) + mem_ptr mem; +{ + mem_node lastp, p = mem->head; + + while (p != mem->tail) { + lastp = p; + p = p->next; + put_free_4k_page(lastp->data); + safe_free(lastp); + } + + if (p != NULL) { + put_free_4k_page(p->data); + safe_free(p); + p = NULL; + } + mem->head = mem->tail = NULL; /* detach in case ref'd */ + mem->origin_offset = 0; +} + +int memFreeDataUpto(mem, target_offset) + mem_ptr mem; + int target_offset; +{ + int current_offset = mem->origin_offset; + mem_node lastp, p = mem->head; + + while (p && ((current_offset + p->len) <= target_offset)) { + if (p == mem->tail) { + /* keep the last one to avoid change to other part of code */ + mem->head = mem->tail; + mem->origin_offset = current_offset; + return current_offset; + } else { + lastp = p; + p = p->next; + current_offset += lastp->len; + put_free_4k_page(lastp->data); + safe_free(lastp); + } + } + + mem->head = p; + mem->origin_offset = current_offset; + if (current_offset < target_offset) { + /* there are still some data left. */ + return current_offset; + } + if (current_offset != target_offset) { + debug(1, "memFreeDataBehind: This shouldn't happen. Some odd condition.\n"); + debug(1, " Current offset: %d Target offset: %d p: %p\n", + current_offset, target_offset, p); + } + return current_offset; + +} + + +/* Append incoming data. */ +int memAppend(mem, data, len) + mem_ptr mem; + char *data; + int len; +{ + mem_node p; + int avail_len; + int len_to_copy; + + debug(6, "memAppend: len %d\n", len); + + /* Does the last block still contain empty space? + * If so, fill out the block before dropping into the + * allocation loop */ + + if (mem->head && mem->tail && (mem->tail->len < SM_PAGE_SIZE)) { + avail_len = SM_PAGE_SIZE - (mem->tail->len); + len_to_copy = min(avail_len, len); + memcpy((mem->tail->data + mem->tail->len), data, len_to_copy); + /* Adjust the ptr and len according to what was deposited in the page */ + data += len_to_copy; + len -= len_to_copy; + mem->tail->len += len_to_copy; + } + while (len > 0) { + len_to_copy = min(len, SM_PAGE_SIZE); + p = (mem_node) xcalloc(1, sizeof(Mem_Node)); + p->next = NULL; + p->len = len_to_copy; + p->data = get_free_4k_page(); + memcpy(p->data, data, len_to_copy); + + if (!mem->head) { + /* The chain is empty */ + mem->head = mem->tail = p; + } else { + /* Append it to existing chain */ + mem->tail->next = p; + mem->tail = p; + } + len -= len_to_copy; + data += len_to_copy; + } + return len; +} + +int memGrep(mem, string, nbytes) + mem_ptr mem; + char *string; + int nbytes; +{ + mem_node p = mem->head; + char *str_i, *mem_i; + int i = 0, blk_idx = 0, state, goal; + + debug(6, "memGrep: looking for %s in less than %d bytes.\n", + string, nbytes); + + if (!p) + return 0; + + if (mem->origin_offset != 0) { + debug(1, "memGrep: Some lower chunk of data has been erased. Can't do memGrep!\n"); + return 0; + } + str_i = string; + mem_i = p->data; + state = 1; + goal = strlen(string); + + while (i < nbytes) { + if (tolower(*mem_i++) == tolower(*str_i++)) + state++; + else { + state = 1; + str_i = string; + } + + /* Return offset of byte beyond the matching string */ + if (state == goal) + return (i + 1); + + i++; + blk_idx++; + + if (blk_idx >= p->len) { + if (p->next) { + p = p->next; + mem_i = p->data; + blk_idx = 0; + } else + break; + } + } + return 0; +} + +int memCopy(mem, offset, buf, size) + mem_ptr mem; + int offset; + char *buf; + int size; +{ + mem_node p = mem->head; + int t_off = mem->origin_offset; + int bytes_to_go = size; + char *ptr_to_buf; + int bytes_from_this_packet = 0; + int bytes_into_this_packet = 0; + + debug(6, "memCopy: offset %d: size %d\n", offset, size); + + if (size <= 0) + return size; + + /* Seek our way into store */ + while ((t_off + p->len) < offset) { + t_off += p->len; + if (p->next) + p = p->next; + else { + debug(1, "memCopy: Offset: %d is off limit of current object of %d\n", t_off, offset); + return 0; + } + } + + /* Start copying begining with this block until + * we're satiated */ + + bytes_into_this_packet = offset - t_off; + bytes_from_this_packet = min(bytes_to_go, + p->len - bytes_into_this_packet); + + memcpy(buf, p->data + bytes_into_this_packet, bytes_from_this_packet); + bytes_to_go -= bytes_from_this_packet; + ptr_to_buf = buf + bytes_from_this_packet; + p = p->next; + + while (p && bytes_to_go > 0) { + if (bytes_to_go > p->len) { + memcpy(ptr_to_buf, p->data, p->len); + ptr_to_buf += p->len; + bytes_to_go -= p->len; + } else { + memcpy(ptr_to_buf, p->data, bytes_to_go); + bytes_to_go -= bytes_to_go; + } + p = p->next; + } + + return size; +} + + +/* Do whatever is necessary to begin storage of new object */ +mem_ptr memInit() +{ + mem_ptr new = (mem_ptr) xcalloc(1, sizeof(Mem_Hdr)); + + new->tail = new->head = NULL; + + new->mem_free = memFree; + new->mem_free_data = memFreeData; + new->mem_free_data_upto = memFreeDataUpto; + new->mem_append = memAppend; + new->mem_copy = memCopy; + new->mem_grep = memGrep; + + return new; +} + + +/* PBD 12/95: Memory allocator routines for saving and reallocating fixed + * size blocks rather than mallocing and freeing them */ +char * + get_free_4k_page() +{ + char *page = NULL; + + if (!empty_stack(&sm_stats.free_page_stack)) { + page = pop(&sm_stats.free_page_stack); + } else { +#if USE_MEMALIGN + page = (char *) memalign(SM_PAGE_SIZE, SM_PAGE_SIZE); + if (!page) + fatal_dump(NULL); +#else + page = (char *) xmalloc(SM_PAGE_SIZE); +#endif + sm_stats.total_pages_allocated++; + } + sm_stats.n_pages_in_use++; + if (page == NULL) { + debug(0, "Null page pointer?"); + fatal_dump(NULL); + } + return (page); +} + +void put_free_4k_page(page) + char *page; +{ + static stack_overflow_warning_toggle; + +#if USE_MEMALIGN + if ((int) page % SM_PAGE_SIZE) { + debug(0, "Someone tossed a string into the 4k page pool\n"); + fatal_dump(NULL); + } +#endif + if (full_stack(&sm_stats.free_page_stack)) { + sm_stats.total_pages_allocated--; + if (!stack_overflow_warning_toggle) { + debug(0, "Stack of free stmem pages overflowed. Resize it?"); + stack_overflow_warning_toggle++; + } + } + sm_stats.n_pages_in_use--; + /* Call push regardless if it's full, cause it's just going to release the + * page if stack is full */ + push(&sm_stats.free_page_stack, page); +} + +char * + get_free_8k_page() +{ + char *page = NULL; + + if (!empty_stack(&disk_stats.free_page_stack)) { + page = pop(&disk_stats.free_page_stack); + } else { +#if USE_MEMALIGN + page = (char *) memalign(DISK_PAGE_SIZE, DISK_PAGE_SIZE); + if (!page) + fatal_dump(NULL); +#else + page = (char *) xmalloc(DISK_PAGE_SIZE); +#endif + disk_stats.total_pages_allocated++; + } + disk_stats.n_pages_in_use++; + if (page == NULL) { + debug(0, "Null page pointer?"); + fatal_dump(NULL); + } + return (page); +} + +void put_free_8k_page(page) + char *page; +{ + static stack_overflow_warning_toggle; + +#if USE_MEMALIGN + if ((int) page % DISK_PAGE_SIZE) { + debug(0, "Someone tossed a string into the 8k page pool\n"); + fatal_dump(NULL); + } +#endif + + if (full_stack(&disk_stats.free_page_stack)) { + disk_stats.total_pages_allocated--; + if (!stack_overflow_warning_toggle) { + debug(0, "Stack of free disk pages overflowed. Resize it?"); + stack_overflow_warning_toggle++; + } + } + disk_stats.n_pages_in_use--; + /* Call push regardless if it's full, cause it's just going to release the + * page if stack is full */ + push(&disk_stats.free_page_stack, page); +} + +void stmemInit() +{ + sm_stats.page_size = SM_PAGE_SIZE; + sm_stats.total_pages_allocated = 0; + sm_stats.n_pages_free = 0; + sm_stats.n_pages_in_use = 0; + + disk_stats.page_size = DISK_PAGE_SIZE; + disk_stats.total_pages_allocated = 0; + disk_stats.n_pages_free = 0; + disk_stats.n_pages_in_use = 0; + +/* use -DPURIFY=1 on the compile line to enable Purify checks */ + +#if !PURIFY + /* 4096 * 10000 pages = 40MB + CacheMemMax in pages */ + init_stack(&sm_stats.free_page_stack, 10000 + (getCacheMemMax() / SM_PAGE_SIZE)); + /* 8096 * 1000 pages = 8MB */ + init_stack(&disk_stats.free_page_stack, 1000); +#else + /* Declare a zero size page stack so that purify checks for + * FMRs/UMRs etc. + */ + init_stack(&sm_stats.free_page_stack, 0); + init_stack(&disk_stats.free_page_stack, 0); +#endif +} diff --git a/src/store.cc b/src/store.cc new file mode 100644 index 00000000000..27871692609 --- /dev/null +++ b/src/store.cc @@ -0,0 +1,2662 @@ +static char rcsid[] = "$Id: store.cc,v 1.1 1996/02/22 06:23:55 wessels Exp $"; +/* + * File: store.c + * Description: Storage manager for the Cache + * Author: John Noll, Anawat Chankhunthod, USC + * Created: Sun Apr 3 16:51:26 1994 + * Language: C + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ + +/* + * Here is a summary of the routines which change mem_status and swap_status: + * Added 11/18/95 + * + * Routine mem_status swap_status status + * --------------------------------------------------------------------------- + * storeAdd NOT_IN_MEMORY NO_SWAP + * storeComplete IN_MEMORY NO_SWAP + * storeSwapOutStart SWAPPING_OUT + * storeSwapOutHandle(fail) NO_SWAP + * storeSwapOutHandle(ok) SWAP_OK + * --------------------------------------------------------------------------- + * storeAddDiskRestore NOT_IN_MEMORY SWAP_OK + * storeSwapInStart SWAPPING_IN + * storeSwapInHandle(fail) NOT_IN_MEMORY + * storeSwapInHandle(ok) IN_MEMORY + * --------------------------------------------------------------------------- + * storeAbort IN_MEMORY NO_SWAP + * storePurgeMem NOT_IN_MEMORY + * --------------------------------------------------------------------------- + * You can reclaim an object's space if it's: + * storeGetSwapSpace !SWAPPING_IN !SWAPPING_OUT !STORE_PENDING + * + */ + +#include "config.h" /* goes first */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ansihelp.h" /* goes secound */ +#include "comm.h" +#include "proto.h" +#include "url.h" +#include "stat.h" +#include "disk.h" +#include "store.h" +#include "cache_cf.h" +#include "hash.h" +#include "debug.h" +#include "dynamic_array.h" +#include "util.h" +#include "filemap.h" +#include "stmem.h" +#include "mime.h" + +extern time_t cached_curtime; +extern char *storeToString _PARAMS((StoreEntry * e)); +extern int proto_url_to_id _PARAMS((char *url)); +extern int file_write_lock _PARAMS((int)); +extern void fatal_dump _PARAMS((char *)); +extern void fatal _PARAMS((char *)); +extern void death(); +char *tmp_error_buf; + +#define REBUILD_TIMESTAMP_DELTA_MAX 2 +#define MAX_SWAP_FILE (1<<21) +#define SWAP_BUF DISK_PAGE_SIZE +#define FATAL_BUF_SIZE 1024 +#define SWAP_DIRECTORIES 100 +#ifndef DEFAULT_SWAP_DIR +#define DEFAULT_SWAP_DIR "/tmp/cache" +#endif + +/* rate of checking expired objects in main loop */ +#define STORE_MAINTAIN_RATE (20) + +#define STORE_BUCKETS (7921) +#define STORE_IN_MEM_BUCKETS (143) + +int urlcmp _PARAMS((char *s1, char *s2)); +int safeunlink _PARAMS((char *, int)); +int swapInError _PARAMS((int fd_unused, StoreEntry * entry)); +int storeSwapInStart _PARAMS((StoreEntry * e)); +int storeCopy _PARAMS((StoreEntry * e, int stateoffset, int maxSize, char *buf, int *size)); + +/* Now, this table is inaccessible to outsider. They have to use a method + * to access a value in internal storage data structure. */ +HashID table = 0; +/* hash table for in-memory-only objects */ +HashID in_mem_table = 0; + +/* initializtion flag */ +static int ok_write_clean_log = 0; + +/* current memory storage size */ +static unsigned long store_mem_size = 0; +static unsigned long store_mem_high = 0; +static unsigned long store_mem_low = 0; + +/* current hotvm object */ +/* defaults for 16M cache and 12.5 cache_hot_vm_factor */ +static int store_hotobj_high = 180; +static int store_hotobj_low = 120; + + +/* current file name, swap file, use number as a filename */ +static unsigned long swapfileno = 0; +static int store_swap_size = 0; /* kilobytes !! */ +static unsigned long store_swap_high = 0; +static unsigned long store_swap_low = 0; +static int swaplog_fd = 0; +static int swaplog_lock; +static int swaplog_tmp_fd = 0; +FILE *swaplog_stream = NULL; +FILE *swaplog_tmp_stream = NULL; + +/* counter for uncachable objects */ +static int uncache_count = 0; +static int keychange_count = 0; + +/* key temp buffer */ +static char key_temp_buffer[MAX_URL]; + +/* patch cache_dir to accomodate multiple disk storage */ +dynamic_array *cache_dirs = NULL; +int ncache_dirs = 0; + +/* Allocate memory for a new store structure */ +StoreEntry *create_StoreEntry() +{ + StoreEntry *e = NULL; + + e = (StoreEntry *) xcalloc(1, sizeof(StoreEntry)); + e->mem_obj = (MemObject *) xcalloc(1, sizeof(MemObject)); + meta_data.store_in_mem_objects++; + + return (e); +} + +StoreEntry *create_StoreEntry_only() +{ + return ((StoreEntry *) xcalloc(1, sizeof(StoreEntry))); +} + +/* Free memory of a store structure */ +/* free a StoreEntry */ +void destroy_StoreEntry(e) + StoreEntry *e; +{ + if (e) { + if (e->mem_obj) { + meta_data.store_in_mem_objects--; + xfree(e->mem_obj); + } + safe_free(e); + } +} + + +/* free unused memory while object is not in memory */ +void destroy_store_mem_obj(e) + StoreEntry *e; +{ + if (e && e->mem_obj) { + safe_free(store_mem_obj(e, mime_hdr)); + safe_free(e->mem_obj); + meta_data.store_in_mem_objects--; + } +} + +/* Check if there is memory allocated for object in memory */ +int has_mem_obj(e) + StoreEntry *e; +{ + if (e && e->mem_obj) + return (TRUE); + return (FALSE); +} + +/* allocate memory for swapping object in memory */ +void create_store_mem_obj(e) + StoreEntry *e; +{ + if (e) { + if (has_mem_obj(e)) + debug(1, "create_store_mem_obj: old memory not released\n"); + e->mem_obj = (MemObject *) xcalloc(1, sizeof(MemObject)); + meta_data.store_in_mem_objects++; + } +} + +/* ----- INTERFACE BETWEEN STORAGE MANAGER AND HASH TABLE FUNCTIONS --------- */ + +/* + * Create 2 hash tables, "table" has all objects, "in_mem_table" has only + * objects in the memory. + */ + +HashID storeCreateHashTable(cmp_func) + int (*cmp_func) (char *, char *); +{ + table = hash_create(cmp_func, STORE_BUCKETS); + in_mem_table = hash_create(cmp_func, STORE_IN_MEM_BUCKETS); + return (table); +} + +/* + * if object is in memory, also insert into in_mem_table + */ + +int storeHashInsert(e) + StoreEntry *e; +{ + if (e->mem_status == IN_MEMORY) + hash_insert(in_mem_table, e->key, e); + return (hash_join(table, (hash_link *) e)); +} + +/* + * if object in memory, also remove from in_mem_table + */ + +int storeHashDelete(hash_ptr) + hash_link *hash_ptr; +{ + hash_link *hptr = NULL; + StoreEntry *e = NULL; + + e = (StoreEntry *) hash_ptr; + if (e->mem_status == IN_MEMORY && e->key) { + if ((hptr = hash_lookup(in_mem_table, e->key))) + hash_delete_link(in_mem_table, hptr); + } + return (hash_remove_link(table, hash_ptr)); +} + +/* + * maintain the in-mem hash table according to the changes of mem_status + * This routine replaces the instruction "e->status = status;" + */ + +void storeSetMemStatus(e, status) + StoreEntry *e; + int status; +{ + hash_link *ptr = NULL; + + if (e->mem_status == IN_MEMORY && status != IN_MEMORY) { + if (e->key == NULL) { + debug(0, "storeSetMemStatus: NULL key for %s\n", e->url); + return; + } + if ((ptr = hash_lookup(in_mem_table, e->key))) + hash_delete_link(in_mem_table, ptr); + } else if (status == IN_MEMORY && e->mem_status != IN_MEMORY) { + hash_insert(in_mem_table, e->key, e); + } + e->mem_status = status; +} + +/* -------------------------------------------------------------------------- */ + +/* free whole entry */ +void storeFreeEntry(e) + StoreEntry *e; +{ + meta_data.store_entries--; + + if (e == (StoreEntry *) NULL) + return; + + debug(5, "storeFreeEntry: Freeing %s\n", e->url); + + if (has_mem_obj(e)) { + store_mem_size -= store_mem_obj(e, e_current_len) - store_mem_obj(e, e_lowest_offset); + debug(8, "storeFreeEntry: Freeing %d in-memory bytes\n", + store_mem_obj(e, e_current_len)); + debug(8, "storeFreeEntry: store_mem_size = %d\n", store_mem_size); + if (store_mem_obj(e, data)) { + store_mem_obj(e, data)->mem_free(store_mem_obj(e, data)); + store_mem_obj(e, data) = NULL; + --meta_data.hot_vm; + } + } + meta_data.url_strings -= strlen(e->url); + safe_free(e->url); + if (!(e->flag & KEY_URL)) + safe_free(e->key); + if (has_mem_obj(e)) { + safe_free(store_mem_obj(e, mime_hdr)); + /* Leave an unzeroed pointer to the abort msg for posterity */ + if (store_mem_obj(e, e_abort_msg)) + free(store_mem_obj(e, e_abort_msg)); + safe_free(store_mem_obj(e, pending)); + /* look up to free client_list */ + if (store_mem_obj(e, client_list)) { + int i; + for (i = 0; i < store_mem_obj(e, client_list_size); ++i) { + if (store_mem_obj(e, client_list[i])) + safe_free(store_mem_obj(e, client_list[i])); + } + safe_free(store_mem_obj(e, client_list)); + } + } + destroy_StoreEntry(e); +} + +/* free only data buffer, let the rest of structure stay. + * For Negative cache purpose. */ +void storeFreeEntryData(e) + StoreEntry *e; +{ + debug(5, "storeFreeEntryData: Freeing data-buffer only %s\n", e->url); + store_mem_size -= store_mem_obj(e, e_current_len) - store_mem_obj(e, e_lowest_offset); + debug(8, "storeFreeEntryData: Freeing %d in-memory bytes\n", + store_mem_obj(e, e_current_len)); + debug(8, "storeFreeEntryData: store_mem_size = %d\n", store_mem_size); + e->object_len = 0; + store_mem_obj(e, e_current_len) = 0; + if (store_mem_obj(e, data)) { + store_mem_obj(e, data)->mem_free(store_mem_obj(e, data)); + store_mem_obj(e, data) = NULL; + } +} + + +/* get rid of memory copy of the object */ +void storePurgeMem(e) + StoreEntry *e; +{ + debug(5, "storePurgeMem: Freeing memory-copy of %s\n", e->url); + if (!has_mem_obj(e)) + return; + + if (storeEntryLocked(e)) { + debug(0, "storePurgeMem: someone (storeGetMemSpace?) is purging a locked object?\n"); + debug(0, "%s", storeToString(e)); + fatal_dump(NULL); + } + /* free up memory data */ + if (store_mem_obj(e, data)) { + store_mem_obj(e, data)->mem_free(store_mem_obj(e, data)); + store_mem_obj(e, data) = NULL; + --meta_data.hot_vm; + } + store_mem_size -= e->object_len - store_mem_obj(e, e_lowest_offset); + debug(8, "storePurgeMem: Freeing %d in-memory bytes\n", + e->object_len); + debug(8, "storePurgeMem: store_mem_size = %d\n", store_mem_size); + storeSetMemStatus(e, NOT_IN_MEMORY); + store_mem_obj(e, e_current_len) = 0; + /* free up pending list table */ + safe_free(store_mem_obj(e, pending)); + store_mem_obj(e, pending_list_size) = 0; + /* free up client list table and entries */ + if (store_mem_obj(e, client_list)) { + int i; + for (i = 0; i < store_mem_obj(e, client_list_size); ++i) { + if (store_mem_obj(e, client_list[i])) + safe_free(store_mem_obj(e, client_list[i])); + } + safe_free(store_mem_obj(e, client_list)); + } + destroy_store_mem_obj(e); +} + +/* lock the object for reading, start swapping in if necessary */ +int storeLockObject(e) + StoreEntry *e; +{ + int swap_in_stat = 0; + int status = 0; + + e->lock_count++; + debug(5, "storeLockObject: locks %d: \n", e->lock_count, e->url); + + if ((e->mem_status == NOT_IN_MEMORY) && /* Not in memory */ + (e->swap_status != SWAP_OK) && /* Not on disk */ + (e->status != STORE_PENDING) /* Not being fetched */ + ) { + debug(0, "storeLockObject: NOT_IN_MEMORY && !SWAP_OK && !STORE_PENDING conflict: . aborting...\n", e->url); + /* If this sanity check fails, we should just ... */ + fatal_dump(NULL); + } + e->lastref = cached_curtime; + + /* StoreLockObject() is called during icp_hit_or_miss and once by storeAbort + * If the object is NOT_IN_MEMORY, fault it in. */ + if ((e->mem_status == NOT_IN_MEMORY) && (e->swap_status == SWAP_OK)) { + /* object is in disk and no swapping daemon running. Bring it in. */ + if ((swap_in_stat = storeSwapInStart(e)) < 0) { + /* + * We couldn't find or couldn't open object's swapfile. + * So, return a -1 here, indicating that we will treat + * the reference like a MISS_TTL, force a keychange and + storeRelease. */ + e->lock_count--; + } + status = swap_in_stat; + } + return status; +} + +/* unlock object, return -1 if object get released after unlock + * otherwise lock_count */ + +int storeUnlockObject(e) + StoreEntry *e; +{ + int e_lock_count; + + if ((int) e->lock_count > 0) + e->lock_count--; + else if (e->lock_count == 0) { + debug(0, "Entry lock count %d is out-of-whack\n", e->lock_count); + } + /* Prevent UMR if we end up freeing the entry */ + e_lock_count = (int) e->lock_count; + + if (e->lock_count == 0) { + + if (e->flag & RELEASE_REQUEST) { + storeRelease(e); + } else if (e->flag & ABORT_MSG_PENDING) { + /* This is where the negative cache gets storeAppended */ + /* Briefly lock to replace content with abort message */ + e->lock_count++; + storeFreeEntryData(e); + store_mem_obj(e, data) = memInit(); + storeAppend(e, store_mem_obj(e, e_abort_msg), strlen(store_mem_obj(e, e_abort_msg))); + e->object_len = store_mem_obj(e, e_current_len) + = strlen(store_mem_obj(e, e_abort_msg)); + BIT_RESET(e->flag, ABORT_MSG_PENDING); + e->lock_count--; + } + } + return e_lock_count; + +} + +/* Lookup an object in the cache. + * return just a reference to object, don't start swapping in yet. */ +StoreEntry *storeGet(url) + char *url; +{ + hash_link *hptr = NULL; + + debug(5, "storeGet: looking up %s\n", url); + + if (table != (HashID) 0) { + if ((hptr = hash_lookup(table, url)) != NULL) + return (StoreEntry *) hptr; + } + return NULL; +} + +char *storeGenerateKey(url, request_type_id) + char *url; + int request_type_id; +{ + debug(5, "storeGenerateKey: type=%d %s\n", request_type_id, url); + if (request_type_id == REQUEST_OP_POST) { + sprintf(key_temp_buffer, "/post/%s", url); + return key_temp_buffer; + } + if (request_type_id == REQUEST_OP_HEAD) { + sprintf(key_temp_buffer, "/head/%s", url); + return key_temp_buffer; + } + return url; +} + +/* Add a new object to the cache. */ +StoreEntry *storeAdd(url, type_notused, mime_hdr, cachable, html_request, request_type_id) + char *url; + char *type_notused; + char *mime_hdr; + int cachable; + int html_request; + int request_type_id; +{ + char key[MAX_URL + 16]; + StoreEntry *e = NULL; + + debug(5, "storeAdd: %s\n", url); + + meta_data.store_entries++; + meta_data.url_strings += strlen(url); + + if (meta_data.hot_vm > store_hotobj_high) + storeGetMemSpace(0, 1); + e = create_StoreEntry(); + e->url = xstrdup(url); + e->key = NULL; + e->flag = 0; + e->type_id = request_type_id; + if (mime_hdr) { + store_mem_obj(e, mime_hdr) = xstrdup(mime_hdr); + if (mime_refresh_request(mime_hdr)) + BIT_SET(e->flag, REFRESH_REQUEST); + else + BIT_RESET(e->flag, REFRESH_REQUEST); + } + if (cachable) { + BIT_SET(e->flag, CACHABLE); + BIT_RESET(e->flag, RELEASE_REQUEST); + } else { + BIT_RESET(e->flag, CACHABLE); + /*after a lock is release, it will be released by storeUnlock */ + BIT_SET(e->flag, RELEASE_REQUEST); + } + + if (html_request) + BIT_SET(e->flag, REQ_HTML); + else + BIT_RESET(e->flag, REQ_HTML); + + e->status = STORE_PENDING; + storeSetMemStatus(e, NOT_IN_MEMORY); + e->swap_status = NO_SWAP; + e->swap_file_number = -1; + e->lock_count = 0; + BIT_RESET(e->flag, KEY_CHANGE); + BIT_RESET(e->flag, CLIENT_ABORT_REQUEST); + store_mem_obj(e, data) = memInit(); + meta_data.hot_vm++; + e->refcount = 0; + e->lastref = cached_curtime; + e->timestamp = 0; /* set in storeSwapOutHandle() */ + e->ping_status = NOPING; + if (e->flag & CACHABLE) { + if (request_type_id == REQUEST_OP_GET) { + e->key = e->url; + BIT_SET(e->flag, KEY_URL); + } else { + e->key = xstrdup(storeGenerateKey(e->url, request_type_id)); + BIT_RESET(e->flag, KEY_URL); + } + } else { + /* prepend a uncache count number to url for a key */ + key[0] = '\0'; + sprintf(key, "/%d/%s", uncache_count, url); + uncache_count++; + e->key = xstrdup(key); + BIT_RESET(e->flag, KEY_URL); + } + + /* allocate pending list */ + store_mem_obj(e, pending_list_size) = MIN_PENDING; + store_mem_obj(e, pending) = (struct pentry **) + xcalloc(store_mem_obj(e, pending_list_size), sizeof(struct pentry *)); + + /* allocate client list */ + store_mem_obj(e, client_list_size) = MIN_CLIENT; + store_mem_obj(e, client_list) = (ClientStatusEntry **) + xcalloc(store_mem_obj(e, client_list_size), sizeof(ClientStatusEntry *)); + + if (table == (HashID) 0) { + storeCreateHashTable(urlcmp); + } + storeHashInsert(e); + return e; +} + +/* Add a new object to the cache with empty memory copy and pointer to disk + * use to rebuild store from disk. */ +StoreEntry *storeAddDiskRestore(url, file_number, size, expires, timestamp) + char *url; + int file_number; + int size; + time_t expires; + time_t timestamp; +{ + StoreEntry *e = NULL; + + debug(5, "StoreAddDiskRestore: : size %d: expires %d: file_number %d\n", + url, size, expires, file_number); + + if (file_map_bit_test(file_number)) { + debug(0, "This file number is already allocated!\n"); + debug(0, " --> file_number %d\n", file_number); + debug(0, " --> \n", url); + return (NULL); + } + meta_data.store_entries++; + meta_data.url_strings += strlen(url); + + e = create_StoreEntry_only(); + e->url = xstrdup(url); + e->key = NULL; + e->flag = 0; + e->type_id = REQUEST_OP_GET; + BIT_SET(e->flag, CACHABLE); + BIT_RESET(e->flag, RELEASE_REQUEST); + BIT_SET(e->flag, REQ_HTML); + e->status = STORE_OK; + storeSetMemStatus(e, NOT_IN_MEMORY); + e->swap_status = SWAP_OK; + e->swap_file_number = file_number; + file_map_bit_set(file_number); + e->object_len = size; + e->lock_count = 0; + BIT_RESET(e->flag, KEY_CHANGE); + BIT_RESET(e->flag, CLIENT_ABORT_REQUEST); + e->refcount = 0; + e->lastref = cached_curtime; + e->timestamp = (u_num32) timestamp; + e->expires = (u_num32) expires; + e->ping_status = NOPING; + + e->key = e->url; + BIT_SET(e->flag, KEY_URL); + + if (!table) { + storeCreateHashTable(urlcmp); + } + storeHashInsert(e); + return e; +} + +/* Register interest in an object currently being retrieved. */ +int storeRegister(e, fd, handler, data) + StoreEntry *e; + int fd; + PIF handler; + caddr_t data; +{ + PendingEntry *pe = (PendingEntry *) xmalloc(sizeof(PendingEntry)); + int old_size, i, j; + + debug(5, "storeRegister: FD %d \n", fd, e->url); + + memset(pe, '\0', sizeof(PendingEntry)); + pe->fd = fd; + pe->handler = handler; + pe->data = data; + + /* + * I've rewritten all this pendings stuff so that num_pending goes + * away, and to fix all of the 'array bounds' problems we were having. + * It's now a very simple array, with any NULL slot empty/avail. + * If something needs to be added and there are no empty slots, + * it'll grow the array. + */ + /* find an empty slot */ + for (i = 0; i < (int) store_mem_obj(e, pending_list_size); i++) + if (store_mem_obj(e, pending[i]) == NULL) + break; + + if (i == store_mem_obj(e, pending_list_size)) { + /* grow the array */ + struct pentry **tmp = NULL; + + old_size = store_mem_obj(e, pending_list_size); + + /* set list_size to an appropriate amount */ + store_mem_obj(e, pending_list_size) += MIN_PENDING; + + /* allocate, and copy old pending list over to the new one */ + tmp = (struct pentry **) xcalloc(store_mem_obj(e, pending_list_size), + sizeof(struct pentry *)); + for (j = 0; j < old_size; j++) + tmp[j] = store_mem_obj(e, pending[j]); + + /* free the old list and set the new one */ + safe_free(store_mem_obj(e, pending)); + store_mem_obj(e, pending) = tmp; + + debug(10, "storeRegister: grew pending list to %d for slot %d.\n", + store_mem_obj(e, pending_list_size), i); + + } + store_mem_obj(e, pending[i]) = pe; + return 0; +} + +/* remove handler assoicate to that fd from store pending list */ +/* Also remove entry from client_list if exist. */ +/* return number of successfully free pending entries */ +int storeUnregister(e, fd) + StoreEntry *e; + int fd; +{ + int i; + int freed = 0; + + debug(10, "storeUnregister: called for FD %d \n", fd, e->url); + + /* look for entry in client_list */ + if (store_mem_obj(e, client_list)) { + for (i = 0; i < store_mem_obj(e, client_list_size); ++i) { + if (store_mem_obj(e, client_list[i]) && (store_mem_obj(e, client_list[i]->fd) == fd)) { + /* reset fd to zero as a mark for empty slot */ + safe_free(store_mem_obj(e, client_list[i])); + store_mem_obj(e, client_list[i]) = NULL; + } + } + } + /* walk the entire list looking for matched fd */ + for (i = 0; i < (int) store_mem_obj(e, pending_list_size); i++) { + if (store_mem_obj(e, pending[i]) && (store_mem_obj(e, pending[i])->fd == fd)) { + /* found the match fd */ + safe_free(store_mem_obj(e, pending[i])); + store_mem_obj(e, pending[i]) = NULL; + freed++; + } + } + + debug(10, "storeUnregister: returning %d\n", freed); + return freed; +} + +/* Call to delete behind upto "target lowest offset" + * also, it update e_lowest_offset. + */ +void storeDeleteBehind(e) + StoreEntry *e; +{ + int free_up_to; + int target_offset; + int n_client = 0; + int i; + + debug(3, "storeDeleteBehind: Object: %s\n", e->key); + debug(3, "storeDeleteBehind:\tOriginal Lowest Offset: %d \n", store_mem_obj(e, e_lowest_offset)); + + free_up_to = store_mem_obj(e, e_lowest_offset); + target_offset = 0; + + for (i = 0; i < store_mem_obj(e, client_list_size); ++i) { + if (store_mem_obj(e, client_list[i]) == NULL) + continue; + if (((store_mem_obj(e, client_list[i]->last_offset) < target_offset) || + (target_offset == 0))) { + n_client++; + target_offset = store_mem_obj(e, client_list[i]->last_offset); + } + } + + if (n_client == 0) { + debug(3, "storeDeleteBehind:\tThere is no client in the list.\n"); + debug(3, "\t\tTry to delete as fast as possible.\n"); + target_offset = store_mem_obj(e, e_current_len); + } + debug(3, "storeDeleteBehind:\tThe target offset is : %d\n", target_offset); + if (target_offset) { + free_up_to = (int) store_mem_obj(e, data)->mem_free_data_upto(store_mem_obj(e, data), + target_offset); + debug(3, " Object is freed upto : %d\n", free_up_to); + store_mem_size -= free_up_to - store_mem_obj(e, e_lowest_offset); + } + debug(3, "storeDeleteBehind:\tOutgoing Lowest Offset : %d\n", free_up_to); + store_mem_obj(e, e_lowest_offset) = free_up_to; +} + +/* Call handlers waiting for data to be appended to E. */ +static void InvokeHandlers(e) + StoreEntry *e; +{ + int i; + + /* walk the entire list looking for valid handlers */ + for (i = 0; i < (int) store_mem_obj(e, pending_list_size); i++) { + if (store_mem_obj(e, pending[i]) && store_mem_obj(e, pending[i])->handler) { + /* + * Once we call the handler, it is no longer needed + * until the write process sends all available data + * from the object entry. + */ + (store_mem_obj(e, pending[i])->handler) + (store_mem_obj(e, pending[i])->fd, e, store_mem_obj(e, pending[i])->data); + safe_free(store_mem_obj(e, pending[i])); + store_mem_obj(e, pending[i]) = NULL; + } + } + +} + +/* switch object to deleting behind mode + * call by retrieval module when object gets too big. + */ +void storeStartDeleteBehind(e) + StoreEntry *e; +{ + debug(2, "storeStartDeleteBehind: Object: %s\n", e->key); + if (e->flag & DELETE_BEHIND) { + debug(2, "storeStartDeleteBehind:\tis already in delete behind mode.\n"); + return; + } + debug(2, "storeStartDeleteBehind:\tis now in delete behind mode.\n"); + /* change its key, so it couldn't be found by other client */ + storeChangeKey(e); + BIT_SET(e->flag, DELETE_BEHIND); + BIT_SET(e->flag, RELEASE_REQUEST); + BIT_RESET(e->flag, CACHABLE); + e->expires = cached_curtime; +} + +/* Append incoming data from a primary server to an entry. */ +int storeAppend(e, data, len) + StoreEntry *e; + char *data; + int len; +{ + /* validity check -- sometimes it's called with bogus values */ + if (e == NULL || !has_mem_obj(e) || store_mem_obj(e, data) == NULL) { + debug(0, "storeAppend (len = %d): Invalid StoreEntry, aborting...\n", + len); + if (len < 512) + fwrite(data, len, 1, debug_log); + debug(0, "%s", storeToString(e)); + fatal_dump(NULL); + } + if (len) { + debug(5, "storeAppend: appending %d bytes for %s\n", len, e->url); + + /* get some extra storage if needed */ + (void) storeGetMemSpace(len, 0); + store_mem_size += len; + debug(8, "storeAppend: growing store_mem_size by %d\n", len); + debug(8, "storeAppend: store_mem_size = %d\n", store_mem_size); + + (void) store_mem_obj(e, data)->mem_append(store_mem_obj(e, data), + data, len); + store_mem_obj(e, e_current_len) += len; + debug(8, "storeAppend: e_current_len = %d\n", + store_mem_obj(e, e_current_len)); + } + if ((e->status != STORE_ABORTED) && !(e->flag & DELAY_SENDING)) + InvokeHandlers(e); + + return 0; +} + +/* add directory to swap disk */ +int storeAddSwapDisk(path) + char *path; +{ + if (cache_dirs == NULL) + cache_dirs = create_dynamic_array(5, 5); + insert_dynamic_array(cache_dirs, path); + return ++ncache_dirs; +} + +/* return the nth swap directory */ +char *swappath(n) + int n; +{ + return cache_dirs->collection[n % ncache_dirs]; +} + + +/* return full name to swapfile */ +char *storeSwapFullPath(fn, fullpath) + int fn; + char *fullpath; +{ + static char fullfilename[MAX_FILE_NAME_LEN]; + + if (fullpath) { + sprintf(fullpath, "%s/%02d/%d", + swappath(fn), + (fn / ncache_dirs) % SWAP_DIRECTORIES, + fn); + return fullpath; + } + fullfilename[0] = '\0'; + sprintf(fullfilename, "%s/%02d/%d", + swappath(fn), + (fn / ncache_dirs) % SWAP_DIRECTORIES, + fn); + return fullfilename; +} + +/* swapping in handle */ +int storeSwapInHandle(fd_notused, buf, len, flag, e, offset_notused) + int fd_notused; + char *buf; + int len; + int flag; + StoreEntry *e; + int offset_notused; +{ + debug(2, "storeSwapInHandle: \n", e->url); + + if ((flag < 0) && (flag != DISK_EOF)) { + debug(0, "storeSwapInHandle: SwapIn failure (err code = %d).\n", flag); + put_free_8k_page(store_mem_obj(e, e_swap_buf)); + storeSetMemStatus(e, NOT_IN_MEMORY); + file_close(store_mem_obj(e, swap_fd)); + swapInError(-1, e); /* Invokes storeAbort() and completes the I/O */ + return -1; + } + debug(5, "storeSwapInHandle: e->swap_offset = %d\n", + store_mem_obj(e, swap_offset)); + debug(5, "storeSwapInHandle: len = %d\n", + len); + debug(5, "storeSwapInHandle: e->e_current_len = %d\n", + store_mem_obj(e, e_current_len)); + debug(5, "storeSwapInHandle: e->object_len = %d\n", + e->object_len); + + /* always call these, even if len == 0 */ + store_mem_obj(e, swap_offset) += len; + storeAppend(e, buf, len); + + if (store_mem_obj(e, e_current_len) < e->object_len && flag != DISK_EOF) { + /* some more data to swap in, reschedule */ + file_read(store_mem_obj(e, swap_fd), + store_mem_obj(e, e_swap_buf), + SWAP_BUF, + store_mem_obj(e, swap_offset), + (FILE_READ_HD) storeSwapInHandle, + (caddr_t) e); + } else { + /* complete swapping in */ + storeSetMemStatus(e, IN_MEMORY); + put_free_8k_page(store_mem_obj(e, e_swap_buf)); + file_close(store_mem_obj(e, swap_fd)); + debug(5, "storeSwapInHandle: SwapIn complete: from %s.\n", + e->url, storeSwapFullPath(e->swap_file_number, NULL)); + if (store_mem_obj(e, e_current_len) != e->object_len) { + debug(0, "storeSwapInHandle: WARNING! Object size mismatch.\n"); + debug(0, " --> \n", e->url); + debug(0, " --> Expecting %d bytes from file: %s\n", e->object_len, + storeSwapFullPath(e->swap_file_number, NULL)); + debug(0, " --> Only read %d bytes\n", + store_mem_obj(e, e_current_len)); + } + if (e->flag & RELEASE_REQUEST) + storeRelease(e); + } + return 0; +} + +/* start swapping in */ +int storeSwapInStart(e) + StoreEntry *e; +{ + int fd; + + /* sanity check! */ + if ((e->swap_status != SWAP_OK) || (e->swap_file_number < 0)) { + debug(0, "storeSwapInStart: ? \n", e->swap_file_number, e->url); + if (has_mem_obj(e)) + store_mem_obj(e, swap_fd) = -1; + return -1; + } + /* create additional structure for object in memory */ + create_store_mem_obj(e); + + store_mem_obj(e, swap_fd) = fd = + file_open(storeSwapFullPath(e->swap_file_number, NULL), NULL, O_RDONLY); + if (fd < 0) { + debug(0, "storeSwapInStart: Unable to open swapfile: %s for\n\t\n", + storeSwapFullPath(e->swap_file_number, NULL), e->url); + storeSetMemStatus(e, NOT_IN_MEMORY); + /* Invoke a store abort that should free the destroy_store_mem_obj(e); */ + return -1; + } + debug(5, "storeSwapInStart: initialized swap file '%s' for \n", + storeSwapFullPath(e->swap_file_number, NULL), e->url); + + store_mem_obj(e, data) = memInit(); + ++meta_data.hot_vm; + + storeSetMemStatus(e, SWAPPING_IN); + store_mem_obj(e, swap_offset) = 0; + + store_mem_obj(e, e_swap_buf) = get_free_8k_page(); + + /* start swapping daemon */ + file_read(store_mem_obj(e, swap_fd), + store_mem_obj(e, e_swap_buf), + SWAP_BUF, + store_mem_obj(e, swap_offset), + (FILE_READ_HD) storeSwapInHandle, + (caddr_t) e); + return 0; +} + +void storeSwapOutHandle(fd, flag, e) + int fd; + int flag; + StoreEntry *e; +{ + static char filename[MAX_FILE_NAME_LEN]; + static char logmsg[6000]; + char *page_ptr = NULL; + + debug(5, "storeSwapOutHandle: \n", e->url); + + e->timestamp = cached_curtime; + storeSwapFullPath(e->swap_file_number, filename); + page_ptr = store_mem_obj(e, e_swap_buf); + + if (flag < 0) { + debug(1, "storeSwapOutHandle: SwapOut failure (err code = %d).\n", + flag); + e->swap_status = NO_SWAP; + put_free_8k_page(page_ptr); + file_close(fd); + BIT_SET(e->flag, RELEASE_REQUEST); + if (e->swap_file_number != -1) { + file_map_bit_reset(e->swap_file_number); + safeunlink(filename, 0); /* remove it */ + e->swap_file_number = -1; + } + if (flag == DISK_NO_SPACE_LEFT) { + /* reduce the swap_size limit to the current size. */ + setCacheSwapMax(store_swap_size); + store_swap_high = (long) (((float) getCacheSwapMax() * + (float) getCacheSwapHighWaterMark()) / (float) 100); + store_swap_low = (long) (((float) getCacheSwapMax() * + (float) getCacheSwapLowWaterMark()) / (float) 100); + } + return; + } + debug(6, "storeSwapOutHandle: e->swap_offset = %d\n", + store_mem_obj(e, swap_offset)); + debug(6, "storeSwapOutHandle: e->e_swap_buf_len = %d\n", + store_mem_obj(e, e_swap_buf_len)); + debug(6, "storeSwapOutHandle: e->object_len = %d\n", + e->object_len); + debug(6, "storeSwapOutHandle: store_swap_size = %dk\n", + store_swap_size); + + store_mem_obj(e, swap_offset) += store_mem_obj(e, e_swap_buf_len); + /* round up */ + store_swap_size += ((store_mem_obj(e, e_swap_buf_len) + 1023) >> 10); + if (store_mem_obj(e, swap_offset) >= e->object_len) { + /* swapping complete */ + e->swap_status = SWAP_OK; + file_close(store_mem_obj(e, swap_fd)); + debug(5, "storeSwapOutHandle: SwapOut complete: to %s.\n", + e->url, storeSwapFullPath(e->swap_file_number, NULL)); + put_free_8k_page(page_ptr); + sprintf(logmsg, "FILE: %s URL: %s %d %d %d\n", + filename, + e->url, + (int) e->expires, + (int) e->timestamp, + e->object_len); + /* Automatically freed by file_write because no-handlers */ + file_write(swaplog_fd, + xstrdup(logmsg), + strlen(logmsg), + swaplog_lock, + NULL, + NULL); + /* check if it's request to be released. */ + if (e->flag & RELEASE_REQUEST) + storeRelease(e); + return; + } + /* write some more data, reschedule itself. */ + storeCopy(e, store_mem_obj(e, swap_offset), SWAP_BUF, + store_mem_obj(e, e_swap_buf), &(store_mem_obj(e, e_swap_buf_len))); + file_write(store_mem_obj(e, swap_fd), store_mem_obj(e, e_swap_buf), + store_mem_obj(e, e_swap_buf_len), store_mem_obj(e, e_swap_access), + storeSwapOutHandle, e); + return; + +} + + +/* start swapping object to disk */ +int storeSwapOutStart(e) + StoreEntry *e; +{ + int fd; + static char swapfilename[MAX_FILE_NAME_LEN]; + + /* Suggest a new swap file number */ + swapfileno = (swapfileno + 1) % (MAX_SWAP_FILE); + /* Record the number returned */ + swapfileno = file_map_allocate(swapfileno); + storeSwapFullPath(swapfileno, swapfilename); + + fd = file_open(swapfilename, NULL, O_RDWR | O_CREAT | O_TRUNC); + if (fd < 0) { + debug(0, "storeSwapOutStart: Unable to open swapfile: %s\n", + swapfilename); + file_map_bit_reset(swapfileno); + e->swap_file_number = -1; + return -1; + } + store_mem_obj(e, swap_fd) = fd; + debug(5, "storeSwapOutStart: Begin SwapOut to FD %d FILE %s.\n", + e->url, fd, swapfilename); + + e->swap_file_number = swapfileno; + if ((store_mem_obj(e, e_swap_access) = file_write_lock(store_mem_obj(e, swap_fd))) < 0) { + debug(0, "storeSwapOutStart: Unable to lock swapfile: %s\n", + swapfilename); + file_map_bit_reset(e->swap_file_number); + e->swap_file_number = -1; + return -1; + } + e->swap_status = SWAPPING_OUT; + store_mem_obj(e, swap_offset) = 0; + store_mem_obj(e, e_swap_buf) = get_free_8k_page(); + store_mem_obj(e, e_swap_buf_len) = 0; + + storeCopy(e, 0, SWAP_BUF, store_mem_obj(e, e_swap_buf), + &(store_mem_obj(e, e_swap_buf_len))); + + /* start swapping daemon */ + if (file_write(store_mem_obj(e, swap_fd), + store_mem_obj(e, e_swap_buf), + store_mem_obj(e, e_swap_buf_len), + store_mem_obj(e, e_swap_access), + storeSwapOutHandle, + e) != DISK_OK) { + /* This shouldn't happen */ + fatal_dump(NULL); + } + return 0; +} + +/* recreate meta data from disk image in swap directory */ +void storeRebuildFromDisk() +{ + int objcount = 0; /* # objects successfully reloaded */ + int expcount = 0; /* # objects expired */ + int linecount = 0; /* # lines parsed from cache logfile */ + int clashcount = 0; /* # swapfile clashes avoided */ + int dupcount = 0; /* # duplicates purged */ + static char line_in[4096]; + static char log_swapfile[1024]; + static char swapfile[1024]; + static char url[MAX_URL]; + char *t = NULL; + StoreEntry *e = NULL; + struct stat sb; + time_t start, stop, r; + time_t expires; + time_t timestamp; + time_t last_clean; + int scan1, scan2, scan3; + int delta; + int i; + int sfileno = 0; + off_t size; + int fast_mode = 0; + + for (i = 0; i < ncache_dirs; ++i) + debug(1, "Rebuilding storage from disk image in %s\n", swappath(i)); + start = cached_curtime = time(NULL); + + sprintf(line_in, "%s/log-last-clean", swappath(0)); + if (stat(line_in, &sb) >= 0) { + last_clean = sb.st_mtime; + sprintf(line_in, "%s/log", swappath(0)); + if (stat(line_in, &sb) >= 0) { + fast_mode = (sb.st_mtime <= last_clean) ? 1 : 0; + } + } + if (fast_mode) + debug(1, "Rebuilding in FAST MODE.\n"); + + /* go to top of the file */ + (void) lseek(swaplog_fd, 0L, SEEK_SET); + + memset(line_in, '\0', 4096); + while (fgets(line_in, 4096, swaplog_stream)) { + + if ((linecount++ & 0x7F) == 0) /* update current time */ + cached_curtime = time(NULL); + + if ((linecount & 0xFFF) == 0) + debug(1, " %7d Lines read so far.\n", linecount); + + debug(10, "line_in: %s", line_in); + if ((line_in[0] == '\0') || (line_in[0] == '\n') || + (line_in[0] == '#')) + continue; /* skip bad lines */ + + url[0] = log_swapfile[0] = '\0'; + expires = cached_curtime; + + scan3 = 0; + size = 0; + if (sscanf(line_in, "FILE: %[^ ] URL: %[^ ] %d %d %d", + log_swapfile, url, &scan1, &scan2, &scan3) < 4) { +#ifdef UNLINK_ON_RELOAD + if (log_swapfile[0]) + safeunlink(log_swapfile, 0); +#endif + continue; + } + expires = (time_t) scan1; + timestamp = (time_t) scan2; + size = (off_t) scan3; + if ((t = strrchr(log_swapfile, '/'))) + sfileno = atoi(t + 1); + else + sfileno = atoi(log_swapfile); + storeSwapFullPath(sfileno, swapfile); + + /* + * Note that swapfile may be different than log_swapfile if + * another cache_dir is added. + */ + + if (!scan3 || !fast_mode) { + + if (stat(swapfile, &sb) < 0) { + if (expires < cached_curtime) { + debug(3, "storeRebuildFromDisk: Expired: \n", url); +#ifdef UNLINK_ON_RELOAD + safeunlink(swapfile, 1); +#endif + expcount++; + } else { + debug(3, "storeRebuildFromDisk: Swap file missing: : %s: %s.\n", url, swapfile, xstrerror()); +#ifdef UNLINK_ON_RELOAD + safeunlink(log_swapfile, 1); +#endif + } + continue; + } + if ((size = sb.st_size) == 0) { +#ifdef UNLINK_ON_RELOAD + safeunlink(log_swapfile, 1); +#endif + continue; + } + /* timestamp might be a little bigger than sb.st_mtime */ + delta = abs((int) (timestamp - sb.st_mtime)); + if (delta > REBUILD_TIMESTAMP_DELTA_MAX) { + /* this log entry doesn't correspond to this file */ + clashcount++; + continue; + } + timestamp = sb.st_mtime; + debug(10, "storeRebuildFromDisk: Cached file exists: : %s\n", + url, swapfile); + } + if ((e = storeGet(url))) { + debug(6, "storeRebuildFromDisk: Duplicate: \n", url); + storeRelease(e); + objcount--; + dupcount++; + } + if (expires < cached_curtime) { + debug(3, "storeRebuildFromDisk: Expired: \n", url); +#ifdef UNLINK_ON_RELOAD + safeunlink(swapfile, 1); +#endif + expcount++; + continue; + } + /* update store_swap_size */ + store_swap_size += (int) ((size + 1023) >> 10); + objcount++; + + fprintf(swaplog_tmp_stream, "FILE: %s URL: %s %d %d %d\n", + swapfile, url, (int) expires, (int) timestamp, (int) size); + storeAddDiskRestore(url, sfileno, (int) size, expires, timestamp); + CacheInfo->proto_newobject(CacheInfo, + CacheInfo->proto_id(url), + (int) size, TRUE); + } + + fflush(swaplog_tmp_stream); + stop = cached_curtime = time(NULL); + r = stop - start; + /* swapfileno = sfileno; */ + /* PBD: Start swapfileno at zero, so that old swap files are overwritten */ + debug(1, "Finished rebuilding storage from disk image.\n"); + debug(1, " %7d Lines read from previous logfile.\n", linecount); + debug(1, " %7d Objects loaded.\n", objcount); + debug(1, " %7d Objects expired.\n", expcount); + debug(1, " %7d Duplicate URLs purged.\n", dupcount); + debug(1, " %7d Swapfile clashes avoided.\n", clashcount); + debug(1, " Took %d seconds (%6.1lf objects/sec).\n", + r > 0 ? r : 0, (double) objcount / (r > 0 ? r : 1)); + debug(1, " store_swap_size = %dk\n", store_swap_size); + + /* touch a timestamp file */ + sprintf(line_in, "%s/log-last-clean", swappath(0)); + file_close(file_open(line_in, NULL, O_WRONLY | O_CREAT | O_TRUNC)); +} + + +/* return current swap size in kilo-bytes */ +int storeGetSwapSize() +{ + return store_swap_size; +} + +/* return current swap size in bytes */ +int storeGetMemSize() +{ + return store_mem_size; +} + + +/* Complete transfer into the local cache. */ +void storeComplete(e) + StoreEntry *e; +{ + debug(5, "storeComplete: \n", e->url); + + e->object_len = store_mem_obj(e, e_current_len); + InvokeHandlers(e); + e->lastref = cached_curtime; + e->status = STORE_OK; + storeSetMemStatus(e, IN_MEMORY); + e->swap_status = NO_SWAP; + /* start writing it to disk, exclude cache_object */ + if ((strncmp("cache_obj", e->url, 9) != 0) && + (e->flag & CACHABLE) && + !(e->flag & RELEASE_REQUEST) && + (e->type_id == REQ_GET)) { + storeSwapOutStart(e); + } + /* free up incoming MIME */ + safe_free(store_mem_obj(e, mime_hdr)); + CacheInfo->proto_newobject(CacheInfo, CacheInfo->proto_id(e->url), + e->object_len, FALSE); + if (e->flag & RELEASE_REQUEST) + storeRelease(e); +} + +/* + * Fetch aborted. Tell all clients to go home. Negatively cache + * abort message, freeing the data for this object + */ +int storeAbort(e, msg) + StoreEntry *e; + char *msg; +{ + static char mime_hdr[300]; + static char abort_msg[2000]; + + debug(6, "storeAbort: \n", e->url); + e->expires = cached_curtime + getNegativeTTL(); + e->status = STORE_ABORTED; + storeSetMemStatus(e, IN_MEMORY); + /* No DISK swap for negative cached object */ + e->swap_status = NO_SWAP; + e->lastref = cached_curtime; + /* In case some parent responds late and + * tries to restart the fetch, say that it's been + * dispatched already. + */ + BIT_SET(e->flag, REQ_DISPATCHED); + + storeLockObject(e); + + /* Count bytes faulted through cache but not moved to disk */ + CacheInfo->proto_touchobject(CacheInfo, CacheInfo->proto_id(e->url), + store_mem_obj(e, e_current_len)); + CacheInfo->proto_touchobject(CacheInfo, CacheInfo->proto_id("abort:"), + store_mem_obj(e, e_current_len)); + + mk_mime_hdr(mime_hdr, + (time_t) getNegativeTTL(), + 6 + strlen(msg), + cached_curtime, + "text/html"); + if (msg) { + /* This can run off the end here. Be careful */ + if ((int) (strlen(msg) + strlen(mime_hdr) + 50) < 2000) { + sprintf(abort_msg, "HTTP/1.0 400 Cache Detected Error\r\n%s\r\n\r\n%s", mime_hdr, msg); + } else { + debug(0, "storeAbort: WARNING: Must increase msg length!"); + } + storeAppend(e, abort_msg, strlen(abort_msg)); + store_mem_obj(e, e_abort_msg) = xstrdup(abort_msg); + /* Set up object for negative caching */ + BIT_SET(e->flag, ABORT_MSG_PENDING); + } + /* We assign an object length here--The only other place we assign the + * object length is in storeComplete() */ + e->object_len = store_mem_obj(e, e_current_len); + + /* Call handlers so they can report error. */ + InvokeHandlers(e); + + storeUnlockObject(e); + return 0; +} + +/* get the first in memory object entry in the storage */ +hash_link *storeFindFirst(id) + HashID id; +{ + if (id == (HashID) 0) + return NULL; + return (hash_first(id)); +} + +/* get the next in memory object entry in the storage for a given + * search pointer */ +hash_link *storeFindNext(id) + HashID id; +{ + if (id == (HashID) 0) + return NULL; + return (hash_next(id)); +} + +/* get the first in memory object entry in the storage */ +StoreEntry *storeGetInMemFirst() +{ + hash_link *first = NULL; + first = storeFindFirst(in_mem_table); + return (first ? ((StoreEntry *) first->item) : NULL); +} + + +/* get the next in memory object entry in the storage for a given + * search pointer */ +StoreEntry *storeGetInMemNext() +{ + hash_link *next = NULL; + next = storeFindNext(in_mem_table); + return (next ? ((StoreEntry *) next->item) : NULL); +} + +/* get the first entry in the storage */ +StoreEntry *storeGetFirst() +{ + return ((StoreEntry *) storeFindFirst(table)); +} + + +/* get the next entry in the storage for a given search pointer */ +StoreEntry *storeGetNext() +{ + return ((StoreEntry *) storeFindNext(table)); +} + + + +/* walk through every single entry in the storage and invoke a given routine */ +int storeWalkThrough(proc, data) + int (*proc) _PARAMS((StoreEntry * e, caddr_t data)); + caddr_t data; +{ + StoreEntry *e = NULL; + int count = 0; + int n = 0; + + for (e = storeGetFirst(); e; e = storeGetNext()) { + if ((++n & 0xFF) == 0) + cached_curtime = time(NULL); + if ((n & 0xFFF) == 0) + debug(2, "storeWalkThrough: %7d objects so far.\n", n); + count += proc(e, data); + } + return count; +} + + +/* compare an object timestamp and see if ttl is expired. Free it if so. */ +/* return 1 if it expired, 0 if not */ +int removeOldEntry(e, data) + StoreEntry *e; + caddr_t data; +{ + time_t curtime = *((time_t *) data); + + debug(5, "removeOldEntry: Checking: %s\n", e->url); + debug(6, "removeOldEntry: * curtime: %8ld\n", curtime); + debug(6, "removeOldEntry: * e->timestamp: %8ld\n", e->timestamp); + debug(6, "removeOldEntry: * time in cache: %8ld\n", + curtime - e->timestamp); + debug(6, "removeOldEntry: * time-to-live: %8ld\n", + e->expires - cached_curtime); + + if ((cached_curtime > e->expires) && (e->status != STORE_PENDING)) { + return (storeRelease(e) == 0 ? 1 : 0); + } + return 0; +} + + +/* free up all ttl-expired objects */ +int storePurgeOld() +{ + int n; + + debug(3, "storePurgeOld: Begin purging TTL-expired objects\n"); + n = storeWalkThrough(removeOldEntry, (caddr_t) & cached_curtime); + debug(3, "storePurgeOld: Done purging TTL-expired objects.\n"); + debug(3, "storePurgeOld: %d objects expired\n", n); + return n; +} + + +#define MEM_LRUSCAN_BLOCK 16 +#define MEM_MAX_HELP 5 +/* Clear Memory storage to accommodate the given object len */ +int storeGetMemSpace(size, check_vm_number) + int size; + int check_vm_number; +{ + static int over_highwater = 0; + static int over_max = 0; + StoreEntry *LRU = NULL, *e = NULL; + dynamic_array *LRU_list = NULL; + dynamic_array *pending_entry_list = NULL; + int entry_to_delete_behind = 0; + int n_deleted_behind = 0; + int n_scanned = 0; + int n_expired = 0; + int n_aborted = 0; + int n_purged = 0; + int n_released = 0; + int i; + int n_inmem = 0; /* extra debugging */ + int n_cantpurge = 0; /* extra debugging */ + int mem_cantpurge = 0; /* extra debugging */ + int compareLastRef(); + int compareSize(); + + + if (!check_vm_number && ((store_mem_size + size) < store_mem_high)) + return 0; + + debug(2, "storeGetMemSpace: Starting...\n"); + + LRU_list = create_dynamic_array(meta_data.store_in_mem_objects, MEM_LRUSCAN_BLOCK); + pending_entry_list = create_dynamic_array(meta_data.store_in_mem_objects, MEM_LRUSCAN_BLOCK); + + for (e = storeGetInMemFirst(); e; e = storeGetInMemNext()) { + n_scanned++; + + n_inmem++; + + if (e->status == STORE_PENDING) { + if (!(e->flag & DELETE_BEHIND)) { + /* it's not deleting behind, we can do something about it. */ + insert_dynamic_array(pending_entry_list, e); + } + continue; + } + if (cached_curtime > e->expires) { + debug(2, "storeGetMemSpace: Expired: %s\n", e->url); + n_expired++; + /* Delayed release */ + storeRelease(e); + continue; + } + if ((e->swap_status == SWAP_OK) && (e->mem_status != SWAPPING_IN) && + (e->lock_count == 0)) { + insert_dynamic_array(LRU_list, e); + } else if (((e->status == STORE_ABORTED) || + (e->swap_status == NO_SWAP)) && + (e->lock_count == 0)) { + n_aborted++; + insert_dynamic_array(LRU_list, e); + } else { + n_cantpurge++; + mem_cantpurge += store_mem_obj(e, e_current_len); + debug(5, "storeGetMemSpace: Can't purge %7d bytes: %s\n", + store_mem_obj(e, e_current_len), e->url); + if (e->swap_status != SWAP_OK) + debug(5, "storeGetMemSpace: --> e->swap_status != SWAP_OK\n"); + if (e->lock_count) + debug(5, "storeGetMemSpace: --> e->lock_count %d\n", e->lock_count); + } + } + debug(2, "storeGetMemSpace: Current size: %7d bytes\n", store_mem_size); + debug(2, "storeGetMemSpace: High W Mark: %7d bytes\n", store_mem_high); + debug(2, "storeGetMemSpace: Low W Mark: %7d bytes\n", store_mem_low); + debug(2, "storeGetMemSpace: Entry count: %7d items\n", meta_data.store_entries); + debug(2, "storeGetMemSpace: Scanned: %7d items\n", n_scanned); + debug(2, "storeGetMemSpace: In memory: %7d items\n", n_inmem); + debug(2, "storeGetMemSpace: Hot vm count: %7d items\n", meta_data.hot_vm); + debug(2, "storeGetMemSpace: Expired: %7d items\n", n_expired); + debug(2, "storeGetMemSpace: Negative Cached: %7d items\n", n_aborted); + debug(2, "storeGetMemSpace: Can't purge: %7d items\n", n_cantpurge); + debug(2, "storeGetMemSpace: Can't purge size: %7d bytes\n", mem_cantpurge); + debug(2, "storeGetMemSpace: Sorting LRU_list: %7d items\n", LRU_list->index); + qsort((char *) LRU_list->collection, LRU_list->index, sizeof(e), (int (*)(const void *, const void *)) compareLastRef); + + /* Kick LRU out until we have enough memory space */ + + if (check_vm_number) { + /* look for vm slot */ + for (i = 0; (i < LRU_list->index) && (meta_data.hot_vm > store_hotobj_low); ++i) { + if ((LRU = (StoreEntry *) LRU_list->collection[i])) + if ((LRU->status != STORE_PENDING) && (LRU->swap_status == NO_SWAP)) { + n_released++; + storeRelease(LRU); + } else { + n_purged++; + storePurgeMem(LRU); + } + } + } else { + /* look for space */ + for (i = 0; (i < LRU_list->index) && ((store_mem_size + size) > store_mem_low); ++i) { + if ((LRU = (StoreEntry *) LRU_list->collection[i])) + if ((LRU->status != STORE_PENDING) && (LRU->swap_status == NO_SWAP)) { + n_released++; + storeRelease(LRU); + } else { + n_purged++; + storePurgeMem(LRU); + } + } + } + + destroy_dynamic_array(LRU_list); + + debug(2, "storeGetMemSpace: After freeing size: %7d bytes\n", store_mem_size); + debug(2, "storeGetMemSpace: Purged: %7d items\n", n_purged); + debug(2, "storeGetMemSpace: Released: %7d items\n", n_released); + + + if (check_vm_number) { + /* don't check for size */ + destroy_dynamic_array(pending_entry_list); + debug(2, "storeGetMemSpace: Done.\n"); + return 0; + } + if ((store_mem_size + size) < store_mem_high) { + /* we don't care for hot_vm count here, just the storage size. */ + over_highwater = over_max = 0; + destroy_dynamic_array(pending_entry_list); + debug(2, "storeGetMemSpace: Done.\n"); + return 0; + } + if ((store_mem_size + size) < getCacheMemMax()) { + /* We're over high water mark here, but still under absolute max */ + if (!over_highwater) { + /* print only once when the condition occur until it clears. */ + debug(1, "storeGetMemSpace: Allocating beyond the high water mark with total size of %d\n", + store_mem_size + size); + over_highwater = 1; + } + /* we can delete more than one if we want to be more aggressive. */ + entry_to_delete_behind = 1; + } else { + /* We're over absolute max */ + if (!over_max) { + /* print only once when the condition occur until it clears. */ + debug(1, "storeGetMemSpace: Allocating beyond the MAX Store with total size of %d\n", + store_mem_size + size); + debug(1, " Start Deleting Behind for every pending objects\n:"); + debug(1, " You should really adjust your cache_mem, high/low water mark,\n"); + debug(1, " max object size to suit your need.\n"); + over_max = 1; + } + /* delete all of them, we desperate for a space. */ + entry_to_delete_behind = pending_entry_list->index; + } + + /* sort the stuff by size */ + qsort((char *) pending_entry_list->collection, pending_entry_list->index, sizeof(e), (int (*)(const void *, const void *)) compareSize); + for (i = 0; (i < pending_entry_list->index) && (i < entry_to_delete_behind); ++i) + if (pending_entry_list->collection[i]) { + n_deleted_behind++; + storeStartDeleteBehind(pending_entry_list->collection[i]); + } + if (n_deleted_behind) { + debug(1, "storeGetMemSpace: Due to memory flucuation, put %d objects to DELETE_BEHIND MODE.\n", + n_deleted_behind); + } + destroy_dynamic_array(pending_entry_list); + debug(2, "storeGetMemSpace: Done.\n"); + return 0; +} + +int compareSize(e1, e2) + StoreEntry **e1, **e2; +{ + if (!e1 || !e2) { + debug(1, "compareSize: Called with at least one null argument, shouldn't happen.\n"); + return 0; + } + if (store_mem_obj(*e1, e_current_len) > store_mem_obj(*e2, e_current_len)) + return (1); + + if (store_mem_obj(*e1, e_current_len) < store_mem_obj(*e2, e_current_len)) + return (-1); + + return (0); +} + +int compareLastRef(e1, e2) + StoreEntry **e1, **e2; +{ + if (!e1 || !e2) + fatal_dump(NULL); + + if ((*e1)->lastref > (*e2)->lastref) + return (1); + + if ((*e1)->lastref < (*e2)->lastref) + return (-1); + + return (0); +} + +/* returns the bucket number to work on, + * pointer to next bucket after each calling + */ +unsigned int storeGetBucketNum() +{ + static unsigned int bucket = 0; + + if (bucket >= STORE_BUCKETS) + bucket = 0; + return (bucket++); +} + +#define SWAP_LRUSCAN_BLOCK 16 +#define SWAP_MAX_HELP STORE_BUCKETS/2 + +/* The maximum objects to scan for maintain storage space */ +#define SWAP_LRUSCAN_COUNT (256) + +/* Removes at most 30 LRU objects for one loop */ +#define SWAP_LRU_REMOVE_COUNT (8) + +/* Clear Swap storage to accommodate the given object len */ +int storeGetSwapSpace(size) + int size; +{ + static int fReduceSwap = 0; + static int swap_help = 0; + StoreEntry *LRU = NULL, *e = NULL; + int scanned = 0; + int removed = 0; + int expired = 0; + int locked = 0; + int locked_size = 0; + int scan_in_objs = 0; + int i; + int LRU_cur_size = meta_data.store_entries; + dynamic_array *LRU_list; + hash_link *link_ptr = NULL, *next = NULL; + unsigned int kb_size = ((size + 1023) >> 10); + + if (store_swap_size + kb_size <= store_swap_low) + fReduceSwap = 0; + if (!fReduceSwap && (store_swap_size + kb_size <= store_swap_high)) { + return 0; + } + debug(2, "storeGetSwapSpace: Starting...\n"); + + /* Set flag if swap size over high-water-mark */ + if (store_swap_size + kb_size > store_swap_high) + fReduceSwap = 1; + + debug(2, "storeGetSwapSpace: Need %d bytes...\n", size); + + LRU_list = create_dynamic_array(LRU_cur_size, LRU_cur_size); + /* remove expired objects until recover enough space or no expired objects */ + for (i = 0; i < STORE_BUCKETS; ++i) { + int expired_in_one_bucket = 0; + + link_ptr = hash_get_bucket(table, storeGetBucketNum()); + if (link_ptr == NULL) + continue; + /* this while loop handles one bucket of hash table */ + expired_in_one_bucket = 0; + while (link_ptr) { + scanned++; + next = link_ptr->next; + e = (StoreEntry *) link_ptr; + + /* Identify objects that aren't locked, for replacement */ + if ((e->status != STORE_PENDING) && /* We're still fetching the object */ + (e->swap_status == SWAP_OK) && /* Only release it if it is on disk */ + (e->lock_count == 0) && /* Be overly cautious */ + (e->mem_status != SWAPPING_IN)) { /* Not if it's being faulted into memory */ + if (cached_curtime > e->expires) { + debug(2, "storeRemoveExpiredObj: Expired: \n", e->url); + /* just call release. don't have to check for lock status. + * storeRelease will take care of that and set a pending flag + * if it's still locked. */ + ++expired_in_one_bucket; + storeRelease(e); + } else { + /* Prepare to do LRU replacement */ + insert_dynamic_array(LRU_list, e); + ++scan_in_objs; + } + } else { + debug(2, "storeGetSwapSpace: Can't purge %7d bytes: \n", + e->object_len, e->url); + if (e->lock_count) { + debug(2, "\t\te->lock_count %d\n", e->lock_count); + } + if (e->swap_status == SWAPPING_OUT) { + debug(2, "\t\te->swap_status == SWAPPING_OUT\n"); + } + locked++; + locked_size += store_mem_obj(e, e_current_len); + } + link_ptr = next; + } /* while, end of one bucket of hash table */ + + expired += expired_in_one_bucket; + if (expired_in_one_bucket && + ((!fReduceSwap && (store_swap_size + kb_size <= store_swap_high)) || + (fReduceSwap && (store_swap_size + kb_size <= store_swap_low))) + ) { + fReduceSwap = 0; + destroy_dynamic_array(LRU_list); + debug(2, "storeGetSwapSpace: Finished, %d objects expired.\n", + expired); + return 0; + } + qsort((char *) LRU_list->collection, LRU_list->index, sizeof(e), (int (*)(const void *, const void *)) compareLastRef); + /* keep the first n LRU objects only */ + cut_dynamic_array(LRU_list, SWAP_LRU_REMOVE_COUNT); + + /* Scan in about SWAP_LRU_COUNT for one call */ + if (scan_in_objs >= SWAP_LRUSCAN_COUNT) + break; + } /* for */ + + /* end of candidate selection */ + debug(2, "storeGetSwapSpace: Current Size: %7d kbytes\n", store_swap_size); + debug(2, "storeGetSwapSpace: High W Mark: %7d kbytes\n", store_swap_high); + debug(2, "storeGetSwapSpace: Low W Mark: %7d kbytes\n", store_swap_low); + debug(2, "storeGetSwapSpace: Entry count: %7d items\n", meta_data.store_entries); + debug(2, "storeGetSwapSpace: Scanned: %7d items\n", scanned); + debug(2, "storeGetSwapSpace: Expired: %7d items\n", expired); + debug(2, "storeGetSwapSpace: Locked: %7d items\n", locked); + debug(2, "storeGetSwapSpace: Locked Space: %7d bytes\n", locked_size); + debug(2, "storeGetSwapSpace: Scan in array: %7d bytes\n", scan_in_objs); + debug(2, "storeGetSwapSpace: LRU candidate: %7d items\n", LRU_list->index); + + /* Although all expired objects removed, still didn't recover enough */ + /* space. Kick LRU out until we have enough swap space */ + for (i = 0; i < LRU_list->index; ++i) { + if (store_swap_size + kb_size <= store_swap_low) { + fReduceSwap = 0; + break; + } + if ((LRU = LRU_list->collection[i]) != NULL) { + if (storeRelease(LRU) == 0) { + removed++; + } else { + debug(2, "storeGetSwapSpace: Help! Can't remove objects. <%s>\n", + LRU->url); + } + } + } + debug(2, "storeGetSwapSpace: After Freeing Size: %7d kbytes\n", store_swap_size); + + /* free the list */ + destroy_dynamic_array(LRU_list); + + if ((store_swap_size + kb_size > store_swap_high)) { + if (++swap_help > SWAP_MAX_HELP) { + debug(0, "storeGetSwapSpace: Nothing to free with %d Kbytes in use.\n", + store_swap_size); + debug(0, "--> Asking for %d bytes\n", size); + debug(0, "WARNING! Repeated failures to allocate swap space!\n"); + debug(0, "WARNING! Please check your disk space.\n"); + swap_help = 0; + } else { + debug(2, "storeGetSwapSpace: Nothing to free with %d Kbytes in use.\n", + store_swap_size); + debug(2, "--> Asking for %d bytes\n", size); + } + } else { + swap_help = 0; + } + + debug(2, "storeGetSwapSpace: Finished, %d objects removed.\n", removed); + return 0; +} + + +/* release an object from a cache */ +/* return 0 when success. */ +int storeRelease(e) + StoreEntry *e; +{ + StoreEntry *result = NULL; + StoreEntry *head_result = NULL; + hash_link *hptr = NULL; + + /* If, for any reason we can't discard this object because of an + * outstanding request, mark it for pending release */ + if (storeEntryLocked(e)) { + BIT_SET(e->flag, RELEASE_REQUEST); + return -1; + } + debug(5, "storeRelease: Releasing: %s\n", e->url); + + if (table == (HashID) 0) + return -1; + + if (e->key == NULL) { + debug(0, "storeRelease: NULL key for %s\n", e->url); + debug(0, "Dump of Entry 'e':\n %s\n", storeToString(e)); + fatal_dump(NULL); + } + if ((hptr = hash_lookup(table, e->key)) == NULL) { + debug(0, "storeRelease: Not Found: %s\n", e->url); + debug(0, "Dump of Entry 'e':\n %s\n", storeToString(e)); + fatal_dump(NULL); + } + result = (StoreEntry *) hptr; + + if (result != e) { + debug(0, "storeRelease: Duplicated entry? \n", + result->url ? result->url : "NULL"); + debug(0, "Dump of Entry 'e':\n%s", storeToString(e)); + debug(0, "Dump of Entry 'result':\n%s", storeToString(result)); + fatal_dump(NULL); + } + if (e->type_id == REQUEST_OP_GET) { + /* check if coresponding HEAD object exists. */ + hash_link *head_table_entry = NULL; + head_table_entry = hash_lookup(table, storeGenerateKey(e->url, REQUEST_OP_HEAD)); + if (head_table_entry) { + head_result = (StoreEntry *) head_table_entry; + if (head_result) { + /* recursive call here to free up /head/ */ + storeRelease(head_result); + } + } + } + debug(3, "storeRelease: Release object key: %s \n", e->key); + + if (e->swap_status == SWAP_OK && (e->swap_file_number > -1)) { + (void) safeunlink(storeSwapFullPath(e->swap_file_number, NULL), 0); + file_map_bit_reset(e->swap_file_number); + e->swap_file_number = -1; + store_swap_size -= (e->object_len + 1023) >> 10; + } + /* Discard byte count */ + CacheInfo->proto_purgeobject(CacheInfo, + CacheInfo->proto_id(e->url), + e->object_len); + storeHashDelete(hptr); + storeFreeEntry(e); + return 0; +} + + +/* store change key */ +void storeChangeKey(e) + StoreEntry *e; +{ + StoreEntry *result = NULL; + static char key[MAX_URL + 32]; + + if (!e) + return; + + if (e->key == NULL) { + debug(0, "storeChangeKey: NULL key for %s\n", e->url); + return; + } + if (table != (HashID) 0) { + hash_link *table_entry = hash_lookup(table, e->key); + if (table_entry) + result = (StoreEntry *) table_entry; + if (result == e) { + storeHashDelete(table_entry); + + key[0] = '\0'; + sprintf(key, "/x%d/%s", keychange_count++, e->key); + if (!(result->flag & KEY_URL)) + safe_free(result->key); + result->key = xstrdup(key); + + storeHashInsert(e); + BIT_SET(result->flag, KEY_CHANGE); + BIT_RESET(result->flag, KEY_URL); + } else { + debug(1, "storeChangeKey: Key is not unique for key: %s\n", e->key); + } + } +} + +/* return if the current key is the original one. */ +int storeOriginalKey(e) + StoreEntry *e; +{ + if (!e) + return 1; + + return !(e->flag & KEY_CHANGE); +} + +/* return 1 if a store entry is locked */ +int storeEntryLocked(e) + StoreEntry *e; +{ + if (!e) { + debug(0, "This entry should be valid.\n"); + debug(0, "%s", storeToString(e)); + fatal_dump(NULL); + } + return ((e->lock_count) || + (e->status == STORE_PENDING) || + (e->swap_status == SWAPPING_OUT) || + (e->mem_status == SWAPPING_IN) + ); +} + +/* use this for internal call only */ +int storeCopy(e, stateoffset, maxSize, buf, size) + StoreEntry *e; + int stateoffset; + int maxSize; + char *buf; + int *size; +{ + int available_to_write = 0; + + available_to_write = store_mem_obj(e, e_current_len) - stateoffset; + + if (stateoffset < store_mem_obj(e, e_lowest_offset)) { + /* this should not happen. Logic race !!! */ + debug(1, "storeCopy: Client Request a chunk of data in area lower than the lowest_offset\n"); + debug(1, " Current Lowest offset : %d\n", store_mem_obj(e, e_lowest_offset)); + debug(1, " Requested offset : %d\n", stateoffset); + /* can't really do anything here. Client may hang until lifetime runout. */ + return 0; + } + *size = (available_to_write >= maxSize) ? + maxSize : available_to_write; + + debug(6, "storeCopy: avail_to_write=%d, store_offset=%d\n", + *size, stateoffset); + + if (*size > 0) + (void) store_mem_obj(e, data)->mem_copy(store_mem_obj(e, data), stateoffset, buf, *size); + + return *size; +} + +/* check if there is any client waiting for this object at all */ +/* return 1 if there is at least one client */ +int storeClientWaiting(e) + StoreEntry *e; +{ + int i; + + if (store_mem_obj(e, client_list)) { + for (i = 0; i < store_mem_obj(e, client_list_size); ++i) { + if (store_mem_obj(e, client_list[i])) + return 1; + } + } + if (store_mem_obj(e, pending)) { + for (i = 0; i < (int) store_mem_obj(e, pending_list_size); ++i) { + if (store_mem_obj(e, pending[i])) + return 1; + } + } + return 0; +} + +/* return index to matched clientstatus in client_list, -1 on NOT_FOUND */ +int storeClientListSearch(e, fd) + StoreEntry *e; + int fd; +{ + int i; + + if (!store_mem_obj(e, client_list)) + return -1; + for (i = 0; i < store_mem_obj(e, client_list_size); ++i) { + if (store_mem_obj(e, client_list[i]) && + (fd == store_mem_obj(e, client_list[i]->fd))) + return i; + } + return -1; +} + +/* add client with fd to client list */ +void storeClientListAdd(e, fd, last_offset) + StoreEntry *e; + int fd; + int last_offset; +{ + int i; + /* look for empty slot */ + + if (store_mem_obj(e, client_list)) { + for (i = 0; (i < store_mem_obj(e, client_list_size)) + && (store_mem_obj(e, client_list[i]) != NULL); ++i); + + if (i == store_mem_obj(e, client_list_size)) { + /* have to expand client_list */ + store_mem_obj(e, client_list_size) += MIN_CLIENT; + store_mem_obj(e, client_list) = (ClientStatusEntry **) xrealloc(store_mem_obj(e, client_list), store_mem_obj(e, client_list_size) * sizeof(ClientStatusEntry *)); + } + } else { + store_mem_obj(e, client_list_size) += MIN_CLIENT; + store_mem_obj(e, client_list) = (ClientStatusEntry **) xcalloc(store_mem_obj(e, client_list_size), sizeof(ClientStatusEntry *)); + i = 0; + } + + store_mem_obj(e, client_list[i]) = (ClientStatusEntry *) xcalloc(1, sizeof(ClientStatusEntry)); + store_mem_obj(e, client_list[i]->fd) = fd; + store_mem_obj(e, client_list[i]->last_offset) = last_offset; +} + +/* same to storeCopy but also register client fd and last requested offset + * for each client */ +int storeClientCopy(e, stateoffset, maxSize, buf, size, fd) + StoreEntry *e; + int stateoffset; + int maxSize; + char *buf; + int *size; + int fd; +{ + int next_offset; + int client_list_index; + int available_to_write = store_mem_obj(e, e_current_len) - stateoffset; + + if (stateoffset < store_mem_obj(e, e_lowest_offset)) { + /* this should not happen. Logic race !!! */ + debug(1, "storeClientCopy: Client Request a chunk of data in area lower than the lowest_offset\n"); + debug(1, " fd : %d\n", fd); + debug(1, " Current Lowest offset : %d\n", store_mem_obj(e, e_lowest_offset)); + debug(1, " Requested offset : %d\n", stateoffset); + /* can't really do anything here. Client may hang until lifetime runout. */ + return 0; + } + *size = (available_to_write >= maxSize) ? + maxSize : available_to_write; + + debug(6, "storeCopy: avail_to_write=%d, store_offset=%d\n", + *size, stateoffset); + + /* update the lowest requested offset */ + next_offset = (stateoffset + *size); + if ((client_list_index = storeClientListSearch(e, fd)) >= 0) { + store_mem_obj(e, client_list[client_list_index]->last_offset) = next_offset; + } else { + storeClientListAdd(e, fd, next_offset); + } + + if (*size > 0) + (void) store_mem_obj(e, data)->mem_copy(store_mem_obj(e, data), stateoffset, buf, *size); + + /* see if we can get rid of some data if we are in "delete behind" mode . */ + if (e->flag & DELETE_BEHIND) { + /* call the handler to delete behind the lowest offset */ + storeDeleteBehind(e); + } + return *size; +} + + + + +/* + * Go through the first 300 bytes of MIME header of a cached object, returning + * fields that match. + */ +char *storeMatchMime(e, mimehdr, buf, buf_len) + StoreEntry *e; + char *mimehdr; + char *buf; + int buf_len; +{ + int i; + int offset = 0; + + offset = storeGrep(e, mimehdr, 300); + + buf[0] = '\0'; + + if (offset) { + storeCopy(e, offset, buf_len, buf, &buf_len); + for (i = 0; i < buf_len; ++i) { + if (buf[i] == '\r') { + buf[i] = ' '; + } /* strip \r */ + if (buf[i] == '\n') { + buf[i] = '\0'; + break; + } + } + buf[buf_len - 1] = '\0'; /* always terminate at the end */ + } + return (buf); +} + +int storeGrep(e, string, nbytes) + StoreEntry *e; + char *string; + int nbytes; +{ + if (e && has_mem_obj(e) && store_mem_obj(e, data) && (nbytes > 0)) + return store_mem_obj(e, data)->mem_grep(store_mem_obj(e, data), string, nbytes); + + return 0; +} + + +int storeEntryValidToSend(e) + StoreEntry *e; +{ + if ((cached_curtime < e->expires) || (e->status == STORE_PENDING)) + return 1; + return 0; +} + +int storeInit() +{ + static char tmpbuf[FATAL_BUF_SIZE]; + static char swaplog_file[MAX_FILE_NAME_LEN]; + static char swaplog_tmp_file[MAX_FILE_NAME_LEN]; + int directory_created = 0; + struct stat sb; + int inx; + char *path = NULL; + + (void) file_map_create(MAX_SWAP_FILE); + + for (inx = 0; inx < ncache_dirs; ++inx) { + path = swappath(inx); + debug(10, "storeInit: Creating swap space in %s\n", path); + if (stat(path, &sb) < 0) { + /* we need to create a directory for swap file here. */ + if (mkdir(path, 0777) < 0) { + sprintf(tmpbuf, + "Failed to create swap directory %s: %s\n", + path, xstrerror()); + if (errno != EEXIST) + fatal(tmpbuf); + } + if (stat(path, &sb) < 0) { + sprintf(tmpbuf, + "Failed to verify swap directory %s: %s\n", + path, xstrerror()); + fatal(tmpbuf); + } + debug(1, "storeInit: Created swap directory %s\n", path); + directory_created = 1; + } + if (zap_disk_store) { + debug(1, "storeInit: Zapping all objects on disk storage.\n"); + /* This could be dangerous, second copy of cache can destroy the existing + * swap files of the previous cache. We may use rc file do it. */ + tmpbuf[0] = '\0'; + sprintf(tmpbuf, "cd %s; rm -rf log [0-9][0-9]", path); + debug(1, "storeInit: Running '%s'\n", tmpbuf); + system(tmpbuf); + } + } + sprintf(swaplog_file, "%s/log", swappath(0)); + + swaplog_fd = file_open(swaplog_file, NULL, O_RDWR | O_CREAT); + if (swaplog_fd < 0) { + sprintf(tmpbuf, "Cannot open swap logfile: %s\n", swaplog_file); + fatal(tmpbuf); + } + swaplog_stream = fdopen(swaplog_fd, "a+"); + if (!swaplog_stream) { + sprintf(tmpbuf, "Cannot open a stream for swap logfile: %s\n", swaplog_file); + fatal(tmpbuf); + } + swaplog_lock = file_write_lock(swaplog_fd); + + sprintf(swaplog_tmp_file, "%s/log_tmp", swappath(0)); + + swaplog_tmp_fd = file_open(swaplog_tmp_file, NULL, O_RDWR | O_TRUNC | O_CREAT); + if (swaplog_tmp_fd < 0) { + sprintf(tmpbuf, "Cannot open swap tmp logfile: %s\n", swaplog_tmp_file); + fatal(tmpbuf); + } + swaplog_tmp_stream = fdopen(swaplog_tmp_fd, "a+"); + if (!swaplog_tmp_stream) { + sprintf(tmpbuf, "Cannot open a stream for swap tmp logfile: %s\n", swaplog_tmp_file); + fatal(tmpbuf); + } + if (!zap_disk_store) { + ok_write_clean_log = 0; + storeRebuildFromDisk(); + + /* rename tmp_log to log */ + file_write_unlock(swaplog_fd, swaplog_lock); + file_close(swaplog_fd); + safeunlink(swaplog_file, 0); + link(swaplog_tmp_file, swaplog_file); + + swaplog_fd = file_open(swaplog_file, NULL, O_RDWR | O_CREAT); + if (swaplog_fd < 0) { + sprintf(tmpbuf, "Cannot reopen swap logfile: %s\n", swaplog_file); + fatal(tmpbuf); + } + swaplog_stream = fdopen(swaplog_fd, "a+"); + if (!swaplog_stream) { + sprintf(tmpbuf, "Cannot reopen a stream for swap logfile: %s\n", swaplog_file); + fatal(tmpbuf); + } + swaplog_lock = file_write_lock(swaplog_fd); + ok_write_clean_log = 1; + } + /* close tmp_log file here. Possibly remove if it is not linked to + * the real one. */ + file_close(swaplog_tmp_fd); + safeunlink(swaplog_tmp_file, 0); + + /* Create Small directories in swap */ + if (directory_created || zap_disk_store) { + int i, j; + char name[1024]; + for (j = 0; j < ncache_dirs; j++) { + for (i = 0; i < SWAP_DIRECTORIES; i++) { + sprintf(name, "%s/%02d", swappath(j), i); + if (mkdir(name, 0755) < 0) { + if (errno != EEXIST) { + sprintf(tmpbuf, + "Failed to make swap directory %s: %s\n", + name, xstrerror()); + fatal(tmpbuf); + } + } + } + } + } + store_mem_high = (long) (getCacheMemMax() / 100) * + getCacheMemHighWaterMark(); + store_mem_low = (long) (getCacheMemMax() / 100) * + getCacheMemLowWaterMark(); + + store_hotobj_high = (int) (getCacheHotVmFactor() * + store_mem_high / (1 << 20)); + store_hotobj_low = (int) (getCacheHotVmFactor() * + store_mem_low / (1 << 20)); + + /* check for validity */ + if (store_hotobj_low > store_hotobj_high) + store_hotobj_low = store_hotobj_high; + + store_swap_high = (long) (getCacheSwapMax() / 100) * + getCacheSwapHighWaterMark(); + store_swap_low = (long) (getCacheSwapMax() / 100) * + getCacheSwapLowWaterMark(); + + return 0; +} + +/* + * storeSanityCheck - verify that all swap storage areas exist, and + * are writable; otherwise, force -z. + */ +void storeSanityCheck() +{ + static char name[4096]; + static char tmpbuf[4096]; + int i; + + if (ncache_dirs < 1) + storeAddSwapDisk(DEFAULT_SWAP_DIR); + + for (i = 0; i < SWAP_DIRECTORIES; i++) { + sprintf(name, "%s/%02d", swappath(i), i); + errno = 0; + if (access(name, W_OK)) { + /* A very annoying problem occurs when access() fails because + * the system file table is full. To prevent cached from + * deleting your entire disk cache on a whim, insist that the + * errno indicates that the directory doesn't exist */ + if (errno != ENOENT) + continue; + sprintf(tmpbuf, "WARNING: Cannot write to %s for storage swap area.\nForcing a *full restart* (e.g., cached -z)...", name); + if (syslog_enable) + syslog(LOG_ALERT, tmpbuf); + fprintf(stderr, "cached: %s\n", tmpbuf); + zap_disk_store = 1; + return; + } + } +} + +int urlcmp(url1, url2) + char *url1, *url2; +{ + if (!url1 || !url2) + death(); + return (strcmp(url1, url2)); +} + +int parse_file_number(s) + char *s; +{ + int len; + + + for (len = strlen(s); (len >= 0); --len) { + if (s[len] == '/') { + return (atoi(&s[len + 1])); + } + } + debug(1, "parse_file_number: Could not determine the swap file number from %s.\n", s); + return (0); +} + +/* + * This routine is to be called by main loop in main.c. + * It removes expired objects on only one bucket for each time called. + * returns the number of objects removed + * + * This should get called 1/s from main(). + */ +int storeMaintainSwapSpace() +{ + static int loop_count = 0; + static unsigned int bucket = 0; + hash_link *link_ptr = NULL, *next = NULL; + StoreEntry *e = NULL; + int rm_obj = 0; + + if (table == (HashID) 0) + return 0; + + /* Scan row of hash table each second and free storage if we're + * over the high-water mark */ + storeGetSwapSpace(0); + + /* Purges expired objects, check one bucket on each calling */ + if (loop_count++ >= STORE_MAINTAIN_RATE) { + loop_count = 0; + if (bucket >= STORE_BUCKETS) + bucket = 0; + link_ptr = hash_get_bucket(table, bucket++); + while (link_ptr) { + next = link_ptr->next; + e = (StoreEntry *) link_ptr; + if ((cached_curtime > e->expires) && + (e->swap_status == SWAP_OK)) { + debug(2, "storeMaintainSwapSpace: Expired: \n", + e->expires - cached_curtime, e->url); + /* just call release. don't have to check for lock status. + * storeRelease will take care of that and set a pending flag + * if it's still locked. */ + storeRelease(e); + ++rm_obj; + } + link_ptr = next; + } + } + return rm_obj; +} + +int safeunlink(s, quiet) + char *s; + int quiet; +{ + int err; + + if ((err = unlink(s)) < 0) + if (!quiet) + debug(1, "safeunlink: Couldn't delete %s. %s\n", s, xstrerror()); + return (err); +} + + +/* + * storeWriteCleanLog + * + * Writes a "clean" swap log file from in-memory metadata. + */ +int storeWriteCleanLog() +{ + StoreEntry *e = NULL; + static char swapfilename[MAX_FILE_NAME_LEN]; + static char clean_log[MAX_FILE_NAME_LEN]; + static char swaplog_file[MAX_FILE_NAME_LEN]; + static char tmpbuf[FATAL_BUF_SIZE]; + FILE *fp = NULL; + int n = 0; + time_t start, stop, r; + + if (!ok_write_clean_log) { + debug(1, "storeWriteCleanLog: Not currently OK to rewrite swap log.\n"); + debug(1, "storeWriteCleanLog: Operation aborted.\n"); + return 0; + } + debug(1, "storeWriteCleanLog: Starting...\n"); + start = cached_curtime = time(NULL); + sprintf(clean_log, "%s/log_clean", swappath(0)); + sprintf(swaplog_file, "%s/log", swappath(0)); + if ((fp = fopen(clean_log, "a+")) == NULL) { + debug(0, "storeWriteCleanLog: %s: %s", clean_log, xstrerror()); + return 0; + } + for (e = storeGetFirst(); e; e = storeGetNext()) { + debug(5, "storeWriteCleanLog: \n", e->url); + if (e->swap_file_number < 0) + continue; + if (e->swap_status != SWAP_OK) + continue; + if (e->object_len <= 0) + continue; + storeSwapFullPath(e->swap_file_number, swapfilename); + fprintf(fp, "FILE: %s URL: %s %d %d %d\n", + swapfilename, e->url, (int) e->expires, (int) e->timestamp, + e->object_len); + if ((++n & 0xFFF) == 0) { + cached_curtime = time(NULL); + debug(1, " %7d lines written so far.\n", n); + } + } + fclose(fp); + + if (file_write_unlock(swaplog_fd, swaplog_lock) != DISK_OK) { + debug(0, "storeWriteCleanLog: Failed to unlock swaplog!\n"); + debug(0, "storeWriteCleanLog: Current swap logfile not replaced.\n"); + return 0; + } + if (rename(clean_log, swaplog_file) < 0) { + debug(0, "storeWriteCleanLog: rename failed: %s\n", + xstrerror()); + return 0; + } + file_close(swaplog_fd); + swaplog_fd = file_open(swaplog_file, NULL, O_RDWR | O_CREAT); + if (swaplog_fd < 0) { + sprintf(tmpbuf, "Cannot open swap logfile: %s\n", swaplog_file); + fatal(tmpbuf); + } + swaplog_stream = fdopen(swaplog_fd, "a+"); + if (!swaplog_stream) { + sprintf(tmpbuf, "Cannot open a stream for swap logfile: %s\n", + swaplog_file); + fatal(tmpbuf); + } + swaplog_lock = file_write_lock(swaplog_fd); + + stop = cached_curtime = time(NULL); + r = stop - start; + debug(1, " Finished. Wrote %d lines.\n", n); + debug(1, " Took %d seconds (%6.1lf lines/sec).\n", + r > 0 ? r : 0, (double) n / (r > 0 ? r : 1)); + + /* touch a timestamp file */ + sprintf(swaplog_file, "%s/log-last-clean", swappath(0)); + file_close(file_open(swaplog_file, NULL, O_WRONLY | O_CREAT | O_TRUNC)); + return n; +} + +int swapInError(fd_unused, entry) + int fd_unused; + StoreEntry *entry; +{ + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "DISK I/O", + 102, + "Cache Disk I/O Failure", + "", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); + return 0; +} + +int storePendingNClients(e) + StoreEntry *e; +{ + int npend = 0; + int i; + + if (!e->mem_obj) + return 0; + for (npend = i = 0; i < (int) store_mem_obj(e, pending_list_size); i++) { + if (store_mem_obj(e, pending[i])) + npend++; + } + return npend; +} diff --git a/src/tools.cc b/src/tools.cc new file mode 100644 index 00000000000..142088b5f2a --- /dev/null +++ b/src/tools.cc @@ -0,0 +1,392 @@ +static char rcsid[] = "$Id: tools.cc,v 1.1 1996/02/22 06:23:56 wessels Exp $"; +/* + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include "config.h" +#include +#include +#include +#include +#include /* for sysconf() stuff */ +#include +#include +#include +#include +#include +#include /* has NOFILE */ +#include + +#include "debug.h" +#include "cache_cf.h" +#include "autoconf.h" +#include "ftp.h" /* sig_child() needs to know FTP threads */ + + +void death(), deathb(), neighbors_rotate_log(), stat_rotate_log(); +void mail_warranty(), print_warranty(), _db_rotate_log(); +int do_mallinfo = 0; /* don't do mallinfo() unless this gets set */ +int PrintRusage _PARAMS((void (*)(), FILE *)); + +extern ftpget_thread *FtpgetThread; +extern int catch_signals; /* main.c */ +extern int storeWriteCleanLog _PARAMS((void)); + +/*------------------------------------------------------------------------- +-- +-- death, deathb +-- +-- Function: These functions catch and report fatal system violations. +-- +-- Inputs: None. +-- +-- Output: None. +-- +-- Comments: None. +-- +--------------------------------------------------------------------------*/ +void death() +{ + fprintf(stderr, "FATAL: Received Segment Violation...dying.\n"); + signal(SIGSEGV, SIG_DFL); + signal(SIGBUS, SIG_DFL); + storeWriteCleanLog(); + PrintRusage(NULL, stderr); + print_warranty(); + abort(); +} + + +void deathb() +{ + fprintf(stderr, "FATAL: Received bus error...dying.\n"); + signal(SIGSEGV, SIG_DFL); + signal(SIGBUS, SIG_DFL); + signal(SIGBUS, SIG_DFL); + storeWriteCleanLog(); + PrintRusage(NULL, stderr); + print_warranty(); + abort(); +} + +#define DEAD_MSG "\ +The Harvest Cache (version %s) died.\n\ +\n\ +You've encountered a fatal error in the Harvest Cache version %s.\n\ +If a core file was created (possibly in the swap directory),\n\ +please execute 'gdb cached core' or 'dbx cached core', then type 'where',\n\ +and report the trace back to harvest-dvl@cs.colorado.edu.\n\ +\n\ +Thanks!\n" + +static char *dead_msg() +{ + static char msg[1024]; + sprintf(msg, DEAD_MSG, HARVEST_VERSION, HARVEST_VERSION); + return msg; +} + +void mail_warranty() +{ + FILE *fp; + static char filename[256]; + static char command[256]; + + sprintf(filename, "/tmp/mailin%d", (int) getpid()); + fp = fopen(filename, "w"); + if (fp != NULL) { + fprintf(fp, "From: cached\n"); + fprintf(fp, "To: %s\n", getAdminEmail()); + fprintf(fp, "Subject: %s\n", dead_msg()); + fclose(fp); + + sprintf(command, "mail %s < %s", getAdminEmail(), filename); + + system(command); + unlink(filename); + } +} + +void print_warranty() +{ + if (getAdminEmail()) + mail_warranty(); + else + puts(dead_msg()); +} + +void rotate_logs(sig) + int sig; +{ + debug(1, "rotate_logs: SIGHUP received.\n"); + + storeWriteCleanLog(); + neighbors_rotate_log(); + stat_rotate_log(); + _db_rotate_log(); +#if defined(_HARVEST_SYSV_SIGNALS_) + signal(sig, rotate_logs); +#endif +} + +void shut_down(sig) + int sig; +{ + debug(1, "Shutting down...\n"); + storeWriteCleanLog(); + PrintRusage(NULL, stderr); + debug(0, "Harvest Cache (Version %s): Exiting due to signal %d.\n", + HARVEST_VERSION, sig); + exit(1); +} + +void fatal_common(message) + char *message; +{ + if (syslog_enable) + syslog(LOG_ALERT, message); + fprintf(stderr, "FATAL: %s\n", message); + fprintf(stderr, "Harvest Cache (Version %s): Terminated abnormally.\n", + HARVEST_VERSION); + fflush(stderr); + PrintRusage(NULL, stderr); + if (debug_log != stderr) { + debug(0, "FATAL: %s\n", message); + debug(0, "Harvest Cache (Version %s): Terminated abnormally.\n", + HARVEST_VERSION); + } +} + +/* fatal */ +void fatal(message) + char *message; +{ + fatal_common(message); + exit(1); +} + +/* fatal with dumping core */ +void fatal_dump(message) + char *message; +{ + if (message) + fatal_common(message); + if (catch_signals) + storeWriteCleanLog(); + abort(); +} + + +void dumpMallocStats(f) + FILE *f; +{ +#if USE_MALLINFO + struct mallinfo mp; + + if (!do_mallinfo) + return; + + mp = mallinfo(); + + fprintf(f, "Malloc Instrumentation via mallinfo(): \n"); + fprintf(f, " total space in arena %d\n", mp.arena); + fprintf(f, " number of ordinary blocks %d\n", mp.ordblks); + fprintf(f, " number of small blocks %d\n", mp.smblks); + fprintf(f, " number of holding blocks %d\n", mp.hblks); + fprintf(f, " space in holding block headers %d\n", mp.hblkhd); + fprintf(f, " space in small blocks in use %d\n", mp.usmblks); + fprintf(f, " space in free blocks %d\n", mp.fsmblks); + fprintf(f, " space in ordinary blocks in use %d\n", mp.uordblks); + fprintf(f, " space in free ordinary blocks %d\n", mp.fordblks); + fprintf(f, " cost of enabling keep option %d\n", mp.keepcost); +#if LNG_MALLINFO + fprintf(f, " max size of small blocks %d\n", mp.mxfast); + fprintf(f, " number of small blocks in a holding block %d\n", + mp.nlblks); + fprintf(f, " small block rounding factor %d\n", mp.grain); + fprintf(f, " space (including overhead) allocated in ord. blks %d\n", + mp.uordbytes); + fprintf(f, " number of ordinary blocks allocated %d\n", + mp.allocated); + fprintf(f, " bytes used in maintaining the free tree %d\n", + mp.treeoverhead); +#endif /* LNG_MALLINFO */ + +#if PRINT_MMAP + mallocmap(); +#endif /* PRINT_MMAP */ +#endif /* USE_MALLINFO */ +} + +int PrintRusage(f, lf) + void (*f) (); + FILE *lf; +{ +#if defined(HAVE_RUSAGE) && defined(RUSAGE_SELF) + struct rusage rusage; + + getrusage(RUSAGE_SELF, &rusage); + fprintf(lf, "CPU Usage: user %d sys %d\nMemory Usage: rss %d KB\n", + rusage.ru_utime.tv_sec, rusage.ru_stime.tv_sec, + rusage.ru_maxrss * getpagesize() / 1000); + fprintf(lf, "Page faults with physical i/o: %d\n", + rusage.ru_majflt); + +#endif + dumpMallocStats(lf); + if (f) + f(0); + return 0; +} + +int getHeapSize() +{ +#if USE_MALLINFO + struct mallinfo mp; + + mp = mallinfo(); + + return (mp.arena); +#else + return (0); +#endif +} + +void sig_child(sig) + int sig; +{ + int status; + int pid; + ftpget_thread *t = NULL; + + if ((pid = waitpid(0, &status, WNOHANG)) > 0) { + debug(3, "sig_child: Ate pid %d\n", pid); + for (t = FtpgetThread; t; t = t->next) { + debug(5, "sig_child: checking pid=%d state=%d\n", + t->pid, t->state); + if (t->pid == pid && t->state == FTPGET_THREAD_RUNNING) { + debug(5, "sig_child: GOT IT!\n"); + t->state = FTPGET_THREAD_WAITED; + t->status = status; + t->wait_retval = pid; + break; + } + } + } +#if defined(_HARVEST_SYSV_SIGNALS_) + signal(sig, sig_child); +#endif +} + +#define MAX_ZOMBIES_TO_KILL 20 +void kill_zombie() +{ + int status; + int i = 0; + int pid; + + while ((pid = waitpid(-1, &status, WNOHANG | WUNTRACED)) > 0) { + debug(3, "kill_zombie: Ate pid %d\n", pid); + if (++i > MAX_ZOMBIES_TO_KILL) + break; + } +} + +/* + * getMaxFD - returns the file descriptor table size + */ +int getMaxFD() +{ + static int i = -1; + + if (i == -1) { +#if defined(HAVE_SYSCONF) && defined(_SC_OPEN_MAX) + i = sysconf(_SC_OPEN_MAX); /* prefered method */ +#elif defined(HAVE_GETDTABLESIZE) + i = getdtablesize(); /* the BSD way */ +#elif defined(OPEN_MAX) + i = OPEN_MAX; +#elif defined(NOFILE) + i = NOFILE; +#elif defined(_NFILE) + i = _NFILE; +#else + i = 64; /* 64 is a safe default */ +#endif + debug(10, "getMaxFD set MaxFD at %d\n", i); + } + return (i); +} diff --git a/src/url.cc b/src/url.cc new file mode 100644 index 00000000000..4ae95a8bb8e --- /dev/null +++ b/src/url.cc @@ -0,0 +1,223 @@ +static char rcsid[] = "$Id: url.cc,v 1.1 1996/02/22 06:23:56 wessels Exp $"; +/* + * File: url.c + * Description: General Routine for url processing + * Author: Anawat Chankhunthod, USC + * Created: Tue May 24 + * Language: C + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include "config.h" +#include +#include +#include + +#include "debug.h" +#include "comm.h" +#include "proto.h" +#include "store.h" /* for the_url() */ +#include "url.h" +#include "util.h" + + +int url_acceptable[256]; +int url_acceptable_init = 0; +char hex[17] = "0123456789abcdef"; + +/* convert %xx in url string to a character + * Allocate a new string and return a pointer to converted string */ + +char *url_convert_hex(org_url) + char *org_url; +{ + int i; + char temp[MAX_URL], hexstr[MAX_URL]; + static char *url; + + url = (char *) xcalloc(1, MAX_URL); + strncpy(url, org_url, MAX_URL); + + i = 0; + while (i < (int) (strlen(url) - 2)) { + if (url[i] == '%') { + /* found %xx, convert it to char */ + strncpy(temp, url, i); + strncpy(hexstr, url + i + 1, 2); + hexstr[2] = '\0'; + temp[i] = (char) ((int) strtol(hexstr, (char **) NULL, 16)); + temp[i + 1] = '\0'; + strncat(temp, url + i + 3, MAX_URL); + strcpy(url, temp); + } + i++; + } + + return url; +} + + +/* INIT Acceptable table. + * Borrow from libwww2 with Mosaic2.4 Distribution */ +void init_url_acceptable() +{ + unsigned int i; + char *good = + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789./-_$"; + for (i = 0; i < 256; i++) + url_acceptable[i] = 0; + for (; *good; good++) + url_acceptable[(unsigned int) *good] = 1; + url_acceptable_init = 1; +} + + +/* Encode prohibited char in string */ +/* return the pointer to new (allocated) string */ +char *url_escape(url) + char *url; +{ + char *p, *q; + char *tmpline = xcalloc(1, MAX_URL); + + if (!url_acceptable_init) + init_url_acceptable(); + + q = tmpline; + for (p = url; *p; p++) { + if (url_acceptable[(int) (*p)]) + *q++ = *p; + else { + *q++ = '%'; /* Means hex coming */ + *q++ = hex[(int) ((*p) >> 4)]; + *q++ = hex[(int) ((*p) & 15)]; + } + } + *q++ = '\0'; + return tmpline; +} + + +/* + * Strip the url from e->key, return a pointer to a static copy of it. + * Planning ahead for removing e->url from meta-data + */ +char *the_url(e) + StoreEntry *e; +{ + char *URL; + char *token; + static char line_in[MAX_URL + 1]; + static char delim[] = "/"; + int i; + + strcpy(line_in, e->key); + token = strtok(line_in, delim); + + if (!(e->flag & CACHABLE) && (sscanf(token, "%d", &i))) { + URL = strtok(NULL, "~"); /* Non_CACHABLE, key = /%d/url */ + return URL; + } + if ((e->flag & KEY_CHANGE) && (sscanf(token, "x%d", &i))) { + /* key is changed, key = /x%d/url or /x%d/head/url or /x%d/post/url */ + /* Now key is url or head/url or post/url */ + token = strtok(NULL, "~"); + } else { + /* key is url or /head/url or /post/url */ + strcpy(token, e->key); + } + + if (e->type_id == REQUEST_OP_GET) { + /* key is url */ + return token; + } else if ((e->type_id == REQUEST_OP_POST) && + (!(strncmp(token, "post/", 5)) || !(strncmp(token, "/post/", 6)))) { + URL = strtok(token, delim); + URL = strtok(NULL, "~"); + /* discard "/post/" or "post/" from the key and get url */ + return URL; + } else if ((e->type_id == REQUEST_OP_HEAD) && + (!(strncmp(token, "head/", 5)) || !(strncmp(token, "/head/", 6)))) { + URL = strtok(token, delim); + URL = strtok(NULL, "~"); + /* discard "/head/" or "head/" from the key and get url */ + return URL; + } else { + debug(0, "Should not be here. Unknown format of the key: %s\n", + e->key); + return (NULL); + } +} diff --git a/src/wais.cc b/src/wais.cc new file mode 100644 index 00000000000..dabd9888f41 --- /dev/null +++ b/src/wais.cc @@ -0,0 +1,515 @@ +static char rcsid[] = "$Id: wais.cc,v 1.1 1996/02/22 06:23:56 wessels Exp $"; +/* + * File: wais.c + * Description: state machine for wais retrieval protocol (just open a + * connection to a wais gateway, like the CERN waisd). + * Based on John's gopher retrieval module. + * Author: Edward Moy, Xerox PARC + * Created: Tue Jun 20 13:07:42 PDT 1995 + * Language: C + * + ********************************************************************** + * Copyright (c) 1994, 1995. All rights reserved. + * + * The Harvest software was developed by the Internet Research Task + * Force Research Group on Resource Discovery (IRTF-RD): + * + * Mic Bowman of Transarc Corporation. + * Peter Danzig of the University of Southern California. + * Darren R. Hardy of the University of Colorado at Boulder. + * Udi Manber of the University of Arizona. + * Michael F. Schwartz of the University of Colorado at Boulder. + * Duane Wessels of the University of Colorado at Boulder. + * + * This copyright notice applies to software in the Harvest + * ``src/'' directory only. Users should consult the individual + * copyright notices in the ``components/'' subdirectories for + * copyright information about other software bundled with the + * Harvest source code distribution. + * + * TERMS OF USE + * + * The Harvest software may be used and re-distributed without + * charge, provided that the software origin and research team are + * cited in any use of the system. Most commonly this is + * accomplished by including a link to the Harvest Home Page + * (http://harvest.cs.colorado.edu/) from the query page of any + * Broker you deploy, as well as in the query result pages. These + * links are generated automatically by the standard Broker + * software distribution. + * + * The Harvest software is provided ``as is'', without express or + * implied warranty, and with no support nor obligation to assist + * in its use, correction, modification or enhancement. We assume + * no liability with respect to the infringement of copyrights, + * trade secrets, or any patents, and are not responsible for + * consequential damages. Proper use of the Harvest software is + * entirely the responsibility of the user. + * + * DERIVATIVE WORKS + * + * Users may make derivative works from the Harvest software, subject + * to the following constraints: + * + * - You must include the above copyright notice and these + * accompanying paragraphs in all forms of derivative works, + * and any documentation and other materials related to such + * distribution and use acknowledge that the software was + * developed at the above institutions. + * + * - You must notify IRTF-RD regarding your distribution of + * the derivative work. + * + * - You must clearly notify users that your are distributing + * a modified version and not the original Harvest software. + * + * - Any derivative product is also subject to these copyright + * and use restrictions. + * + * Note that the Harvest software is NOT in the public domain. We + * retain copyright, as specified above. + * + * HISTORY OF FREE SOFTWARE STATUS + * + * Originally we required sites to license the software in cases + * where they were going to build commercial products/services + * around Harvest. In June 1995 we changed this policy. We now + * allow people to use the core Harvest software (the code found in + * the Harvest ``src/'' directory) for free. We made this change + * in the interest of encouraging the widest possible deployment of + * the technology. The Harvest software is really a reference + * implementation of a set of protocols and formats, some of which + * we intend to standardize. We encourage commercial + * re-implementations of code complying to this set of standards. + * + * + */ +#include "config.h" +#if USE_WAIS_RELAY +#include +#include +#include + +#include "ansihelp.h" +#include "comm.h" +#include "store.h" +#include "stat.h" +#include "neighbors.h" +#include "url.h" +#include "ipcache.h" +#include "cache_cf.h" +#include "util.h" + +#define WAIS_DELETE_GAP (64*1024) + +typedef struct _waisdata { + StoreEntry *entry; + char host[HARVESTHOSTNAMELEN + 1]; + int port; + char *type; + char *mime_hdr; + char type_id; + char request[MAX_URL]; +} WAISData; + +extern char *tmp_error_buf; +extern char *dns_error_message; +extern time_t cached_curtime; + +int wais_url_parser(url, host, port, request) + char *url; + char *host; + int *port; + char *request; +{ + strcpy(host, getWaisRelayHost()); + *port = getWaisRelayPort(); + strcpy(request, url); + + return 0; +} + +/* This will be called when timeout on read. */ +void waisReadReplyTimeout(fd, data) + int fd; + WAISData *data; +{ + StoreEntry *entry = NULL; + + entry = data->entry; + debug(4, "waisReadReplyTimeout: Timeout on %d\n url: %s\n", fd, entry->url); + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "WAIS", + 403, + "Read timeout", + "The Network/Remote site may be down. Try again later.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); + comm_set_select_handler(fd, COMM_SELECT_READ, 0, 0); + comm_close(fd); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_403", /* WAIS READ TIMEOUT */ + "GET"); +#endif + safe_free(data); +} + +/* This will be called when socket lifetime is expired. */ +void waisLifetimeExpire(fd, data) + int fd; + WAISData *data; +{ + StoreEntry *entry = NULL; + + entry = data->entry; + debug(4, "waisLifeTimeExpire: FD %d: \n", fd, entry->url); + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "WAIS", + 410, + "Transaction Timeout", + "The Network/Remote site may be down or too slow. Try again later.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); + comm_set_select_handler(fd, COMM_SELECT_READ | COMM_SELECT_WRITE, 0, 0); + comm_close(fd); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_410", /* WAIS LIFETIME EXPIRE */ + "GET"); +#endif + safe_free(data); +} + + + + +/* This will be called when data is ready to be read from fd. Read until + * error or connection closed. */ +void waisReadReply(fd, data) + int fd; + WAISData *data; +{ + static char buf[4096]; + int len; + StoreEntry *entry = NULL; + + entry = data->entry; + if (entry->flag & DELETE_BEHIND) { + if (storeClientWaiting(entry)) { + /* check if we want to defer reading */ + if ((store_mem_obj(entry, e_current_len) - + store_mem_obj(entry, e_lowest_offset)) > WAIS_DELETE_GAP) { + debug(3, "waisReadReply: Read deferred for Object: %s\n", entry->key); + debug(3, " Current Gap: %d bytes\n", + store_mem_obj(entry, e_current_len) - + store_mem_obj(entry, e_lowest_offset)); + + /* reschedule, so it will automatically reactivated when Gap is big enough. */ + comm_set_select_handler(fd, COMM_SELECT_READ, (PF) waisReadReply, (caddr_t) data); + comm_set_select_handler_plus_timeout(fd, COMM_SELECT_TIMEOUT, (PF) waisReadReplyTimeout, + (caddr_t) data, getReadTimeout()); + return; + } + } else { + /* we can terminate connection right now */ + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "WAIS", + 419, + "No Client", + "All Clients went away before tranmission is complete and object is too big to cache.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); + comm_close(fd); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_419", /* WAIS NO CLIENTS, BIG OBJECT */ + "GET"); +#endif + safe_free(data); + return; + } + } + len = read(fd, buf, 4096); + debug(5, "waisReadReply - fd: %d read len:%d\n", fd, len); + + if (len < 0 || ((len == 0) && (store_mem_obj(entry, e_current_len) == 0))) { + debug(1, "waisReadReply - error reading errno %d: %s\n", + errno, xstrerror()); + if (errno == ECONNRESET) { + /* Connection reset by peer */ + /* consider it as a EOF */ + entry->expires = cached_curtime; + + sprintf(tmp_error_buf, "\n

    Warning: The Remote Server sent RESET at the end of transmission.\n"); + storeAppend(entry, tmp_error_buf, strlen(tmp_error_buf)); + storeComplete(entry); + } else { + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "WAIS", + 405, + "Read error", + "Network/Remote site is down. Try again later.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); + } + comm_close(fd); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_405", /* WAIS READ ERROR */ + "GET"); +#endif + safe_free(data); + } else if (len == 0) { + /* Connection closed; retrieval done. */ + entry->expires = cached_curtime; + storeComplete(entry); + comm_close(fd); + safe_free(data); + } else if (((store_mem_obj(entry, e_current_len) + len) > getWAISMax()) && + !(entry->flag & DELETE_BEHIND)) { + /* accept data, but start to delete behind it */ + storeStartDeleteBehind(entry); + + storeAppend(entry, buf, len); + comm_set_select_handler(fd, COMM_SELECT_READ, (PF) waisReadReply, (caddr_t) data); + comm_set_select_handler_plus_timeout(fd, COMM_SELECT_TIMEOUT, (PF) waisReadReplyTimeout, + (caddr_t) data, getReadTimeout()); + + } else { + storeAppend(entry, buf, len); + comm_set_select_handler(fd, COMM_SELECT_READ, (PF) waisReadReply, (caddr_t) data); + comm_set_select_handler_plus_timeout(fd, COMM_SELECT_TIMEOUT, (PF) waisReadReplyTimeout, + (caddr_t) data, getReadTimeout()); + } +} + +/* This will be called when request write is complete. Schedule read of + * reply. */ +void waisSendComplete(fd, buf, size, errflag, data) + int fd; + char *buf; + int size; + int errflag; + WAISData *data; +{ + StoreEntry *entry = NULL; + entry = data->entry; + debug(5, "waisSendComplete - fd: %d size: %d errflag: %d\n", + fd, size, errflag); + if (errflag) { + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "WAIS", + 401, + "Cannot connect to the original site", + "The remote site may be down.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); + comm_close(fd); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_401", /* WAIS CONNECT FAILURE */ + "GET"); +#endif + safe_free(data); + } else { + /* Schedule read reply. */ + comm_set_select_handler(fd, COMM_SELECT_READ, (PF) waisReadReply, (caddr_t) data); + comm_set_select_handler_plus_timeout(fd, COMM_SELECT_TIMEOUT, (PF) waisReadReplyTimeout, + (caddr_t) data, getReadTimeout()); + } + safe_free(buf); /* Allocated by waisSendRequest. */ +} + +/* This will be called when connect completes. Write request. */ +void waisSendRequest(fd, data) + int fd; + WAISData *data; +{ +#define CR '\015' +#define LF '\012' + int len = strlen(data->request) + 4; + char *buf; + + debug(5, "waisSendRequest - fd: %d\n", fd); + + if (data->type) + len += strlen(data->type); + if (data->mime_hdr) + len += strlen(data->mime_hdr); + + buf = (char *) xcalloc(1, len + 1); + + if (data->mime_hdr) + sprintf(buf, "%s %s %s%c%c", data->type, data->request, + data->mime_hdr, CR, LF); + else + sprintf(buf, "%s %s%c%c", data->type, data->request, CR, LF); + debug(6, "waisSendRequest - buf:%s\n", buf); + icpWrite(fd, buf, len, 30, waisSendComplete, data); +} + +int waisStart(unusedfd, url, type, mime_hdr, entry) + int unusedfd; + char *url; + char *type; + char *mime_hdr; + StoreEntry *entry; +{ + /* Create state structure. */ + int sock, status; + WAISData *data = NULL; + + debug(3, "waisStart - url:%s, type:%s\n", url, type); + debug(4, " header: %s\n", mime_hdr); + + data = (WAISData *) xcalloc(1, sizeof(WAISData)); + data->entry = entry; + + if (!getWaisRelayHost()) { + debug(0, "waisStart: Failed because no relay host defined!\n"); + sprintf(tmp_error_buf, + CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "WAIS", + 412, + "Configuration error. No WAIS relay host is defined.", + "", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_412", /* WAIS NO RELAY */ + "GET"); +#endif + safe_free(data); + return COMM_ERROR; + } + /* Parse url. */ + (void) wais_url_parser(url, data->host, &data->port, data->request); + data->type = type; + data->mime_hdr = mime_hdr; + + /* Create socket. */ + sock = comm_open(COMM_NONBLOCKING, 0, 0, url); + if (sock == COMM_ERROR) { + debug(4, "waisStart: Failed because we're out of sockets.\n"); + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "WAIS", + 411, + "Cached short of file-descriptors, sorry", + "", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_411", /* WAIS NO FD'S */ + "GET"); +#endif + safe_free(data); + return COMM_ERROR; + } + /* check if IP is already in cache. It must be. + * It should be done before this route is called. + * Otherwise, we cannot check return code for connect. */ + if (!ipcache_gethostbyname(data->host)) { + debug(4, "waisstart: Called without IP entry in ipcache. OR lookup failed.\n"); + comm_close(sock); + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "WAIS", + 402, + "DNS name lookup failure", + dns_error_message, + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_402", /* WAIS DNS FAILURE */ + "GET"); +#endif + safe_free(data); + return COMM_ERROR; + } + /* Open connection. */ + if ((status = comm_connect(sock, data->host, data->port))) { + if (status != EINPROGRESS) { + comm_close(sock); + sprintf(tmp_error_buf, CACHED_RETRIEVE_ERROR_MSG, + entry->url, + entry->url, + "WAIS", + 401, + "Cannot connect to the original site", + "The remote site may be down.", + HARVEST_VERSION, + comm_hostname()); + storeAbort(entry, tmp_error_buf); +#ifdef LOG_ERRORS + CacheInfo->log_append(CacheInfo, + entry->url, + "0.0.0.0", + store_mem_obj(entry, e_current_len), + "ERR_401", /* WAIS CONNECT FAIL */ + "GET"); +#endif + safe_free(data); + return COMM_ERROR; + } else { + debug(5, "waisStart - conn %d EINPROGRESS\n", sock); + } + } + /* Install connection complete handler. */ + comm_set_select_handler(sock, COMM_SELECT_LIFETIME, + (PF) waisLifetimeExpire, (caddr_t) data); + comm_set_select_handler(sock, COMM_SELECT_WRITE, + (PF) waisSendRequest, (caddr_t) data); + return COMM_OK; +} +#endif