diff --git a/ChangeLog b/ChangeLog index 7e9ee660524f79..1d41a83420dcc4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Thu Mar 14 00:29:12 2002 Yukihiro Matsumoto + + * re.c (rb_reg_match): should clear $~ if operand is nil. + + * re.c (rb_reg_match2): ditto. + Thu Mar 14 12:32:59 2002 Nobuyoshi Nakada * ext/stringio/stringio.c: fixed frozen string bug. ungetc no @@ -25,6 +31,16 @@ Wed Mar 13 18:36:55 2002 Akinori MUSHA * lib/getopts.rb: single_options can be nil[*], and is not not optional. ([*]Pointed out by gotoken) +Wed Mar 13 17:23:46 2002 Yukihiro Matsumoto + + * configure: merge Jonathan Baker's large file support patch + [ruby-talk:35316], with read_all patch in [ruby-talk:35470]. + +Wed Mar 13 04:06:48 2002 Yukihiro Matsumoto + + * eval.c (rb_f_abort): optional message argument that be printed + on termination. + Tue Mar 12 17:12:06 2002 Tanaka Akira * lib/resolv.rb: don't complete domains for absolute FQNs. diff --git a/MANIFEST b/MANIFEST index 238142acfcedbe..2b3c7ea1607587 100644 --- a/MANIFEST +++ b/MANIFEST @@ -103,6 +103,7 @@ lib/English.rb lib/Env.rb lib/README lib/base64.rb +lib/benchmark.rb lib/cgi.rb lib/cgi/session.rb lib/cgi-lib.rb diff --git a/bignum.c b/bignum.c index fb3afdc3c0780a..8aa242bc2461af 100644 --- a/bignum.c +++ b/bignum.c @@ -38,6 +38,9 @@ typedef long BDIGIT_DBL_SIGNED; #define BITSPERDIG (sizeof(BDIGIT)*CHAR_BIT) #define BIGRAD ((BDIGIT_DBL)1 << BITSPERDIG) #define DIGSPERLONG ((unsigned int)(sizeof(long)/sizeof(BDIGIT))) +#if HAVE_LONG_LONG +# define DIGSPERLL ((unsigned int)(sizeof(long long)/sizeof(BDIGIT))) +#endif #define BIGUP(x) ((BDIGIT_DBL)(x) << BITSPERDIG) #define BIGDN(x) RSHIFT(x,BITSPERDIG) #define BIGLO(x) ((BDIGIT)((x) & (BIGRAD-1))) @@ -507,6 +510,67 @@ rb_str_to_inum(str, base, badcheck) return rb_cstr_to_inum(s, base, badcheck); } +#if HAVE_LONG_LONG + +VALUE +rb_ull2big(n) + unsigned long long n; +{ + BDIGIT_DBL num = n; + long i = 0; + BDIGIT *digits; + VALUE big; + + i = 0; + big = bignew(DIGSPERLL, 1); + digits = BDIGITS(big); + while (i < DIGSPERLL) { + digits[i++] = BIGLO(num); + num = BIGDN(num); + } + + i = DIGSPERLL; + while (i-- && !digits[i]) ; + RBIGNUM(big)->len = i+1; + return big; +} + +VALUE +rb_ll2big(n) + long long n; +{ + long neg = 0; + VALUE big; + + if (n < 0) { + n = -n; + neg = 1; + } + big = rb_ull2big(n); + if (neg) { + RBIGNUM(big)->sign = 0; + } + return big; +} + +VALUE +rb_ull2inum(n) + unsigned long long n; +{ + if (POSFIXABLE(n)) return INT2FIX(n); + return rb_ull2big(n); +} + +VALUE +rb_ll2inum(n) + long long n; +{ + if (FIXABLE(n)) return INT2FIX(n); + return rb_ll2big(n); +} + +#endif /* HAVE_LONG_LONG */ + VALUE rb_cstr2inum(str, base) const char *str; @@ -650,6 +714,54 @@ rb_big2long(x) return num; } +#if HAVE_LONG_LONG + +static unsigned long long +big2ull(x, type) + VALUE x; + char *type; +{ + long len = RBIGNUM(x)->len; + BDIGIT_DBL num; + BDIGIT *ds; + + if (len > sizeof(long long)/sizeof(BDIGIT)) + rb_raise(rb_eRangeError, "bignum too big to convert into `%s'", type); + ds = BDIGITS(x); + num = 0; + while (len--) { + num = BIGUP(num); + num += ds[len]; + } + return num; +} + +unsigned long long +rb_big2ull(x) + VALUE x; +{ + unsigned long long num = big2ull(x, "unsigned long long"); + + if (!RBIGNUM(x)->sign) return -num; + return num; +} + +long long +rb_big2ll(x) + VALUE x; +{ + unsigned long long num = big2ull(x, "long long"); + + if ((long long)num < 0 && (RBIGNUM(x)->sign + || (long long)num != LLONG_MIN)) { + rb_raise(rb_eRangeError, "bignum too big to convert into `long long'"); + } + if (!RBIGNUM(x)->sign) return -(long long)num; + return num; +} + +#endif /* HAVE_LONG_LONG */ + static VALUE dbl2big(d) double d; diff --git a/configure.in b/configure.in index 31a007a9413f40..3785b65f460bff 100644 --- a/configure.in +++ b/configure.in @@ -1,6 +1,8 @@ dnl Process this file with autoconf to produce a configure script. AC_INIT(ruby.h) +AC_PREREQ(2.50) + rb_version=`grep RUBY_VERSION $srcdir/version.h` MAJOR=`expr "$rb_version" : '#define RUBY_VERSION "\([0-9][0-9]*\)\.[0-9][0-9]*\.[0-9][0-9]*"'` MINOR=`expr "$rb_version" : '#define RUBY_VERSION "[0-9][0-9]*\.\([0-9][0-9]*\)\.[0-9][0-9]*"'` @@ -124,11 +126,16 @@ AC_MINIX AC_EXEEXT AC_OBJEXT +dnl check for large file stuff +AC_SYS_LARGEFILE + +AC_CHECK_TYPES([long long, off_t]) AC_CHECK_SIZEOF(int, 4) AC_CHECK_SIZEOF(short, 2) AC_CHECK_SIZEOF(long, 4) AC_CHECK_SIZEOF(long long, 0) AC_CHECK_SIZEOF(__int64, 0) +AC_CHECK_SIZEOF(off_t, 0) AC_CHECK_SIZEOF(void*, 4) AC_CHECK_SIZEOF(float, 4) AC_CHECK_SIZEOF(double, 8) @@ -292,6 +299,8 @@ AC_TYPE_GETGROUPS AC_TYPE_SIGNAL AC_FUNC_ALLOCA AC_FUNC_MEMCMP +AC_FUNC_FSEEKO +AC_CHECK_FUNCS(ftello) AC_REPLACE_FUNCS(dup2 memmove mkdir strcasecmp strncasecmp strerror strftime\ strchr strstr strtoul crypt flock vsnprintf\ isinf isnan finite hypot) diff --git a/defines.h b/defines.h index c2b3dce685d698..d8b7242daad510 100644 --- a/defines.h +++ b/defines.h @@ -56,10 +56,9 @@ void *xrealloc _((void*,long)); void xfree _((void*)); #if SIZEOF_LONG_LONG > 0 -# define HAVE_LONG_LONG # define LONG_LONG long long #elif SIZEOF___INT64 > 0 -# define HAVE_LONG_LONG +# define HAVE_LONG_LONG 1 # define LONG_LONG __int64 # undef SIZEOF_LONG_LONG # define SIZEOF_LONG_LONG SIZEOF___INT64 diff --git a/eval.c b/eval.c index 2bd416fb938967..5a18ee3b3aa72d 100644 --- a/eval.c +++ b/eval.c @@ -3491,10 +3491,21 @@ rb_abort() } static VALUE -rb_f_abort() +rb_f_abort(argc, argv) + int argc; + VALUE *argv; { rb_secure(4); - rb_abort(); + if (argc == 0) { + rb_abort(); + } + else { + VALUE mesg; + + rb_scan_args(argc, argv, "01", &mesg); + rb_io_puts(argc, argv, rb_stderr); + exit(1); + } return Qnil; /* not reached */ } @@ -6056,7 +6067,7 @@ Init_eval() rb_define_global_function("caller", rb_f_caller, -1); rb_define_global_function("exit", rb_f_exit, -1); - rb_define_global_function("abort", rb_f_abort, 0); + rb_define_global_function("abort", rb_f_abort, -1); rb_define_global_function("at_exit", rb_f_at_exit, 0); diff --git a/file.c b/file.c index d797a786da7cd9..5a79c13fbd2493 100644 --- a/file.c +++ b/file.c @@ -242,7 +242,7 @@ static VALUE rb_stat_size(self) VALUE self; { - return LONG2NUM(get_stat(self)->st_size); + return OFFT2NUM(get_stat(self)->st_size); } static VALUE @@ -730,7 +730,7 @@ test_s(obj, fname) if (rb_stat(fname, &st) < 0) return Qnil; if (st.st_size == 0) return Qnil; - return rb_int2inum(st.st_size); + return OFFT2NUM(st.st_size); } static VALUE diff --git a/intern.h b/intern.h index 5310f1c03d1c69..d05c3af9459007 100644 --- a/intern.h +++ b/intern.h @@ -70,6 +70,12 @@ long rb_big2long _((VALUE)); #define rb_big2int(x) rb_big2long(x) unsigned long rb_big2ulong _((VALUE)); #define rb_big2uint(x) rb_big2ulong(x) +#if HAVE_LONG_LONG +VALUE rb_ll2inum _((long long)); +VALUE rb_ull2inum _((unsigned long long)); +long long rb_big2ll _((VALUE)); +unsigned long long rb_big2ull _((VALUE)); +#endif /* HAVE_LONG_LONG */ void rb_quad_pack _((char*,VALUE)); VALUE rb_quad_unpack _((const char*,int)); VALUE rb_dbl2big _((double)); diff --git a/io.c b/io.c index 47243563a6b5f8..9d3881110dee7a 100644 --- a/io.c +++ b/io.c @@ -45,6 +45,16 @@ #include #endif +#if !HAVE_OFF_T && !defined(off_t) +# define off_t long +#endif +#if !HAVE_FSEEKO && !defined(fseeko) +# define fseeko fseek +#endif +#if !HAVE_FTELLO && !defined(ftello) +# define ftello ftell +#endif + #ifdef HAVE_SYS_TIME_H # include #else @@ -316,13 +326,20 @@ rb_io_tell(io) VALUE io; { OpenFile *fptr; - long pos; + off_t pos; GetOpenFile(io, fptr); - pos = ftell(fptr->f); + pos = ftello(fptr->f); if (ferror(fptr->f)) rb_sys_fail(fptr->path); +#if SIZEOF_OFF_T > SIZEOF_LONG +# if !HAVE_LONG_LONG +# error off_t is bigger than long, but you have no long long... +# endif + return rb_ll2inum(pos); +#else return rb_int2inum(pos); +#endif } #ifndef SEEK_CUR @@ -340,7 +357,7 @@ rb_io_seek(io, offset, whence) long pos; GetOpenFile(io, fptr); - pos = fseek(fptr->f, NUM2LONG(offset), whence); + pos = fseeko(fptr->f, NUM2OFFT(offset), whence); if (pos != 0) rb_sys_fail(fptr->path); clearerr(fptr->f); @@ -371,7 +388,7 @@ rb_io_set_pos(io, offset) long pos; GetOpenFile(io, fptr); - pos = fseek(fptr->f, NUM2LONG(offset), SEEK_SET); + pos = fseeko(fptr->f, NUM2OFFT(offset), SEEK_SET); if (pos != 0) rb_sys_fail(fptr->path); clearerr(fptr->f); @@ -380,12 +397,12 @@ rb_io_set_pos(io, offset) static VALUE rb_io_rewind(io) - VALUE io; + VALUE io; { OpenFile *fptr; GetOpenFile(io, fptr); - if (fseek(fptr->f, 0L, 0) != 0) rb_sys_fail(fptr->path); + if (fseeko(fptr->f, 0L, 0) != 0) rb_sys_fail(fptr->path); clearerr(fptr->f); if (io == current_file) { gets_lineno -= fptr->lineno; @@ -588,7 +605,7 @@ read_all(port) OpenFile *fptr; VALUE str = Qnil; struct stat st; - long siz = BUFSIZ; + off_t siz = BUFSIZ; long bytes = 0; int n; @@ -607,16 +624,19 @@ read_all(port) return rb_str_new(0, 0); } else { - long pos = ftell(fptr->f); + off_t pos = ftello(fptr->f); if (st.st_size > pos && pos >= 0) { siz = st.st_size - pos + 1; + if (siz > LONG_MAX) { + rb_raise(rb_eIOError, "file too big for single read"); + } } } } - str = rb_tainted_str_new(0, siz); + str = rb_tainted_str_new(0, (long)siz); READ_CHECK(fptr->f); for (;;) { - n = io_fread(RSTRING(str)->ptr+bytes, siz-bytes, fptr->f); + n = io_fread(RSTRING(str)->ptr+bytes, (long)siz-bytes, fptr->f); if (n == 0 && bytes == 0) { if (feof(fptr->f)) return Qnil; rb_sys_fail(fptr->path); @@ -1986,7 +2006,7 @@ io_reopen(io, nfile) OpenFile *fptr, *orig; char *mode; int fd; - long pos; + off_t pos; nfile = rb_io_get_io(nfile); if (rb_safe_level() >= 4 && (!OBJ_TAINTED(io) || !OBJ_TAINTED(nfile))) { @@ -1997,7 +2017,7 @@ io_reopen(io, nfile) if (fptr == orig) return io; if (orig->mode & FMODE_READABLE) { - pos = ftell(orig->f); + pos = ftello(orig->f); } if (orig->f2) { io_fflush(orig->f2, orig->path); @@ -2032,8 +2052,8 @@ io_reopen(io, nfile) fptr->f = rb_fdopen(fd, mode); } if ((orig->mode & FMODE_READABLE) && pos >= 0) { - fseek(fptr->f, pos, SEEK_SET); - fseek(orig->f, pos, SEEK_SET); + fseeko(fptr->f, pos, SEEK_SET); + fseeko(orig->f, pos, SEEK_SET); } if (fptr->f2) { diff --git a/lib/README b/lib/README index c45e019b73440e..d05964fa67506c 100644 --- a/lib/README +++ b/lib/README @@ -2,6 +2,7 @@ English.rb lets Perl'ish global variables have English names Env.rb loads importenv.rb README this file base64.rb encodes/decodes base64 (obsolete) +benchmark.rb a benchmark utility cgi-lib.rb simple CGI support library (old style) cgi.rb CGI support library cgi/session CGI session class diff --git a/lib/benchmark.rb b/lib/benchmark.rb new file mode 100644 index 00000000000000..e6c63a7c1467ff --- /dev/null +++ b/lib/benchmark.rb @@ -0,0 +1,651 @@ +# +# benchmark.rb +# +=begin + 2001-11-26: Time.times renamed Process.times for ruby17 (gotoken#notwork.org) + 2001-01-12: made bmbm module func. bmbm return Tms array. + 2001-01-10: added bmbm, Job and INSTALL.rb (gotoken#notwork.org) + 2000-04-00: report() prints tag before eval block (gotoken#notwork.org) + 2000-02-22: report(): measure -> Benchmark::measure (nakahiro#sarion.co.jp) + 2000-01-02: bug fix, documentation (gotoken#notwork.org) + 2000-01-01: measure can take a tag as opt. (nobu.nakada#nifty.ne.jp) + 2000-01-01: first release (gotoken#notwork.org) +=end + +=begin += benchmark.rb + +== NAME +((*benchmark.rb*)) - a benchmark utility + +== SYNOPSIS + ---------- + require "benchmark" + include Benchmark + ---------- + +== DESCRIPTION + +benchmark.rb provides some utilities to measure and report the +times used and passed to execute. + +== SIMPLE EXAMPLE + +=== EXAMPLE 0 +To (()) the times to make (({"a"*1_000_000})): + + ---------- + puts measure{ "a"*1_000_000 } + ---------- + +On my machine (FreeBSD 3.2 on P5100MHz) this reported as follows: + + ---------- + 1.166667 0.050000 1.216667 ( 0.571355) + ---------- + +The above shows user time, system time, user+system, and really passed +time. The unit of time is second. + +=== EXAMPLE 1 +To do some experiments sequentially, (()) is useful: + + ---------- + n = 50000 + bm do |x| + x.report{for i in 1..n; a = "1"; end} + x.report{n.times do ; a = "1"; end} + x.report{1.upto(n) do ; a = "1"; end} + end + ---------- + +The result: + ---------- + user system total real + 1.033333 0.016667 1.016667 ( 0.492106) + 1.483333 0.000000 1.483333 ( 0.694605) + 1.516667 0.000000 1.516667 ( 0.711077) + ---------- + +=== EXAMPLE 2 +To put a label in each (()): + + ---------- + n = 50000 + bm(7) do |x| + x.report("for:") {for i in 1..n; a = "1"; end} + x.report("times:") {n.times do ; a = "1"; end} + x.report("upto:") {1.upto(n) do ; a = "1"; end} + end + ---------- + +The option (({7})) specifies the offset of each report accoding to the +longest label. + +This reports as follows: + + ---------- + user system total real + for: 1.050000 0.000000 1.050000 ( 0.503462) + times: 1.533333 0.016667 1.550000 ( 0.735473) + upto: 1.500000 0.016667 1.516667 ( 0.711239) + ---------- + +=== EXAMPLE 3 + +By the way, benchmarks might seem to depend on the order of items. It +is caused by the cost of memory allocation and the garbage collection. +To prevent this boresome, Benchmark::(()) is provided, e.g., to +compare ways for sort array of strings: + + ---------- + require "rbconfig" + include Config + def file + open("%s/lib/ruby/%s.%s/tk.rb" % + [CONFIG['prefix'],CONFIG['MAJOR'],CONFIG['MINOR']]).read + end + + n = 10 + bmbm do |x| + x.report("destructive!"){ + t = (file*n).to_a; t.each{|line| line.upcase!}; t.sort! + } + x.report("method chain"){ + t = (file*n).to_a.collect{|line| line.upcase}.sort + } + end + ---------- + +This reports: + + ---------- + Rehearsal ------------------------------------------------ + destructive! 2.664062 0.070312 2.734375 ( 2.783401) + method chain 5.257812 0.156250 5.414062 ( 5.736088) + --------------------------------------- total: 8.148438sec + + user system total real + destructive! 2.359375 0.007812 2.367188 ( 2.381015) + method chain 3.046875 0.023438 3.070312 ( 3.085816) + ---------- + +=== EXAMPLE 4 +To report statistics of sequential experiments with unique label, +(()) is available: + + ---------- + n = 50000 + benchmark(" "*7 + CAPTION, 7, FMTSTR, ">total:", ">avg:") do |x| + tf = x.report("for:") {for i in 1..n; a = "1"; end} + tt = x.report("times:"){n.times do ; a = "1"; end} + tu = x.report("upto:") {1.upto(n) do ; a = "1"; end} + [tf+tt+tu, (tf+tt+tu)/3] + end + ---------- + +The result: + + ---------- + user system total real + for: 1.016667 0.016667 1.033333 ( 0.485749) + times: 1.450000 0.016667 1.466667 ( 0.681367) + upto: 1.533333 0.000000 1.533333 ( 0.722166) + >total: 4.000000 0.033333 4.033333 ( 1.889282) + >avg: 1.333333 0.011111 1.344444 ( 0.629761) + ---------- + +== Benchmark module + +=== CONSTANT +:CAPTION + CAPTION is a caption string which is used in Benchmark::(()) and + Benchmark::Report#(()). +:FMTSTR + FMTSTR is a format string which is used in Benchmark::(()) and + Benchmark::Report#(()). See also Benchmark::Tms#(()). +:BENCHMARK_VERSION + BENCHMARK_VERSION is version string which statnds for the last modification + date (YYYY-MM-DD). + +=== INNER CLASS +* (()) +* (()) +* (()) + +=== MODULE FUNCTION +==== benchmark + ---------- + benchmark([caption [, label_width [, fmtstr]]]) do |x| ... end + benchmark([caption [, label_width [, fmtstr]]]) do array_of_Tms end + benchmark([caption [, label_width [, fmtstr [, labels...]]]]) do + ... + array_of_Tms + end + ---------- + +(({benchmark})) reports the times. In the first form the block variable x is +treated as a (()) object, which has (()) method. +In the second form, each member of array_of_Tms is reported in the +specified form if the member is a (()) object. The +last form provides combined above two forms (See (())). + +The following lists the meaning of each option. + +:caption + A string ((|caption|)) is printed once before execution of the given block. + +:label_width + An integer ((|label_width|)) is used as an offset in each report. + +:fmtstr + An string ((|fmtstr|)) is used to format each measurement. + See (()) + +:labels + The rest parameters labels is used as prefix of the format to the + value of block, that is array_of_Tms. + +==== bm + ---------- + bm([label_width [, labels ...]) do ... end + ---------- + +(({bm})) is a simpler interface of (()). +(({bm})) acts as same as follows: + + benchmark(" "*label_width + CAPTION, label_width, FMTSTR, *labels) do + ... + end + +==== bmbm + ---------- + bmbm([label_width]) do |x| + x.item("label1") { .... } + .... + end + ---------- + +(({bmbm})) is yet another (()). This utility function is +provited to prevent a kind of job order dependency, which is caused +by memory allocation and object creation. The usage is similar to +(()) but has less options and does extra three things: + + (1) ((*Rehearsal*)): runs all items in the job (()) to allocate + enough memory. + (2) ((*GC*)): before each (())ment, invokes (({GC.start})) to + prevent the influence of previous job. + (3) If given ((|label_width|)) is less than the maximal width of labels + given as ((|item|))'s argument, the latter is used. + Because (({bmbm})) is a 2-pass procedure, this is possible. + +(({bmbm})) returns an array which consists of Tms correspoding to each +(({item})). +==== measure + ---------- + measure([label]) do ... end + ---------- + +measure returns the times used and passed to execute the given block as a +Benchmark::Tms object. + +==== realtime + ---------- + realtime do ... end + ---------- + +realtime returns the times passed to execute the given block. + +== Benchmark::Report + +=== CLASS METHOD + +==== Benchmark::Report::new(width) + ---------- + Benchmark::Report::new([width [, fmtstr]]) + ---------- + +Usually, one doesn't have to use this method directly, +(({Benchmark::Report::new})) is called by (()) or (()). +((|width|)) and ((|fmtstr|)) are the offset of ((|label|)) and +format string responsively; Both of them are used in (()). + +=== METHOD + +==== report + + ---------- + report(fmt, *args) + ---------- + +This method reports label and time formated by ((|fmt|)). See +(()) of Benchmark::Tms for formatting rule. + +== Benchmark::Tms + +=== CLASS METHOD + +== Benchmark::Job + +=== CLASS METHOD + +==== Benchmark::Job::new + ---------- + Benchmark::Job::new(width) + ---------- + +Usually, one doesn't have to use this method directly, +(({Benchmark::Job::new})) is called by (()). +((|width|)) is a initial value for the offset ((|label|)) for formatting. +(()) passes its argument ((|width|)) to this constructor. + +=== METHOD + +==== item + ---------- + item(((|lable|))){ .... } + ---------- + +(({item})) registers a pair of (((|label|))) and given block as job (()). +==== width + +Maximum length of labels in (()) plus one. + +==== list + +array of array which consists of label and jop proc. + +==== report + +alias to (()). + +==== Benchmark::Tms::new + ---------- + Benchmark::Tms::new([u [, s [, cu [, cs [, re [, l]]]]]]) + ---------- + +returns new Benchmark::Tms object which has +((|u|)) as (()), +((|s|)) as (()), +((|cu|)) as (()) +((|cs|)) as (()), +((|re|)) as (()) and +((|l|)) as ((