diff --git a/lib/Statistics/Descriptive/PDL.pm b/lib/Statistics/Descriptive/PDL.pm index 88f81c3..a76c374 100644 --- a/lib/Statistics/Descriptive/PDL.pm +++ b/lib/Statistics/Descriptive/PDL.pm @@ -29,6 +29,7 @@ my @cache_methods = qw / geometric_mean harmonic_mean max min sample_range iqr + sum_sqr_sample_weights /; __PACKAGE__->_make_caching_accessors( \@cache_methods ); @@ -62,7 +63,7 @@ sub _make_caching_accessors { }; }; } - + return; } @@ -179,6 +180,11 @@ sub sum_sqr_weights { return $self->count; } +sub _sum_sqr_sample_weights { + my $self = shift; + return $self->sum_sqr_weights; +} + sub _min { my $self = shift; return $self->_get_piddle->min; diff --git a/lib/Statistics/Descriptive/PDL/SampleWeighted.pm b/lib/Statistics/Descriptive/PDL/SampleWeighted.pm index 7ca3d6b..505616e 100644 --- a/lib/Statistics/Descriptive/PDL/SampleWeighted.pm +++ b/lib/Statistics/Descriptive/PDL/SampleWeighted.pm @@ -163,6 +163,12 @@ sub _percentile { return $piddle->at($idx); } +# weight for each sample is 1 +sub _sum_sqr_sample_weights { + my $self = shift; + return $self->sum_weights; +} + 1; @@ -227,6 +233,12 @@ Add data to the stats object. Appends to any existing data. Same as L except that non-integer weights will be converted to integer using PDL's rules. +=item sum_sqr_sample_weights + +Same as the C< sum_weights > method. This is because one can consider each +value as weighted by the number of samples, where each individual sample has +a weight of 1. + =back =head1 AUTHOR diff --git a/lib/Statistics/Descriptive/PDL/Weighted.pm b/lib/Statistics/Descriptive/PDL/Weighted.pm index 3d935a1..ca22eee 100644 --- a/lib/Statistics/Descriptive/PDL/Weighted.pm +++ b/lib/Statistics/Descriptive/PDL/Weighted.pm @@ -510,6 +510,10 @@ Sum of the weights vector. Sum of the squared weights vector. Each weight is squared and the sum of these values then calculated. +=item sum_sqr_sample_weights + +Same as the C< sum_sqr_weights > method. + =item Statistical methods Most of the methods should need no explanation here, diff --git a/t/descr_weighted.t b/t/descr_weighted.t index 831198d..a67ceab 100644 --- a/t/descr_weighted.t +++ b/t/descr_weighted.t @@ -536,6 +536,7 @@ sub test_data_with_samples { is ($stats->sum_weights, 150, 'sum of weights correct'); is ($stats->sum_sqr_weights, 100+400+900+1600+2500, 'sum of weights correct'); + is ($stats->sum_sqr_sample_weights, 100+400+900+1600+2500, 'sum of weights correct'); } diff --git a/t/sample_weighted.t b/t/sample_weighted.t index 61f7987..7cf7495 100644 --- a/t/sample_weighted.t +++ b/t/sample_weighted.t @@ -102,6 +102,10 @@ sub test_wikipedia_percentile_example { is $weighted->percentile(50), $weighted->median, "median same as 50th percentile, " . join ' ', @data; is $weighted->percentile(50), $unweighted->median, 'weighted and unweighted median'; + is $weighted->sum_sqr_sample_weights, + $unweighted->sum_sqr_sample_weights, + 'weighted and unweighted sum of squared sample weights'; + # no longer guaranteed unless dedup is inplace # ok $weighted->values_are_unique, "unique flag set to true value after calculating percentiles";