Skip to content

Commit

Permalink
Add sum_sqr_sample_weights methods
Browse files Browse the repository at this point in the history
The SampleWeighted data can be considered as
run length encoded individual samples, for which
each individual sample has a weight of 1.
  • Loading branch information
shawnlaffan committed Jul 2, 2024
1 parent 9ac05d6 commit 394d8f8
Show file tree
Hide file tree
Showing 5 changed files with 28 additions and 1 deletion.
8 changes: 7 additions & 1 deletion lib/Statistics/Descriptive/PDL.pm
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ my @cache_methods = qw /
geometric_mean harmonic_mean
max min sample_range
iqr
sum_sqr_sample_weights
/;
__PACKAGE__->_make_caching_accessors( \@cache_methods );

Expand Down Expand Up @@ -62,7 +63,7 @@ sub _make_caching_accessors {
};
};
}

return;
}

Expand Down Expand Up @@ -179,6 +180,11 @@ sub sum_sqr_weights {
return $self->count;
}

sub _sum_sqr_sample_weights {
my $self = shift;
return $self->sum_sqr_weights;
}

sub _min {
my $self = shift;
return $self->_get_piddle->min;
Expand Down
12 changes: 12 additions & 0 deletions lib/Statistics/Descriptive/PDL/SampleWeighted.pm
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,12 @@ sub _percentile {
return $piddle->at($idx);
}

# weight for each sample is 1
sub _sum_sqr_sample_weights {
my $self = shift;
return $self->sum_weights;
}



1;
Expand Down Expand Up @@ -227,6 +233,12 @@ Add data to the stats object. Appends to any existing data.
Same as L<Statistics::Descriptive::PDL::Weighted> except that non-integer weights
will be converted to integer using PDL's rules.
=item sum_sqr_sample_weights
Same as the C< sum_weights > method. This is because one can consider each
value as weighted by the number of samples, where each individual sample has
a weight of 1.
=back
=head1 AUTHOR
Expand Down
4 changes: 4 additions & 0 deletions lib/Statistics/Descriptive/PDL/Weighted.pm
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,10 @@ Sum of the weights vector.
Sum of the squared weights vector. Each weight is squared and the sum of these values then calculated.
=item sum_sqr_sample_weights
Same as the C< sum_sqr_weights > method.
=item Statistical methods
Most of the methods should need no explanation here,
Expand Down
1 change: 1 addition & 0 deletions t/descr_weighted.t
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,7 @@ sub test_data_with_samples {

is ($stats->sum_weights, 150, 'sum of weights correct');
is ($stats->sum_sqr_weights, 100+400+900+1600+2500, 'sum of weights correct');
is ($stats->sum_sqr_sample_weights, 100+400+900+1600+2500, 'sum of weights correct');

}

Expand Down
4 changes: 4 additions & 0 deletions t/sample_weighted.t
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ sub test_wikipedia_percentile_example {
is $weighted->percentile(50), $weighted->median, "median same as 50th percentile, " . join ' ', @data;
is $weighted->percentile(50), $unweighted->median, 'weighted and unweighted median';

is $weighted->sum_sqr_sample_weights,
$unweighted->sum_sqr_sample_weights,
'weighted and unweighted sum of squared sample weights';

# no longer guaranteed unless dedup is inplace
# ok $weighted->values_are_unique, "unique flag set to true value after calculating percentiles";

Expand Down

0 comments on commit 394d8f8

Please sign in to comment.