diff --git a/lib/Biodiverse/Randomise.pm b/lib/Biodiverse/Randomise.pm index 25b209368..9164eefde 100644 --- a/lib/Biodiverse/Randomise.pm +++ b/lib/Biodiverse/Randomise.pm @@ -1387,6 +1387,7 @@ sub get_spatial_output_for_label_allocation { my ($self, %args) = @_; my $sp_conditions = $args{spatial_conditions_for_label_allocation}; + my $param_name = $args{param_name} // 'SPATIAL_OUTPUT_FOR_LABEL_ALLOCATION'; return if !defined $sp_conditions; @@ -1409,7 +1410,7 @@ sub get_spatial_output_for_label_allocation { return if !length $sp_check_text; # all we had was whitespace and comments - my $sp = $self->get_param('SPATIAL_OUTPUT_FOR_LABEL_ALLOCATION'); + my $sp = $self->get_param($param_name); return $sp if $sp; @@ -1423,6 +1424,7 @@ sub get_spatial_output_for_label_allocation { #definition_query => $def_query, # do we want a def query for this? Prob not. calculations => [], override_valid_analysis_check => 1, + elements_to_calc => $args{elements_to_calc}, calc_only_elements_to_calc => 1, # really need to rename this undocumented arg ); }; @@ -1432,7 +1434,7 @@ sub get_spatial_output_for_label_allocation { croak $e if $e; - $self->set_param(SPATIAL_OUTPUT_FOR_LABEL_ALLOCATION => $sp); + $self->set_param($param_name => $sp); return $sp; } diff --git a/lib/Biodiverse/Randomise/CurveBall.pm b/lib/Biodiverse/Randomise/CurveBall.pm index abc68bcb7..0e95e4f1a 100644 --- a/lib/Biodiverse/Randomise/CurveBall.pm +++ b/lib/Biodiverse/Randomise/CurveBall.pm @@ -6,6 +6,8 @@ use 5.022; our $VERSION = '4.99_002'; +use Carp qw /croak/; + use experimental 'refaliasing'; use experimental 'declared_refs'; no warnings 'experimental::refaliasing'; @@ -145,6 +147,44 @@ END_PROGRESS_TEXT $progress_bar->reset; + my (%sp_swap_list, @gps_with_nbrs); + if (my $sp_conditions = $args{spatial_condition_for_swap_pairs}) { + my $sp_swapper = $self->get_spatial_output_for_label_allocation ( + %args, + spatial_conditions_for_label_allocation => $sp_conditions, + param_name => 'SPATIAL_OUTPUT_FOR_SWAP_CANDIDATES', + elements_to_calc => \@sorted_groups, # excludes empty and full groups + ); + if ($sp_swapper) { + my $spatial_conditions_arr = $sp_swapper->get_spatial_conditions; + my $sp_cond_obj = $spatial_conditions_arr->[0]; + my $result_type = $sp_cond_obj->get_result_type; + if ($result_type eq 'always_true') { + say "[Randomise] spatial condition always_true, reverting to non-spatial allocation"; + } + elsif ($result_type =~ /^always_false|self_only$/) { + croak "Spatial condition means it is impossible for groups to have neighbours, " + . "so cannot swap labels with neighbours" + if !@gps_with_nbrs; + } + else { + foreach my $element ($sp_swapper->get_element_list) { + my $nbrs = $sp_swapper->get_list_ref_aa ($element, '_NBR_SET1') // []; + # prefilter the focal group + my @filtered = sort grep {$_ ne $element} @$nbrs; + next if !@filtered; + $sp_swap_list{$element} = \@filtered; + } + @gps_with_nbrs = sort keys %sp_swap_list; + my $n_gps_w_nbrs = @gps_with_nbrs; + say "[Randomise] $n_gps_w_nbrs of $n_groups groups have swappable neighbours"; + croak "No groups have neighbours, cannot swap labels with neighbours" + if !@gps_with_nbrs; + } + } + } + my $use_spatial_swap = !!%sp_swap_list; + # Basic algorithm: # pick two different groups at random # swap as many labels as possible @@ -169,12 +209,24 @@ END_PROGRESS_TEXT ) { $attempts++; - my $group1 = $sorted_groups[int $rand->rand ($n_groups)]; - my $group2 = $sorted_groups[int $rand->rand ($n_groups)]; - while ($group1 eq $group2) { - # handle pathological case of only one group - last MAIN_ITER if $n_groups == 1; - $group2 = $sorted_groups[int $rand->rand ($n_groups)]; + # handle pathological case of only one group + last MAIN_ITER if $n_groups == 1; + + my $group1; ; + my $group2; + if ($use_spatial_swap) { + $group1 = $gps_with_nbrs[int $rand->rand (scalar @gps_with_nbrs)]; + my $n = scalar @{$sp_swap_list{$group1}}; + next MAIN_ITER if !$n; + # we have already filtered group1 from its list + $group2 = $sp_swap_list{$group1}[int $rand->rand($n)] + } + else { + $group1 = $sorted_groups[int $rand->rand ($n_groups)]; + $group2 = $sorted_groups[int $rand->rand($n_groups)]; + while ($group1 eq $group2) { # keep trying - a bit wasteful but should be rare + $group2 = $sorted_groups[int $rand->rand($n_groups)]; + } } my \%labels1 = $lb_hash{$group1}; diff --git a/t/28-Randomisation.t b/t/28-Randomisation.t index cccb862dc..a3ce28e9c 100644 --- a/t/28-Randomisation.t +++ b/t/28-Randomisation.t @@ -125,15 +125,45 @@ sub test_rand_independent_swaps_modified { sub test_rand_curveball { test_rand_structured_richness_same ( - 'rand_curveball', swap_count => 1000, + 'rand_curveball', ); } +sub test_rand_curveball_sp_cond { + my $rand_bd_array = test_rand_structured_richness_same ( + 'rand_curveball', + spatial_condition_for_swap_pairs => 'sp_circle(radius => 100000)', + resolution => 100000, + log_suffix => ' with spatial condition' + ); + + # The site data have these groups as an isolated set. There should only be swapping among them + # so the total counts will be constant across realisations. + my $expected = { + 'Genus:sp1' => 4, + 'Genus:sp2' => 10, + 'Genus:sp3' => 4, + }; + my $i; + foreach my $bd (@$rand_bd_array) { + $i++; + my %collated_labels; + foreach my $gp (qw /3250000:3050000 3150000:2950000 3250000:2950000 3250000:2850000/) { + my $labels = $bd->get_labels_in_group_as_hash (group => $gp); + foreach my $label (keys %$labels) { + $collated_labels{$label} += $labels->{$label}; + } + } + is \%collated_labels, $expected, "Curveball spatial: labels and counts for isolated subregion, rand bd $i"; + } +} + sub test_rand_structured_richness_same { my ($rand_function, %args) = @_; $rand_function //= 'rand_structured'; + my $log_suffix = delete $args{log_suffix} // ''; - my $c = 100000; + my $c = delete $args{resolution} // 100000; my $bd = get_basedata_object_from_site_data(CELL_SIZES => [$c, $c]); # add some empty groups - need enough to trigger issue #543 @@ -152,6 +182,7 @@ sub test_rand_structured_richness_same { $bd->add_element(group => $gp, label => $label); } } + $bd->build_spatial_index (resolutions => [$c, $c]); # name is short for test_rand_calc_per_node_uses_orig_bd my $sp = $bd->add_spatial_output (name => 'sp'); @@ -191,7 +222,7 @@ sub test_rand_structured_richness_same { $obs_richness{$group} //= $bd->get_richness_aa ($group) // 0; $rand_richness{$group} = $rand_bd->get_richness_aa ($group) // 0; } - is \%rand_richness, \%obs_richness, "Richness scores match, $rand_function"; + is \%rand_richness, \%obs_richness, "Richness scores match, $rand_function $log_suffix"; } foreach my $rand_bd (@$rand_bd_array) { @@ -200,10 +231,10 @@ sub test_rand_structured_richness_same { $obs_range{$label} //= $bd->get_range (element => $label); $rand_range{$label} = $rand_bd->get_range (element => $label); } - is \%obs_range, \%rand_range, "Ranges match, $rand_function"; + is \%obs_range, \%rand_range, "Ranges match, $rand_function $log_suffix"; } - return; + return $rand_bd_array; }