Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Randomisations: Refactor and cache constant labels #952

Merged
merged 3 commits into from
Nov 12, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Rand: Cache constant labels basedatas
No need to reprocess every time to
generate the same data sets.
  • Loading branch information
shawnlaffan committed Nov 12, 2024
commit 80f26596abe60f70e3eaa37a9a009c3240f6560f
66 changes: 40 additions & 26 deletions lib/Biodiverse/Randomise.pm
Original file line number Diff line number Diff line change
Expand Up @@ -822,38 +822,52 @@ sub get_randomised_basedata {
my $bd = $args{basedata_ref} || $self->get_param ('BASEDATA_REF');
my $constant_labels = $self->_parse_labels_not_to_randomise(%args);

my $const_bd = Biodiverse::BaseData->new($bd->get_params_hash);
my $non_const_bd = Biodiverse::BaseData->new($bd->get_params_hash);
$const_bd->rename (new_name => $const_bd->get_name . ' constant label subset');
$non_const_bd->rename (new_name => $non_const_bd->get_name . ' random label subset');
state $cache_key_const_bd = 'CONSTANT_LABELS_CONST_BASEDATA';
state $cache_key_rand_bd = 'CONSTANT_LABELS_RAND_BASEDATA';

my $csv_object = $bd->get_csv_object (
sep_char => $bd->get_param('JOIN_CHAR'),
quote_char => $bd->get_param('QUOTES'),
);
my $const_bd = $self->get_cached_value ($cache_key_const_bd);
my $non_const_bd = $self->get_cached_value ($cache_key_rand_bd);

my %const_label_hash;
@const_label_hash{@$constant_labels} = undef;
for my $label ($bd->get_labels) {
my $groups = $bd->get_groups_with_label_as_hash_aa ($label);
if (!$const_bd && !$non_const_bd) {
$const_bd = Biodiverse::BaseData->new($bd->get_params_hash);
$non_const_bd = Biodiverse::BaseData->new($bd->get_params_hash);

# we should cache the constant BD
my $target_bd = exists $const_label_hash{$label} ? $const_bd : $non_const_bd;
$target_bd->add_elements_collated_by_label (
data => {$label => $groups},
csv_object => $csv_object,
);
}
foreach my $empty_gp ($bd->get_empty_groups) {
$const_bd->add_element (
group => $empty_gp,
count => 0,
allow_empty_groups => 1,
$const_bd->rename(new_name => $const_bd->get_name . ' constant label subset');
$non_const_bd->rename(new_name => $non_const_bd->get_name . ' random label subset');

my $csv_object = $bd->get_csv_object(
sep_char => $bd->get_param('JOIN_CHAR'),
quote_char => $bd->get_param('QUOTES'),
);

my %const_label_hash;
@const_label_hash{@$constant_labels} = undef;
for my $label ($bd->get_labels) {
my $groups = $bd->get_groups_with_label_as_hash_aa($label);

# we should cache the constant BD
my $target_bd = exists $const_label_hash{$label} ? $const_bd : $non_const_bd;
$target_bd->add_elements_collated_by_label(
data => { $label => $groups },
csv_object => $csv_object,
);
}
foreach my $empty_gp ($bd->get_empty_groups) {
$const_bd->add_element(
group => $empty_gp,
count => 0,
allow_empty_groups => 1,
);
}

$const_bd->rebuild_spatial_index;
$non_const_bd->rebuild_spatial_index; # sometimes the non_const basedata is "missing" groups

$self->set_cached_value ($cache_key_const_bd => $const_bd);
$self->set_cached_value ($cache_key_rand_bd => $non_const_bd);
}

$const_bd->rebuild_spatial_index;
$non_const_bd->rebuild_spatial_index; # sometimes the non_const basedata is "missing" groups
# randomise the "randomisable" one
my $new_rand_bd = $self->_get_randomised_basedata (%args, basedata_ref => $non_const_bd);

# add the constant labels
Expand Down
Loading