Skip to content

Commit

Permalink
Initialize volume and behavior from parameter distributions
Browse files Browse the repository at this point in the history
Any behavior (as well as volume) can be set on a cell-by-cell basis drawn from a list of parameter distributions: uniform, loguniform, normal, lognormal, log10normal. See the template project after custom_data for how this can be implemented. Note: those are disabled to not change the base behavior of the template project.

Checks are performed to ensure coherence (lower bounds <= upper bounds) and to make sure that truncated normals warn the user about how long they will take for draws.

The main function, set_parameters_from_distributions() should be called immediately after load_cells_from_pugixml() and it will update the values of the loaded cells.
  • Loading branch information
drbergman committed Jun 3, 2024
1 parent 84d033a commit 5037e5f
Show file tree
Hide file tree
Showing 4 changed files with 346 additions and 2 deletions.
315 changes: 315 additions & 0 deletions modules/PhysiCell_geometry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,321 @@ bool load_cells_from_pugixml( pugi::xml_node root )
bool load_cells_from_pugixml( void )
{ return load_cells_from_pugixml( physicell_config_root ); }


void set_parameters_from_distributions( const pugi::xml_node root )
{
// find the start of cell definitions
pugi::xml_node node = root.child( "cell_definitions" );

// find the first cell definition
node = node.child( "cell_definition" );

std::string celltype;
Cell_Definition *pCD;
while (node)
{
pugi::xml_node node_ipd = node.child("initial_parameter_distributions");
if (node_ipd && (node_ipd.attribute("enabled").empty() || node_ipd.attribute("enabled").as_bool()))
{
celltype = node.attribute("name").as_string();
pCD = find_cell_definition(celltype);
set_distributed_parameters(node_ipd, pCD);
}
node = node.next_sibling("cell_definition");
}
return;
}

void set_distributed_parameters(const pugi::xml_node node_ipd, Cell_Definition *pCD)
{
pugi::xml_node node_dist = node_ipd.child("distribution");
while (node_dist)
{
if (node_dist.attribute("enabled").empty() || node_dist.attribute("enabled").as_bool()) // if enabled attribute is missing or true, set the distribution (I put this here rather than in the function because the logic is clearer this way without negations)
{
set_distributed_parameter(node_dist, pCD);
}
node_dist = node_dist.next_sibling("distribution");
}
return;
}

void set_distributed_parameter(pugi::xml_node node_dist, Cell_Definition *pCD)
{
static std::vector<std::string> supported_distributions = {"Uniform","LogUniform","Normal","LogNormal","Log10Normal"};
std::string type = node_dist.attribute("type").as_string();
std::string behavior = xml_get_string_value(node_dist, "behavior");
if (!is_in(type, supported_distributions))
{
std::cout << "ERROR: Only supporting these distributions:" << std::endl
<< "\t\t" << supported_distributions << std::endl
<< "\tBut " << behavior << " for " << pCD->name << " using " << type << "." << std::endl;
exit(-1);
}

if (!strcmpi(behavior,"volume") && find_behavior_index(behavior) == -1)
{
std::cout << "ERROR: Initial parameter distributions can only be set for volume and cell behaviors." << std::endl
<< "\t" << behavior << " is not among these." << std::endl;
exit(-1);
}
set_distributed_parameter(pCD, behavior, type, node_dist);
return;
}

bool is_in(const std::string x, const std::vector<std::string> A)
{
// checks if x is in A
for (unsigned int i = 0; i < A.size(); i++)
{
if (strcmpi(x, A[i]))
{ return true; }
}
return false;
}

void set_distributed_parameter(Cell_Definition *pCD, std::string behavior, std::string type, pugi::xml_node node_dist)
{
double base_value;
if (strcmpi(behavior, "volume"))
{
base_value = pCD->phenotype.volume.total;
}
else
{
base_value = get_single_base_behavior(pCD, behavior);
}
bool check_base = node_dist.attribute("check_base").empty() || node_dist.attribute("check_base").as_bool(); // if check_base not provided, default to true
if (strcmpi(type,"uniform"))
{
double min = xml_get_double_value(node_dist, "min");
double max = xml_get_double_value(node_dist, "max");
if (check_base && (base_value < min || base_value > max))
{
std::cout << "ERROR: The base value for " << behavior << " in " << pCD->name << " is " << base_value << std::endl
<< "\tThis value is outside the range of the uniform distribution." << std::endl
<< "\tmin = " << min << ", max = " << max << "." << std::endl
<< "\tIf you want to allow the base value to be outside the range, set check_base to false." << std::endl;
exit(-1);
}
double dv = max - min;
if (dv < 0)
{
std::cout << "ERROR: The min and max values for " << behavior << " in " << pCD->name << " do not satisfy min <= max." << std::endl
<< "\tmin = " << min << ", max = " << max << std::endl;
exit(-1);
}
for (unsigned int i = 0; i < (*all_cells).size(); i++)
{
if ((*all_cells)[i]->type_name != pCD->name)
{
continue;
}
double val = min + dv * UniformRandom();
set_distributed_parameter((*all_cells)[i], behavior, val);
}
}
else if (strcmpi(type,"loguniform"))
{
double min = xml_get_double_value(node_dist, "min");
if (min <= 0)
{
std::cout << "ERROR: The log uniform distirbution must be defined on a positive interval." << std::endl
<< "\tThe min value for " << behavior << " in " << pCD->name << " is " << min << std::endl
<< "\tSet the min and max as the bounds on the value you want, not the bounds on the exponent." << std::endl
<< "\tFor example, if you want a value between 0.1 and 10, set min=0.1 and max=10, not min=-1 and max=1." << std::endl;
exit(-1);
}
double max = xml_get_double_value(node_dist, "max");
if (check_base && (base_value < min || base_value > max))
{
std::cout << "ERROR: The base value for " << behavior << " in " << pCD->name << " is " << base_value << std::endl
<< "\tThis value is outside the range of the loguniform distribution." << std::endl
<< "\tmin = " << min << ", max = " << max << "." << std::endl
<< "\tIf you want to allow the base value to be outside the range, set check_base to false." << std::endl;
exit(-1);
}
min = log(min);
max = log(max);
double dv = max - min;
if (dv < 0)
{
std::cout << "ERROR: The min and max values for " << behavior << " in " << pCD->name << " do not satisfy min <= max." << std::endl
<< "\tmin = " << min << ", max = " << max << std::endl;
exit(-1);
}
for (unsigned int i = 0; i < (*all_cells).size(); i++)
{
if ((*all_cells)[i]->type_name != pCD->name)
{
continue;
}
double val = exp(min + dv * UniformRandom());
set_distributed_parameter((*all_cells)[i], behavior, val);
}
}
else if (strcmpi(type,"normal"))
{
double mu = xml_get_double_value(node_dist, "mu");
double sigma = xml_get_double_value(node_dist, "sigma");
double lb = -9e99;
double ub = 9e99;
if (node_dist.child("lower_bound"))
{ lb = xml_get_double_value(node_dist, "lower_bound"); }
if (node_dist.child("upper_bound"))
{ ub = xml_get_double_value(node_dist, "upper_bound"); }
if (lb > ub)
{
std::cout << "ERROR: The lower and upper bounds for " << behavior << " in " << pCD->name << " do not satisfy lb <= ub." << std::endl
<< "\tlb = " << lb << ", ub = " << ub << std::endl;
exit(-1);
}
if (check_base && (base_value < lb || base_value > ub))
{
std::cout << "ERROR: The base value for " << behavior << " in " << pCD->name << " is " << base_value << std::endl
<< "\tThis value is outside the range of the truncated normal distribution." << std::endl
<< "\tExpecting values between " << lb << " and " << ub << "." << std::endl
<< "\tIf you want to allow the base value to be outside the range, set check_base to false." << std::endl;
exit(-1);
}
print_drawing_expectations(mu, sigma, lb, ub, (*all_cells).size());
for (unsigned int i = 0; i < (*all_cells).size(); i++)
{
if ((*all_cells)[i]->type_name != pCD->name)
{
continue;
}
double val=lb;
while (val<=lb || val>=ub)
{ val = NormalRandom(mu, sigma); }
set_distributed_parameter((*all_cells)[i], behavior, val);
}
}
else if (strcmpi(type,"lognormal"))
{
double mu = xml_get_double_value(node_dist, "mu");
double sigma = xml_get_double_value(node_dist, "sigma");
double lb = 0;
double ub = 9e99;
get_log_normal_bounds(node_dist, behavior, pCD, lb, ub, base_value, check_base);
print_drawing_expectations(mu, sigma, log(lb), log(ub), (*all_cells).size());
for (unsigned int i = 0; i < (*all_cells).size(); i++)
{
if ((*all_cells)[i]->type_name != pCD->name)
{
continue;
}
double val=lb;
while (val<=lb || val>=ub)
{ val = exp(NormalRandom(mu, sigma)); }
set_distributed_parameter((*all_cells)[i], behavior, val);
}
}
else if (strcmpi(type,"log10normal"))
{
static double log10_ = log(10.0);
double mu = xml_get_double_value(node_dist, "mu");
double sigma = xml_get_double_value(node_dist, "sigma");
double lb = -9e99;
double ub = 9e99;
get_log_normal_bounds(node_dist, behavior, pCD, lb, ub, base_value, check_base);
print_drawing_expectations(mu, sigma, log(lb), log(ub), (*all_cells).size());
for (unsigned int i = 0; i < (*all_cells).size(); i++)
{
if ((*all_cells)[i]->type_name != pCD->name)
{
continue;
}
double val=lb;
while (val<=lb || val>=ub)
{ val = exp(log10_ * NormalRandom(mu, sigma)); }
set_distributed_parameter((*all_cells)[i], behavior, val);
}
}
return;
}

void get_log_normal_bounds(pugi::xml_node node_dist, std::string behavior, Cell_Definition *pCD, double &lb, double &ub, double base_value, bool check_base)
{
if (node_dist.child("lower_bound"))
{
lb = xml_get_double_value(node_dist, "lower_bound");
if (lb < 0)
{
std::cout << "ERROR: The lower bound for a lognormal/log10normal distribution only matters if it is non-negative." << std::endl
<< "\tThe lower bound for " << behavior << " in " << pCD->name << " is " << lb << "." << std::endl
<< "\tThe lower bound is for the actual assigned value while the mean and standard deviation are for the log/log10 of the value." << std::endl
<< "\tSince this seems to imply (understandable!) confusion about the lognormal/log10normal distribution, I'm going to exit." << std::endl;
exit(-1);
}
}
if (node_dist.child("upper_bound"))
{
ub = xml_get_double_value(node_dist, "upper_bound");
}
if (lb > ub)
{
std::cout << "ERROR: The lower and upper bounds for " << behavior << " in " << pCD->name << " do not satisfy lb <= ub." << std::endl
<< "\tlb = " << lb << ", ub = " << ub << std::endl;
exit(-1);
}
if (check_base && (base_value < lb || base_value > ub))
{
std::cout << "ERROR: The base value for " << behavior << " in " << pCD->name << " is " << base_value << std::endl
<< "\tThis value is outside the range of the lognormal/log10normal distribution." << std::endl
<< "\tExpecting values between " << lb << " and " << ub << "." << std::endl
<< "\tIf you want to allow the base value to be outside the range, set check_base to false." << std::endl;
exit(-1);
}
}

void print_drawing_expectations(double mu, double sigma, double lb, double ub, int n)
{
// calculate the z-scores for lb and ub
double z_lb = (lb - mu) / sigma;
double z_ub = (ub - mu) / sigma;

// calculate the probabilities for lb and ub
double p_lb = 0.5 * (1 + std::erf(z_lb / std::sqrt(2)));
double p_ub = 0.5 * (1 + std::erf(z_ub / std::sqrt(2)));

// the probability of finding a value between lb and ub is the difference between the probabilities for ub and lb
double success_probability = p_ub - p_lb;
double num_expected = n / success_probability;

std::cout << "Drawing up to " << n << " values from a normal distribution with mu=" << mu << " and sigma=" << sigma << std::endl
<< "\tExpecting values between " << lb << " and " << ub << std::endl
<< "\tIt will take about " << num_expected << " draws to get " << n << " values in the range." << std::endl
<< "\tIf one draw takes 1 microsecond, this will take about " << num_expected * 1e-6 / 60 << " minutes." << std::endl;
}

void set_distributed_parameter(Cell* pCell, std::string behavior, double val)
{
if (strcmpi(behavior, "volume"))
{
pCell->set_total_volume(val);
}
else
{
set_single_behavior(pCell, behavior, val);
}
}

bool strcmpi(std::string x, std::string y)
{
// case-Insensitive compare strings
for (auto& a : x) {
a = tolower(a);
}
for (auto& a : y) {
a = tolower(a);
}
return x==y;
}

void set_parameters_from_distributions( void )
{ return set_parameters_from_distributions( physicell_config_root ); }

std::vector<std::string> split_csv_labels( std::string labels_line )
{
std::vector< std::string > label_tokens;
Expand Down
15 changes: 14 additions & 1 deletion modules/PhysiCell_geometry.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,20 @@ void load_cells_mat( std::string filename );
void load_cells_physicell( std::string filename );

bool load_cells_from_pugixml( pugi::xml_node root );
bool load_cells_from_pugixml( void ); // load cells based on default config XML root
bool load_cells_from_pugixml( void ); // load cells based on default config XML root

void set_parameters_from_distributions( const pugi::xml_node root );
void set_parameters_from_distributions(void);
void set_distributed_parameters(pugi::xml_node node, Cell_Definition *pCD);
void set_distributed_parameter(pugi::xml_node node_dist, Cell_Definition *pCD);
void set_distributed_parameter(Cell_Definition *pCD, std::string behavior, std::string type, pugi::xml_node node_dist);

void get_log_normal_bounds(pugi::xml_node node_dist, std::string behavior, Cell_Definition *pCD, double &lb, double &ub, double base_value, bool check_base);
void set_distributed_parameter(Cell* pCell, std::string behavior, double val);
void print_drawing_expectations(double mu, double sigma, double lb, double ub, int n);

bool is_in(std::string x, std::vector<std::string> A);
bool strcmpi(std::string x, std::string y);

//
// 2D functions
Expand Down
15 changes: 15 additions & 0 deletions sample_projects/template/config/PhysiCell_settings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -318,6 +318,21 @@
<custom_data>
<sample units="dimensionless" conserved="false">1.0</sample>
</custom_data>

<initial_parameter_distributions enabled="false">
<distribution enabled="false" type="Log10Normal" check_base="true">
<behavior>Volume</behavior>
<mu>4</mu>
<sigma>2</sigma>
<upper_bound>100000</upper_bound>
</distribution>
<distribution enabled="false" type="LogUniform" check_base="true">
<behavior>apoptosis</behavior>
<min>1e-6</min>
<max>1e-2</max>
</distribution>
</initial_parameter_distributions>

</cell_definition>
</cell_definitions>

Expand Down
3 changes: 2 additions & 1 deletion sample_projects/template/custom_modules/custom.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,8 @@ void setup_tissue( void )
std::cout << std::endl;

// load cells from your CSV file (if enabled)
load_cells_from_pugixml();
load_cells_from_pugixml();
set_parameters_from_distributions();

return;
}
Expand Down

0 comments on commit 5037e5f

Please sign in to comment.