Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Evaluate objective in C #366

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
65 changes: 65 additions & 0 deletions cgp/cartesian_graph.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import collections
import copy
import math # noqa: F401
import os
import re
from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Set

Expand All @@ -12,6 +13,7 @@
try:
import sympy
from sympy.core import expr as sympy_expr # noqa: F401
from sympy.utilities.codegen import codegen

sympy_available = True
except ModuleNotFoundError:
Expand Down Expand Up @@ -435,3 +437,66 @@ def to_sympy(self, simplify: Optional[bool] = True):
return sympy_exprs[0]
else:
return sympy_exprs

def to_c(self, function_name, filename, path):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it necessary that the user can choose the function_name?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not really, should I just set it to rule?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's not always a learning rule, is it? so how about some generic function name, like f? i know this one letter function names should be avoided, but here we may have an exception ;)

"""Create a C module described by the graph.
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved

Writes code and header into files in the given path.
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
Important: function_name and filename have to be different, due to
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
Currently only available for a single output node.
jakobj marked this conversation as resolved.
Show resolved Hide resolved

Returns
----------
None
"""

if not sympy_available:
raise ModuleNotFoundError("No sympy module available. Required for exporting C module")

if not self._n_outputs == 1:
raise ValueError("C module export only available for single output node.")

if function_name in filename:
raise ValueError(
"function_name can not be substring of filename, due to function declaration"
"consistency checks"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"function_name can not be substring of filename, due to function declaration"
"consistency checks"
"function_name can not be substring of filename"

maybe just shorten to this, since it's anyway hard to understand

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at this again, maybe it is easiest to just remove the ability to set function_name and filename? Then we don't have to worry about this check?

)

sympy_expression = self.to_sympy()

[(filename_c, code_c), (filename_header, code_header)] = codegen(
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
(function_name, sympy_expression), "C99", filename, header=False, empty=False
)

def replace_func_declaration_in_code_and_header_with_full_variable_set(
code_c, code_header, function_name
):

arg_string_list = [f"double x_{idx}" for idx in range(self._n_inputs)]
permanent_header = f"{function_name}(" + ", ".join(arg_string_list) + ")"

c_replace_start_idx = code_c.find(function_name)
c_replace_end_idx = code_c.find(")", c_replace_start_idx) + 1 # +1 offset for
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
code_c = code_c.replace(
code_c[c_replace_start_idx:c_replace_end_idx], permanent_header
)

h_replace_start_idx = code_header.find(function_name)
h_replace_end_idx = code_header.find(")", h_replace_start_idx) + 1
code_header = code_header.replace(
code_header[h_replace_start_idx:h_replace_end_idx], permanent_header
)

return code_c, code_header

# assert function declaration consistency - replace declaration in header and code
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i think maybe a bit more information could be useful here? like "sympy generates function declarations based on the variables used in the expression, but our callers assume a fixed one. hence we need to replace the function declaration"; actually i'm leaning a bit towards "function signature" rather than "function declaration" bc it's the number of arguments we care about

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tried to add a description to make things more clear in the docstring of replace_func_signature_in_source_and_header_with_full_variable_set

Maybe that helps a bit - the whole function is a bit messy, but I wasn't sure how to make it more readable

code_c, code_header = replace_func_declaration_in_code_and_header_with_full_variable_set(
code_c, code_header, function_name
)

if not os.path.exists(path):
os.makedirs(path)
with open("%s/%s" % (path, filename_c), "w") as f:
f.write(f"{code_c}")
with open("%s/%s" % (path, filename_header), "w") as f:
f.write(f"{code_header}")
50 changes: 50 additions & 0 deletions examples/c_code/main.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#include "individual.h"
#include <math.h>
#include <stdio.h>
#include <stdlib.h>


double target(double x_0, double x_1) {
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
double target;
target = x_0 * x_1 + 1.0;
return target;
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
}

/* generate a random floating point number from min to max */
double rand_from(double min, double max)
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
{
double range = (max - min);
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
double div = RAND_MAX / range;
return min + (rand() / div);
}


double l2_norm_rule_target() {
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
int sz = 100;
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
srand(1234); // fix seed
double x_0_rand;
double x_1_rand;

double target_value;
double rule_output;
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
double sum_l2_difference = 0.0;

for(int i=0;i<sz;i++){
/* generate two random values for x_0, x_1 */
double min = -1.0;
double max = 1.0;
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
x_0_rand=rand_from(min, max);
x_1_rand=rand_from(min, max);
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved

target_value=target(x_0_rand, x_1_rand);
rule_output=rule(x_0_rand, x_1_rand);

sum_l2_difference += pow(target_value-rule_output, 2);
}
return sum_l2_difference/sz;
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
}

int main(){
printf("%f", l2_norm_rule_target());
return 0;
}
3 changes: 3 additions & 0 deletions examples/c_code/main.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
double target(double x_0, double x_1);
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
double rand_from(double min, double max);
double l2_norm_rule_target();
107 changes: 107 additions & 0 deletions examples/example_evaluate_in_c.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""
Example for evolutionary regression, with evaluation in c
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
=========================================================
"""

# The docopt str is added explicitly to ensure compatibility with
# sphinx-gallery.
docopt_str = """
Usage:
example_evaluate_in_c.py

Options:
-h --help
"""

import pathlib
import subprocess
from docopt import docopt

import cgp

args = docopt(docopt_str)

# %%
# We first define a helper function for compiling the c code. It creates
# object files from the file and main script and creates an executable


def compile_c_code(filename, scriptname, path):

# assert all necessary files exist
path_file_c = pathlib.Path(f"{path}/{filename}.c")
path_file_h = pathlib.Path(f"{path}/{filename}.h")
path_script_c = pathlib.Path(f"{path}/{scriptname}.c")
path_script_h = pathlib.Path(f"{path}/{scriptname}.h")
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
assert (
path_file_c.is_file()
& path_file_h.is_file()
& path_script_c.is_file()
& path_script_h.is_file()
)

# compile file with rule
subprocess.run(["gcc", "-c", "-fPIC", f"{path}/{filename}.c", "-o", f"{path}/{filename}.o"])
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
# compile script
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
subprocess.run(
["gcc", "-c", "-fPIC", f"{path}/{scriptname}.c", "-o", f"{path}/{scriptname}.o"]
)
# create executable
subprocess.run(
["gcc", f"{path}/{scriptname}.o", f"{path}/{filename}.o", "-o", f"{path}/{filename}"]
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
)


# %%
# We define the objective function for the evolution. It creates a
# c module and header from the computational graph. File with rule
# and script for evaluation are compiled using the above helper function.
# It assigns fitness to the negative float of the print of the script execution.
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved


def objective(individual):

if not individual.fitness_is_None():
return individual

graph = cgp.CartesianGraph(individual.genome)
function_name = "rule"
filename = "individual"
scriptname = "main"
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
path = "c_code"

graph.to_c(function_name=function_name, filename=filename, path=path)

# compile_c_code()
compile_c_code(filename=filename, scriptname=scriptname, path=path)

# assert that the executable returns something
assert subprocess.check_output(pathlib.Path().absolute() / f"{path}/{filename}")
# run simulation and assign fitness
individual.fitness = -1.0 * float(
subprocess.check_output(pathlib.Path().absolute() / f"{path}/{filename}")
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
)

return individual


# %%
# Next, we set up the evolutionary search. We first define the parameters of the
# genome. We then create a population of individuals with matching genome parameters.


genome_params = {"n_inputs": 2, "primitives": (cgp.Add, cgp.Mul, cgp.ConstantFloat)}

pop = cgp.Population(genome_params=genome_params)


# %%
# and finally perform the evolution relying on the libraries default
# hyperparameters except that we terminate the evolution as soon as one
# individual has reached fitness zero.

pop = cgp.evolve(objective=objective, pop=pop, termination_fitness=0.0, print_progress=True)

# %%
# After finishing the evolution, we print the final evolved expression.
print(pop.champion.to_sympy())
HenrikMettler marked this conversation as resolved.
Show resolved Hide resolved
98 changes: 98 additions & 0 deletions test/test_cartesian_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,3 +588,101 @@ def test_repr(rng, genome_params):
genome.randomize(rng)
# Assert that the CartesianGraph.__repr__ doesn't raise an error
str(cgp.CartesianGraph(genome))


# def test_to_c():
# sympy = pytest.importorskip("sympy")
#
# # test addition, multiplication, single input, constant: f = 2 * x_0 + 1
# primitives = (cgp.Add, cgp.ConstantFloat)
# genome = cgp.Genome(1, 1, 2, 2, primitives, 1)
#
# genome.dna = [
# ID_INPUT_NODE,
# ID_NON_CODING_GENE,
# ID_NON_CODING_GENE,
# 0,
# 0,
# 0,
# 1,
# 0,
# 0,
# 0,
# 1,
# 2,
# 0,
# 0,
# 1,
# ID_OUTPUT_NODE,
# 3,
# ID_NON_CODING_GENE,
# ]
#
# function_name = 'test_function'
# filename = 'test0'
# graph = cgp.CartesianGraph(genome)
# [(filename_c, code_c), (filename_header, code_header)] =
# graph.to_c(function_name=function_name, filename=filename, path='test_cpp')
#
# filename_c_target = 'test0.c'
# assert filename_c == filename_c_target
#
# # todo: rewrite targets to display more readable cpp code; avoid duplicates
# code_c_target = f'#include "{filename}.h"'\
# f'\n#include <math.h>\ndouble {function_name}(double x_0) ' \
# f'{{\n double {function_name}_result;' \
# f'\n {function_name}_result = 2*x_0 + 1.0;\n
# return {function_name}_result;\n}}\n'
#
# assert code_c_target == code_c
#
# filename_header_target = 'test0.h'
# assert filename_header == filename_header_target
#
# code_header_target = f'#ifndef PROJECT__{filename.upper()}__H'\
# f'\n#define PROJECT__{filename.upper()}__H'\
# f'\ndouble {function_name}(double x_0);\n#endif\n'
#
# assert code_header_target == code_header
#
# # test exponential, subtraction, multiple inputs f = x_0^2 - x_1
# primitives = (cgp.Mul, cgp.Sub)
# genome = cgp.Genome(2, 1, 2, 1, primitives, 1)
#
# genome.dna = [
# ID_INPUT_NODE,
# ID_NON_CODING_GENE,
# ID_NON_CODING_GENE,
# ID_INPUT_NODE,
# ID_NON_CODING_GENE,
# ID_NON_CODING_GENE,
# 0, # cgp.Mul
# 0, # x_0
# 0, # x_0
# 1, # cpg.Sub
# 2, # x_0^2
# 1, # x_1
# ID_OUTPUT_NODE,
# 3,
# ID_NON_CODING_GENE,
# ]
#
# function_name = 'test_function'
# filename = 'test1'
# graph = cgp.CartesianGraph(genome)
# [(filename_c, code_c), (filename_header, code_header)] =
# graph.to_c(function_name=function_name, filename=filename, path='test_cpp')
#
# code_c_target = f'#include "{filename}.h"'\
# f'\n#include <math.h>\ndouble {function_name}(double x_0, double x_1) ' \
# f'{{\n double {function_name}_result;' \
# f'\n {function_name}_result = pow(x_0, 2) - x_1;\n
# return {function_name}_result;\n}}\n'
#
# assert code_c_target == code_c
#
# code_header_target = f'#ifndef PROJECT__{filename.upper()}__H'\
# f'\n#define PROJECT__{filename.upper()}__H'\
# f'\ndouble {function_name}(double x_0, double x_1);\n#endif\n'
#
# assert code_header_target == code_header