Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion openevolve/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ class Config:
log_level: str = "INFO"
log_dir: Optional[str] = None
random_seed: Optional[int] = 42
language: str = None

# Component configurations
llm: LLMConfig = field(default_factory=LLMConfig)
Expand Down Expand Up @@ -361,4 +362,4 @@ def load_config(config_path: Optional[Union[str, Path]] = None) -> Config:
# Make the system message available to the individual models, in case it is not provided from the prompt sampler
config.llm.update_model_params({"system_message": config.prompt.system_message})

return config
return config
263 changes: 102 additions & 161 deletions openevolve/controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,23 @@
import asyncio
import logging
import os
import shutil
import re
import time
import uuid
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
import traceback
import concurrent.futures

from openevolve.config import Config, load_config
from openevolve.database import Program, ProgramDatabase
from openevolve.evaluator import Evaluator
from openevolve.llm.ensemble import LLMEnsemble
from openevolve.prompt.sampler import PromptSampler
from openevolve.iteration import run_iteration_sync, Result
from openevolve.utils.code_utils import (
apply_diff,
extract_code_language,
extract_diffs,
format_diff_summary,
parse_evolve_blocks,
parse_full_rewrite,
)
from openevolve.utils.format_utils import (
format_metrics_safe,
Expand Down Expand Up @@ -129,7 +127,8 @@ def __init__(
# Load initial program
self.initial_program_path = initial_program_path
self.initial_program_code = self._load_initial_program()
self.language = extract_code_language(self.initial_program_code)
if not self.config.language:
self.config.language = extract_code_language(self.initial_program_code)

# Extract file extension from initial program
self.file_extension = os.path.splitext(initial_program_path)[1]
Expand Down Expand Up @@ -162,8 +161,9 @@ def __init__(
self.evaluator_prompt_sampler,
database=self.database,
)
self.evaluation_file = evaluation_file

logger.info(f"Initialized OpenEvolve with {initial_program_path} " f"and {evaluation_file}")
logger.info(f"Initialized OpenEvolve with {initial_program_path}")

def _setup_logging(self) -> None:
"""Set up logging"""
Expand Down Expand Up @@ -236,7 +236,7 @@ async def run(
initial_program = Program(
id=initial_program_id,
code=self.initial_program_code,
language=self.language,
language=self.config.language,
metrics=initial_metrics,
iteration_found=start_iteration,
)
Expand All @@ -263,171 +263,112 @@ async def run(
logger.info(f"Using island-based evolution with {self.config.database.num_islands} islands")
self.database.log_island_status()

for i in range(start_iteration, total_iterations):
iteration_start = time.time()

# Manage island evolution - switch islands periodically
if i > start_iteration and current_island_counter >= programs_per_island:
self.database.next_island()
current_island_counter = 0
logger.debug(f"Switched to island {self.database.current_island}")

current_island_counter += 1

# Sample parent and inspirations from current island
parent, inspirations = self.database.sample()

# Get artifacts for the parent program if available
parent_artifacts = self.database.get_artifacts(parent.id)

# Get actual top programs for prompt context (separate from inspirations)
# This ensures the LLM sees only high-performing programs as examples
actual_top_programs = self.database.get_top_programs(5)

# Build prompt
prompt = self.prompt_sampler.build_prompt(
current_program=parent.code,
parent_program=parent.code, # We don't have the parent's code, use the same
program_metrics=parent.metrics,
previous_programs=[p.to_dict() for p in self.database.get_top_programs(3)],
top_programs=[p.to_dict() for p in actual_top_programs], # Use actual top programs
inspirations=[p.to_dict() for p in inspirations], # Pass inspirations separately
language=self.language,
evolution_round=i,
diff_based_evolution=self.config.diff_based_evolution,
program_artifacts=parent_artifacts if parent_artifacts else None,
)

# Generate code modification
try:
llm_response = await self.llm_ensemble.generate_with_context(
system_message=prompt["system"],
messages=[{"role": "user", "content": prompt["user"]}],
# create temp file to save database snapshots to for process workers to load from
temp_db_path = "tmp/" + str(uuid.uuid4())
self.database.save(temp_db_path, start_iteration)

with concurrent.futures.ProcessPoolExecutor(
max_workers=self.config.evaluator.parallel_evaluations
) as executor:
futures = []
for i in range(start_iteration, total_iterations):
futures.append(
executor.submit(
run_iteration_sync, i, self.config, self.evaluation_file, temp_db_path
)
)

# Parse the response
if self.config.diff_based_evolution:
diff_blocks = extract_diffs(llm_response)

if not diff_blocks:
logger.warning(f"Iteration {i+1}: No valid diffs found in response")
iteration = start_iteration + 1
for future in concurrent.futures.as_completed(futures):
logger.info(f"Completed iteration {iteration}")
try:
result: Result = future.result()
# if result is nonType
if not isinstance(result, Result):
logger.warning(f"No valid diffs or program length exceeded limit")
continue
# Manage island evolution - switch islands periodically
if (
iteration - 1 > start_iteration
and current_island_counter >= programs_per_island
):
self.database.next_island()
current_island_counter = 0
logger.debug(f"Switched to island {self.database.current_island}")

current_island_counter += 1

# Add to database (will be added to current island)
self.database.add(result.child_program, iteration=iteration)

# Log prompts
self.database.log_prompt(
template_key=(
"full_rewrite_user" if not self.config.diff_based_evolution else "diff_user"
),
program_id=result.child_program.id,
prompt=result.prompt,
responses=[result.llm_response],
)

# Apply the diffs
child_code = apply_diff(parent.code, llm_response)
changes_summary = format_diff_summary(diff_blocks)
else:
# Parse full rewrite
new_code = parse_full_rewrite(llm_response, self.language)

if not new_code:
logger.warning(f"Iteration {i+1}: No valid code found in response")
continue

child_code = new_code
changes_summary = "Full rewrite"

# Check code length
if len(child_code) > self.config.max_code_length:
logger.warning(
f"Iteration {i+1}: Generated code exceeds maximum length "
f"({len(child_code)} > {self.config.max_code_length})"
# Store artifacts if they exist (after program is added to database)
if result.artifacts:
self.database.store_artifacts(result.child_program.id, result.artifacts)

# Log prompts
self.database.log_prompt(
template_key=(
"full_rewrite_user" if not self.config.diff_based_evolution else "diff_user"
),
program_id=result.child_program.id,
prompt=result.prompt,
responses=[result.llm_response],
)
continue

# Evaluate the child program
child_id = str(uuid.uuid4())
child_metrics = await self.evaluator.evaluate_program(child_code, child_id)

# Handle artifacts if they exist
artifacts = self.evaluator.get_pending_artifacts(child_id)

# Create a child program
child_program = Program(
id=child_id,
code=child_code,
language=self.language,
parent_id=parent.id,
generation=parent.generation + 1,
metrics=child_metrics,
metadata={
"changes": changes_summary,
"parent_metrics": parent.metrics,
},
)
# Increment generation for current island
self.database.increment_island_generation()

# Add to database (will be added to current island)
self.database.add(child_program, iteration=i + 1)

# Log prompts
self.database.log_prompt(
template_key=(
"full_rewrite_user" if not self.config.diff_based_evolution else "diff_user"
),
program_id=child_id,
prompt=prompt,
responses=[llm_response],
)
# Check if migration should occur
if self.database.should_migrate():
logger.info(f"Performing migration at iteration {iteration}")
self.database.migrate_programs()
self.database.log_island_status()

# Store artifacts if they exist
if artifacts:
self.database.store_artifacts(child_id, artifacts)

# Log prompts
self.database.log_prompt(
template_key=(
"full_rewrite_user" if not self.config.diff_based_evolution else "diff_user"
),
program_id=child_id,
prompt=prompt,
responses=[llm_response],
)
# Log progress
self._log_iteration(
iteration, result.parent, result.child_program, result.iteration_time
)

# Increment generation for current island
self.database.increment_island_generation()

# Check if migration should occur
if self.database.should_migrate():
logger.info(f"Performing migration at iteration {i+1}")
self.database.migrate_programs()
self.database.log_island_status()

# Log progress
iteration_time = time.time() - iteration_start
self._log_iteration(i, parent, child_program, iteration_time)

# Specifically check if this is the new best program
if self.database.best_program_id == child_program.id:
logger.info(f"🌟 New best solution found at iteration {i+1}: {child_program.id}")
logger.info(f"Metrics: {format_metrics_safe(child_program.metrics)}")

# Save checkpoint
if (i + 1) % self.config.checkpoint_interval == 0:
self._save_checkpoint(i + 1)
# Also log island status at checkpoints
logger.info(f"Island status at checkpoint {i+1}:")
self.database.log_island_status()

# Check if target score reached
if target_score is not None:
# Only consider numeric metrics for target score calculation
numeric_metrics = [
v
for v in child_metrics.values()
if isinstance(v, (int, float)) and not isinstance(v, bool)
]
if numeric_metrics:
avg_score = sum(numeric_metrics) / len(numeric_metrics)
# Specifically check if this is the new best program
if self.database.best_program_id == result.child_program.id:
logger.info(
f"🌟 New best solution found at iteration {iteration}: {result.child_program.id}"
)
logger.info(f"Metrics: {format_metrics_safe(result.child_program.metrics)}")

# Save checkpoint
if (iteration) % self.config.checkpoint_interval == 0:
self._save_checkpoint(iteration)
# Also log island status at checkpoints
logger.info(f"Island status at checkpoint {iteration}:")
self.database.log_island_status()

# Check if target score reached
if target_score is not None:
avg_score = sum(result["child_metrics"].values()) / max(
1, len(result.child_metrics)
)
if avg_score >= target_score:
logger.info(
f"Target score {target_score} reached after {i+1} iterations"
f"Target score {target_score} reached after {iteration} iterations"
)
break

except Exception as e:
logger.exception(f"Error in iteration {i+1}: {str(e)}")
continue

self.database.save(temp_db_path, iteration)
iteration += 1
except Exception as e:
logger.error(f"Error in iteration {iteration}: {str(e)}")
continue
shutil.rmtree(temp_db_path)
# Get the best program using our tracking mechanism
best_program = None
if self.database.best_program_id:
Expand Down Expand Up @@ -607,4 +548,4 @@ def _save_best_program(self, program: Optional[Program] = None) -> None:
indent=2,
)

logger.info(f"Saved best program to {code_path} with program info to {info_path}")
logger.info(f"Saved best program to {code_path} with program info to {info_path}")
Loading