Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
f9f1d40
add token count, flexible batch size and kwargs to vllm class
mo374z Mar 6, 2025
b20495f
add testing script for implementation
mo374z Mar 6, 2025
e27fa6c
fix batch size calculation
mo374z Mar 6, 2025
01eeb6d
small changes
mo374z Mar 6, 2025
045ffb8
add revision test
mo374z Mar 6, 2025
ad54496
add argument to parser
mo374z Mar 6, 2025
fc8d779
max model len to int
mo374z Mar 6, 2025
469117c
remove script
mo374z Mar 6, 2025
6b543fa
Change version and Release notes
mo374z Mar 6, 2025
619ce65
changed callback behaviour and impelemented token count callback
finitearth Mar 7, 2025
2588664
added super inits
finitearth Mar 7, 2025
8c365c7
allow for splits not based on white space (such as new line break etc)
finitearth Mar 8, 2025
7e7d2b5
include task descriptions
finitearth Mar 8, 2025
edcd28d
add tokenizer based token count to vllm class
mo374z Mar 8, 2025
f2d73d4
update test run script
mo374z Mar 8, 2025
a725384
use classifiers accordingly
mo374z Mar 8, 2025
b0f7931
small fix
mo374z Mar 8, 2025
30e1712
add storage path
mo374z Mar 8, 2025
80b19d2
helpers should use classificator
mo374z Mar 8, 2025
ec4861a
use different model
mo374z Mar 8, 2025
bf7f1df
changes in opro test
mo374z Mar 8, 2025
3969e03
change get_predictor function
mo374z Mar 8, 2025
bd05cd8
fix callback calling
mo374z Mar 8, 2025
96e1bf6
change optimizer test run script
mo374z Mar 8, 2025
62c8de7
small alignments
mo374z Mar 8, 2025
1aa5606
small alignments
mo374z Mar 8, 2025
7214658
small alignments
mo374z Mar 8, 2025
0b15410
some changes to match the current optimizer implementation
mo374z Mar 8, 2025
3967978
changes in template and config
finitearth Mar 9, 2025
9f8c0b6
allow for batching of prompt creation
finitearth Mar 9, 2025
d1c9b54
update release notes and version
mo374z Mar 9, 2025
861a30c
extend csvcallback functionality
mo374z Mar 9, 2025
3bfa208
change callback csv export
mo374z Mar 9, 2025
4ef965e
change step time calculation
mo374z Mar 9, 2025
c997391
small changes
mo374z Mar 9, 2025
cde98f0
remove llm_test_run script
mo374z Mar 9, 2025
d1ba099
update release notes
mo374z Mar 9, 2025
b9f3568
fix issues in token stepswise calculation
mo374z Mar 9, 2025
a894538
small fix
mo374z Mar 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ __pycache__/
temp/
dist/
outputs/
results/
poetry.lock
14 changes: 14 additions & 0 deletions docs/release-notes.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# Release Notes

## Release v1.3.0
### What's changed
#### Added features
* new features for the VLLM Wrapper (automatic batch size determination, accepting kwargs)
* allow callbacks to terminate optimization run
* add token count functionality
* renamed "Classificator"-Predictor to "FirstOccurenceClassificator"
* introduced "MarkerBasedClassifcator"
* automatic task description creation
* use task description in prompt creation
* implement CSV callbacks

**Full Changelog**: [here](https://github.com/finitearth/promptolution/compare/v1.2.0...v1.3.0)

## Release v1.2.0
### What's changed
#### Added features
Expand Down
110 changes: 92 additions & 18 deletions promptolution/callbacks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
"""Callback classes for logging, saving, and tracking optimization progress."""

import os
import time

import numpy as np
import pandas as pd
from tqdm import tqdm

Expand All @@ -14,24 +16,33 @@ def on_step_end(self, optimizer):

Args:
optimizer: The optimizer object that called the callback.

Returns:
Bool: True if the optimization should continue, False if it should stop.
"""
pass
return True

def on_epoch_end(self, optimizer):
"""Called at the end of each optimization epoch.

Args:
optimizer: The optimizer object that called the callback.

Returns:
Bool: True if the optimization should continue, False if it should stop.
"""
pass
return True

def on_train_end(self, optimizer):
"""Called at the end of the entire optimization process.

Args:
optimizer: The optimizer object that called the callback.

Returns:
Bool: True if the optimization should continue, False if it should stop.
"""
pass
return True


class LoggerCallback(Callback):
Expand All @@ -57,14 +68,21 @@ def on_step_end(self, optimizer):
self.logger.critical(f"*** Prompt {i}: Score: {score}")
self.logger.critical(f"{prompt}")

return True

def on_train_end(self, optimizer, logs=None):
"""Log information at the end of training.

Args:
optimizer: The optimizer object that called the callback.
logs: Additional information to log.
"""
self.logger.critical(f"Training ended - {logs}")
if logs is None:
self.logger.critical("Training ended")
else:
self.logger.critical(f"Training ended - {logs}")

return True


class CSVCallback(Callback):
Expand All @@ -73,25 +91,24 @@ class CSVCallback(Callback):
This callback saves prompts and scores at each step to a CSV file.

Attributes:
path (str): The path to the CSV file.
dir (str): Directory the CSV file is saved to.
step (int): The current step number.
"""

def __init__(self, path):
def __init__(self, dir):
"""Initialize the CSVCallback.

Args:
path (str): The path to the CSV file.
dir (str): Directory the CSV file is saved to.
"""
# if dir does not exist
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))

# create file in path with header: "step,prompt,score"
with open(path, "w") as f:
f.write("step,prompt,score\n")
self.path = path
if not os.path.exists(dir):
os.makedirs(dir)

self.dir = dir
self.step = 0
self.input_tokens = 0
self.output_tokens = 0
self.step_time = time.time()

def on_step_end(self, optimizer):
"""Save prompts and scores to csv.
Expand All @@ -101,17 +118,50 @@ def on_step_end(self, optimizer):
"""
self.step += 1
df = pd.DataFrame(
{"step": [self.step] * len(optimizer.prompts), "prompt": optimizer.prompts, "score": optimizer.scores}
{
"step": [self.step] * len(optimizer.prompts),
"input_tokens": [optimizer.meta_llm.input_token_count - self.input_tokens] * len(optimizer.prompts),
"output_tokens": [optimizer.meta_llm.output_token_count - self.output_tokens] * len(optimizer.prompts),
"time_elapsed": [time.time() - self.step_time] * len(optimizer.prompts),
"score": optimizer.scores,
"prompt": optimizer.prompts,
}
)
df.to_csv(self.path, mode="a", header=False, index=False)
self.step_time = time.time()
self.input_tokens = optimizer.meta_llm.input_token_count
self.output_tokens = optimizer.meta_llm.output_token_count

if not os.path.exists(self.dir + "step_results.csv"):
df.to_csv(self.dir + "step_results.csv", index=False)
else:
df.to_csv(self.dir + "step_results.csv", mode="a", header=False, index=False)

return True

def on_train_end(self, optimizer):
"""Called at the end of training.

Args:
optimizer: The optimizer object that called the callback.
"""
pass
df = pd.DataFrame(
dict(
steps=self.step,
input_tokens=optimizer.meta_llm.input_token_count,
output_tokens=optimizer.meta_llm.output_token_count,
time_elapsed=time.time() - optimizer.start_time,
score=np.array(optimizer.scores).mean(),
best_prompts=str(optimizer.prompts),
),
index=[0],
)

if not os.path.exists(self.dir + "train_results.csv"):
df.to_csv(self.dir + "train_results.csv", index=False)
else:
df.to_csv(self.dir + "train_results.csv", mode="a", header=False, index=False)

return True


class BestPromptCallback(Callback):
Expand Down Expand Up @@ -139,6 +189,8 @@ def on_step_end(self, optimizer):
self.best_score = optimizer.scores[0]
self.best_prompt = optimizer.prompts[0]

return True

def get_best_prompt(self):
"""Get the best prompt and score achieved during optimization.

Expand Down Expand Up @@ -173,10 +225,32 @@ def on_step_end(self, optimizer):
"""
self.pbar.update(1)

return True

def on_train_end(self, optimizer):
"""Close the progress bar at the end of training.

Args:
optimizer: The optimizer object that called the callback.
"""
self.pbar.close()

return True


class TokenCountCallback(Callback):
"""Callback for stopping optimization based on the total token count."""

def __init__(self, max_tokens_for_termination):
"""Initialize the TokenCountCallback."""
self.max_tokens_for_termination = max_tokens_for_termination

def on_step_end(self, optimizer):
"""Check if the total token count exceeds the maximum allowed. If so, stop the optimization."""
token_counts = optimizer.predictor.llm.get_token_count()
total_token_count = token_counts["total_tokens"]

if total_token_count > self.max_tokens_for_termination:
return False

return True
8 changes: 6 additions & 2 deletions promptolution/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,17 @@ class Config:
ds_path (str): Path to the dataset. Should not be None if used.
n_steps (int): Number of optimization steps. Should not be None if used.
optimizer (str): Name of the optimizer to use. Should not be None if used.
predictor (str): Name of the predictor to use. Defaults to "FirstOccurenceClassificator".
meta_llm (str): Name of the meta language model. Should not be None if used.
downstream_llm (str): Name of the downstream language model. Should not be None if used.
evaluation_llm (str): Name of the evaluation language model. Should not be None if used.
init_pop_size (int): Initial population size. Defaults to 10.
logging_dir (str): Directory for logging. Defaults to "logs/run.csv".
experiment_name (str): Name of the experiment. Defaults to "experiment".
include_task_desc (bool): Whether to include task description. Defaults to False.
task_description (str): Task Description fed to the optimizer. Defaults to None.
donor_random (bool): Whether to use random donor prompts for EvoPromptDE. Defaults to False.
random_seed (int): Random seed for reproducibility. Defaults to 42.
model_storage_path (str): Path to the model storage directory (used for VLLM). Defaults to "../models/".
selection_mode (str): Selection mode for EvoPromptGA. Defaults to "random".
meta_bs (int): Batch size for local meta LLM. Should not be None if llm is run locally. Defaults to None.
downstream_bs (int): Batch size for local downstream LLM.
Expand All @@ -46,16 +48,18 @@ class Config:
task_name: str = None
ds_path: Path = None
optimizer: str = None
predictor: Literal["MarkerBasedClassificator", "FirstOccurenceClassificator"] = "FirstOccurenceClassificator"
meta_llm: str = None
downstream_llm: str = None
evaluation_llm: str = None
n_steps: int = None
init_pop_size: int = None
logging_dir: Path = Path("logs/run.csv")
experiment_name: str = "experiment"
include_task_desc: bool = True
task_description: str = None
donor_random: bool = False
random_seed: int = 42
model_storage_path: Optional[Path] = Path("../models/")
selection_mode: Optional[Literal["random", "wheel", "tour"]] = "random"
meta_bs: Optional[int] = None
downstream_bs: Optional[int] = None
Expand Down
17 changes: 12 additions & 5 deletions promptolution/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from promptolution.exemplar_selectors import get_exemplar_selector
from promptolution.llms import get_llm
from promptolution.optimizers import get_optimizer
from promptolution.predictors import Classificator
from promptolution.predictors import FirstOccurrenceClassificator, MarkerBasedClassificator
from promptolution.tasks import get_task


Expand All @@ -27,7 +27,7 @@ def run_experiment(config: Config):
return df


def run_optimization(config: Config):
def run_optimization(config: Config, callbacks: List = None):
"""Run the optimization phase of the experiment.

Args:
Expand All @@ -37,8 +37,13 @@ def run_optimization(config: Config):
List[str]: The optimized list of prompts.
"""
task = get_task(config)
llm = get_llm(config.meta_llm, token=config.api_token)
predictor = Classificator(llm, classes=task.classes)
llm = get_llm(config.meta_llm, token=config.api_token, model_storage_path=config.model_storage_path)
if config.predictor == "MarkerBasedClassificator":
predictor = MarkerBasedClassificator(llm, classes=task.classes)
elif config.predictor == "FirstOccurenceClassificator":
predictor = FirstOccurrenceClassificator(llm, classes=task.classes)
else:
raise ValueError(f"Predictor {config.predictor} not supported.")

if config.init_pop_size:
init_pop = np.random.choice(task.initial_population, size=config.init_pop_size, replace=True)
Expand All @@ -52,6 +57,8 @@ def run_optimization(config: Config):
task=task,
predictor=predictor,
n_eval_samples=config.n_eval_samples,
callbacks=callbacks,
task_description=predictor.extraction_description,
)

prompts = optimizer.optimize(n_steps=config.n_steps)
Expand All @@ -76,7 +83,7 @@ def run_evaluation(config: Config, prompts: List[str]):
task = get_task(config, split="test")

llm = get_llm(config.evaluation_llm, token=config.api_token)
predictor = Classificator(llm, classes=task.classes)
predictor = FirstOccurrenceClassificator(llm, classes=task.classes)

scores = task.evaluate(prompts, predictor, subsample=True, n_samples=config.n_eval_samples)
df = pd.DataFrame(dict(prompt=prompts, score=scores))
Expand Down
7 changes: 4 additions & 3 deletions promptolution/llms/api_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import asyncio
import time
from logging import INFO, Logger
from typing import List
from typing import Any, List

import nest_asyncio
import openai
Expand Down Expand Up @@ -63,7 +63,7 @@ class APILLM(BaseLLM):
get_response_async: Asynchronously get responses for a list of prompts.
"""

def __init__(self, model_id: str, token: str = None):
def __init__(self, model_id: str, token: str = None, **kwargs: Any):
"""Initialize the APILLM with a specific model.

Args:
Expand All @@ -73,14 +73,15 @@ def __init__(self, model_id: str, token: str = None):
Raises:
ValueError: If an unknown model identifier is provided.
"""
super().__init__()
if "claude" in model_id:
self.model = ChatAnthropic(model=model_id, api_key=token)
elif "gpt" in model_id:
self.model = ChatOpenAI(model=model_id, api_key=token)
else:
self.model = ChatDeepInfra(model_name=model_id, deepinfra_api_token=token)

def get_response(self, prompts: List[str]) -> List[str]:
def _get_response(self, prompts: List[str]) -> List[str]:
"""Get responses for a list of prompts in a synchronous manner.

This method includes retry logic for handling connection errors and rate limits.
Expand Down
Loading
Loading