Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
eb20e60
Feature/workflows (#8)
timo282 Oct 3, 2024
1f0119f
Fix/workflows (#11)
timo282 Oct 4, 2024
ba39fd6
Opro implementation (#7)
finitearth Oct 4, 2024
37591b7
Patch/pre commit config (#10)
finitearth Oct 4, 2024
5e2c0a9
Feature/prompt generation (#12)
finitearth Oct 4, 2024
689701a
Create LICENSE (#14)
mo374z Oct 5, 2024
f69bbb2
Refactor/remove deepinfra (#16)
mo374z Oct 9, 2024
1768297
Usability patches (#15)
finitearth Oct 12, 2024
65f02a5
Feature/examplar selection (#17)
finitearth Oct 13, 2024
e23dd74
Chore/docs release notes (#18)
timo282 Oct 13, 2024
25639c9
Merge branch dev accepting all incoming changes
finitearth Oct 13, 2024
2ac359b
revert Chore/docs release notes (#18)"
finitearth Oct 13, 2024
3e650cc
revert last commit
finitearth Oct 13, 2024
3f4c757
updated release notes and read me
finitearth Oct 13, 2024
ac2af13
Feature/read from df (#21)
finitearth Nov 4, 2024
7987f99
Update pyproject.toml
finitearth Nov 19, 2024
7217011
Update release-notes.md
finitearth Nov 19, 2024
2727b6e
merge
finitearth Nov 19, 2024
87fd54c
merge
finitearth Nov 19, 2024
aa6b7d7
Merge branch 'dev' of https://github.com/finitearth/promptolution int…
finitearth Nov 19, 2024
6e69b58
merges
finitearth Nov 19, 2024
e935fa2
resolve merge mistakes
finitearth Nov 19, 2024
68c67f3
delete duplicated lines
finitearth Nov 19, 2024
bee6d8d
Update release-notes.md (#24)
mo374z Nov 19, 2024
3ca7893
Fix/dependencies (#28)
finitearth Feb 21, 2025
d6203d6
Merge branch 'main' into dev
timo282 Feb 21, 2025
683d926
Add vllm as feature and a llm_test_run_script
mo374z Mar 3, 2025
69837fa
small fixes in vllm class
mo374z Mar 3, 2025
7563712
differentiate between vllm and api inference
mo374z Mar 3, 2025
af6f9f8
set up experiment over multiple tasks and prompts
mo374z Mar 3, 2025
bc9997a
change csv saving
mo374z Mar 3, 2025
7958b86
add base llm super class
mo374z Mar 4, 2025
e82db35
add changes from PR review
mo374z Mar 4, 2025
0045de7
change some VLLM params
mo374z Mar 5, 2025
0b3c7cb
fix tensor parallel size to 1
mo374z Mar 5, 2025
a73c378
experiment with batch size
mo374z Mar 5, 2025
1f68410
experiment with larger batch sizes
mo374z Mar 5, 2025
f5fe188
add continuous batch llm
mo374z Mar 5, 2025
1330a9e
remove arg
mo374z Mar 5, 2025
c6dbb7b
remove continuous batch inference try
mo374z Mar 5, 2025
42ab6c9
add batching to vllm
mo374z Mar 5, 2025
0be3d06
add batching in script
mo374z Mar 5, 2025
16514f4
Merge branch 'dev' of https://github.com/finitearth/promptolution int…
timo282 Mar 5, 2025
c5ac101
Add release notes and increase version number
timo282 Mar 5, 2025
0eb701b
remove llm_test_run.py script
mo374z Mar 5, 2025
f4f9722
Merge branch 'feature/vllm' of https://github.com/finitearth/promptol…
mo374z Mar 5, 2025
fae0113
change system prompt
mo374z Mar 5, 2025
b6440c7
Merge pull request #30 from finitearth/feature/vllm
mo374z Mar 5, 2025
b61c9ee
Fix/vllm (#33)
mo374z Mar 9, 2025
2f6f499
Merge branch 'main' into dev
mo374z Mar 9, 2025
b873ee0
implement changes from review
mo374z Mar 9, 2025
739e568
Merge branch 'dev' of https://github.com/finitearth/promptolution int…
mo374z Mar 9, 2025
ec5b709
add typing to token count callback
mo374z Mar 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ __pycache__/
temp/
dist/
outputs/
results/
poetry.lock
14 changes: 14 additions & 0 deletions docs/release-notes.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,19 @@
# Release Notes

## Release v1.3.0
### What's changed
#### Added features
* new features for the VLLM Wrapper (automatic batch size determination, accepting kwargs)
* allow callbacks to terminate optimization run
* add token count functionality
* renamed "Classificator"-Predictor to "FirstOccurenceClassificator"
* introduced "MarkerBasedClassifcator"
* automatic task description creation
* use task description in prompt creation
* implement CSV callbacks

**Full Changelog**: [here](https://github.com/finitearth/promptolution/compare/v1.2.0...v1.3.0)

## Release v1.2.0
### What's changed
#### Added features
Expand Down
121 changes: 103 additions & 18 deletions promptolution/callbacks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
"""Callback classes for logging, saving, and tracking optimization progress."""

import os
import time
from typing import Literal

import numpy as np
import pandas as pd
from tqdm import tqdm

Expand All @@ -14,24 +17,33 @@ def on_step_end(self, optimizer):

Args:
optimizer: The optimizer object that called the callback.

Returns:
Bool: True if the optimization should continue, False if it should stop.
"""
pass
return True

def on_epoch_end(self, optimizer):
"""Called at the end of each optimization epoch.

Args:
optimizer: The optimizer object that called the callback.

Returns:
Bool: True if the optimization should continue, False if it should stop.
"""
pass
return True

def on_train_end(self, optimizer):
"""Called at the end of the entire optimization process.

Args:
optimizer: The optimizer object that called the callback.

Returns:
Bool: True if the optimization should continue, False if it should stop.
"""
pass
return True


class LoggerCallback(Callback):
Expand All @@ -57,14 +69,21 @@ def on_step_end(self, optimizer):
self.logger.critical(f"*** Prompt {i}: Score: {score}")
self.logger.critical(f"{prompt}")

return True

def on_train_end(self, optimizer, logs=None):
"""Log information at the end of training.

Args:
optimizer: The optimizer object that called the callback.
logs: Additional information to log.
"""
self.logger.critical(f"Training ended - {logs}")
if logs is None:
self.logger.critical("Training ended")
else:
self.logger.critical(f"Training ended - {logs}")

return True


class CSVCallback(Callback):
Expand All @@ -73,25 +92,25 @@ class CSVCallback(Callback):
This callback saves prompts and scores at each step to a CSV file.

Attributes:
path (str): The path to the CSV file.
dir (str): Directory the CSV file is saved to.
step (int): The current step number.
"""

def __init__(self, path):
def __init__(self, dir):
"""Initialize the CSVCallback.

Args:
path (str): The path to the CSV file.
dir (str): Directory the CSV file is saved to.
"""
# if dir does not exist
if not os.path.exists(os.path.dirname(path)):
os.makedirs(os.path.dirname(path))

# create file in path with header: "step,prompt,score"
with open(path, "w") as f:
f.write("step,prompt,score\n")
self.path = path
if not os.path.exists(dir):
os.makedirs(dir)

self.dir = dir
self.step = 0
self.input_tokens = 0
self.output_tokens = 0
self.start_time = time.time()
self.step_time = time.time()

def on_step_end(self, optimizer):
"""Save prompts and scores to csv.
Expand All @@ -101,17 +120,50 @@ def on_step_end(self, optimizer):
"""
self.step += 1
df = pd.DataFrame(
{"step": [self.step] * len(optimizer.prompts), "prompt": optimizer.prompts, "score": optimizer.scores}
{
"step": [self.step] * len(optimizer.prompts),
"input_tokens": [optimizer.meta_llm.input_token_count - self.input_tokens] * len(optimizer.prompts),
"output_tokens": [optimizer.meta_llm.output_token_count - self.output_tokens] * len(optimizer.prompts),
"time_elapsed": [time.time() - self.step_time] * len(optimizer.prompts),
"score": optimizer.scores,
"prompt": optimizer.prompts,
}
)
df.to_csv(self.path, mode="a", header=False, index=False)
self.step_time = time.time()
self.input_tokens = optimizer.meta_llm.input_token_count
self.output_tokens = optimizer.meta_llm.output_token_count

if not os.path.exists(self.dir + "step_results.csv"):
df.to_csv(self.dir + "step_results.csv", index=False)
else:
df.to_csv(self.dir + "step_results.csv", mode="a", header=False, index=False)

return True

def on_train_end(self, optimizer):
"""Called at the end of training.

Args:
optimizer: The optimizer object that called the callback.
"""
pass
df = pd.DataFrame(
dict(
steps=self.step,
input_tokens=optimizer.meta_llm.input_token_count,
output_tokens=optimizer.meta_llm.output_token_count,
time_elapsed=time.time() - self.start_time,
score=np.array(optimizer.scores).mean(),
best_prompts=str(optimizer.prompts),
),
index=[0],
)

if not os.path.exists(self.dir + "train_results.csv"):
df.to_csv(self.dir + "train_results.csv", index=False)
else:
df.to_csv(self.dir + "train_results.csv", mode="a", header=False, index=False)

return True


class BestPromptCallback(Callback):
Expand Down Expand Up @@ -139,6 +191,8 @@ def on_step_end(self, optimizer):
self.best_score = optimizer.scores[0]
self.best_prompt = optimizer.prompts[0]

return True

def get_best_prompt(self):
"""Get the best prompt and score achieved during optimization.

Expand Down Expand Up @@ -173,10 +227,41 @@ def on_step_end(self, optimizer):
"""
self.pbar.update(1)

return True

def on_train_end(self, optimizer):
"""Close the progress bar at the end of training.

Args:
optimizer: The optimizer object that called the callback.
"""
self.pbar.close()

return True


class TokenCountCallback(Callback):
"""Callback for stopping optimization based on the total token count."""

def __init__(
self,
max_tokens_for_termination: int,
token_type_for_termination: Literal["input_tokens", "output_tokens", "total_tokens"],
):
"""Initialize the TokenCountCallback.

Args:
max_tokens_for_termination (int): Maximum number of tokens which is allowed befor the algorithm is stopped.
token_type_for_termination (str): Can be one of either "input_tokens", "output_tokens" or "total_tokens".
"""
self.max_tokens_for_termination = max_tokens_for_termination
self.token_type_for_termination = token_type_for_termination

def on_step_end(self, optimizer):
"""Check if the total token count exceeds the maximum allowed. If so, stop the optimization."""
token_counts = optimizer.predictor.llm.get_token_count()

if token_counts[self.token_type_for_termination] > self.max_tokens_for_termination:
return False

return True
8 changes: 6 additions & 2 deletions promptolution/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,17 @@ class Config:
ds_path (str): Path to the dataset. Should not be None if used.
n_steps (int): Number of optimization steps. Should not be None if used.
optimizer (str): Name of the optimizer to use. Should not be None if used.
predictor (str): Name of the predictor to use. Defaults to "FirstOccurenceClassificator".
meta_llm (str): Name of the meta language model. Should not be None if used.
downstream_llm (str): Name of the downstream language model. Should not be None if used.
evaluation_llm (str): Name of the evaluation language model. Should not be None if used.
init_pop_size (int): Initial population size. Defaults to 10.
logging_dir (str): Directory for logging. Defaults to "logs/run.csv".
experiment_name (str): Name of the experiment. Defaults to "experiment".
include_task_desc (bool): Whether to include task description. Defaults to False.
task_description (str): Task Description fed to the optimizer. Defaults to None.
donor_random (bool): Whether to use random donor prompts for EvoPromptDE. Defaults to False.
random_seed (int): Random seed for reproducibility. Defaults to 42.
model_storage_path (str): Path to the model storage directory (used for VLLM). Defaults to "../models/".
selection_mode (str): Selection mode for EvoPromptGA. Defaults to "random".
meta_bs (int): Batch size for local meta LLM. Should not be None if llm is run locally. Defaults to None.
downstream_bs (int): Batch size for local downstream LLM.
Expand All @@ -46,16 +48,18 @@ class Config:
task_name: str = None
ds_path: Path = None
optimizer: str = None
predictor: Literal["MarkerBasedClassificator", "FirstOccurenceClassificator"] = "FirstOccurenceClassificator"
meta_llm: str = None
downstream_llm: str = None
evaluation_llm: str = None
n_steps: int = None
init_pop_size: int = None
logging_dir: Path = Path("logs/run.csv")
experiment_name: str = "experiment"
include_task_desc: bool = True
task_description: str = None
donor_random: bool = False
random_seed: int = 42
model_storage_path: Optional[Path] = Path("../models/")
selection_mode: Optional[Literal["random", "wheel", "tour"]] = "random"
meta_bs: Optional[int] = None
downstream_bs: Optional[int] = None
Expand Down
17 changes: 12 additions & 5 deletions promptolution/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from promptolution.exemplar_selectors import get_exemplar_selector
from promptolution.llms import get_llm
from promptolution.optimizers import get_optimizer
from promptolution.predictors import Classificator
from promptolution.predictors import FirstOccurrenceClassificator, MarkerBasedClassificator
from promptolution.tasks import get_task


Expand All @@ -27,7 +27,7 @@ def run_experiment(config: Config):
return df


def run_optimization(config: Config):
def run_optimization(config: Config, callbacks: List = None):
"""Run the optimization phase of the experiment.

Args:
Expand All @@ -37,8 +37,13 @@ def run_optimization(config: Config):
List[str]: The optimized list of prompts.
"""
task = get_task(config)
llm = get_llm(config.meta_llm, token=config.api_token)
predictor = Classificator(llm, classes=task.classes)
llm = get_llm(config.meta_llm, token=config.api_token, model_storage_path=config.model_storage_path)
if config.predictor == "MarkerBasedClassificator":
predictor = MarkerBasedClassificator(llm, classes=task.classes)
elif config.predictor == "FirstOccurenceClassificator":
predictor = FirstOccurrenceClassificator(llm, classes=task.classes)
else:
raise ValueError(f"Predictor {config.predictor} not supported.")

if config.init_pop_size:
init_pop = np.random.choice(task.initial_population, size=config.init_pop_size, replace=True)
Expand All @@ -52,6 +57,8 @@ def run_optimization(config: Config):
task=task,
predictor=predictor,
n_eval_samples=config.n_eval_samples,
callbacks=callbacks,
task_description=predictor.extraction_description,
)

prompts = optimizer.optimize(n_steps=config.n_steps)
Expand All @@ -76,7 +83,7 @@ def run_evaluation(config: Config, prompts: List[str]):
task = get_task(config, split="test")

llm = get_llm(config.evaluation_llm, token=config.api_token)
predictor = Classificator(llm, classes=task.classes)
predictor = FirstOccurrenceClassificator(llm, classes=task.classes)

scores = task.evaluate(prompts, predictor, subsample=True, n_samples=config.n_eval_samples)
df = pd.DataFrame(dict(prompt=prompts, score=scores))
Expand Down
7 changes: 4 additions & 3 deletions promptolution/llms/api_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import asyncio
import time
from logging import INFO, Logger
from typing import List
from typing import Any, List

import nest_asyncio
import openai
Expand Down Expand Up @@ -63,7 +63,7 @@ class APILLM(BaseLLM):
get_response_async: Asynchronously get responses for a list of prompts.
"""

def __init__(self, model_id: str, token: str = None):
def __init__(self, model_id: str, token: str = None, **kwargs: Any):
"""Initialize the APILLM with a specific model.

Args:
Expand All @@ -73,14 +73,15 @@ def __init__(self, model_id: str, token: str = None):
Raises:
ValueError: If an unknown model identifier is provided.
"""
super().__init__()
if "claude" in model_id:
self.model = ChatAnthropic(model=model_id, api_key=token)
elif "gpt" in model_id:
self.model = ChatOpenAI(model=model_id, api_key=token)
else:
self.model = ChatDeepInfra(model_name=model_id, deepinfra_api_token=token)

def get_response(self, prompts: List[str]) -> List[str]:
def _get_response(self, prompts: List[str]) -> List[str]:
"""Get responses for a list of prompts in a synchronous manner.

This method includes retry logic for handling connection errors and rate limits.
Expand Down
Loading
Loading