Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
f9f1d40
add token count, flexible batch size and kwargs to vllm class
mo374z Mar 6, 2025
b20495f
add testing script for implementation
mo374z Mar 6, 2025
e27fa6c
fix batch size calculation
mo374z Mar 6, 2025
01eeb6d
small changes
mo374z Mar 6, 2025
045ffb8
add revision test
mo374z Mar 6, 2025
ad54496
add argument to parser
mo374z Mar 6, 2025
fc8d779
max model len to int
mo374z Mar 6, 2025
469117c
remove script
mo374z Mar 6, 2025
6b543fa
Change version and Release notes
mo374z Mar 6, 2025
619ce65
changed callback behaviour and impelemented token count callback
finitearth Mar 7, 2025
2588664
added super inits
finitearth Mar 7, 2025
8c365c7
allow for splits not based on white space (such as new line break etc)
finitearth Mar 8, 2025
7e7d2b5
include task descriptions
finitearth Mar 8, 2025
edcd28d
add tokenizer based token count to vllm class
mo374z Mar 8, 2025
f2d73d4
update test run script
mo374z Mar 8, 2025
a725384
use classifiers accordingly
mo374z Mar 8, 2025
b0f7931
small fix
mo374z Mar 8, 2025
30e1712
add storage path
mo374z Mar 8, 2025
80b19d2
helpers should use classificator
mo374z Mar 8, 2025
ec4861a
use different model
mo374z Mar 8, 2025
bf7f1df
changes in opro test
mo374z Mar 8, 2025
3969e03
change get_predictor function
mo374z Mar 8, 2025
bd05cd8
fix callback calling
mo374z Mar 8, 2025
96e1bf6
change optimizer test run script
mo374z Mar 8, 2025
62c8de7
small alignments
mo374z Mar 8, 2025
1aa5606
small alignments
mo374z Mar 8, 2025
7214658
small alignments
mo374z Mar 8, 2025
0b15410
some changes to match the current optimizer implementation
mo374z Mar 8, 2025
3967978
changes in template and config
finitearth Mar 9, 2025
9f8c0b6
allow for batching of prompt creation
finitearth Mar 9, 2025
8ecc6a8
v1.3.0 (#34)
mo374z Mar 9, 2025
859831c
fixed prompt creation with task description
finitearth Mar 9, 2025
f53e4d2
make classifaction task for prompt creation optional
finitearth Mar 9, 2025
c063039
fix meta_prompt_template
finitearth Mar 9, 2025
5e0b8f7
enable not forcing class output for marker based classifactor
finitearth Mar 9, 2025
b81d953
Merge branch 'main' into fix/prompt_creation
mo374z Mar 10, 2025
eeb6995
updated callbacks
finitearth Mar 11, 2025
147052e
add seeding to vllm and sampling params
finitearth Mar 11, 2025
984220b
add random seed do test script
mo374z Mar 11, 2025
aa26e5f
align with token / no token
mo374z Mar 11, 2025
5b483df
delete script
mo374z Mar 11, 2025
28020cc
Merge branch 'dev' into feature/deterministic
mo374z Mar 11, 2025
39c58e4
fix prompt creation if else
mo374z Mar 12, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 12 additions & 9 deletions promptolution/callbacks.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Callback classes for logging, saving, and tracking optimization progress."""

import os
import time
from datetime import datetime
from typing import Literal

import numpy as np
Expand Down Expand Up @@ -64,7 +64,8 @@ def __init__(self, logger):
def on_step_end(self, optimizer):
"""Log information about the current step."""
self.step += 1
self.logger.critical(f"✨Step {self.step} ended✨")
time = datetime.now().strftime("%d-%m-%y %H:%M:%S:%f")
self.logger.critical(f"{time} - ✨Step {self.step} ended✨")
for i, (prompt, score) in enumerate(zip(optimizer.prompts, optimizer.scores)):
self.logger.critical(f"*** Prompt {i}: Score: {score}")
self.logger.critical(f"{prompt}")
Expand All @@ -78,10 +79,11 @@ def on_train_end(self, optimizer, logs=None):
optimizer: The optimizer object that called the callback.
logs: Additional information to log.
"""
time = datetime.now().strftime("%d-%m-%y %H:%M:%S:%f")
if logs is None:
self.logger.critical("Training ended")
self.logger.critical(f"{time} - Training ended")
else:
self.logger.critical(f"Training ended - {logs}")
self.logger.critical(f"{time} - Training ended - {logs}")

return True

Expand Down Expand Up @@ -109,8 +111,8 @@ def __init__(self, dir):
self.step = 0
self.input_tokens = 0
self.output_tokens = 0
self.start_time = time.time()
self.step_time = time.time()
self.start_time = datetime.now()
self.step_time = datetime.now()

def on_step_end(self, optimizer):
"""Save prompts and scores to csv.
Expand All @@ -124,12 +126,12 @@ def on_step_end(self, optimizer):
"step": [self.step] * len(optimizer.prompts),
"input_tokens": [optimizer.meta_llm.input_token_count - self.input_tokens] * len(optimizer.prompts),
"output_tokens": [optimizer.meta_llm.output_token_count - self.output_tokens] * len(optimizer.prompts),
"time_elapsed": [time.time() - self.step_time] * len(optimizer.prompts),
"time_elapsed": [(datetime.now() - self.step_time).total_seconds()] * len(optimizer.prompts),
"score": optimizer.scores,
"prompt": optimizer.prompts,
}
)
self.step_time = time.time()
self.step_time = datetime.now()
self.input_tokens = optimizer.meta_llm.input_token_count
self.output_tokens = optimizer.meta_llm.output_token_count

Expand All @@ -151,7 +153,8 @@ def on_train_end(self, optimizer):
steps=self.step,
input_tokens=optimizer.meta_llm.input_token_count,
output_tokens=optimizer.meta_llm.output_token_count,
time_elapsed=time.time() - self.start_time,
time_elapsed=(datetime.now() - self.start_time).total_seconds(),
time=datetime.now(),
score=np.array(optimizer.scores).mean(),
best_prompts=str(optimizer.prompts),
),
Expand Down
7 changes: 5 additions & 2 deletions promptolution/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def run_experiment(config: Config):
return df


def run_optimization(config: Config, callbacks: List = None):
def run_optimization(config: Config, callbacks: List = None, use_token: bool = False):
"""Run the optimization phase of the experiment.

Args:
Expand All @@ -37,7 +37,10 @@ def run_optimization(config: Config, callbacks: List = None):
List[str]: The optimized list of prompts.
"""
task = get_task(config)
llm = get_llm(config.meta_llm, token=config.api_token, model_storage_path=config.model_storage_path)
if use_token:
llm = get_llm(config.meta_llm, token=config.api_token)
else:
llm = get_llm(config.meta_llm, model_storage_path=config.model_storage_path, seed=config.random_seed)
if config.predictor == "MarkerBasedClassificator":
predictor = MarkerBasedClassificator(llm, classes=task.classes)
elif config.predictor == "FirstOccurenceClassificator":
Expand Down
14 changes: 6 additions & 8 deletions promptolution/llms/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,12 @@ def __init__(
temperature: float = 0.1,
top_p: float = 0.9,
model_storage_path: str | None = None,
token: str | None = None,
dtype: str = "auto",
tensor_parallel_size: int = 1,
gpu_memory_utilization: float = 0.95,
max_model_len: int = 2048,
trust_remote_code: bool = False,
seed: int = 42,
**kwargs,
):
"""Initialize the VLLM with a specific model.
Expand All @@ -61,12 +61,12 @@ def __init__(
temperature (float, optional): Sampling temperature. Defaults to 0.1.
top_p (float, optional): Top-p sampling parameter. Defaults to 0.9.
model_storage_path (str, optional): Directory to store the model. Defaults to None.
token: (str, optional): Token for accessing the model - not used in implementation yet.
dtype (str, optional): Data type for model weights. Defaults to "float16".
tensor_parallel_size (int, optional): Number of GPUs for tensor parallelism. Defaults to 1.
gpu_memory_utilization (float, optional): Fraction of GPU memory to use. Defaults to 0.95.
max_model_len (int, optional): Maximum sequence length for the model. Defaults to 2048.
trust_remote_code (bool, optional): Whether to trust remote code. Defaults to False.
seed (int, optional): Random seed for the model. Defaults to 42.
**kwargs: Additional keyword arguments to pass to the LLM class initialization.

Note:
Expand All @@ -81,7 +81,9 @@ def __init__(
self.trust_remote_code = trust_remote_code

# Configure sampling parameters
self.sampling_params = SamplingParams(temperature=temperature, top_p=top_p, max_tokens=max_generated_tokens)
self.sampling_params = SamplingParams(
temperature=temperature, top_p=top_p, max_tokens=max_generated_tokens, seed=seed
)

# Initialize the vLLM engine with both explicit parameters and any additional kwargs
llm_params = {
Expand All @@ -93,6 +95,7 @@ def __init__(
"max_model_len": self.max_model_len,
"download_dir": model_storage_path,
"trust_remote_code": self.trust_remote_code,
"seed": seed,
**kwargs,
}

Expand Down Expand Up @@ -136,11 +139,6 @@ def _get_response(self, inputs: list[str]):
for input in inputs
]

# Count input tokens
for prompt in prompts:
input_tokens = self.tokenizer.encode(prompt)
self.input_token_count += len(input_tokens)

# generate responses for self.batch_size prompts at the same time
all_responses = []
for i in range(0, len(prompts), self.batch_size):
Expand Down
1 change: 1 addition & 0 deletions promptolution/optimizers/evoprompt_ga.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def optimize(self, n_steps: int) -> List[str]:
if not continue_optimization:
break

self._on_train_end()
return self.prompts

def _crossover(self, prompts, scores) -> str:
Expand Down
12 changes: 6 additions & 6 deletions promptolution/predictors/classificator.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,12 @@ class MarkerBasedClassificator(BasePredictor):
BasePredictor: The base class for predictors in the promptolution library.
"""

def __init__(self, llm, classes, marker="<final_answer>", *args, **kwargs):
def __init__(self, llm, classes=None, marker="<final_answer>", *args, **kwargs):
"""Initialize the Classificator.

Args:
llm: The language model to use for predictions.
classes (List[str]): The list of valid class labels.
classes (List[str]): The list of valid class labels. If None, does not force any class.
marker (str): The marker to use for extracting the class label.
*args, **kwargs: Additional arguments for the BasePredictor.
"""
Expand All @@ -101,11 +101,11 @@ def _extract_preds(self, preds: List[str], shape: Tuple[int, int]) -> np.ndarray
"""
response = []
for pred in preds:
predicted_class = pred.split(self.marker)[-1].strip()
if predicted_class not in self.classes:
predicted_class = self.classes[0]
pred = pred.split(self.marker)[-1].strip()
if self.classes is not None and pred not in self.classes:
pred = self.classes[0]

response.append(predicted_class)
response.append(pred)

response = np.array(response).reshape(*shape)
return response
20 changes: 14 additions & 6 deletions promptolution/utils/prompt_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ def create_prompts_from_samples(
n_samples: int = 3,
task_description: str = None,
n_prompts: int = 1,
get_uniform_labels: bool = False,
) -> List[str]:
"""Generate a set of prompts from dataset examples sampled from a given task.

Expand All @@ -59,13 +60,23 @@ def create_prompts_from_samples(
n_samples (int): The number of samples to use for generating prompts.
task_description (str): The description of the task to include in the prompt.
n_prompts (int): The number of prompts to generate.
get_uniform_labels (bool): If True, samples are selected such that all classes are represented.

Returns:
List[str]: A list of generated prompts.
"""
if meta_prompt is None and task_description is None:
meta_prompt_template = PROMPT_CREATION_TEMPLATE
elif meta_prompt is None and task_description is not None:
meta_prompt_template = PROMPT_CREATION_TEMPLATE_TD.replace("<task_desc>", task_description)
elif meta_prompt is not None and task_description is None:
meta_prompt_template = meta_prompt
elif meta_prompt is not None and task_description is not None:
meta_prompt_template = meta_prompt.replace("<task_desc>", task_description)

meta_prompts = []
for _ in range(n_prompts):
if isinstance(task, ClassificationTask):
if isinstance(task, ClassificationTask) and get_uniform_labels:
# if classification task sample such that all classes are represented
unique_labels, counts = np.unique(task.ys, return_counts=True)
proportions = counts / len(task.ys)
Expand All @@ -87,13 +98,10 @@ def create_prompts_from_samples(
xs = task.xs[indices].tolist()
ys = task.ys[indices].tolist()

if meta_prompt is None:
meta_prompt = PROMPT_CREATION_TEMPLATE
if task_description is None:
meta_prompt = PROMPT_CREATION_TEMPLATE_TD.replace("<task_desc>", task_description)
examples = "\n\n".join([f"Input: {x}\nOutput: {y}" for x, y in zip(xs, ys)])
meta_prompt = meta_prompt.replace("<input_output_pairs>", examples)
meta_prompt = meta_prompt_template.replace("<input_output_pairs>", examples)
meta_prompts.append(meta_prompt)

prompts = llm.get_response(meta_prompts)
prompts = [prompt.split("</prompt>")[0].split("<prompt>")[-1].strip() for prompt in prompts]

Expand Down
7 changes: 6 additions & 1 deletion scripts/optimizer_test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
parser.add_argument("--optimizer", default="evopromptde")
parser.add_argument("--n-steps", type=int, default=10)
parser.add_argument("--token", default=None)
parser.add_argument("--seed", type=int, default=187)
args = parser.parse_args()

config = Config(
Expand All @@ -29,8 +30,12 @@
evaluation_llm=args.model,
api_token=args.token,
model_storage_path=args.model_storage_path,
random_seed=args.seed,
)

prompts = run_optimization(config, callbacks=[LoggerCallback(logger), CSVCallback(f"results/{args.model}/")])
if args.token is None:
prompts = run_optimization(config, callbacks=[LoggerCallback(logger), CSVCallback(f"results/seedingtest/{args.model}/")])
else:
prompts = run_optimization(config, callbacks=[LoggerCallback(logger), CSVCallback(f"results/seedingtest/{args.model}/")], use_token=True)

logger.info(f"Optimized prompts: {prompts}")
Loading