Skip to content

Commit b61c9ee

Browse files
mo374zfinitearth
andauthored
Fix/vllm (#33)
* add token count, flexible batch size and kwargs to vllm class * add testing script for implementation * fix batch size calculation * small changes * add revision test * add argument to parser * max model len to int * remove script * Change version and Release notes * changed callback behaviour and impelemented token count callback * added super inits * allow for splits not based on white space (such as new line break etc) * include task descriptions * add tokenizer based token count to vllm class * update test run script * use classifiers accordingly * small fix * add storage path * helpers should use classificator * use different model * changes in opro test * change get_predictor function * fix callback calling * change optimizer test run script * small alignments * small alignments * small alignments * some changes to match the current optimizer implementation * changes in template and config * allow for batching of prompt creation * update release notes and version * extend csvcallback functionality * change callback csv export * change step time calculation * small changes * remove llm_test_run script * update release notes * fix issues in token stepswise calculation * small fix --------- Co-authored-by: finitearth <t.zehle@gmail.com>
1 parent b6440c7 commit b61c9ee

22 files changed

+455
-152
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,5 @@ __pycache__/
77
temp/
88
dist/
99
outputs/
10+
results/
1011
poetry.lock

docs/release-notes.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
11
# Release Notes
22

3+
## Release v1.3.0
4+
### What's changed
5+
#### Added features
6+
* new features for the VLLM Wrapper (automatic batch size determination, accepting kwargs)
7+
* allow callbacks to terminate optimization run
8+
* add token count functionality
9+
* renamed "Classificator"-Predictor to "FirstOccurenceClassificator"
10+
* introduced "MarkerBasedClassifcator"
11+
* automatic task description creation
12+
* use task description in prompt creation
13+
* implement CSV callbacks
14+
15+
**Full Changelog**: [here](https://github.com/finitearth/promptolution/compare/v1.2.0...v1.3.0)
16+
317
## Release v1.2.0
418
### What's changed
519
#### Added features

promptolution/callbacks.py

Lines changed: 92 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
"""Callback classes for logging, saving, and tracking optimization progress."""
22

33
import os
4+
import time
45

6+
import numpy as np
57
import pandas as pd
68
from tqdm import tqdm
79

@@ -14,24 +16,33 @@ def on_step_end(self, optimizer):
1416
1517
Args:
1618
optimizer: The optimizer object that called the callback.
19+
20+
Returns:
21+
Bool: True if the optimization should continue, False if it should stop.
1722
"""
18-
pass
23+
return True
1924

2025
def on_epoch_end(self, optimizer):
2126
"""Called at the end of each optimization epoch.
2227
2328
Args:
2429
optimizer: The optimizer object that called the callback.
30+
31+
Returns:
32+
Bool: True if the optimization should continue, False if it should stop.
2533
"""
26-
pass
34+
return True
2735

2836
def on_train_end(self, optimizer):
2937
"""Called at the end of the entire optimization process.
3038
3139
Args:
3240
optimizer: The optimizer object that called the callback.
41+
42+
Returns:
43+
Bool: True if the optimization should continue, False if it should stop.
3344
"""
34-
pass
45+
return True
3546

3647

3748
class LoggerCallback(Callback):
@@ -57,14 +68,21 @@ def on_step_end(self, optimizer):
5768
self.logger.critical(f"*** Prompt {i}: Score: {score}")
5869
self.logger.critical(f"{prompt}")
5970

71+
return True
72+
6073
def on_train_end(self, optimizer, logs=None):
6174
"""Log information at the end of training.
6275
6376
Args:
6477
optimizer: The optimizer object that called the callback.
6578
logs: Additional information to log.
6679
"""
67-
self.logger.critical(f"Training ended - {logs}")
80+
if logs is None:
81+
self.logger.critical("Training ended")
82+
else:
83+
self.logger.critical(f"Training ended - {logs}")
84+
85+
return True
6886

6987

7088
class CSVCallback(Callback):
@@ -73,25 +91,24 @@ class CSVCallback(Callback):
7391
This callback saves prompts and scores at each step to a CSV file.
7492
7593
Attributes:
76-
path (str): The path to the CSV file.
94+
dir (str): Directory the CSV file is saved to.
7795
step (int): The current step number.
7896
"""
7997

80-
def __init__(self, path):
98+
def __init__(self, dir):
8199
"""Initialize the CSVCallback.
82100
83101
Args:
84-
path (str): The path to the CSV file.
102+
dir (str): Directory the CSV file is saved to.
85103
"""
86-
# if dir does not exist
87-
if not os.path.exists(os.path.dirname(path)):
88-
os.makedirs(os.path.dirname(path))
89-
90-
# create file in path with header: "step,prompt,score"
91-
with open(path, "w") as f:
92-
f.write("step,prompt,score\n")
93-
self.path = path
104+
if not os.path.exists(dir):
105+
os.makedirs(dir)
106+
107+
self.dir = dir
94108
self.step = 0
109+
self.input_tokens = 0
110+
self.output_tokens = 0
111+
self.step_time = time.time()
95112

96113
def on_step_end(self, optimizer):
97114
"""Save prompts and scores to csv.
@@ -101,17 +118,50 @@ def on_step_end(self, optimizer):
101118
"""
102119
self.step += 1
103120
df = pd.DataFrame(
104-
{"step": [self.step] * len(optimizer.prompts), "prompt": optimizer.prompts, "score": optimizer.scores}
121+
{
122+
"step": [self.step] * len(optimizer.prompts),
123+
"input_tokens": [optimizer.meta_llm.input_token_count - self.input_tokens] * len(optimizer.prompts),
124+
"output_tokens": [optimizer.meta_llm.output_token_count - self.output_tokens] * len(optimizer.prompts),
125+
"time_elapsed": [time.time() - self.step_time] * len(optimizer.prompts),
126+
"score": optimizer.scores,
127+
"prompt": optimizer.prompts,
128+
}
105129
)
106-
df.to_csv(self.path, mode="a", header=False, index=False)
130+
self.step_time = time.time()
131+
self.input_tokens = optimizer.meta_llm.input_token_count
132+
self.output_tokens = optimizer.meta_llm.output_token_count
133+
134+
if not os.path.exists(self.dir + "step_results.csv"):
135+
df.to_csv(self.dir + "step_results.csv", index=False)
136+
else:
137+
df.to_csv(self.dir + "step_results.csv", mode="a", header=False, index=False)
138+
139+
return True
107140

108141
def on_train_end(self, optimizer):
109142
"""Called at the end of training.
110143
111144
Args:
112145
optimizer: The optimizer object that called the callback.
113146
"""
114-
pass
147+
df = pd.DataFrame(
148+
dict(
149+
steps=self.step,
150+
input_tokens=optimizer.meta_llm.input_token_count,
151+
output_tokens=optimizer.meta_llm.output_token_count,
152+
time_elapsed=time.time() - optimizer.start_time,
153+
score=np.array(optimizer.scores).mean(),
154+
best_prompts=str(optimizer.prompts),
155+
),
156+
index=[0],
157+
)
158+
159+
if not os.path.exists(self.dir + "train_results.csv"):
160+
df.to_csv(self.dir + "train_results.csv", index=False)
161+
else:
162+
df.to_csv(self.dir + "train_results.csv", mode="a", header=False, index=False)
163+
164+
return True
115165

116166

117167
class BestPromptCallback(Callback):
@@ -139,6 +189,8 @@ def on_step_end(self, optimizer):
139189
self.best_score = optimizer.scores[0]
140190
self.best_prompt = optimizer.prompts[0]
141191

192+
return True
193+
142194
def get_best_prompt(self):
143195
"""Get the best prompt and score achieved during optimization.
144196
@@ -173,10 +225,32 @@ def on_step_end(self, optimizer):
173225
"""
174226
self.pbar.update(1)
175227

228+
return True
229+
176230
def on_train_end(self, optimizer):
177231
"""Close the progress bar at the end of training.
178232
179233
Args:
180234
optimizer: The optimizer object that called the callback.
181235
"""
182236
self.pbar.close()
237+
238+
return True
239+
240+
241+
class TokenCountCallback(Callback):
242+
"""Callback for stopping optimization based on the total token count."""
243+
244+
def __init__(self, max_tokens_for_termination):
245+
"""Initialize the TokenCountCallback."""
246+
self.max_tokens_for_termination = max_tokens_for_termination
247+
248+
def on_step_end(self, optimizer):
249+
"""Check if the total token count exceeds the maximum allowed. If so, stop the optimization."""
250+
token_counts = optimizer.predictor.llm.get_token_count()
251+
total_token_count = token_counts["total_tokens"]
252+
253+
if total_token_count > self.max_tokens_for_termination:
254+
return False
255+
256+
return True

promptolution/config.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,17 @@ class Config:
1717
ds_path (str): Path to the dataset. Should not be None if used.
1818
n_steps (int): Number of optimization steps. Should not be None if used.
1919
optimizer (str): Name of the optimizer to use. Should not be None if used.
20+
predictor (str): Name of the predictor to use. Defaults to "FirstOccurenceClassificator".
2021
meta_llm (str): Name of the meta language model. Should not be None if used.
2122
downstream_llm (str): Name of the downstream language model. Should not be None if used.
2223
evaluation_llm (str): Name of the evaluation language model. Should not be None if used.
2324
init_pop_size (int): Initial population size. Defaults to 10.
2425
logging_dir (str): Directory for logging. Defaults to "logs/run.csv".
2526
experiment_name (str): Name of the experiment. Defaults to "experiment".
26-
include_task_desc (bool): Whether to include task description. Defaults to False.
27+
task_description (str): Task Description fed to the optimizer. Defaults to None.
2728
donor_random (bool): Whether to use random donor prompts for EvoPromptDE. Defaults to False.
2829
random_seed (int): Random seed for reproducibility. Defaults to 42.
30+
model_storage_path (str): Path to the model storage directory (used for VLLM). Defaults to "../models/".
2931
selection_mode (str): Selection mode for EvoPromptGA. Defaults to "random".
3032
meta_bs (int): Batch size for local meta LLM. Should not be None if llm is run locally. Defaults to None.
3133
downstream_bs (int): Batch size for local downstream LLM.
@@ -46,16 +48,18 @@ class Config:
4648
task_name: str = None
4749
ds_path: Path = None
4850
optimizer: str = None
51+
predictor: Literal["MarkerBasedClassificator", "FirstOccurenceClassificator"] = "FirstOccurenceClassificator"
4952
meta_llm: str = None
5053
downstream_llm: str = None
5154
evaluation_llm: str = None
5255
n_steps: int = None
5356
init_pop_size: int = None
5457
logging_dir: Path = Path("logs/run.csv")
5558
experiment_name: str = "experiment"
56-
include_task_desc: bool = True
59+
task_description: str = None
5760
donor_random: bool = False
5861
random_seed: int = 42
62+
model_storage_path: Optional[Path] = Path("../models/")
5963
selection_mode: Optional[Literal["random", "wheel", "tour"]] = "random"
6064
meta_bs: Optional[int] = None
6165
downstream_bs: Optional[int] = None

promptolution/helpers.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from promptolution.exemplar_selectors import get_exemplar_selector
1010
from promptolution.llms import get_llm
1111
from promptolution.optimizers import get_optimizer
12-
from promptolution.predictors import Classificator
12+
from promptolution.predictors import FirstOccurrenceClassificator, MarkerBasedClassificator
1313
from promptolution.tasks import get_task
1414

1515

@@ -27,7 +27,7 @@ def run_experiment(config: Config):
2727
return df
2828

2929

30-
def run_optimization(config: Config):
30+
def run_optimization(config: Config, callbacks: List = None):
3131
"""Run the optimization phase of the experiment.
3232
3333
Args:
@@ -37,8 +37,13 @@ def run_optimization(config: Config):
3737
List[str]: The optimized list of prompts.
3838
"""
3939
task = get_task(config)
40-
llm = get_llm(config.meta_llm, token=config.api_token)
41-
predictor = Classificator(llm, classes=task.classes)
40+
llm = get_llm(config.meta_llm, token=config.api_token, model_storage_path=config.model_storage_path)
41+
if config.predictor == "MarkerBasedClassificator":
42+
predictor = MarkerBasedClassificator(llm, classes=task.classes)
43+
elif config.predictor == "FirstOccurenceClassificator":
44+
predictor = FirstOccurrenceClassificator(llm, classes=task.classes)
45+
else:
46+
raise ValueError(f"Predictor {config.predictor} not supported.")
4247

4348
if config.init_pop_size:
4449
init_pop = np.random.choice(task.initial_population, size=config.init_pop_size, replace=True)
@@ -52,6 +57,8 @@ def run_optimization(config: Config):
5257
task=task,
5358
predictor=predictor,
5459
n_eval_samples=config.n_eval_samples,
60+
callbacks=callbacks,
61+
task_description=predictor.extraction_description,
5562
)
5663

5764
prompts = optimizer.optimize(n_steps=config.n_steps)
@@ -76,7 +83,7 @@ def run_evaluation(config: Config, prompts: List[str]):
7683
task = get_task(config, split="test")
7784

7885
llm = get_llm(config.evaluation_llm, token=config.api_token)
79-
predictor = Classificator(llm, classes=task.classes)
86+
predictor = FirstOccurrenceClassificator(llm, classes=task.classes)
8087

8188
scores = task.evaluate(prompts, predictor, subsample=True, n_samples=config.n_eval_samples)
8289
df = pd.DataFrame(dict(prompt=prompts, score=scores))

promptolution/llms/api_llm.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
import asyncio
44
import time
55
from logging import INFO, Logger
6-
from typing import List
6+
from typing import Any, List
77

88
import nest_asyncio
99
import openai
@@ -63,7 +63,7 @@ class APILLM(BaseLLM):
6363
get_response_async: Asynchronously get responses for a list of prompts.
6464
"""
6565

66-
def __init__(self, model_id: str, token: str = None):
66+
def __init__(self, model_id: str, token: str = None, **kwargs: Any):
6767
"""Initialize the APILLM with a specific model.
6868
6969
Args:
@@ -73,14 +73,15 @@ def __init__(self, model_id: str, token: str = None):
7373
Raises:
7474
ValueError: If an unknown model identifier is provided.
7575
"""
76+
super().__init__()
7677
if "claude" in model_id:
7778
self.model = ChatAnthropic(model=model_id, api_key=token)
7879
elif "gpt" in model_id:
7980
self.model = ChatOpenAI(model=model_id, api_key=token)
8081
else:
8182
self.model = ChatDeepInfra(model_name=model_id, deepinfra_api_token=token)
8283

83-
def get_response(self, prompts: List[str]) -> List[str]:
84+
def _get_response(self, prompts: List[str]) -> List[str]:
8485
"""Get responses for a list of prompts in a synchronous manner.
8586
8687
This method includes retry logic for handling connection errors and rate limits.

0 commit comments

Comments
 (0)