Skip to content

Commit 7c052a9

Browse files
authored
v1.3.2 (#40)
#### Added features * Allow for configuration and evaluation of system prompts in all LLM-Classes * CSV Callback is now FileOutputCallback and able to write Parquet files * Fixed LLM-Call templates in VLLM * refined OPRO-implementation to be closer to the paper
1 parent c12ab62 commit 7c052a9

File tree

16 files changed

+319
-173
lines changed

16 files changed

+319
-173
lines changed

docs/release-notes.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,15 @@
11
# Release Notes
22

3+
## Release v1.3.2
4+
### What's changed
5+
#### Added features
6+
* Allow for configuration and evaluation of system prompts in all LLM-Classes
7+
* CSV Callback is now FileOutputCallback and able to write Parquet files
8+
* Fixed LLM-Call templates in VLLM
9+
* refined OPRO-implementation to be closer to the paper
10+
11+
**Full Changelog**: [here](https://github.com/finitearth/promptolution/compare/v1.3.1...v1.3.2)
12+
313
## Release v1.3.1
414
### What's changed
515
#### Added features
@@ -9,6 +19,8 @@
919
* generalize the Classificator
1020
* add verbosity and callback handling in EvoPromptGA
1121
* add timestamp to the callback
22+
* removed datasets from repo
23+
* changed task creation (now by default with a dataset)
1224

1325
**Full Changelog**: [here](https://github.com/finitearth/promptolution/compare/v1.3.0...v1.3.1)
1426

promptolution/callbacks.py

Lines changed: 35 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,8 @@ def on_step_end(self, optimizer):
6666
self.step += 1
6767
time = datetime.now().strftime("%d-%m-%y %H:%M:%S:%f")
6868
self.logger.critical(f"{time} - ✨Step {self.step} ended✨")
69+
time = datetime.now().strftime("%d-%m-%y %H:%M:%S:%f")
70+
self.logger.critical(f"{time} - ✨Step {self.step} ended✨")
6971
for i, (prompt, score) in enumerate(zip(optimizer.prompts, optimizer.scores)):
7072
self.logger.critical(f"*** Prompt {i}: Score: {score}")
7173
self.logger.critical(f"{prompt}")
@@ -80,40 +82,48 @@ def on_train_end(self, optimizer, logs=None):
8082
logs: Additional information to log.
8183
"""
8284
time = datetime.now().strftime("%d-%m-%y %H:%M:%S:%f")
85+
time = datetime.now().strftime("%d-%m-%y %H:%M:%S:%f")
8386
if logs is None:
8487
self.logger.critical(f"{time} - Training ended")
88+
self.logger.critical(f"{time} - Training ended")
8589
else:
8690
self.logger.critical(f"{time} - Training ended - {logs}")
91+
self.logger.critical(f"{time} - Training ended - {logs}")
8792

8893
return True
8994

9095

91-
class CSVCallback(Callback):
92-
"""Callback for saving optimization progress to a CSV file.
96+
class FileOutputCallback(Callback):
97+
"""Callback for saving optimization progress to a specified file type.
9398
94-
This callback saves prompts and scores at each step to a CSV file.
99+
This callback saves information about each step to a file.
95100
96101
Attributes:
97-
dir (str): Directory the CSV file is saved to.
102+
dir (str): Directory the file is saved to.
98103
step (int): The current step number.
104+
file_type (str): The type of file to save the output to.
99105
"""
100106

101-
def __init__(self, dir):
102-
"""Initialize the CSVCallback.
107+
def __init__(self, dir, file_type: Literal["parquet", "csv"] = "parquet"):
108+
"""Initialize the FileOutputCallback.
103109
104110
Args:
105111
dir (str): Directory the CSV file is saved to.
112+
file_type (str): The type of file to save the output to.
106113
"""
107114
if not os.path.exists(dir):
108115
os.makedirs(dir)
109116

110-
self.dir = dir
111-
self.dir = dir
117+
self.file_type = file_type
118+
119+
if file_type == "parquet":
120+
self.path = dir + "/step_results.parquet"
121+
elif file_type == "csv":
122+
self.path = dir + "/step_results.csv"
123+
else:
124+
raise ValueError(f"File type {file_type} not supported.")
125+
112126
self.step = 0
113-
self.input_tokens = 0
114-
self.output_tokens = 0
115-
self.start_time = datetime.now()
116-
self.step_time = datetime.now()
117127

118128
def on_step_end(self, optimizer):
119129
"""Save prompts and scores to csv.
@@ -125,47 +135,24 @@ def on_step_end(self, optimizer):
125135
df = pd.DataFrame(
126136
{
127137
"step": [self.step] * len(optimizer.prompts),
128-
"input_tokens": [optimizer.meta_llm.input_token_count - self.input_tokens] * len(optimizer.prompts),
129-
"output_tokens": [optimizer.meta_llm.output_token_count - self.output_tokens] * len(optimizer.prompts),
130-
"time_elapsed": [(datetime.now() - self.step_time).total_seconds()] * len(optimizer.prompts),
138+
"input_tokens": [optimizer.meta_llm.input_token_count] * len(optimizer.prompts),
139+
"output_tokens": [optimizer.meta_llm.output_token_count] * len(optimizer.prompts),
140+
"time": [datetime.now().total_seconds()] * len(optimizer.prompts),
131141
"score": optimizer.scores,
132142
"prompt": optimizer.prompts,
133143
}
134144
)
135-
self.step_time = datetime.now()
136-
self.input_tokens = optimizer.meta_llm.input_token_count
137-
self.output_tokens = optimizer.meta_llm.output_token_count
138-
139-
if not os.path.exists(self.dir + "step_results.csv"):
140-
df.to_csv(self.dir + "step_results.csv", index=False)
141-
else:
142-
df.to_csv(self.dir + "step_results.csv", mode="a", header=False, index=False)
143-
144-
return True
145-
146-
def on_train_end(self, optimizer):
147-
"""Called at the end of training.
148-
149-
Args:
150-
optimizer: The optimizer object that called the callback.
151-
"""
152-
df = pd.DataFrame(
153-
dict(
154-
steps=self.step,
155-
input_tokens=optimizer.meta_llm.input_token_count,
156-
output_tokens=optimizer.meta_llm.output_token_count,
157-
time_elapsed=(datetime.now() - self.start_time).total_seconds(),
158-
time=datetime.now(),
159-
score=np.array(optimizer.scores).mean(),
160-
best_prompts=str(optimizer.prompts),
161-
),
162-
index=[0],
163-
)
164145

165-
if not os.path.exists(self.dir + "train_results.csv"):
166-
df.to_csv(self.dir + "train_results.csv", index=False)
167-
else:
168-
df.to_csv(self.dir + "train_results.csv", mode="a", header=False, index=False)
146+
if self.file_type == "parquet":
147+
if self.step == 1:
148+
df.to_parquet(self.path, index=False)
149+
else:
150+
df.to_parquet(self.path, mode="a", index=False)
151+
elif self.file_type == "csv":
152+
if self.step == 1:
153+
df.to_csv(self.path, index=False)
154+
else:
155+
df.to_csv(self.path, mode="a", header=False, index=False)
169156

170157
return True
171158

promptolution/llms/api_llm.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,19 +10,20 @@
1010
import requests
1111
from langchain_anthropic import ChatAnthropic
1212
from langchain_community.chat_models.deepinfra import ChatDeepInfra, ChatDeepInfraException
13-
from langchain_core.messages import HumanMessage
13+
from langchain_core.messages import HumanMessage, SystemMessage
1414
from langchain_openai import ChatOpenAI
1515

1616
from promptolution.llms.base_llm import BaseLLM
1717

1818
logger = Logger(__name__)
1919

2020

21-
async def invoke_model(prompt, model, semaphore):
21+
async def invoke_model(prompt, system_prompt, model, semaphore):
2222
"""Asynchronously invoke a language model with retry logic.
2323
2424
Args:
2525
prompt (str): The input prompt for the model.
26+
system_prompt (str): The system prompt for the model.
2627
model: The language model to invoke.
2728
semaphore (asyncio.Semaphore): Semaphore to limit concurrent calls.
2829
@@ -39,7 +40,7 @@ async def invoke_model(prompt, model, semaphore):
3940

4041
while attempts < max_retries:
4142
try:
42-
response = await model.ainvoke([HumanMessage(content=prompt)])
43+
response = await model.ainvoke([SystemMessage(content=system_prompt), HumanMessage(content=prompt)])
4344
return response.content
4445
except ChatDeepInfraException as e:
4546
print(f"DeepInfra error: {e}. Attempt {attempts}/{max_retries}. Retrying in {delay} seconds...")
@@ -80,13 +81,14 @@ def __init__(self, model_id: str, token: str = None, **kwargs: Any):
8081
else:
8182
self.model = ChatDeepInfra(model_name=model_id, deepinfra_api_token=token)
8283

83-
def _get_response(self, prompts: List[str]) -> List[str]:
84+
def _get_response(self, prompts: List[str], system_prompts: List[str] = None) -> List[str]:
8485
"""Get responses for a list of prompts in a synchronous manner.
8586
8687
This method includes retry logic for handling connection errors and rate limits.
8788
8889
Args:
8990
prompts (list[str]): List of input prompts.
91+
system_prompts (list[str]): List of system prompts. If not provided, uses default system_prompts
9092
9193
Returns:
9294
list[str]: List of model responses.

promptolution/llms/base_llm.py

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
import numpy as np
88

9+
from promptolution.templates import DEFAULT_SYS_PROMPT
10+
911
logger = logging.getLogger(__name__)
1012

1113

@@ -54,7 +56,7 @@ def update_token_count(self, inputs: List[str], outputs: List[str]):
5456
self.input_token_count += input_tokens
5557
self.output_token_count += output_tokens
5658

57-
def get_response(self, prompts: str) -> str:
59+
def get_response(self, prompts: List[str], system_prompts: List[str] = None) -> List[str]:
5860
"""Generate responses for the given prompts.
5961
6062
This method calls the _get_response method to generate responses
@@ -64,31 +66,45 @@ def get_response(self, prompts: str) -> str:
6466
Args:
6567
prompts (str or List[str]): Input prompt(s). If a single string is provided,
6668
it's converted to a list containing that string.
69+
system_prompts (str or List[str]): System prompt(s) to provide context to the model.
6770
6871
Returns:
6972
List[str]: A list of generated responses, one for each input prompt.
7073
"""
74+
if system_prompts is None:
75+
system_prompts = DEFAULT_SYS_PROMPT
7176
if isinstance(prompts, str):
7277
prompts = [prompts]
73-
responses = self._get_response(prompts)
74-
self.update_token_count(prompts, responses)
78+
if isinstance(system_prompts, str):
79+
system_prompts = [system_prompts] * len(prompts)
80+
responses = self._get_response(prompts, system_prompts)
81+
self.update_token_count(prompts + system_prompts, responses)
7582

7683
return responses
7784

85+
def set_generation_seed(self, seed: int):
86+
"""Set the random seed for reproducibility per request.
87+
88+
Args:
89+
seed (int): Random seed value.
90+
"""
91+
pass
92+
7893
@abstractmethod
79-
def _get_response(self, prompts: List[str]) -> List[str]:
94+
def _get_response(self, prompts: List[str], system_prompts: List[str] = None) -> List[str]:
8095
"""Generate responses for the given prompts.
8196
8297
This method should be implemented by subclasses to define how
8398
the LLM generates responses.
8499
85100
Args:
86101
prompts (List[str]): A list of input prompts.
102+
system_prompts (List[str]): A list of system prompts to provide context to the model.
87103
88104
Returns:
89105
List[str]: A list of generated responses corresponding to the input prompts.
90106
"""
91-
pass
107+
raise NotImplementedError
92108

93109

94110
class DummyLLM(BaseLLM):

promptolution/llms/local_llm.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ def __init__(self, model_id: str, batch_size=8):
5050
self.pipeline.tokenizer.pad_token_id = self.pipeline.tokenizer.eos_token_id
5151
self.pipeline.tokenizer.padding_side = "left"
5252

53-
def _get_response(self, prompts: list[str]):
53+
def _get_response(self, prompts: list[str], system_prompts: list[str]) -> list[str]:
5454
"""Generate responses for a list of prompts using the local language model.
5555
5656
Args:
@@ -63,8 +63,12 @@ def _get_response(self, prompts: list[str]):
6363
This method uses torch.no_grad() for inference to reduce memory usage.
6464
It handles both single and batch inputs, ensuring consistent output format.
6565
"""
66+
inputs = []
67+
for prompt, sys_prompt in zip(prompts, system_prompts):
68+
inputs.append([{"role": "system", "prompt": sys_prompt}, {"role": "user", "prompt": prompt}])
69+
6670
with torch.no_grad():
67-
response = self.pipeline(prompts, pad_token_id=self.pipeline.tokenizer.eos_token_id)
71+
response = self.pipeline(inputs, pad_token_id=self.pipeline.tokenizer.eos_token_id)
6872

6973
if len(response) != 1:
7074
response = [r[0] if isinstance(r, list) else r for r in response]

promptolution/llms/vllm.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def __init__(
108108
# Initialize tokenizer separately for potential pre-processing
109109
self.tokenizer = AutoTokenizer.from_pretrained(model_id)
110110

111-
def _get_response(self, inputs: list[str]):
111+
def _get_response(self, prompts: list[str], system_prompts: list[str]) -> list[str]:
112112
"""Generate responses for a list of prompts using the vLLM engine.
113113
114114
Args:
@@ -126,13 +126,14 @@ def _get_response(self, inputs: list[str]):
126126
[
127127
{
128128
"role": "system",
129-
"content": "You are a helpful assistant.",
129+
"content": sys_prompt,
130130
},
131-
{"role": "user", "content": input},
131+
{"role": "user", "content": prompt},
132132
],
133133
tokenize=False,
134+
add_generation_prompt=True,
134135
)
135-
for input in inputs
136+
for prompt, sys_prompt in zip(prompts, system_prompts)
136137
]
137138

138139
# generate responses for self.batch_size prompts at the same time
@@ -161,6 +162,14 @@ def update_token_count(self, inputs: List[str], outputs: List[str]):
161162
for output in outputs:
162163
self.output_token_count += len(self.tokenizer.encode(output))
163164

165+
def set_generation_seed(self, seed):
166+
"""Set the random seed for text generation.
167+
168+
Args:
169+
seed (int): Random seed for text generation.
170+
"""
171+
self.sampling_params.seed = seed
172+
164173
def __del__(self):
165174
"""Cleanup method to delete the LLM instance and free up GPU memory."""
166175
del self.llm

0 commit comments

Comments
 (0)