Prompt engineering toolkit for building, testing, and iterating on LLM prompts.
I kept copy-pasting prompts between scripts and losing track of which version worked best. Built this to fix that.
- Templates - reusable prompts with variables, optional blocks, and message formatting
- Runner - execute templates against any LLM with retry logic
- Testing - unit test your prompts (keyword checks, length limits, custom assertions)
- Message formatting - auto-convert
System:/User:/Assistant:blocks to chat format
from promptlab import PromptTemplate
template = PromptTemplate(
template="System: You are a {role}.\n{?context}Context: {context}\n{/context}User: {question}",
name="qa-prompt",
version="2",
default_vars={"role": "helpful assistant"},
)
# simple render
prompt = template.render(question="What is RAG?")
# with optional context
prompt = template.render(question="What is RAG?", context="RAG is...")
# get chat messages
messages = template.to_messages(question="What is RAG?")
# [{"role": "system", "content": "..."}, {"role": "user", "content": "..."}]
# check what variables are needed
print(template.required_variables) # ["question"]
print(template.variables) # ["context", "question", "role"]from promptlab import PromptRunner
runner = PromptRunner(llm_fn=my_api_call, max_retries=3)
result = runner.run(template, question="What is Python?")
print(result.response)
print(f"Took {result.latency_ms:.0f}ms, {result.attempts} attempt(s)")
# batch runs
results = runner.run_batch(template, [
{"question": "What is Python?"},
{"question": "What is JavaScript?"},
])from promptlab import PromptTestSuite, PromptTest
suite = PromptTestSuite(template, llm_fn=my_api)
suite.add_test(PromptTest(
name="mentions key concepts",
variables={"question": "Explain RAG"},
expected_contains=["retrieval", "generation"],
expected_not_contains=["error", "I don't know"],
max_length=2000,
))
suite.add_test(PromptTest(
name="stays on topic",
variables={"question": "Explain RAG"},
checks=[lambda r: "retrieval" in r.lower()],
))
result = suite.run()
print(result.summary())
# Prompt Tests: 2/2 passed (100%)
# [PASS] mentions key concepts
# [PASS] stays on topicpytest -v