Shared LLM client library for OpenAI-compatible APIs.
Add to your pyproject.toml:
dependencies = [
"lsimons-llm @ git+https://github.com/lsimons-bot/lsimons-llm.git",
]
# For async support
dependencies = [
"lsimons-llm[async] @ git+https://github.com/lsimons-bot/lsimons-llm.git",
]When developing lsimons-llm alongside dependent projects, use editable installs to see changes immediately:
# In each dependent project directory (lsimons-bot, quarto4sbp, lsimons-agent)
uv pip install -e ../lsimons-llmThis overrides the git dependency with your local copy. Changes to lsimons-llm are available without pushing.
Projects using this library:
- lsimons-bot - Slack bot (async client)
- quarto4sbp - Quarto document tool (sync client)
- lsimons-agent - AI coding agent (sync client)
Set environment variables:
| Variable | Default | Description |
|---|---|---|
LLM_API_KEY |
(required) | API key for authentication |
LLM_BASE_URL |
https://litellm.sbp.ai/v1 |
API endpoint |
LLM_MODEL |
azure/gpt-4o-mini |
Model name |
LLM_MAX_TOKENS |
4096 |
Maximum tokens per request |
LLM_TEMPERATURE |
0.7 |
Sampling temperature |
LLM_TIMEOUT |
120 |
Request timeout (seconds) |
LLM_MAX_RETRIES |
3 |
Maximum retry attempts |
from lsimons_llm import chat
response = chat([{"role": "user", "content": "Hello!"}])
print(response)from lsimons_llm import LLMClient, load_config
config = load_config()
with LLMClient(config) as client:
response = client.chat([
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "Hello!"}
])
print(response)from lsimons_llm.async_client import AsyncLLMClient
from lsimons_llm import load_config
config = load_config()
async with AsyncLLMClient(config) as client:
response = await client.chat([{"role": "user", "content": "Hello!"}])
print(response)from lsimons_llm import LLMClient, load_config
# Override via load_config
config = load_config(model="gpt-4", temperature=0.9)
# Or override per-request
client = LLMClient(config)
response = client.chat(
messages=[{"role": "user", "content": "Hello!"}],
model="gpt-4-turbo",
max_tokens=1000,
)from lsimons_llm import LLMClient, load_config
config = load_config()
client = LLMClient(config)
tools = [{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string"}
}
}
}
}]
# Use chat_raw to get full response including tool calls
response = client.chat_raw(
messages=[{"role": "user", "content": "What's the weather in Amsterdam?"}],
tools=tools,
)# Setup
uv venv && uv sync --all-groups
# Run tests
uv run pytest
# Lint and format
uv run ruff check .
uv run ruff format .
# Type check
uv run basedpyrightSee LICENSE.md.