Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions wrangles/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,62 @@ def ai(
summary: bool = False,
**kwargs
) -> Union[dict, list]:
"""
type: object
description: Generate structured AI output for each recipe row.
additionalProperties: false
required:
- api_key
- output
properties:
api_key:
type: string
description: OpenAI-compatible API key.
input:
type:
- string
- array
description: Column(s) to concatenate into the prompt (defaults to all columns).
output:
type:
- string
- object
- array
description: Target schema; string/array shorthands are expanded automatically.
model:
type: string
description: Responses model name (e.g. gpt-5-mini).
threads:
type: integer
description: Maximum concurrent requests (default 20).
timeout:
type: integer
description: Per-request timeout in seconds.
retries:
type: integer
description: Number of retry attempts on failure.
messages:
type: array
description: Optional extra messages forwarded to the inner generate helper.
url:
type: string
description: Override for the OpenAI-compatible endpoint.
strict:
type: boolean
description: Enforce JSON-schema validation on the response.
web_search:
type: boolean
description: Enable DuckDuckGo context lookup per row.
reasoning:
type: object
description: Responses API reasoning options (forwarded verbatim).
previous_response:
type: boolean
description: Chain responses by reusing previous_response_id for field-by-field calls.
summary:
type: boolean
description: Request summary text to be merged into the output.
"""

input_was_scalar = not isinstance(input, list)
input_list = [input] if input_was_scalar else input
Expand Down
109 changes: 54 additions & 55 deletions wrangles/recipe_wrangles/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,62 +24,61 @@ def ai(
**kwargs
) -> _pd.DataFrame:
"""
Generate one or more structured outputs from every row using the inner `wrangles.generate.ai` helper.

Recipe:
---
```yaml
wrangles:
- generate.ai:
input:
- product_name
- features
output:
short_description:
type: string
description: Short marketing copy.
category:
type: string
description: Product category.
api_key: ${OPENAI_API_KEY}
model: gpt-5-nano
web_search: false
strict: true
```

python:
```python
import wrangles
import pandas as pd

data = pd.DataFrame({
"product_name": ["Widget One"],
"features": ["Lightweight; durable"]
})

df = wrangles.recipe.run(
recipe_path="recipe.wrgl.yml",
dataframe=data
)
```

:param df: Source DataFrame passed from the recipe runner.
:param api_key: OpenAI-compatible API key used by the inner generate function.
:param output: Schema describing the keys to create (string/list/dict mirroring recipe syntax).
:param input: Optional list of column names to combine into the prompt payload (defaults to all columns).
:param model: Name of the OpenAI Responses model to call.
:param threads: Maximum concurrent requests to issue (fan-out via ThreadPoolExecutor).
:param timeout: Seconds to wait on each request before timing out.
:param retries: Number of retries on non-success responses.
:param messages: Optional system/user message list forwarded to the inner AI call.
:param url: Override of the OpenAI-compatible endpoint.
:param strict: Forwarded to the JSON schema formatter to enforce strict validation.
:param web_search: When true, fetches supplemental DuckDuckGo context per row before calling the model.
:param reasoning: Optional reasoning configuration forwarded to the OpenAI Responses API.
:param kwargs: Any additional keyword arguments supported by `wrangles.generate.ai`.
:return: The original DataFrame with new columns injected (and `source` when present).
type: object
description: Generate structured AI output for each recipe row.
additionalProperties: false
required:
- api_key
- output
properties:
api_key:
type: string
description: OpenAI-compatible API key.
input:
type:
- string
- array
description: Column(s) to concatenate into the prompt (defaults to all columns).
output:
type:
- string
- object
- array
description: Target schema; string/array shorthands are expanded automatically.
model:
type: string
description: Responses model name (e.g. gpt-5-mini).
threads:
type: integer
description: Maximum concurrent requests (default 20).
timeout:
type: integer
description: Per-request timeout in seconds.
retries:
type: integer
description: Number of retry attempts on failure.
messages:
type: array
description: Optional extra messages forwarded to the inner generate helper.
url:
type: string
description: Override for the OpenAI-compatible endpoint.
strict:
type: boolean
description: Enforce JSON-schema validation on the response.
web_search:
type: boolean
description: Enable DuckDuckGo context lookup per row.
reasoning:
type: object
description: Responses API reasoning options (forwarded verbatim).
previous_response:
type: boolean
description: Chain responses by reusing previous_response_id for field-by-field calls.
summary:
type: boolean
description: Request summary text to be merged into the output.
"""

if input is not None:
if not isinstance(input, list):
input = [input]
Expand Down