Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 163 additions & 0 deletions runtime/ai/analyst_agent_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,169 @@ func TestAnalystOpenRTB(t *testing.T) {
})
}

func TestAnalystCharts(t *testing.T) {
// Setup runtime instance with the OpenRTB dataset
n, files := testruntime.ProjectOpenRTB(t)
rt, instanceID := testruntime.NewInstanceWithOptions(t, testruntime.InstanceOptions{
AIConnector: "openai",
Files: files,
FrontendURL: "https://ui.rilldata.com/-/dashboards/bids_metrics",
})
testruntime.RequireReconcileState(t, rt, instanceID, n, 0, 0)

t.Run("BasicBarChart", func(t *testing.T) {
s := newEval(t, rt, instanceID)

res, err := s.CallTool(t.Context(), ai.RoleUser, ai.AnalystAgentName, nil, ai.AnalystAgentArgs{
Prompt: "Show me the top 10 advertisers by total bids as a bar chart. Use the bids_metrics dataset and the full available time range. Create a single chart and nothing else.",
Explore: "bids_metrics",
})
require.NoError(t, err)

chartCalls := findChartCalls(s, res.Call.ID)
require.GreaterOrEqual(t, len(chartCalls), 1, "expected at least one create_chart call")

spec := requireValidChartSpec(t, s, chartCalls[0], []string{"bar_chart"})
if x, ok := spec["x"].(map[string]any); ok {
if field, ok := x["field"].(string); ok {
require.Equal(t, "advertiser_name", field)
}
}
if y, ok := spec["y"].(map[string]any); ok {
if field, ok := y["field"].(string); ok {
require.Equal(t, "total_bids", field)
}
}
})

t.Run("TimeSeriesChart", func(t *testing.T) {
s := newEval(t, rt, instanceID)

res, err := s.CallTool(t.Context(), ai.RoleUser, ai.AnalystAgentName, nil, ai.AnalystAgentArgs{
Prompt: "Show me the trend of total bids over time as a line chart. Use the bids_metrics dataset and the full available time range. Create a single chart and nothing else.",
Explore: "bids_metrics",
})
require.NoError(t, err)

chartCalls := findChartCalls(s, res.Call.ID)
require.GreaterOrEqual(t, len(chartCalls), 1, "expected at least one create_chart call")

spec := requireValidChartSpec(t, s, chartCalls[0], []string{"line_chart", "area_chart"})
if x, ok := spec["x"].(map[string]any); ok {
if field, ok := x["field"].(string); ok {
require.Equal(t, "__time", field)
}
}
_, hasTimeGrain := spec["time_grain"]
require.True(t, hasTimeGrain, "time series chart should have time_grain")
})

t.Run("ChartWithDashboardContext", func(t *testing.T) {
s := newEval(t, rt, instanceID)

res, err := s.CallTool(t.Context(), ai.RoleUser, ai.AnalystAgentName, nil, ai.AnalystAgentArgs{
Prompt: "Show me total bids by device OS as a bar chart. Create a single chart and nothing else.",
Explore: "bids_metrics",
TimeStart: parseTestTime(t, "2023-09-11T00:00:00Z"),
TimeEnd: parseTestTime(t, "2023-09-14T00:00:00Z"),
Where: &metricsview.Expression{
Condition: &metricsview.Condition{
Operator: metricsview.OperatorEq,
Expressions: []*metricsview.Expression{
{Name: "auction_type"},
{Value: "First Price"},
},
},
},
})
require.NoError(t, err)

chartCalls := findChartCalls(s, res.Call.ID)
require.GreaterOrEqual(t, len(chartCalls), 1, "expected at least one create_chart call")

spec := requireValidChartSpec(t, s, chartCalls[0], nil)

// Verify time range honors the context
timeRange, ok := spec["time_range"].(map[string]any)
require.True(t, ok, "spec must contain time_range object")
start, ok := timeRange["start"].(string)
require.True(t, ok, "time_range.start must be a string")
require.Contains(t, start, "2023-09-11", "time_range.start should honor the provided context")
end, ok := timeRange["end"].(string)
require.True(t, ok, "time_range.end must be a string")
require.Contains(t, end, "2023-09-14", "time_range.end should honor the provided context")
})

t.Run("ChartsDisabled", func(t *testing.T) {
s := newEval(t, rt, instanceID)

res, err := s.CallTool(t.Context(), ai.RoleUser, ai.AnalystAgentName, nil, ai.AnalystAgentArgs{
Prompt: "Show me the top advertisers by total bids. Create a chart if possible.",
Explore: "bids_metrics",
DisableCharts: true,
})
require.NoError(t, err)

// Verify NO create_chart calls were made
chartCalls := findChartCalls(s, res.Call.ID)
require.Empty(t, chartCalls, "expected no create_chart calls when charts are disabled")

// Verify we still got a response
rawRes, err := s.UnmarshalMessageContent(res.Result)
require.NoError(t, err)
require.NotNil(t, rawRes)
})
}

// findChartCalls finds all create_chart tool calls among children of the given parent call.
func findChartCalls(s *ai.Session, parentID string) []*ai.Message {
return s.Messages(
ai.FilterByParent(parentID),
ai.FilterByType(ai.MessageTypeCall),
ai.FilterByTool(ai.CreateChartName),
)
}

// requireValidChartSpec unmarshals a create_chart call message, validates the chart spec structure,
// and returns the spec map for further assertions. If validChartTypes is non-empty, it asserts the
// chart_type is one of the allowed values.
func requireValidChartSpec(t *testing.T, s *ai.Session, chartCall *ai.Message, validChartTypes []string) map[string]any {
t.Helper()

rawArgs, err := s.UnmarshalMessageContent(chartCall)
require.NoError(t, err)

// CreateChartArgs is a named type (map[string]any), so try both type assertions
var args map[string]any
switch v := rawArgs.(type) {
case ai.CreateChartArgs:
args = map[string]any(v)
case map[string]any:
args = v
default:
require.Fail(t, "expected CreateChartArgs or map[string]any, got %T", rawArgs)
}

chartType, ok := args["chart_type"].(string)
require.True(t, ok, "chart_type must be a string")
require.NotEmpty(t, chartType)
if len(validChartTypes) > 0 {
require.Contains(t, validChartTypes, chartType, "chart_type %q not in expected types %v", chartType, validChartTypes)
}

spec, ok := args["spec"].(map[string]any)
require.True(t, ok, "spec must be an object")

metricsView, ok := spec["metrics_view"].(string)
require.True(t, ok, "spec.metrics_view must be a string")
require.NotEmpty(t, metricsView)

_, hasTimeRange := spec["time_range"]
require.True(t, hasTimeRange, "spec must contain time_range")

return spec
}

func parseTestTime(tst *testing.T, t string) time.Time {
ts, err := time.Parse(time.RFC3339, t)
require.NoError(tst, err)
Expand Down
74 changes: 74 additions & 0 deletions runtime/ai/evals/AnalystCharts_BasicBarChart.completions.yaml

Large diffs are not rendered by default.

80 changes: 80 additions & 0 deletions runtime/ai/evals/AnalystCharts_BasicBarChart.messages.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
- id: e14fd200-1410-477f-bebe-a6ed9f3263a1
parent_id: ""
session_id: d4a42618-c6ff-4f13-8cc3-cf1c257d3938
time: 2026-02-26T02:46:47.760676+05:30
index: 0
role: user
type: call
tool: analyst_agent
content_type: json
content: '{"prompt":"Show me the top 10 advertisers by total bids as a bar chart. Use the bids_metrics dataset and the full available time range. Create a single chart and nothing else.","explore":"bids_metrics","time_start":"0001-01-01T00:00:00Z","time_end":"0001-01-01T00:00:00Z","comparison_time_start":"0001-01-01T00:00:00Z","comparison_time_end":"0001-01-01T00:00:00Z","disable_charts":false,"is_report":false}'
- id: 70b66cca-e5d5-4518-b9ac-4e40898496fb
parent_id: e14fd200-1410-477f-bebe-a6ed9f3263a1
session_id: d4a42618-c6ff-4f13-8cc3-cf1c257d3938
time: 2026-02-26T02:46:47.760701+05:30
index: 1
role: assistant
type: call
tool: query_metrics_view_summary
content_type: json
content: '{"metrics_view":"bids_metrics"}'
- id: 3f3e92d6-5906-4742-9f67-0ab402afe39b
parent_id: 70b66cca-e5d5-4518-b9ac-4e40898496fb
session_id: d4a42618-c6ff-4f13-8cc3-cf1c257d3938
time: 2026-02-26T02:46:47.778892+05:30
index: 2
role: assistant
type: result
tool: query_metrics_view_summary
content_type: json
content: '{"data":{"dimensions":[{"data_type":"CODE_TIMESTAMP","max_value":"2023-09-16T00:00:00Z","min_value":"2023-09-09T00:00:00Z","name":"__time"},{"data_type":"CODE_STRING","example_value":"crsespanol.org","max_value":"zyp.test","min_value":"1800petmeds.com","name":"adomain"},{"data_type":"CODE_STRING","example_value":"CRS","max_value":"vertex","min_value":"6sense","name":"advertiser_name"},{"data_type":"CODE_STRING","example_value":"App","max_value":"Site","min_value":"App","name":"app_or_site"},{"data_type":"CODE_STRING","example_value":"Not Available","max_value":"zoosk.com","min_value":"1point3acres.com","name":"app_site_domain"},{"data_type":"CODE_STRING","example_value":"Web Browser \u0026 Explorer","max_value":"라인웹툰기본","min_value":"$''more","name":"app_site_name"},{"data_type":"CODE_STRING","example_value":"First Price","max_value":"Second Price","min_value":"First Price","name":"auction_type"},{"data_type":"CODE_STRING","example_value":"0.30-0.35","max_value":"9.50-10.00","min_value":"0.00-0.05","name":"bid_floor_bucket"},{"data_type":"CODE_STRING","example_value":"CRS_BrandBoom","max_value":"vertex_PrimeProfit","min_value":"#N/A","name":"campaign_name"},{"data_type":"CODE_STRING","example_value":"BANNER","max_value":"VIDEO","min_value":"BANNER","name":"creative_type"},{"data_type":"CODE_STRING","example_value":"iOS","max_value":"webOS","min_value":"Android","name":"device_os"},{"data_type":"CODE_STRING","example_value":"16","max_value":"XP","min_value":"10","name":"device_osv"},{"data_type":"CODE_STRING","example_value":"USA/NM","max_value":"wy","min_value":"000/00","name":"device_region"},{"data_type":"CODE_STRING","example_value":"Mobile/Tablet","max_value":"Tablet","min_value":"Connected Device","name":"device_type"},{"data_type":"CODE_STRING","example_value":"Not Interstitial","max_value":"Not Interstitial","min_value":"Interstitial/Full Screen","name":"interstitial"},{"data_type":"CODE_STRING","example_value":"CRS_BrandBoom_34297","max_value":"vertex_PrimeProfit_33969","min_value":"#N/A","name":"line_item_name"}],"time_range":{"data_type":"CODE_TIMESTAMP","max_value":"2023-09-16T00:00:00Z","min_value":"2023-09-09T00:00:00Z","name":"__time"}}}'
- id: 77814456-44e7-4452-8325-87caa14317d8
parent_id: e14fd200-1410-477f-bebe-a6ed9f3263a1
session_id: d4a42618-c6ff-4f13-8cc3-cf1c257d3938
time: 2026-02-26T02:46:47.778896+05:30
index: 3
role: assistant
type: call
tool: get_metrics_view
content_type: json
content: '{"metrics_view":"bids_metrics"}'
- id: 92054cb4-e65c-456b-a995-ef0ecf0819b8
parent_id: 77814456-44e7-4452-8325-87caa14317d8
session_id: d4a42618-c6ff-4f13-8cc3-cf1c257d3938
time: 2026-02-26T02:46:47.779238+05:30
index: 4
role: assistant
type: result
tool: get_metrics_view
content_type: json
content: '{"spec":{"connector":"duckdb","dimensions":[{"column":"__time","dataType":{"code":"CODE_TIMESTAMP","nullable":true},"displayName":"__time","name":"__time","smallestTimeGrain":"TIME_GRAIN_SECOND","type":"DIMENSION_TYPE_TIME"},{"column":"adomain","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Adomain","name":"adomain","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"advertiser_name","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Advertiser Name","name":"advertiser_name","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"app_or_site","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"App Or Site","name":"app_or_site","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"app_site_domain","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"App Site Domain","name":"app_site_domain","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"app_site_name","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"App Site Name","name":"app_site_name","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"auction_type","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Auction Type","name":"auction_type","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"bid_floor_bucket","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Bid Floor Bucket","name":"bid_floor_bucket","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"campaign_name","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Campaign Name","name":"campaign_name","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"creative_type","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Creative Type","name":"creative_type","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"device_os","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Device Os","name":"device_os","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"device_osv","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Device Osv","name":"device_osv","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"device_region","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Device Region","name":"device_region","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"device_type","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Device Type","name":"device_type","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"interstitial","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Interstitial","name":"interstitial","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"line_item_name","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Line Item Name","name":"line_item_name","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"placement_type","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Placement Type","name":"placement_type","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"platform_browser","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Platform Browser","name":"platform_browser","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"player_size","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Player Size","name":"player_size","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"privacy","dataType":{"code":"CODE_BOOL","nullable":true},"displayName":"Privacy","name":"privacy","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"pub_name","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Pub Name","name":"pub_name","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"sdk","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Sdk","name":"sdk","type":"DIMENSION_TYPE_CATEGORICAL"},{"column":"video_activity","dataType":{"code":"CODE_STRING","nullable":true},"displayName":"Video Activity","name":"video_activity","type":"DIMENSION_TYPE_CATEGORICAL"}],"displayName":"Bids Metrics","measures":[{"dataType":{"code":"CODE_FLOAT64","nullable":true},"description":"Total Spend","displayName":"Overall Spend","expression":"sum(media_spend_usd)/1000","formatPreset":"currency_usd","name":"overall_spend","type":"MEASURE_TYPE_SIMPLE"},{"dataType":{"code":"CODE_INT128","nullable":true},"description":"Total Bids","displayName":"Total Bids","expression":"sum(bid_cnt)","formatPreset":"humanize","name":"total_bids","type":"MEASURE_TYPE_SIMPLE"},{"dataType":{"code":"CODE_INT128","nullable":true},"description":"Total Impressions","displayName":"Impressions","expression":"sum(imp_cnt)","formatPreset":"humanize","name":"impressions","type":"MEASURE_TYPE_SIMPLE"},{"dataType":{"code":"CODE_FLOAT64","nullable":true},"description":"Impressions / Bids","displayName":"Win Rate","expression":"sum(imp_cnt)*1.0/sum(bid_cnt)","formatPreset":"percentage","name":"win_rate","type":"MEASURE_TYPE_SIMPLE"},{"dataType":{"code":"CODE_INT128","nullable":true},"description":"Total Clicks","displayName":"Clicks","expression":"sum(click_reg_cnt)","formatPreset":"humanize","name":"clicks","type":"MEASURE_TYPE_SIMPLE"},{"dataType":{"code":"CODE_FLOAT64","nullable":true},"description":"Click Through Rate","displayName":"Ctr","expression":"sum(click_reg_cnt)*1.0/nullif(sum(imp_cnt),0)","formatPreset":"percentage","name":"ctr","type":"MEASURE_TYPE_SIMPLE"},{"dataType":{"code":"CODE_INT128","nullable":true},"description":"Total Video Starts","displayName":"Video Starts","expression":"sum(video_start_cnt)","formatPreset":"humanize","name":"video_starts","type":"MEASURE_TYPE_SIMPLE"},{"dataType":{"code":"CODE_INT128","nullable":true},"description":"Total Video Completes","displayName":"Video Completes","expression":"sum(video_complete_cnt)","formatPreset":"humanize","name":"video_completes","type":"MEASURE_TYPE_SIMPLE"},{"dataType":{"code":"CODE_FLOAT64","nullable":true},"description":"Video Completion Rate","displayName":"Video Completion Rate","expression":"sum(video_complete_cnt)*1.0/sum(video_start_cnt)","formatPreset":"percentage","name":"video_completion_rate","type":"MEASURE_TYPE_SIMPLE"},{"dataType":{"code":"CODE_FLOAT64","nullable":true},"description":"Average Bid Price","displayName":"Avg Bid Price","expression":"sum(bid_price_usd)*1.0/sum(bid_cnt)/1000","formatPreset":"currency_usd","name":"avg_bid_price","type":"MEASURE_TYPE_SIMPLE"},{"dataType":{"code":"CODE_FLOAT64","nullable":true},"description":"eCPM","displayName":"Ecpm","expression":"sum(media_spend_usd)*1.0/1000/nullif(sum(imp_cnt),0)","formatPreset":"currency_usd","name":"ecpm","type":"MEASURE_TYPE_SIMPLE"},{"dataType":{"code":"CODE_FLOAT64","nullable":true},"description":"Average Bid Floor","displayName":"Avg Bid Floor","expression":"sum(bid_floor)*1.0/sum(has_bid_floor_cnt)","formatPreset":"currency_usd","name":"avg_bid_floor","type":"MEASURE_TYPE_SIMPLE"},{"dataType":{"code":"CODE_FLOAT64","nullable":true},"displayName":"Bids 7Day Rolling Avg","expression":"AVG(total_bids)","name":"bids_7day_rolling_avg","referencedMeasures":["total_bids"],"requiredDimensions":[{"name":"__time"}],"type":"MEASURE_TYPE_DERIVED","window":{"frameExpression":"RANGE BETWEEN INTERVAL 6 DAY PRECEDING AND CURRENT ROW","orderBy":[{"name":"__time"}],"partition":true}}],"model":"bids_data","smallestTimeGrain":"TIME_GRAIN_HOUR","table":"bids_data","timeDimension":"__time"}}'
- id: bcf19a04-7c5c-4b50-b67b-d25bc02bc91c
parent_id: e14fd200-1410-477f-bebe-a6ed9f3263a1
session_id: d4a42618-c6ff-4f13-8cc3-cf1c257d3938
time: 2026-02-26T02:46:50.097592+05:30
index: 5
role: assistant
type: call
tool: create_chart
content_type: json
content: '{"chart_type":"bar_chart","spec":{"color":"primary","metrics_view":"bids_metrics","time_range":{"end":"2023-09-16T00:00:00Z","start":"2023-09-09T00:00:00Z"},"x":{"field":"advertiser_name","limit":10,"showNull":true,"sort":"-y","type":"nominal"},"y":{"field":"total_bids","type":"quantitative","zeroBasedOrigin":true}}}'
- id: ad0a3b25-12a1-4aa0-996f-16dea90e10b9
parent_id: bcf19a04-7c5c-4b50-b67b-d25bc02bc91c
session_id: d4a42618-c6ff-4f13-8cc3-cf1c257d3938
time: 2026-02-26T02:46:50.0996+05:30
index: 6
role: assistant
type: result
tool: create_chart
content_type: json
content: '{"chart_type":"bar_chart","spec":{"color":"primary","metrics_view":"bids_metrics","time_range":{"end":"2023-09-16T00:00:00Z","start":"2023-09-09T00:00:00Z"},"x":{"field":"advertiser_name","limit":10,"showNull":true,"sort":"-y","type":"nominal"},"y":{"field":"total_bids","type":"quantitative","zeroBasedOrigin":true}},"message":"Chart created successfully: bar_chart"}'
- id: 581de305-2676-4c39-9482-351b81ac01b6
parent_id: e14fd200-1410-477f-bebe-a6ed9f3263a1
session_id: d4a42618-c6ff-4f13-8cc3-cf1c257d3938
time: 2026-02-26T02:46:53.740663+05:30
index: 7
role: user
type: result
tool: analyst_agent
content_type: json
content: '{"response":"```chart\n{\"chart_type\":\"bar_chart\",\"spec\":{\"color\":\"primary\",\"metrics_view\":\"bids_metrics\",\"time_range\":{\"end\":\"2023-09-16T00:00:00Z\",\"start\":\"2023-09-09T00:00:00Z\"},\"x\":{\"field\":\"advertiser_name\",\"limit\":10,\"showNull\":true,\"sort\":\"-y\",\"type\":\"nominal\"},\"y\":{\"field\":\"total_bids\",\"type\":\"quantitative\",\"zeroBasedOrigin\":true}}}\n```"}'
Loading
Loading