Skip to content

Commit 183fdd0

Browse files
authored
contribute a prompt benchmarking script (#292)
- contribute a prompt benchmarking script This contributes a contribute a prompt benchmarking script; it gets label information for ~100 existing issues - assumed to be correctly labelled - and compares them against the labels predicted by the prompt. This will allow us to benchmark prompt changes to know whether we're improving things or not. Note that there are no prompt improvements in this PR (just some updates for area label changes). The benchmarking can be re-run via `dart tool/bench.dart`. --- - [x] I’ve reviewed the contributor guide and applied the relevant portions to this PR. <details> <summary>Contribution guidelines:</summary><br> - See our [contributor guide](https://github.com/dart-lang/.github/blob/main/CONTRIBUTING.md) for general expectations for PRs. - Larger or significant changes should be discussed in an issue before creating a PR. - Contributions to our repos should follow the [Dart style guide](https://dart.dev/guides/language/effective-dart) and use `dart format`. - Most changes should add an entry to the changelog and may need to [rev the pubspec package version](https://github.com/dart-lang/sdk/blob/main/docs/External-Package-Maintenance.md#making-a-change). - Changes to packages require [corresponding tests](https://github.com/dart-lang/.github/blob/main/CONTRIBUTING.md#Testing). Note that many Dart repos have a weekly cadence for reviewing PRs - please allow for some latency before initial review feedback. </details>
1 parent f7191b7 commit 183fdd0

File tree

9 files changed

+347
-65
lines changed

9 files changed

+347
-65
lines changed

pkgs/sdk_triage_bot/bin/triage.dart

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ void main(List<String> arguments) async {
4444

4545
var issue = results.rest.first;
4646
final dryRun = results.flag('dry-run');
47-
final force = results.flag('force');
47+
final forceTriage = results.flag('force');
4848

4949
// Accept either an issue number or a url (i.e.,
5050
// https://github.com/dart-lang/sdk/issues/55816).
@@ -69,7 +69,7 @@ void main(List<String> arguments) async {
6969
await triage(
7070
int.parse(issue),
7171
dryRun: dryRun,
72-
force: force,
72+
forceTriage: forceTriage,
7373
githubService: githubService,
7474
geminiService: geminiService,
7575
logger: Logger(),

pkgs/sdk_triage_bot/lib/src/gemini.dart

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,22 +6,26 @@ import 'package:google_generative_ai/google_generative_ai.dart';
66
import 'package:http/http.dart' as http;
77

88
class GeminiService {
9+
// gemini-1.5-pro-latest, gemini-1.5-flash-latest, gemini-1.0-pro-latest
10+
static const String classificationModel = 'models/gemini-1.5-flash-latest';
11+
static const String summarizationModel = 'models/gemini-1.5-flash-latest';
12+
913
final GenerativeModel _summarizeModel;
1014
final GenerativeModel _classifyModel;
1115

1216
GeminiService({
1317
required String apiKey,
1418
required http.Client httpClient,
1519
}) : _summarizeModel = GenerativeModel(
16-
model: 'models/gemini-1.5-flash-latest',
20+
model: summarizationModel,
1721
apiKey: apiKey,
1822
generationConfig: GenerationConfig(temperature: 0.2),
1923
httpClient: httpClient,
2024
),
2125
_classifyModel = GenerativeModel(
22-
// TODO(devconcarew): substitute our tuned model
26+
// TODO(devoncarew): substitute our tuned model
2327
// model: 'tunedModels/autotune-sdk-triage-tuned-prompt-1l96e2n',
24-
model: 'models/gemini-1.5-flash-latest',
28+
model: classificationModel,
2529
apiKey: apiKey,
2630
generationConfig: GenerationConfig(temperature: 0.2),
2731
httpClient: httpClient,
@@ -45,6 +49,6 @@ class GeminiService {
4549

4650
Future<String> _query(GenerativeModel model, String prompt) async {
4751
final response = await model.generateContent([Content.text(prompt)]);
48-
return response.text!.trim();
52+
return (response.text ?? '').trim();
4953
}
5054
}

pkgs/sdk_triage_bot/lib/src/github.dart

Lines changed: 51 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -43,10 +43,15 @@ class GithubService {
4343

4444
Future<FetchIssuesResult> fetchIssues(
4545
String areaLabel, {
46+
required bool includeClosed,
4647
String? cursor,
4748
}) async {
4849
final result = await _query(QueryOptions(
49-
document: gql(_buildQueryString(areaLabel, cursor: cursor)),
50+
document: gql(_buildQueryString(
51+
areaLabel,
52+
cursor: cursor,
53+
includeClosed: includeClosed,
54+
)),
5055
fetchPolicy: FetchPolicy.noCache,
5156
parserFn: (data) {
5257
final search = data['search'] as Map<String, dynamic>;
@@ -104,41 +109,46 @@ Future<QueryResult<T>> _query<T>(QueryOptions<T> options) {
104109
return _client.query<T>(options);
105110
}
106111

107-
String _buildQueryString(String areaLabel, {String? cursor}) {
108-
final cursorRef = cursor == null ? null : '"$cursor"';
112+
String _buildQueryString(
113+
String areaLabel, {
114+
required bool includeClosed,
115+
String? cursor,
116+
}) {
117+
final cursorTerm = cursor == null ? '' : 'after: "$cursor"';
118+
final isOpen = includeClosed ? '' : 'is:open';
109119

110120
return '''{
111-
search(
112-
query: "repo:dart-lang/sdk is:issue is:open label:$areaLabel"
113-
type: ISSUE
114-
first: 100,
115-
after: $cursorRef
116-
) {
117-
edges {
118-
node {
119-
... on Issue {
120-
title
121-
number
122-
state
123-
bodyText
124-
labels(first: 10) {
125-
edges {
126-
node {
127-
name
121+
search(
122+
query: "repo:dart-lang/sdk is:issue $isOpen label:$areaLabel"
123+
type: ISSUE
124+
first: 100
125+
$cursorTerm
126+
) {
127+
edges {
128+
node {
129+
... on Issue {
130+
title
131+
number
132+
state
133+
bodyText
134+
labels(first: 10) {
135+
edges {
136+
node {
137+
name
138+
}
128139
}
129140
}
130141
}
131142
}
132143
}
144+
pageInfo {
145+
endCursor
146+
startCursor
147+
hasNextPage
148+
hasPreviousPage
149+
}
133150
}
134-
pageInfo {
135-
endCursor
136-
startCursor
137-
hasNextPage
138-
hasPreviousPage
139-
}
140-
}
141-
}''';
151+
}''';
142152
}
143153

144154
final GraphQLClient _client = _initGraphQLClient();
@@ -158,4 +168,17 @@ extension IssueExtension on Issue {
158168
///
159169
/// Note that the original text for the issue is returned in the `body` field.
160170
bool get hasComments => commentsCount > 0;
171+
172+
/// Returns whether this issue has already been triaged.
173+
///
174+
/// Generally, this means the the issue has had an `area-` label applied to
175+
/// it, has had `needs-info` applied to it, or was closed.
176+
bool get alreadyTriaged {
177+
if (isClosed) return true;
178+
179+
return labels.any((label) {
180+
final name = label.name;
181+
return name == 'needs-info' || name.startsWith('area-');
182+
});
183+
}
161184
}

pkgs/sdk_triage_bot/lib/src/prompts.dart

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,11 @@ area-infrastructure: Use area-infrastructure for SDK infrastructure issues, like
2828
area-intellij: Tracking issues for the Dart IntelliJ plugin.
2929
area-language: Dart language related items (some items might be better tracked at github.com/dart-lang/language).
3030
area-meta: Cross-cutting, high-level issues (for tracking many other implementation issues, ...).
31+
area-native-interop: Used for native interop related issues, including FFI.
3132
area-pkg: Used for miscellaneous pkg/ packages not associated with specific area- teams.
3233
area-sdk: Use area-sdk for general purpose SDK issues (packaging, distribution, …).
3334
area-test: Cross-cutting test issues (use area- labels for specific failures; not used for package:test).
34-
area-vm: Use area-vm for VM related issues, including code coverage, FFI, and the AOT and JIT backends.
35+
area-vm: Use area-vm for VM related issues, including code coverage, and the AOT and JIT backends.
3536
area-web: Use area-web for Dart web related issues, including the DDC and dart2js compilers and JS interop.
3637
3738
Don't make up a new area.

pkgs/sdk_triage_bot/lib/triage.dart

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ final sdkSlug = RepositorySlug('dart-lang', 'sdk');
1717
Future<void> triage(
1818
int issueNumber, {
1919
bool dryRun = false,
20-
bool force = false,
20+
bool forceTriage = false,
2121
required GithubService githubService,
2222
required GeminiService geminiService,
2323
required Logger logger,
@@ -63,21 +63,22 @@ ${trimmedBody(comment.body ?? '')}
6363
}
6464

6565
// decide if we should triage
66-
final alreadyTriaged = labels.any((l) => l.startsWith('area-'));
67-
if (alreadyTriaged && !force) {
68-
logger.log('Exiting (issue is already triaged).');
69-
return;
66+
if (!forceTriage) {
67+
if (issue.alreadyTriaged) {
68+
logger.log('Exiting (issue is already triaged).');
69+
return;
70+
}
7071
}
7172

7273
// ask for the summary
7374
var bodyTrimmed = trimmedBody(issue.body);
7475
String summary;
7576
try {
76-
// Failures here can include things like gemini safety issues, ...
7777
summary = await geminiService.summarize(
7878
summarizeIssuePrompt(title: issue.title, body: bodyTrimmed),
7979
);
8080
} on GenerativeAIException catch (e) {
81+
// Failures here can include things like gemini safety issues, ...
8182
stderr.writeln('gemini: $e');
8283
exit(1);
8384
}
@@ -88,21 +89,21 @@ ${trimmedBody(comment.body ?? '')}
8889
logger.log('');
8990

9091
// ask for the 'area-' classification
91-
List<String> classification;
92+
List<String> newLabels;
9293
try {
93-
// Failures here can include things like gemini safety issues, ...
94-
classification = await geminiService.classify(
94+
newLabels = await geminiService.classify(
9595
assignAreaPrompt(
9696
title: issue.title, body: bodyTrimmed, lastComment: lastComment),
9797
);
9898
} on GenerativeAIException catch (e) {
99+
// Failures here can include things like gemini safety issues, ...
99100
stderr.writeln('gemini: $e');
100101
exit(1);
101102
}
102103

103104
logger.log('## gemini classification');
104105
logger.log('');
105-
logger.log(classification.toString());
106+
logger.log(newLabels.toString());
106107
logger.log('');
107108

108109
if (dryRun) {
@@ -113,7 +114,7 @@ ${trimmedBody(comment.body ?? '')}
113114
// perform changes
114115
logger.log('## github comment');
115116
logger.log('');
116-
logger.log('labels: $classification');
117+
logger.log('labels: $newLabels');
117118
logger.log('');
118119
logger.log(summary);
119120

@@ -122,17 +123,16 @@ ${trimmedBody(comment.body ?? '')}
122123
// create github comment
123124
await githubService.createComment(sdkSlug, issueNumber, comment);
124125

125-
final allLabels = await githubService.getAllLabels(sdkSlug);
126-
var newLabels = filterExistingLabels(allLabels, classification);
127-
if (newLabels.any((l) => l.startsWith('area-'))) {
128-
newLabels.add('triage-automation');
126+
final allRepoLabels = (await githubService.getAllLabels(sdkSlug)).toSet();
127+
final labelAdditions = newLabels.toSet().union(allRepoLabels).toList()
128+
..sort();
129+
if (labelAdditions.isNotEmpty) {
130+
labelAdditions.add('triage-automation');
129131
}
130-
// remove any duplicates
131-
newLabels = newLabels.toSet().toList();
132132

133133
// apply github labels
134134
if (newLabels.isNotEmpty) {
135-
await githubService.addLabelsToIssue(sdkSlug, issueNumber, newLabels);
135+
await githubService.addLabelsToIssue(sdkSlug, issueNumber, labelAdditions);
136136
}
137137

138138
logger.log('');

pkgs/sdk_triage_bot/test/triage_test.dart

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ void main() {
6363

6464
await triage(
6565
mockIssueNumber,
66-
force: true,
66+
forceTriage: true,
6767
githubService: githubService,
6868
geminiService: geminiService,
6969
logger: TestLogger(),

0 commit comments

Comments
 (0)