Skip to content

Commit 12df9fb

Browse files
authored
feat: add spam detection (#33)
1 parent 29b2ac2 commit 12df9fb

File tree

6 files changed

+211
-40
lines changed

6 files changed

+211
-40
lines changed

.env.example

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
1+
# The access token with permissions to the repository, where the webhook is installed,
2+
# as well as to the target repository, where the issues marked as spam will be transferred to
13
NUXT_GITHUB_TOKEN=
24
NUXT_WEBHOOK_GITHUB_SECRET_KEY=
5+
# The node_id of the repository, where to transfer the issues marked as spam
6+
NUXT_GITHUB_TARGET_REPOSITORY_NODE_ID=

nuxt.config.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ export default defineNuxtConfig({
55
runtimeConfig: {
66
github: {
77
token: process.env.NUXT_GITHUB_TOKEN || '',
8+
targetRepositoryNodeId: process.env.NUXT_GITHUB_TARGET_REPOSITORY_NODE_ID || '',
89
},
910
},
1011
routeRules: {

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,8 @@
1515
"@nuxt/eslint": "^0.6.0",
1616
"@nuxthub/core": "^0.7.26",
1717
"nuxt": "^3.13.2",
18-
"nuxt-webhook-validators": "^0.1.1"
18+
"nuxt-webhook-validators": "^0.1.1",
19+
"zod": "^3.23.8"
1920
},
2021
"devDependencies": {
2122
"@nuxt/eslint-config": "^0.6.0",

pnpm-lock.yaml

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

server/api/webhook.post.ts

Lines changed: 144 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,51 +1,104 @@
1+
import { z } from 'zod'
2+
import { isError } from 'h3'
3+
14
export default defineEventHandler(async (event) => {
5+
const runtimeConfig = useRuntimeConfig(event)
6+
27
const isValidWebhook = await isValidGithubWebhook(event)
38

49
if (!import.meta.dev && !isValidWebhook) {
510
throw createError({ statusCode: 401, message: 'Unauthorized: webhook is not valid' })
611
}
712

813
// TODO: implement as a GitHub app
9-
const { action, issue, repository /* installation */ } = await readBody(event)
14+
const { action, issue, repository /* installation */ } = await readValidatedBody(event, githubWebhookSchema.parse)
1015
if (action !== 'opened') return null
1116

12-
const body = (issue.body || '')
13-
.replace(/<!--.*?-->/g, ' ')
14-
.replace(/https:\/\/stackblitz.com\/github\/nuxt\/starter/g, '')
15-
16-
if (body.split(' ').length > 200) return null
17-
1817
const ai = hubAI()
1918

20-
const res = await ai.run('@hf/nousresearch/hermes-2-pro-mistral-7b', {
21-
messages: [
22-
{
23-
role: 'system', content: `You are a kind, helpful open-source maintainer that answers in JSON. Here\`s the json schema you must adhere to:\n<schema>\n${JSON.stringify(responseSchema)}\n</schema>\n`,
24-
},
25-
{ role: 'user', content: `# ${issue.title}\n\n${issue.body}` },
26-
],
27-
}) as { response?: string, tool_calls?: { name: string, arguments: unknown }[] }
28-
const answer = res.response?.trim() || ''
19+
let analyzedIssue: z.infer<typeof analyzedIssueSchema> | null = null
2920

21+
// Run the AI model and parse the response
3022
try {
31-
const value = JSON.parse(answer) as Response
32-
33-
const $github = useGitHubAPI(event)
34-
const promises: Array<Promise<unknown>> = []
35-
36-
const labels = []
23+
const res = await ai.run('@hf/nousresearch/hermes-2-pro-mistral-7b', {
24+
messages: [
25+
{
26+
role: 'system',
27+
content: `You are a kind, helpful open-source maintainer that answers in JSON. If the issue looks like spam (contains gibberish, nonsense, etc.), it is marked as spam. Do not mark issues as spam purely based on non-English content or bad grammar. Do not answer with anything else other than a valid JSON. Here\`s the json schema you must adhere to:\n<schema>\n${JSON.stringify(responseSchema)}\n</schema>\n`,
28+
},
29+
{ role: 'user', content: `# ${issue.title}\n\n${getNormalizedIssueContent(issue.body || '')}` },
30+
],
31+
})
32+
33+
const aiResponse = aiResponseSchema.parse(res)
34+
if (!aiResponse.response) {
35+
console.error('Missing AI response', res)
36+
throw createError({
37+
statusCode: 500,
38+
message: 'Missing AI response',
39+
})
40+
}
3741

38-
if (value.issueType === 'bug' && value.reproductionProvided === false) {
39-
labels.push('needs reproduction')
42+
try {
43+
analyzedIssue = analyzedIssueSchema.parse(JSON.parse(aiResponse.response.trim()))
4044
}
41-
if (value.issueType === 'bug' && value.possibleRegression === true) {
42-
labels.push('possible regression')
45+
catch (e) {
46+
console.error('Invalid AI response', aiResponse.response, e)
47+
throw createError({
48+
statusCode: 500,
49+
message: 'Invalid AI response',
50+
})
4351
}
44-
if (value.nitro === true) {
45-
labels.push('nitro')
52+
}
53+
catch (e) {
54+
if (isError(e)) {
55+
throw e
56+
}
57+
58+
console.error('Unknown AI error', e)
59+
throw createError({
60+
statusCode: 500,
61+
message: 'Unknown AI error',
62+
})
63+
}
64+
65+
const $github = useGitHubAPI(event)
66+
const promises: Array<Promise<unknown>> = []
67+
68+
// Update the GitHub issue
69+
try {
70+
const labels: IssueLabel[] = []
71+
72+
if (analyzedIssue.issueType === IssueType.Spam) {
73+
promises.push($github('graphql', {
74+
baseURL: 'https://api.github.com/',
75+
method: 'POST',
76+
body: {
77+
query: `
78+
mutation {
79+
transferIssue(input: { issueId: "${issue.node_id}", repositoryId: "${runtimeConfig.github.targetRepositoryNodeId}" }) {
80+
issue {
81+
number
82+
}
83+
}
84+
}
85+
`,
86+
},
87+
}))
4688
}
47-
if (value.issueType === 'documentation') {
48-
labels.push('documentation')
89+
else {
90+
if (analyzedIssue.issueType === IssueType.Bug && !analyzedIssue.reproductionProvided) {
91+
labels.push(IssueLabel.NeedsReproduction)
92+
}
93+
if (analyzedIssue.issueType === IssueType.Bug && analyzedIssue.possibleRegression) {
94+
labels.push(IssueLabel.PossibleRegression)
95+
}
96+
if (analyzedIssue.nitro) {
97+
labels.push(IssueLabel.Nitro)
98+
}
99+
if (analyzedIssue.issueType === IssueType.Documentation) {
100+
labels.push(IssueLabel.Documentation)
101+
}
49102
}
50103

51104
if (labels.length > 0) {
@@ -55,16 +108,18 @@ export default defineEventHandler(async (event) => {
55108
}))
56109
}
57110

58-
if (value.spokenLanguage.toLowerCase() !== 'english') {
111+
// Translate non-English issue titles to English
112+
if (analyzedIssue.spokenLanguage !== 'en' && analyzedIssue.issueType !== IssueType.Spam) {
59113
await ai.run('@cf/meta/m2m100-1.2b', {
60114
text: issue.title,
61-
source_lang: value.spokenLanguage.toLowerCase(),
115+
source_lang: analyzedIssue.spokenLanguage,
62116
target_lang: 'english',
63117
}).then(({ translated_text }) => {
118+
if (!translated_text || !translated_text.trim().length) return
64119
promises.push($github(`repos/${repository.full_name}/issues/${issue.number}`, {
65120
method: 'PATCH',
66121
body: {
67-
title: translated_text,
122+
title: `[${analyzedIssue?.spokenLanguage}:translated] ${translated_text}`,
68123
},
69124
}))
70125
}).catch(console.error)
@@ -74,20 +129,22 @@ export default defineEventHandler(async (event) => {
74129

75130
return null
76131
}
77-
catch (err) {
78-
console.log(err)
79-
console.error('Could not parse response from OpenAI', answer)
80-
throw createError({ message: 'Could not parse.' })
132+
catch (e) {
133+
console.error('Error updating issue', e)
134+
throw createError({
135+
statusCode: 500,
136+
message: 'Error updating issue',
137+
})
81138
}
82139
})
83140

84141
const responseSchema = {
85142
title: 'Issue Categorisation',
86143
type: 'object',
87144
properties: {
88-
issueType: { type: 'string', enum: ['bug', 'feature', 'documentation'] },
145+
issueType: { type: 'string', enum: ['bug', 'feature', 'documentation', 'spam'] },
89146
reproductionProvided: { type: 'boolean' },
90-
spokenLanguage: { type: 'string' },
147+
spokenLanguage: { type: 'string', comment: 'The language of the title in ISO 639-1 format. Do not include country codes, only language code.' },
91148
possibleRegression: {
92149
type: 'boolean',
93150
comment: 'If the issue is reported on upgrade to a new version of Nuxt, it is a possible regression.',
@@ -99,6 +156,54 @@ const responseSchema = {
99156
},
100157
} as const
101158

159+
// eslint-disable-next-line @typescript-eslint/no-unused-vars
102160
type Response = {
103161
[key in keyof typeof responseSchema['properties']]: typeof responseSchema['properties'][key]['type'] extends 'string' ? 'enum' extends keyof typeof responseSchema['properties'][key] ? typeof responseSchema['properties'][key]['enum'] extends Array<infer S> ? S : string : string : typeof responseSchema['properties'][key]['type'] extends 'boolean' ? boolean : unknown
104162
}
163+
164+
enum IssueLabel {
165+
NeedsReproduction = 'needs reproduction',
166+
PossibleRegression = 'possible regression',
167+
Nitro = 'nitro',
168+
Documentation = 'documentation',
169+
}
170+
171+
enum IssueType {
172+
Bug = 'bug',
173+
Feature = 'feature',
174+
Documentation = 'documentation',
175+
Spam = 'spam',
176+
}
177+
178+
const githubWebhookSchema = z.object({
179+
action: z.enum(['opened']),
180+
issue: z.object({
181+
title: z.string(),
182+
body: z.string().nullable(),
183+
number: z.number(),
184+
node_id: z.string(),
185+
}),
186+
repository: z.object({
187+
full_name: z.string(),
188+
}),
189+
// TODO: implement as a GitHub app
190+
installation: z.any().optional(),
191+
})
192+
193+
const aiResponseSchema = z.object({
194+
response: z.string().optional(),
195+
tool_calls: z.array(z.object({
196+
name: z.string(),
197+
arguments: z.unknown(),
198+
})).optional(),
199+
})
200+
201+
// TODO: generate AI model schema from this?
202+
const analyzedIssueSchema = z.object({
203+
issueType: z.nativeEnum(IssueType),
204+
reproductionProvided: z.boolean().nullable().transform(v => v ?? false),
205+
spokenLanguage: z.string().nullable().transform(lang => getNormalizedLanguage(lang)).describe('The language of the title in ISO 639-1 format.'),
206+
possibleRegression: z.boolean().nullable().transform(v => v ?? false).describe('If the issue is reported on upgrade to a new version of Nuxt, it is a possible regression.'),
207+
nitro: z.boolean().nullable().transform(v => v ?? false).describe('If the issue is reported only in relation to a single deployment provider, it is possibly a Nitro issue.'),
208+
})
209+
.describe('Issue Categorisation')

server/utils/normalization.ts

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// Feature issues
2+
const FEATURE_REQUEST_TITLE = '### Describe the feature'
3+
4+
// Bug report issues
5+
const BUG_REPORT_REPRODUCTION_TITLE = '### Reproduction'
6+
const BUG_REPORT_LOGS_TITLE = '### Logs'
7+
8+
const MAX_CONTENT_LENGTH = 5000
9+
10+
/**
11+
* Normalize the issue content by removing comments, stackblitz links, diacritics, and trimming the content.
12+
* @param txt The issue content to normalize.
13+
*/
14+
export function getNormalizedIssueContent(txt: string) {
15+
const text = txt
16+
.replace(/<!--.*?-->/g, ' ')
17+
.replace(/https:\/\/stackblitz.com\/github\/nuxt\/starter/g, '')
18+
.normalize('NFD')
19+
.replace(/[\u0300-\u036f]/g, '')
20+
.trim()
21+
const featureRequestContentStart = text.indexOf(FEATURE_REQUEST_TITLE)
22+
// Trim feature requests
23+
if (featureRequestContentStart !== -1) {
24+
return text.slice(featureRequestContentStart, MAX_CONTENT_LENGTH).trim()
25+
}
26+
27+
// Trim bug reports
28+
const bugReportContentStart = text.indexOf(BUG_REPORT_REPRODUCTION_TITLE)
29+
if (bugReportContentStart !== -1) {
30+
// Exclude logs from the content, if present
31+
const bugReportLogsStart = text.indexOf(BUG_REPORT_LOGS_TITLE)
32+
if (bugReportLogsStart !== -1) {
33+
return text.slice(bugReportContentStart, Math.min(bugReportLogsStart, MAX_CONTENT_LENGTH)).trim()
34+
}
35+
36+
return text.slice(bugReportContentStart, MAX_CONTENT_LENGTH).trim()
37+
}
38+
39+
return text.slice(0, MAX_CONTENT_LENGTH).trim()
40+
}
41+
42+
/**
43+
* Normalize the language code (ISO 639-1) to lowercase and remove the region code, if present.
44+
* @param lang The language code to normalize.
45+
* @returns The normalized language code or 'en' if the language code is not valid.
46+
*/
47+
export function getNormalizedLanguage(lang?: string | null) {
48+
if (!lang) {
49+
return 'en'
50+
}
51+
const language = lang.toLowerCase().split('-')[0]
52+
const langRegex = /^[a-z]{2}$/
53+
if (!langRegex.test(language)) {
54+
return 'en'
55+
}
56+
return language
57+
}

0 commit comments

Comments
 (0)