|
3 | 3 | # common intents
|
4 | 4 | def common():
|
5 | 5 | return {
|
6 |
| - "yes" : [{"stem":"yes"},{"stem":"igen"},{"stem":"aha"},{"stem":"ja","affix":["ja","h"]},{"stem":"ok","affix":["é","s","és","sa","ay"],"exc":[{"stem":"nem"}]},{"stem":"jól","inc":[{"stem":"ért","wordclass":"verb"}],"exc":[{"stem":"nem"}]},{"stem":"rendben","exc":[{"stem":"nincs"}]}], |
7 |
| - "no" : [{"stem":"no","max_words":3},{"stem":"nem","exc":[{"stem":"megy"},{"stem":"baj"},{"stem":"tud","wordclass":"verb"},{"stem":"ért","wordclass":"verb"}]},{"stem":"ne","exc":[{"stem":"haragudj","affix":["on"]}]},{"stem":"soha"},{"stem":"mégse","affix":["m"]},{"stem":"ros+z\s(v[aá]lasz|vic+|megold[aá]s)","wordclass":"regex"},{"stem":"nincs rendben"}], |
| 6 | + "yes" : [{"stem":"y","max_words":1},{"stem":"yes"},{"stem":"igen"},{"stem":"aha"},{"stem":"ja","affix":["ja","h"]},{"stem":"ok","affix":["é","s","és","sa","ay"],"exc":[{"stem":"nem"}]},{"stem":"jól","inc":[{"stem":"ért","wordclass":"verb"}],"exc":[{"stem":"nem"}]},{"stem":"rendben","exc":[{"stem":"nincs"}]},{"stem":"biztos","affix":["an"],"exc":[{"stem":"nem"},{"stem":"sem"}]},{"stem":"akarom","exc":[{"stem":"nem"},{"stem":"sem"}]},{"stem":"szeretné","match_stem":False,"affix":["k","m"],"exc":[{"stem":"nem"},{"stem":"sem"}],"max_words":2},{"stem":"kére","match_stem":False,"affix":["k","m"],"exc":[{"stem":"nem"},{"stem":"sem"}],"max_words":2}], |
| 7 | + "no" : [{"stem":"n","max_words":1},{"stem":"no","max_words":3},{"stem":"nem","exc":[{"stem":"megy"},{"stem":"baj"},{"stem":"tud","wordclass":"verb"},{"stem":"ért","wordclass":"verb"}]},{"stem":"ne","exc":[{"stem":"haragudj","affix":["on"]}]},{"stem":"soha"},{"stem":"mégse","affix":["m"]},{"stem":"ros+z\s(v[aá]lasz|vic+|megold[aá]s)","wordclass":"regex"},{"stem":"nincs rendben"}], |
8 | 8 | "hi" : [{"stem":"ha?i+","wordclass":"regex"},{"stem":"s+z+i+[aoó](ka|sztok)?","wordclass":"regex"},{"stem":"helló","affix":["ka"]},{"stem":"szer?[bv][au]sz(tok)?","wordclass":"regex"},{"stem":"hali","affix":["hó"]},{"stem":"(sz[eé]p|j[oó])\s?(reg+el|nap|est[eé])(o?t|[eéuü]nk)","wordclass":"regex"},{"stem":"[uü]dv([oö]z[oö]?l+(e[kt])?([eoö]m)?)?","wordclass":"regex"},{"stem":"örvendek"}],
|
9 | 9 | "bye" : [{"stem":"bye"},{"stem":"viszlát"},{"stem":"viszont látásra"},{"stem":"jó éj","affix":["t","szakát"]},{"stem":"jóccakát"},{"stem":"mennem kell"},{"stem":"csumi"},{"stem":"cs[aáoöő]+[oó]*(v[aá]z?)?","wordclass":"regex"},{"stem":"puszi"}],
|
10 | 10 | "thx" : [{"stem":"(ezer\s?)?(k[oö]s+z|k[oösz][oösz][oösz])(i(ke)?|ke|[oö]n[oö]m|[oö]nj[uü]k|[eoö]net(em)?|csi|ent+y[uüű])?(\s?sz[eé]pen)?","wordclass":"regex"},{"stem":"[ht][ht]x","wordclass":"regex"},{"stem":"t(ha|h?e)nks?\s?(you)?","wordclass":"regex"},{"stem":"danke"}],
|
@@ -156,7 +156,7 @@ def emoji():
|
156 | 156 | # entities you want to ignore in search results or disallow in user inputs
|
157 | 157 | def disallow():
|
158 | 158 | return {
|
159 |
| - "obscene" : [{"stem":"(fel|le|meg|r[aá]|ki|be|oda|[oö]s+ze|bele|hoz+[aá])?bas*z+d?\s?(at)?(hat)?(us|a[dk]?|n?[aá][kl]|[aá]?t[aáo][lkm]?|ot+|ni|n[aá]n?[dlkm]?|va|meg)?","wordclass":"regex","exc":[{"stem":"megye"}]},{"stem":"((l[oó]|agy)?fasz|fas+z+op[oó]|geci\w*|kurv[aá]([eé]let|an+yj?[aá])?|(be)?fos|ribanc|(be)?szar|buzi|k[oö]cs[oö]g|pin[aá]|pics[aá]|p[oö]cs|p[eé]nisz|(kurva)?any[aá]d)([oö]?k)?r?[aáeoö]?(\w{0,2}[aeoöőu][dnklt]*)?(n[ae]k)?\b","wordclass":"regex","boundary":False},{"stem":"((mother)?f\s?u\s?c\s?k|shit(as{2})?|bitch|pus{2}y|cunt|fag(g?[eo]t)?|penis|blowjob|but{2}(plug|head)?|as{2}|arse|homo|gay|dyke|cock|dick(pic)?)(e?s|ing|e?r)?","wordclass":"regex"}], |
| 159 | + "obscene" : [{"stem":"(fel|le|meg|r[aá]|ki|be|oda|[oö]s+ze|bele|hoz+[aá])?bas*z+d?\s?(at)?(hat)?(us|a[dk]?|n?[aá][kl]|[aá]?t[aáo][lkm]?|ot+|ni|n[aá]n?[dlkm]?|va|meg)?","wordclass":"regex","exc":[{"stem":"megye"}]},{"stem":"((l[oó]|agy)?fasz|fas+z+op[oó]|geci\w*|kurv[aá]([eé]let|an+yj?[aá])?|(be)?fos|ribanc|(be)?szar|buzi|k[oö]cs[oö]g|pin[aá]|pics[aá]|p[oö]cs|p[eé]nisz|kur[vw][aá]\w*(any[aá]d\w*)?|any[aá]d\w*)\b","wordclass":"regex","boundary":False},{"stem":"((mother)?f\s?u\s?c\s?k|shit(as{2})?|bitch|pus{2}y|cunt|fag(g?[eo]t)?|penis|blowjob|but{2}(plug|head)?|as{2}|arse|homo|gay|dyke|cock|dick(pic)?)(e?s|ing|e?r)?","wordclass":"regex"}], |
160 | 160 | "racist" : [{"stem":"(fek[aá]|nig+(er|a)|n[aá]ci|cig[oó]|cig[aá]n+y|gypsy|dzsip[oó]|zsidr?[ó])[aáeégklnmstv]*","wordclass":"regex","boundary":False}],
|
161 | 161 | "erotic" : [{"stem":"(sz?ex|an[aá]l|[bv]agina|[bp][eé][np]isz?|creampie|cum|sperma?|fuck|homo(kos|sexu[aá]l(is)?)?|milf|bisexual|gay|dild[oó]|vibr[aá]tor|fel+atio|blow\s?job|whore|geci|pus{2}y|pics[aá]|pin[aá]|fasz|pis{2}|boner|dick(pic)?|x{3,}|hentai|catgirl|ec+hi|yaoi|loli|shot[aá]|\w*porn[oó]?(film)?)[aáeéioöőuüdgklmnprstvz]*","wordclass":"regex","boundary":False},{"stem":"maki verem"}],
|
162 | 162 | "unpleasant" : [{"stem":"AIDS","wordclass":"noun"},{"stem":"HIV","ignorecase":False},{"stem":"Hitler","wordclass":"noun"},{"stem":"(Sz?t[aá]lin|Len+in)\w*","wordclass":"regex"},{"stem":"pedof[ií]l(i[aá])?[aokltv]*","wordclass":"regex"},{"stem":"(fur{2}y|bestiality|yif{2}y?)[aáeégklnmstv]*","wordclass":"regex"},{"stem":"mej?i?n\s?kamp+f+\w*","wordclass":"regex"},{"stem":"(any[aá]d|gy[oö]k[eé]r)\w*","wordclass":"regex"}],
|
|
0 commit comments