Skip to content

Commit 45d8a43

Browse files
committed
updated entities based on user feedback
- updated entities based on user feedback - increased version number to 1.1.8
1 parent bb2c10f commit 45d8a43

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

lara/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
# Lara - Lingusitic Aim Recognizer API
44

55
__all__ = 'nlp','parser','stemmer','entities'
6-
__version__ = '1.1.7'
6+
__version__ = '1.1.8'
77
__version_info__ = tuple(int(num) for num in __version__.split('.'))
88

99
import sys

lara/entities.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
# common intents
44
def common():
55
return {
6-
"yes" : [{"stem":"yes"},{"stem":"igen"},{"stem":"aha"},{"stem":"ja","affix":["ja","h"]},{"stem":"ok","affix":["é","s","és","sa","ay"],"exc":[{"stem":"nem"}]},{"stem":"jól","inc":[{"stem":"ért","wordclass":"verb"}],"exc":[{"stem":"nem"}]},{"stem":"rendben","exc":[{"stem":"nincs"}]}],
7-
"no" : [{"stem":"no","max_words":3},{"stem":"nem","exc":[{"stem":"megy"},{"stem":"baj"},{"stem":"tud","wordclass":"verb"},{"stem":"ért","wordclass":"verb"}]},{"stem":"ne","exc":[{"stem":"haragudj","affix":["on"]}]},{"stem":"soha"},{"stem":"mégse","affix":["m"]},{"stem":"ros+z\s(v[aá]lasz|vic+|megold[aá]s)","wordclass":"regex"},{"stem":"nincs rendben"}],
6+
"yes" : [{"stem":"y","max_words":1},{"stem":"yes"},{"stem":"igen"},{"stem":"aha"},{"stem":"ja","affix":["ja","h"]},{"stem":"ok","affix":["é","s","és","sa","ay"],"exc":[{"stem":"nem"}]},{"stem":"jól","inc":[{"stem":"ért","wordclass":"verb"}],"exc":[{"stem":"nem"}]},{"stem":"rendben","exc":[{"stem":"nincs"}]},{"stem":"biztos","affix":["an"],"exc":[{"stem":"nem"},{"stem":"sem"}]},{"stem":"akarom","exc":[{"stem":"nem"},{"stem":"sem"}]},{"stem":"szeretné","match_stem":False,"affix":["k","m"],"exc":[{"stem":"nem"},{"stem":"sem"}],"max_words":2},{"stem":"kére","match_stem":False,"affix":["k","m"],"exc":[{"stem":"nem"},{"stem":"sem"}],"max_words":2}],
7+
"no" : [{"stem":"n","max_words":1},{"stem":"no","max_words":3},{"stem":"nem","exc":[{"stem":"megy"},{"stem":"baj"},{"stem":"tud","wordclass":"verb"},{"stem":"ért","wordclass":"verb"}]},{"stem":"ne","exc":[{"stem":"haragudj","affix":["on"]}]},{"stem":"soha"},{"stem":"mégse","affix":["m"]},{"stem":"ros+z\s(v[aá]lasz|vic+|megold[aá]s)","wordclass":"regex"},{"stem":"nincs rendben"}],
88
"hi" : [{"stem":"ha?i+","wordclass":"regex"},{"stem":"s+z+i+[aoó](ka|sztok)?","wordclass":"regex"},{"stem":"helló","affix":["ka"]},{"stem":"szer?[bv][au]sz(tok)?","wordclass":"regex"},{"stem":"hali","affix":["hó"]},{"stem":"(sz[eé]p|j[oó])\s?(reg+el|nap|est[eé])(o?t|[eéuü]nk)","wordclass":"regex"},{"stem":"[uü]dv([oö]z[oö]?l+(e[kt])?([eoö]m)?)?","wordclass":"regex"},{"stem":"örvendek"}],
99
"bye" : [{"stem":"bye"},{"stem":"viszlát"},{"stem":"viszont látásra"},{"stem":"jó éj","affix":["t","szakát"]},{"stem":"jóccakát"},{"stem":"mennem kell"},{"stem":"csumi"},{"stem":"cs[aáoöő]+[oó]*(v[aá]z?)?","wordclass":"regex"},{"stem":"puszi"}],
1010
"thx" : [{"stem":"(ezer\s?)?(k[oö]s+z|k[oösz][oösz][oösz])(i(ke)?|ke|[oö]n[oö]m|[oö]nj[uü]k|[eoö]net(em)?|csi|ent+y[uüű])?(\s?sz[eé]pen)?","wordclass":"regex"},{"stem":"[ht][ht]x","wordclass":"regex"},{"stem":"t(ha|h?e)nks?\s?(you)?","wordclass":"regex"},{"stem":"danke"}],
@@ -156,7 +156,7 @@ def emoji():
156156
# entities you want to ignore in search results or disallow in user inputs
157157
def disallow():
158158
return {
159-
"obscene" : [{"stem":"(fel|le|meg|r[aá]|ki|be|oda|[oö]s+ze|bele|hoz+[aá])?bas*z+d?\s?(at)?(hat)?(us|a[dk]?|n?[aá][kl]|[aá]?t[aáo][lkm]?|ot+|ni|n[aá]n?[dlkm]?|va|meg)?","wordclass":"regex","exc":[{"stem":"megye"}]},{"stem":"((l[oó]|agy)?fasz|fas+z+op[oó]|geci\w*|kurv[aá]([eé]let|an+yj?[aá])?|(be)?fos|ribanc|(be)?szar|buzi|k[oö]cs[oö]g|pin[aá]|pics[aá]|p[oö]cs|p[eé]nisz|(kurva)?any[aá]d)([oö]?k)?r?[aáeoö]?(\w{0,2}[aeoöőu][dnklt]*)?(n[ae]k)?\b","wordclass":"regex","boundary":False},{"stem":"((mother)?f\s?u\s?c\s?k|shit(as{2})?|bitch|pus{2}y|cunt|fag(g?[eo]t)?|penis|blowjob|but{2}(plug|head)?|as{2}|arse|homo|gay|dyke|cock|dick(pic)?)(e?s|ing|e?r)?","wordclass":"regex"}],
159+
"obscene" : [{"stem":"(fel|le|meg|r[aá]|ki|be|oda|[oö]s+ze|bele|hoz+[aá])?bas*z+d?\s?(at)?(hat)?(us|a[dk]?|n?[aá][kl]|[aá]?t[aáo][lkm]?|ot+|ni|n[aá]n?[dlkm]?|va|meg)?","wordclass":"regex","exc":[{"stem":"megye"}]},{"stem":"((l[oó]|agy)?fasz|fas+z+op[oó]|geci\w*|kurv[aá]([eé]let|an+yj?[aá])?|(be)?fos|ribanc|(be)?szar|buzi|k[oö]cs[oö]g|pin[aá]|pics[aá]|p[oö]cs|p[eé]nisz|kur[vw][aá]\w*(any[aá]d\w*)?|any[aá]d\w*)\b","wordclass":"regex","boundary":False},{"stem":"((mother)?f\s?u\s?c\s?k|shit(as{2})?|bitch|pus{2}y|cunt|fag(g?[eo]t)?|penis|blowjob|but{2}(plug|head)?|as{2}|arse|homo|gay|dyke|cock|dick(pic)?)(e?s|ing|e?r)?","wordclass":"regex"}],
160160
"racist" : [{"stem":"(fek[aá]|nig+(er|a)|n[aá]ci|cig[oó]|cig[aá]n+y|gypsy|dzsip[oó]|zsidr?[ó])[aáeégklnmstv]*","wordclass":"regex","boundary":False}],
161161
"erotic" : [{"stem":"(sz?ex|an[aá]l|[bv]agina|[bp][eé][np]isz?|creampie|cum|sperma?|fuck|homo(kos|sexu[aá]l(is)?)?|milf|bisexual|gay|dild[oó]|vibr[aá]tor|fel+atio|blow\s?job|whore|geci|pus{2}y|pics[aá]|pin[aá]|fasz|pis{2}|boner|dick(pic)?|x{3,}|hentai|catgirl|ec+hi|yaoi|loli|shot[aá]|\w*porn[oó]?(film)?)[aáeéioöőuüdgklmnprstvz]*","wordclass":"regex","boundary":False},{"stem":"maki verem"}],
162162
"unpleasant" : [{"stem":"AIDS","wordclass":"noun"},{"stem":"HIV","ignorecase":False},{"stem":"Hitler","wordclass":"noun"},{"stem":"(Sz?t[aá]lin|Len+in)\w*","wordclass":"regex"},{"stem":"pedof[ií]l(i[aá])?[aokltv]*","wordclass":"regex"},{"stem":"(fur{2}y|bestiality|yif{2}y?)[aáeégklnmstv]*","wordclass":"regex"},{"stem":"mej?i?n\s?kamp+f+\w*","wordclass":"regex"},{"stem":"(any[aá]d|gy[oö]k[eé]r)\w*","wordclass":"regex"}],

0 commit comments

Comments
 (0)