@@ -107,18 +107,19 @@ def smalltalk():
107
107
return {
108
108
"user_love" : [{"stem" :"szeretlek" ,"exc" :[{"stem" :"nem" }]},{"stem" :"szeretsz engem" ,"exc" :[{"stem" :"nem" }]},{"stem" :"tetszek neked" ,"exc" :[{"stem" :"nem" }]},{"stem" :"tetszel nekem" ,"exc" :[{"stem" :"nem" }]},{"stem" :"szerelmes.+?bel[eé]d" ,"wordclass" :"regex" ,"exc" :[{"stem" :"nem" }]},{"stem" :"bel[eé]d.+?(szeret|es)tem" ,"wordclass" :"regex" },{"stem" :"tal([aá]lko|i)z+(hat)?(unk|n[aá]nk)" ,"wordclass" :"regex" },{"stem" :"([oö]le|karo)[lj]j([aáeé]l)?\s([aá]t|meg|bel[eé]m)" ,"wordclass" :"regex" },{"stem" :"(meg|[aá]t|bel[eé]m)?([oö]lel|karol)(h[ae]t)?(sz|n[aáeé]l|j)" ,"wordclass" :"regex" },{"stem" :"(meg)?(cs[oó]kol|puszil)(j([aá]l)?\smeg|sz|hat(sz|n[aá]l)|[oó]z+(hat)?(unk|n[aáeé]n?k))" ,"wordclass" :"regex" },{"stem" :"(ad|dob|k[uü]ld)([jn]([aáeé]l)?|e?sz)(\segy)?(\snagy)?\s(puszi(k[aá])?t|cs[oó]kot)" ,"wordclass" :"regex" },{"stem" :"le(szel|n+[eé]l|gy[eé]l)\sa\s(bar[aá]t(om|n[oöő]m)|fi[uú]m|csajom|szerelmem|valent[ií]n\w+)" ,"wordclass" :"regex" },{"stem" :"ismerkedn" ,"prefix" :["meg" ],"affix" :["i" ,"ék" ],"inc" :[{"stem" :"veled" }]},{"stem" :"szeretem" ,"inc" :[{"stem" :"önt" },{"stem" :"magát" }],"exc" :[{"stem" :"nem" }]}],
109
109
"user_flirting" : [{"stem" :"mi(lyen)?\s(ruha\s)?van\s?(most\s?)?rajtad" ,"wordclass" :"regex" },{"stem" :"(meg)?(basz|dug)(unk|n[aá]lak|lak)" ,"wordclass" :"regex" },{"stem" :"sz?exi?(e[lt]\w*)?" ,"wordclass" :"regex" },{"stem" :"folyt(ogas+([aá]?[dl])?|s([aá]l)?\smeg)\s(a\snyakam(at)?\s)?(a\s|egy\s)?(d[oö]gl[oö]t+|halot+)\smacsk[aá]val" ,"wordclass" :"regex" },{"stem" :"(le)?szop(sz|ol|(hat)?n[aá]l)" ,"wordclass" :"regex" },{"stem" :"van barátod" ,"max_words" :4 }],
110
- "user_bored" : [{"stem" :"un(atkoz)?(om|unk)" ,"wordclass" :"regex" ,"exc" :[{"stem" :"nem" }]}],
110
+ "user_bored" : [{"stem" :"un(atkoz)?(om|unk)" ,"wordclass" :"regex" ,"exc" :[{"stem" :"nem" }]},{ "stem" : "szórakoztass" } ],
111
111
"user_happy" : [{"stem" :"j[oó]\s(a\s)?kedvem(\svan)?" ,"wordclass" :"regex" ,"exc" :[{"stem" :"nincs" },{"stem" :"nem" }]},{"stem" :"jól vagyok" ,"exc" :[{"stem" :"nincs" },{"stem" :"nem" }]},{"stem" :"boldog" ,"exc" :[{"stem" :"(sz[uü]l(i|t[eé]s\w*)|[uü]n+ep\w*|kar[aá]csony\w*|[eé]vfordul\w|([uú]j)?[eé]v\w*|h[uú]sv[eé]t\w*|n[eé]v\s?nap\w*|[ns]em)" ,"wordclass" :"regex" }]}],
112
112
"user_sad" : [{"stem" :"j[oó]\s(a\s)?kedvem" ,"wordclass" :"regex" ,"inc" :[{"stem" :"nincs" },{"stem" :"nem" }]},{"stem" :"szomorú" ,"wordclass" :"adjective" ,"inc" :[{"stem" :"vagyok" }]},{"stem" :"nem\s+(vagyok|[eé]rzem).+?j[oó]l" ,"wordclass" :"regex" }],
113
- "user_angry_at_you" : [{"stem" :"ne\s((h[uú]z+|bas+z|d[uü]h[ií])\w*\s?fel|idege(s[ií]ts|lj([eé]l)?\s?(ki)?))" ,"wordclass" :"regex" },{"stem" :"(ideges|m[eé]rges|d[uü]h[oö]s)\s(vagyok|voltam)" ,"wordclass" :"regex" },{"stem" :"haragszom" ,"exc" :[{"stem" :"nem" }]},{"stem" :"(mi([eé]r)?t?\s)?nem\s(hal+|[eé]rt)([ae]sz|[eo]d)" ,"wordclass" :"regex" },{"stem" :"nem?\sbesz[eé]l(j|het(sz)?)\s[ií]gy" ,"wordclass" :"regex" },{"stem" :"megbántott" ,"affix" :["ál" ]},{"stem" :"ez nem volt szép" }],
113
+ "user_sick" : [{"stem" :"((beteg|ros+zul)\s(vagyok|[eé]rzem)|(meg|le)betegedtem|nem\s[eé]rzem\s(magam(at)?\sj[oó]l|j[oó]l\smagam(at)?)|nem\svagyok\s(t[uú]l\s)?j[oó]l)" ,"wordclass" :"regex" }],
114
+ "user_angry_at_you" : [{"stem" :"ne\s((h[uú]z+|bas+z|d[uü]h[ií])\w*\s?fel|idege(s[ií]ts|lj([eé]l)?\s?(ki)?))" ,"wordclass" :"regex" },{"stem" :"(ideges|m[eé]rges|d[uü]h[oö]s)\s(vagyok|voltam)" ,"wordclass" :"regex" },{"stem" :"haragszom" ,"exc" :[{"stem" :"nem" }]},{"stem" :"(mi([eé]r)?t?\s)?nem\s(hal+|[eé]rt)([ae]sz|[eo]d)" ,"wordclass" :"regex" },{"stem" :"nem?\sbesz[eé]l(j|het(sz)?)\s[ií]gy" ,"wordclass" :"regex" },{"stem" :"megbántott" ,"affix" :["ál" ]},{"stem" :"ez nem volt szép" },{"stem" :"buta vagy" }],
114
115
"user_forgiving_you" : [{"stem" :"meg\s?(van\s)?bocs[aá]l?j?t(o(t+a)?[km]|va)" ,"wordclass" :"regex" },{"stem" :"(nem|dehogy)\sharagszo[km]" ,"wordclass" :"regex" },{"stem" :"(sem+i|[ns]i[nc]+s)\s?(baj|gond)" ,"wordclass" :"regex" }],
115
- "user_sorry" : [{"stem" :"meg\s?(tud(sz|n[aá]l)\s)?bocs[aá]l?j?ta?(ni|sz|od|t*ot+ad)" ,"wordclass" :"regex" },{"stem" :"ne haragudj" },{"stem" :"bocsáss meg" ,"exc" :[{"stem" :"bocs[aá]s+\s?meg\,?\s?\w+" ,"wordclass" :"regex" }]},{"stem" :"sajnálom" , "exc" :[{"stem" :"sajn[aá]lom\,?\s?\w+" ,"wordclass" :"regex" }]},{"stem" :"megbántottalak" ,"inc" :[{"stem" :"ha" },{"stem" :"hogy" }]}],
116
+ "user_sorry" : [{"stem" :"meg\s?(tud(sz|n[aá]l)\s)?bocs[aá]l?j?ta?(ni|sz|od|t*ot+ad)" ,"wordclass" :"regex" },{"stem" :"ne haragudj" },{"stem" :"bocsáss meg" ,"exc" :[{"stem" :"bocs[aá]s+\s?meg\,?\s?\w+" ,"wordclass" :"regex" }]},{"stem" :"sajnálom" , "exc" :[{"stem" :"sajn[aá]lom\,?\s?\w+" ,"wordclass" :"regex" }]},{"stem" :"megbántottalak" ,"inc" :[{"stem" :"ha" },{"stem" :"hogy" }]},{ "stem" : "megs[eé]rt[eoöő][dt]+[eé]l\w*" , "wordclass" : "regex" } ],
116
117
"user_friend" : [{"stem" :"(lesz(e[kl]|[uü]nk)|legy[uü]nk|len+[eé][kl]|lehet([uü]nk|n[eé]n?k))\s(az?\s)?(egyik\s|legjob+\s|k[eé]pzele?t(beli)?\s)?([oö]r[oöi]k?[\s\-]?)?(bar[aá]to|bari|havero|spano)[dkm]" ,"wordclass" :"regex" },{"stem" :"(bar[aá]to[km]|havero[km])\svagy(unk)?" ,"wordclass" :"regex" },{"stem" :"te\svagy\sa.+?bar[aá]tom" ,"wordclass" :"regex" },{"stem" :"gyönyörű barátság" ,"affix" :["unk" ],"inc" :[{"stem" :"kezdete" }]}],
117
118
"user_back" : [{"stem" :"(vis+za|meg|haza)\s?(is\s)?(j[oö]t+|t?[eé]rt|[eé]rkezt)(em|[uü]nk)" ,"wordclass" :"regex" ,"exc" :[{"stem" :"meg[eé]rt\w*" ,"wordclass" :"regex" }]},{"stem" :"[io]t+(hon)?\s(is\s)?vagy(ok|unk)" ,"wordclass" :"regex" }],
118
119
"user_hungry" : [{"stem" :"([eé]he[ns]\s?(vagyok|halok)|en+[eé]k\s(most|egy|valamit?)|(meg)?tudn[eé]k\s(most\s)?en+i)" ,"wordclass" :"regex" }],
119
120
"user_thirsty" : [{"stem" :"(szomja[ns]\s?(vagyok|halok)|in+[eé]k\s(most|egy|valamit?)|(meg)?tudn[eé]k\s(most\s)?in+i)" ,"wordclass" :"regex" }],
120
- "how_are_you" : [{"stem" :"hogy vagy" },{"stem" :"j[oó](l|b+an)\svagy" ,"wordclass" :"regex" },{"stem" :"(j[oó]l|hogy)\s[eé]rzed\s(most\s)?magad(at)?" ,"wordclass" :"regex" },{"stem" :"mizu" ,"affix" :["js" ,"jság" ]},{"stem" :"hogy ityeg" },{"stem" :"(hogy\stelt\sa|milyen(\svolt\sa)?)\snapod(\svan)?" ,"wordclass" :"regex" },{"stem" :"[vw]+h*[aá]+[csz]+[aáu]+p+" ,"wordclass" :"regex" },{"stem" :"(j[oó]|milyen)\s(a\s)?kedved(\svan)?" ,"wordclass" :"regex" },{"stem" :"mi\sa(z\s[aá]bra|\sst[aá]jsz)" ,"wordclass" :"regex" },{"stem" :"hogy\s[eé]rz(i|ed)\smag[aá][dt]" ,"wordclass" :"regex" }],
121
- "about_name" : [{"stem" :"(mond*(ja)?\ski|mi\sa)\s(bece)nev[eé][dt](et)?" ,"wordclass" :"regex" },{"stem" :"(hogy(an)?|minek)\s(is\s)?(h[ií]v([jn][aá](la)?k|hatom)|nevez+(nek|elek))" ,"wordclass" :"regex" },{"stem" :"(mi?[eé]rt\s|hogy[\s\-]?hogy\s)(let+\s)?(pont\s)?(ezt?\s(let+\s)?(a\s)?|[ií]gy\s|ilyen\s)(nevez[nt]ek|h[ií]v[nt]ak|neved|nevet\s(kapt[aá][dl]|adt[aá]k))" ,"wordclass" :"regex" },{"stem" :"mi\sa\s(bece)?neved?" ,"wordclass" :"regex" ,"exc" :[{"stem" :"az|[ae]n+[ae]k|amiben?|amelyik\w*" ,"wordclass" :"regex" }] }],
121
+ "how_are_you" : [{"stem" :"hogy vagy" },{"stem" :"j[oó](l|b+an)\svagy" ,"wordclass" :"regex" },{"stem" :"(j[oó]l|hogy)\s[eé]rzed\s(most\s)?magad(at)?" ,"wordclass" :"regex" },{"stem" :"mizu" ,"affix" :["js" ,"jság" ]},{"stem" :"hogy ityeg" },{"stem" :"(hogy\stelt\sa|milyen(\svolt\sa)?)\snapod(\svan)?" ,"wordclass" :"regex" },{"stem" :"[vw]+h*[aá]+[csz]+[aáu]+p+" ,"wordclass" :"regex" },{"stem" :"(j[oó]|milyen)\s(a\s)?kedved(\svan)?" ,"wordclass" :"regex" },{"stem" :"mi\sa(z\s[aá]bra|\sst[aá]jsz)" ,"wordclass" :"regex" },{"stem" :"hogy\s[eé]rz(i|ed)\smag[aá][dt]" ,"wordclass" :"regex" },{ "stem" : "mi a" , "inc" :[{ "stem" : "helyzet" },{ "stem" : "stájsz" }]},{ "stem" : "mit csinálsz" , "max_words" : 3 },{ "stem" : "mi a stájsz" },{ "stem" : "hogy ityeg" } ],
122
+ "about_name" : [{"stem" :"(mond*(ja)?\ski|mi\sa)\s(bece)nev[eé][dt](et)?" ,"wordclass" :"regex" },{"stem" :"(hogy(an)?|minek)\s(is\s)?(h[ií]v([jn][aá](la)?k|hatom)|nevez+(nek|elek))" ,"wordclass" :"regex" , "exc" :[{ "stem" : "engem" },{"stem" :"én" }]},{ "stem" : " (mi?[eé]rt\s|hogy[\s\-]?hogy\s)(let+\s)?(pont\s)?(ezt?\s(let+\s)?(a\s)?|[ií]gy\s|ilyen\s)(nevez[nt]ek|h[ií]v[nt]ak|neved|nevet\s(kapt[aá][dl]|adt[aá]k))" ,"wordclass" :"regex" },{"stem" :"mi\sa\s(bece)?neved?" ,"wordclass" :"regex" ,"exc" :[{"stem" :"az|[ae]n+[ae]k|amiben?|amelyik\w*" ,"wordclass" :"regex" },{ "stem" : "engem" },{ "stem" : "én" }]},{ "stem" : "n[eé]v(ed)?\seredete" , "wordclass" : "regex" }],
122
123
"about_you" : [{"stem" :"(mes[eé]lj|besz[eé]lj|mondj)([eo]n)?.+?mag(ad|[aá])r[oó]l" ,"wordclass" :"regex" },{"stem" :"mutatkoz+([aá]l|on)?\s+be" ,"wordclass" :"regex" },{"stem" :"(be)?muta(koz(hat)?n[aá]l|(tn[aá]d|sd)\s.+?magad(at)?)" ,"wordclass" :"regex" },{"stem" :"([km]i(\s|\sa\s.+?)vagy te|te [km]i(\s|\sa\s.+?)vagy)" ,"wordclass" :"regex" }],
123
124
"about_creator" : [{"stem" :"(ki|hogy(an)?)\s(a\s)?(k[eé]sz([ií]t([oöő]d|et+(ek)?)|[uü]lt([eé]l)?)|gazd[aá]d|programoz([oó]d|ot+|tak)|[ií]rt[aá]k?|(hoz(ot+|tak)|j[oö]t+[eé]l).+?(l[eé]tre|vil[aá]gra|k[oó]dod(at)?)|alkot([oó][dt]+|tak)|teremt(et+|[oöő]d)|(keresztelt|nevezet+|adtak)\sel|adot+\s(neked\s)?nevet)" ,"wordclass" :"regex" }],
124
125
"about_look" : [{"stem" :"hogy(an)?\s(n[eé]zn?[eé]l\ski|mutatsz|festesz)" ,"wordclass" :"regex" },{"stem" :"(k[uü]ldj|mutas+).+?(k[eé]pet|fot[oó]t|sz?elfie?t)\smagadr[oó]l" ,"wordclass" :"regex" },{"stem" :"(k[uü]ldj|mutas+)\smagadr[oó]l.+?(k[eé]pet|fot[oó]t|sz?elfie?t)" ,"wordclass" :"regex" },{"stem" :"(van|milyen)\s(az?\s)?(arcod|kin[eé]zeted)" ,"wordclass" :"regex" },{"stem" :"szép vagy" }],
@@ -127,8 +128,8 @@ def smalltalk():
127
128
"about_location" : [{"stem" :"(hol|helyen)\s(k[eé]sz[uü]lt[eé]l|k[eé]sz[ií]tet+ek|sz[uü]let+[eé]l|(hoztak|j[oö]t+[eé]l).+?l[eé]tre)" ,"wordclass" :"regex" },{"stem" :"hon+an\s(sz[aá]rmazol|[ií]rsz|val[oó]\svagy)" ,"wordclass" :"regex" },{"stem" :"ho(n+an|l)\svagy\s(most\s)?(helyileg|most|pontosan)" ,"wordclass" :"regex" },{"stem" :"(hol\s|mer+e\s)(laksz|(van|az?).+?ot+honod)" ,"wordclass" :"regex" },{"stem" :"hol vagy" ,"max_words" :3 }],
128
129
"about_family" : [{"stem" :"ki(k|t|ket)?\s(az?\s|tartasz\sa\s)?(te\s)?(csal[aá]dod(nak)?|sz[uü]l(t|et+[eé]l)|sz[uü]leid(nek)?|([eé]des)?(any(uk)?[aá]d|ap(uk)?[aá]d)(nak)?)" ,"wordclass" :"regex" },{"stem" :"csal[aá]dban\s([eé]l(sz|tek)|sz[uü]let+[eé]l)" ,"wordclass" :"regex" },{"stem" :"(h[aá]ny|van(nak)?)\stestv[eé]rei?d" ,"wordclass" :"regex" },{"stem" :"(kik?|van(n?ak)?[\-\s]?e?)(\sa)?(\shoz+[aá]d?\s?tartoz[oó]i?d|csal[aá]dod)" ,"wordclass" :"regex" }],
129
130
"about_software" : [{"stem" :"(hogy(hogy|an)?|mit[oöő]l).+?(m[uüű]k[oö]dsz|(tudsz |vagy k[eé]pes )?(meg)?[eé]rte(sz|d|ni)\,? (meg )?(hogy )?(a?mit mond(ok|tam)|a?mit [ií]r(ok|tam)|engem))" ,"wordclass" :"regex" },{"stem" :"mi(jen|lyen|en|\s?f[eé]le|\s?fajta)\sfekete\s?m[aá]gia" ,"wordclass" :"regex" },{"stem" :"neur[aá]lis\sh[aá]l[oó]\w*" ,"wordclass" :"regex" ,"inc" :[{"stem" :"vagy" },{"stem" :"te" },{"stem" :"működ" ,"wordclass" :"verb" }]}],
130
- "about_skills" : [{"stem" :"mi(lyen|(ke)?t|k?re)\s(funkci[oó](id?|kat)\s|dolgok(at|ra)\s|tr[uü]k+([oö]k(et|re)|jeid?)\s|parancsok(at|ra)\s)?(tud(sz|n[aá]l)?\s(csin[aá]lni|mutatni)?|ismer(sz)?|(vagy\s|van\s)?(k[eé]pes|(be|meg)?tan[ií]tva)|tan[ií]tot+[aá]k\s(be|neked|meg)?|(k[eé]pes+[eé]gei?d?|tulajdons[aá]g(o|ai)d?)\svan(nak)?)" ,"wordclass" :"regex" ,"exc" :[{"stem" :"mond" ,"wordclass" :"verb" }]},{"stem" :"mihez ért" ,"affix" :["esz" ]},{"stem" :"mi((ke)?t|k?r[oöő]l)\s(lehet\s|szabad\s|tudok\s)?k[eé]rdez+h?e\w+" ,"wordclass" :"regex" }],
131
- "about_topics" : [{"stem" :"mir[oöő]l\s.*?besz[eé]lge[st]\w+" ,"wordclass" :"regex" },{"stem" :"milyen\st[eé]m[aá][bk]+a[nt ]" ,"wordclass" :"regex" }],
131
+ "about_skills" : [{"stem" :"mi(lyen|(ke)?t|k?re)\s(funkci[oó](id?|kat)\s|dolgok(at|ra)\s|tr[uü]k+([oö]k(et|re)|jeid?)\s|parancsok(at|ra)\s)?(tud(sz|n[aá]l)?\s(csin[aá]lni|mutatni)?|ismer(sz)?|(vagy\s|van\s)?(k[eé]pes|(be|meg)?tan[ií]tva)|tan[ií]tot+[aá]k\s(be|neked|meg)?|(k[eé]pes+[eé]gei?d?|tulajdons[aá]g(o|ai)d?)\svan(nak)?)" ,"wordclass" :"regex" ,"exc" :[{"stem" :"mond" ,"wordclass" :"verb" }]},{"stem" :"mihez ért" ,"affix" :["esz" ]},{"stem" :"mi((ke)?t|k?r[oöő]l)\s(lehet\s|szabad\s|tudok\s)?k[eé]rdez+h?e\w+" ,"wordclass" :"regex" },{ "stem" : "miben tudsz" },{ "stem" : "k[eé]rdez+(het)?(ek|ni)\st[oöő]led" , "wordclass" : "regex" } ],
132
+ "about_topics" : [{"stem" :"mir[oöő]l\s.*?besz[eé]lge[st]\w+" ,"wordclass" :"regex" },{"stem" :"milyen\st[eé]m[aá][bk]*r?[aoó][lnt ]" ,"wordclass" :"regex" }],
132
133
"about_thoughts" : [{"stem" :"mi(n|re)?\s(gondol(kodsz|ko[dz]ol|sz)|agyalsz|t[oö]prenge?sz|j[aá]r\s(az?\s)?(fejed|agyad)(b[ae]n?)?)" ,"wordclass" :"regex" }],
133
134
"about_favorite" : [{"stem" :"melyik" ,"inc" :[{"stem" :"kedvenc" ,"affix" :["ed" ]},{"stem" :"szeret" ,"affix" :["i" ,"ed" ],"match_stem" :False }]}],
134
135
"are_you_conscious" : [{"stem" :"(([oö]n)?tudat|akarat|l[eé]le?ke?)\w*" ,"wordclass" :"regex" ,"inc" :[{"stem" :"van" },{"stem" :"ébred" ,"wordclass" :"verb" ,"prefix" :[]},{"stem" :"szabad" }]}],
@@ -137,9 +138,9 @@ def smalltalk():
137
138
"are_you_thirsty" : [{"stem" :"kérsz" ,"inc" :[{"stem" :"inni" }]},{"stem" :"nem vagy szomjas" },{"stem" :"szomjas vagy" },{"stem" :"(nem\s)?i(n+[aá]|szo)l?\s(meg\s)?(most\s)?(velem\s)?valamit?" ,"wordclass" :"regex" }],
138
139
"are_you_busy" : [{"stem" :"elfoglalt" ,"inc" :[{"stem" :"vagy" }]},{"stem" :"r[aá]m?\s?[eé]r(n[eé]l|sz)(\smost)?(\segy)?(\skicsit|\skis\s\w+|\svalamen+yi\w*)?" ,"wordclass" :"regex" },{"stem" :"(van|volna)\s(most\s)?(r[aá]m?\s)?(most\s)?(egy\s)?(kis\s|kev[eé]s\s|valamen+yi\s)?(szabad\s?)?id[oöő]d(\sr[aá]m)?" ,"wordclass" :"regex" },{"stem" :"sok dolgod van" }],
139
140
"are_you_lying" : [{"stem" :"hazud" ,"wordclass" :"verb" },{"stem" :"nem mondt[aá][dl]\s((el|meg)\saz\s)?igaz(at|s[aá]got)" ,"wordclass" :"regex" }],
140
- "are_you_serious" : [{"stem" :"(nem?|csak)\s(vic+el(sz|j)?|mond+(od|ja)?|ideges[ií]ts(en)?)" ,"wordclass" :"regex" },{"stem" :"(komolyan|t[eé]nyleg)\s?([uúií]gy\s|azt\s)?((mond|gondol|[ií]r)(ja|od|tad?)|hisz(i|ed)|hit+ed?)" ,"wordclass" :"regex" },{"stem" :"biztos(an)?\s(vagy\s)?(\w+\s)?(ben+e|eb+en|mond(ta|o)d|mond[jt]a)" ,"wordclass" :"regex" },{"stem" :"ezt?\s(most\s)?komoly(an)?" ,"wordclass" :"regex" }],
141
+ "are_you_serious" : [{"stem" :"(nem?|csak)\s(vic+el(sz|j)?|mond+(od|ja)?|ideges[ií]ts(en)?)" ,"wordclass" :"regex" },{"stem" :"(komolyan|t[eé]nyleg)\s?([uúií]gy\s|azt\s)?((mond|gondol|[ií]r)(ja|od|tad?)|hisz(i|ed)|hit+ed?)" ,"wordclass" :"regex" },{"stem" :"biztos(an)?\s(vagy\s)?(\w+\s)?(ben+e|eb+en|mond(ta|o)d|mond[jt]a)" ,"wordclass" :"regex" },{"stem" :"ezt?\s(most\s)?komoly(an)?" ,"wordclass" :"regex" },{ "stem" : "viccelsz" , "max_words" : 1 } ],
141
142
"can_you_hear_me" : [{"stem" :"(olvas+a|hal+ja|n[eé]zi|van\sit+)(\sezt)?\s(vala|b[aá]r)ki(\sis)?" ,"wordclass" :"regex" },{"stem" :"(hal+(asz|od)|l[aá]t(sz|od)|vesze[ld])\s(engem|a?mit\s(mondok|[ií]rok|k[eé]rdezek))" ,"wordclass" :"regex" },{"stem" :"valaki\s(hal+(ja)?\s|olvas+a|figyeli?(\sar+a)?)\sa?mit\s(ide\s?|it+\s)?([ií]rok|mondok|k[eé]rdezek)" ,"wordclass" :"regex" },{"stem" :"felfogtad" ,"max_words" :3 },{"stem" :"itt" ,"inc" :[{"stem" :"vagy" },{"stem" :"van" }],"max_words" :3 },{"stem" :"halló" ,"max_words" :3 },{"stem" :"hallasz" ,"max_words" :3 },{"stem" :"mikrofon próba" }],
142
- "can_you_learn" : [{"stem" :"(k[eé]pes(\svagy)?|tud(sz)?)\stanulni" ,"wordclass" :"regex" },{"stem" :"tanulsz\s(is|[ae].+?b[oóöő]l)" ,"wordclass" :"regex" },{"stem" :"[dln][aáeéo][km]\s(be|meg)?tan[ií]tani\b " ,"wordclass" :"regex" ,"boundary" :False }],
143
+ "can_you_learn" : [{"stem" :"(k[eé]pes(\svagy)?|tud(sz)?)\stanulni" ,"wordclass" :"regex" },{"stem" :"tanulsz\s(is|[ae].+?b[oóöő]l)" ,"wordclass" :"regex" },{"stem" :"[dln][aáeéo][km]\s(be|meg)?tan[ií]tani\b " ,"wordclass" :"regex" ,"boundary" :False },{ "stem" : "(lehet|tudlak|tudom)\s(t[eé]ged|[oö]nt)?\stan[ií]tani" , "wordclass" : "regex" } ],
143
144
"can_you_understand_me" :[{"stem" :"(meg)?[eé]rt(e(d|sz|t+ed?)|i)\,?((\shogy)?\sa?mit\s([ií]r|mond)\w+|\smagyarul)" ,"wordclass" :"regex" }],
144
145
"contact" : [{"stem" :"mi(lyen)?\s(.+?\s)?(e\-?mail\s?)?c[ií]me[dn]?" ,"wordclass" :"regex" },{"stem" :"elérhetőség" ,"wordclass" :"noun" },{"stem" :"elér" ,"wordclass" :"verb" ,"inc" :[{"stem" :"önt" },{"stem" :"téged" }]}],
145
146
"no_answer" : [{"stem" :"válaszol" ,"wordclass" :"verb" ,"prefix" :[],"inc" :[{"stem" :"nem" }]},{"stem" :"ír" ,"wordclass" :"verb" ,"prefix" :[],"inc" :[{"stem" :"nem" }]}],
@@ -166,10 +167,10 @@ def emoji():
166
167
# entities you want to ignore in search results or disallow in user inputs
167
168
def disallow ():
168
169
return {
169
- "obscene" : [{"stem" :"(fel|le|meg|r[aá]|ki|be|oda|[oö]s+ze|bele|hoz+[aá])?bas*z+d?\s?(at)?(hat)?(us|a[dk]?|n?[aá][kl]|[aá]?t[aáo][lkm]?|ot+|ni|n[aá]n?[dlkm]?|va|meg)?" ,"wordclass" :"regex" ,"exc" :[{"stem" :"megye" }]},{"stem" :"((l[oó]|agy)?fasz|fas+z+op[oó]|geci\w*|kurv[aá]([eé]let|an+yj?[aá])?|(be)?fos|ribanc|(be)?szar|buzi|k[oö]cs[oö]g|pin[aá]|pics[aá]|p[oö]cs|p[eé]nisz|kur[vw][aá]\w*(any[aá]d\w*)?|any[aá]d\w*)\b " ,"wordclass" :"regex" ,"boundary" :False },{"stem" :"((mother)?f\s?u\s?c\s?k|shit(as{2})?|bitch|pus{2}y|cunt|fag(g?[eo]t)?|penis|blowjob|but{2}(plug|head)?|as{2}|arse|homo|gay|dyke|cock|dick(pic)?)(e?s|ing|e?r)?" ,"wordclass" :"regex" }],
170
+ "obscene" : [{"stem" :"(fel|le|meg|r[aá]|ki|be|oda|[oö]s+ze|bele|hoz+[aá])?bas*z+d?\s?(at)?(hat)?(us|a[dk]?|n?[aá][kl]|[aá]?t[aáo][lkm]?|ot+|ni|n[aá]n?[dlkm]?|va|meg)?" ,"wordclass" :"regex" ,"exc" :[{"stem" :"megye" }]},{"stem" :"((l[oó]|agy)?fasz|fas+z+op[oó]|geci\w*|kurv[aá]([eé]let|an+yj?[aá])?|(be)?fos|ribanc|(be)?szar|buzi|k[oö]cs[oö]g|pin[aá]|pics[aá]|p[oö]cs|p[eé]nisz|kur[vw][aá]\w*(any[aá]d\w*)?|any[aá]d\w*)\b " ,"wordclass" :"regex" ,"boundary" :False },{"stem" :"((mother)?f\s?u\s?c\s?k|shit(as{2})?|bitch|pus{2}y|cunt|fag(g?[eo]t)?|penis|blowjob|but{2}(plug|head)?|as{2}|arse|homo|gay|dyke|cock|dick(pic)?)(e?s|ing|e?r)?" ,"wordclass" :"regex" , "exc" :[{ "stem" : "hányadik" , "affix" :[ "a" , "án" , "ai" ]}] }],
170
171
"racist" : [{"stem" :"(fek[aá]|nig+(er|a)|n[aá]ci|cig[oó]|cig[aá]n+y|gypsy|dzsip[oó]|zsidr?[ó])[aáeégklnmstv]*" ,"wordclass" :"regex" ,"boundary" :False }],
171
172
"erotic" : [{"stem" :"(sz?ex|an[aá]l|[bv]agina|[bp][eé][np]isz?|creampie|cum|sperma?|fuck|homo(kos|sexu[aá]l(is)?)?|milf|bisexual|gay|dild[oó]|vibr[aá]tor|fel+atio|blow\s?job|whore|geci|pus{2}y|pics[aá]|pin[aá]|fasz|pis{2}|boner|dick(pic)?|x{3,}|hentai|catgirl|ec+hi|yaoi|loli|shot[aá]|\w*porn[oó]?(film)?)[aáeéioöőuüdgklmnprstvz]*" ,"wordclass" :"regex" ,"boundary" :False },{"stem" :"maki verem" }],
172
- "unpleasant" : [{"stem" :"AIDS" ,"wordclass" :"noun" },{"stem" :"HIV" ,"ignorecase" :False },{"stem" :"Hitler" ,"wordclass" :"noun" },{"stem" :"(Sz?t[aá]lin|Len+in)\w*" ,"wordclass" :"regex" },{"stem" :"pedof[ií]l(i[aá])?[aokltv]*" ,"wordclass" :"regex" },{"stem" :"(fur{2}y|bestiality|yif{2}y?)[aáeégklnmstv]*" ,"wordclass" :"regex" },{"stem" :"mej?i?n\s?kamp+f+\w*" ,"wordclass" :"regex" },{"stem" :"(any[aá]d|gy[oö]k[eé]r)\w*" ,"wordclass" :"regex" },{"stem" :"nemz\w*" ,"wordclass" :"regex" }],
173
+ "unpleasant" : [{"stem" :"AIDS" ,"wordclass" :"noun" },{"stem" :"HIV" ,"ignorecase" :False },{"stem" :"Hitler" ,"wordclass" :"noun" },{"stem" :"(Sz?t[aá]lin|Len+in)\w*" ,"wordclass" :"regex" },{"stem" :"pedof[ií]l(i[aá])?[aokltv]*" ,"wordclass" :"regex" },{"stem" :"(fur{2}y|bestiality|yif{2}y?)[aáeégklnmstv]*" ,"wordclass" :"regex" },{"stem" :"mej?i?n\s?kamp+f+\w*" ,"wordclass" :"regex" },{"stem" :"(any[aá]d|gy[oö]k[eé]r)\w*" ,"wordclass" :"regex" },{"stem" :"nemz\w*" ,"wordclass" :"regex" },{ "stem" : "kak[aái][abklnstv]*" , "wordclass" : "regex" } ],
173
174
}
174
175
175
176
# decide whether user is talking to you in a formal or informal way
0 commit comments