@@ -10,14 +10,14 @@ def common():
10
10
"thx" : [{"stem" :"(ezer\s?)?(k[oö]s+z|k[oösz][oösz][oösz])(i(ke)?|ke|[oö]n[oö]m|[oö]nj[uü]k|[eoö]net(em)?|csi|ent+y[uüű])?(\s?sz[eé]pen)?" ,"wordclass" :"regex" },{"stem" :"[ht][ht]x" ,"wordclass" :"regex" },{"stem" :"t(ha|h?e)nks?\s?(you)?" ,"wordclass" :"regex" },{"stem" :"danke" }],
11
11
"pls" : [{"stem" :"p+l+[iíea]*[zs]+e*" ,"wordclass" :"regex" },{"stem" :"l[eé]+[cgyt]+[sz]*[ií]+(ves|keh?)?" ,"wordclass" :"regex" },{"stem" :"l[eé](sz(el)?|gy(en)?|n+[eé]l).*?(kedves|sz[ií](ves)?)" ,"wordclass" :"regex" },{"stem" :"szeretn[eé](k|m)" ,"wordclass" :"regex" ,"exc" :[{"stem" :"(meg)?bocs(i(ka)?|[aá](nat([aá][eé]rt)?|nat[aáo]t?|s+|s+on|j?t(ana)?))?" ,"wordclass" :"regex" },{"stem" :"elnézés" ,"wordclass" :"noun" ,"match_stem" :False }]},{"stem" :"(meg)?k[eé]r(het)?((n[eéi])?l?e?[km]?)" ,"wordclass" :"regex" ,"exc" :[{"stem" :"(meg)?bocs(i(ka)?|[aá](nat([aá][eé]rt)?|nat[aáo]t?|s+|s+on|j?t(ana)?))?" ,"wordclass" :"regex" },{"stem" :"elnézés" ,"wordclass" :"noun" ,"match_stem" :False }]},{"stem" :"szeretn[eé]([km]|lek)" ,"wordclass" :"regex" ,"exc" :[{"stem" :"(meg)?bocs(i(ka)?|[aá](nat([aá][eé]rt)?|nat[aáo]t?|s+|s+on|j?t(ana)?))?" ,"wordclass" :"regex" },{"stem" :"elnézés" ,"wordclass" :"noun" ,"match_stem" :False }]}],
12
12
"welks" : [{"stem" :"nincs mit" },{"stem" :"(nagyon\s?)?(is\s)?sz[ií]ves(en|\s?[oö]r[oö]mest)" ,"wordclass" :"regex" },{"stem" :"ugyan\,?\shag[gy]\w{1,3}" ,"wordclass" :"regex" },{"stem" :"hag[gy]\w{1,3}\scsak" ,"wordclass" :"regex" },{"stem" :"sz[aá]momra.+?([oö]r[oö]m|megtiszteltet[eé]s)" ,"wordclass" :"regex" }],
13
- "sorry" : [{"stem" :"(meg)?bocs(i(ka)?|esz|[aá](nat([aá][eé]rt)?|nat[aáo]t?|s+|s+on|j?t(ana)?))?" ,"wordclass" :"regex" },{"stem" :"elnézés" ,"wordclass" :"noun" ,"match_stem" :False },{"stem" :"sajn[aá]l(om|juk)" ,"wordclass" :"regex" },{"stem" :"s+z*o+ r+[iy]+" ,"wordclass" :"regex" }],
13
+ "sorry" : [{"stem" :"(meg)?bocs(i(ka)?|esz|[aá](nat([aá][eé]rt)?|nat[aáo]t?|s+|s+on|j?t(ana)?))?" ,"wordclass" :"regex" },{"stem" :"elnézés" ,"wordclass" :"noun" ,"match_stem" :False },{"stem" :"sajn[aá]l(om|juk)" ,"wordclass" :"regex" },{"stem" :"s+z*o* r+[iy]+(ka)? " ,"wordclass" :"regex" }],
14
14
"lol" : [{"stem" :"(h[aei]){2,}h?" ,"wordclass" :"regex" },{"stem" :"o?(lol)+o?" ,"wordclass" :"regex" },{"stem" :"[\:\;]\-*[dp\)9]+" ,"wordclass" :"regex" ,"boundary" :False },{"stem" :"[\(8]+\-*[:;]" ,"wordclass" :"regex" ,"boundary" :False },{"stem" :"rot?fl" ,"wordclass" :"regex" },{"stem" :"vicces" ,"exc" :[{"stem" :"nem" }]},{"stem" :"nevet(tem|ek|[uü]nk)" ,"wordclass" :"regex" ,"exc" :[{"stem" :"nem" }]}],
15
15
"nvm" : [{"stem" :"felejts[ed]n?\sel" ,"wordclass" :"regex" },{"stem" :"mindegy" ,"exc" :[{"stem" :"hogy" },{"stem" :"nem" }]},{"stem" :"nem fontos" },{"stem" :"hagy(jad?|d)" ,"wordclass" :"regex" ,"inc" :[{"stem" :"jól" ,"affix" :["van" ]},{"stem" :"á" },{"stem" :"mindegy" },{"stem" :"inkább" }]},{"stem" :"ne\s(is\s)?(foglalkoz+(on|[aá]l)?|t[oö]r[oöő]dj([oö]n|[eé]l)?)\s(vel(e|[uü]k)|[ae][vz]+[ae]l)" ,"wordclass" :"regex" },{"stem" :"hagy\w+\sfigyelmen\sk[ií]v[uü]l" ,"wordclass" :"regex" ,"exc" :[{"stem" :"ne" }]}],
16
16
"help" : [{"stem" :"segít" ,"wordclass" :"verb" },{"stem" :"segítség" ,"wordclass" :"noun" },{"stem" :"help" ,"wordclass" :"verb" ,"prefix" :[]}],
17
- "again" : [{"stem" :"[uú]j(ra|b[oó]l)|(meg)?ism[eé]t(l[eé]s|el(je|ni)?d?)?|m[eé]g\s?egyszer|megint?" ,"wordclass" :"regex" }],
17
+ "again" : [{"stem" :"[uú]j(ra|b[oó]l)|(meg)?ism[eé]t(l[eé]s|el(je|ni)?d?)?|m[eé]g\s?egyszer|megint?" ,"wordclass" :"regex" , "exc" :[{ "stem" : "vagyok" }] }],
18
18
"command" : [{"stem" :"(csin[aá]l(jad?|d)|(keres|mutas|mond)[aedjos]+n?|n[eé]z[nz]?[eé]?[dl]|akaro[km]|utas[ií]t\w{1,})" ,"wordclass" :"regex" },{"stem" :"haj[cts]+(a|[aá]?[ld])\sv[eé]gre" ,"wordclass" :"regex" }],
19
19
"question" : [{"stem" :"(\?+$)|(\?+\s\w+)" ,"wordclass" :"regex" },{"stem" :"([^,][^,\S+]hogy|^hogy)(an)?" ,"wordclass" :"regex" },{"stem" :"hol" },{"stem" :"honnan" },{"stem" :"hová" },{"stem" :"hány" ,"affix" :["an" ,"at" ,"ból" ]},{"stem" :"mettől" },{"stem" :"meddig" },{"stem" :"merre" },{"stem" :"mennyi" ,"affix" :["en" ,"re" ]},{"stem" :"mi" ,"affix" :["t" ,"k" ,"ket" ,"kor" ,"korra" ,"lyen" ,"lyenek" ,"nek" ,"től" ,"kortól" ,"korra" ,"ből" ,"hez" ,"re" ,"vel" ]},{"stem" :"ki(k?(e?t|nek|[bt][oöő]l|hez|re|[kv]el)|\saz?)" ,"wordclass" :"regex" }],
20
- "conditional" : [{"stem" :"volna" },{ "stem " :"lenne " },{"stem" :"\w+h[ae]t\w+" ,"wordclass" :"regex" }],
20
+ "conditional" : [{"stem" :"(meg)?(vol|tud|[lt]en?)n[aáeé][dl]?" , "wordclass " :"regex " },{"stem" :"\w+h[ae]t\w+" ,"wordclass" :"regex" }],
21
21
"profanity" : [{"stem" :"(fel|le|meg|r[aá]|ki|be|oda|[oö]s+ze|bele|hoz+[aá])?bas*z+d?\s?(at)?(hat)?(us|a[dk]?|n?[aá][kl]|[aá]?t[aáo][lkm]?|ot+|ni|n[aá]n?[dlkm]?|va|meg)?" ,"wordclass" :"regex" ,"exc" :[{"stem" :"megye" }]},{"stem" :"fasz" ,"prefix" :["ló" ,"agy" ],"wordclass" :"noun" },{"stem" :"fasza" ,"wordclass" :"adjective" },{"stem" :"geci" ,"wordclass" :"noun" },{"stem" :"kurva" ,"affix" :["élet" ,"anya" ,"anyja" ,"annya" ],"wordclass" :"noun" },{"stem" :"hülye" ,"wordclass" :"adjective" },{"stem" :"pi(n|cs)[aá][dk]?(a?t|nak|ban?|[bt][oó]l|[eé]rt)?" ,"wordclass" :"regex" },{"stem" :"((bekap(ja?|hato?|n[aái])?d?)|(kap.*?be))" ,"wordclass" :"regex" },{"stem" :"(le)?szop(sz|ol|[jn][aá][dl]|hat(sz|n[aá]l|o[dl]))(\s?(le|ki))?" ,"wordclass" :"regex" },{"stem" :"(geci|kurva)?(fos|szar)\w{0,3}" ,"wordclass" :"regex" }],
22
22
"welldone" : [{"stem" :"fasza" },{"stem" :"nagyszerű" },{"stem" :"remek" ,"max_words" :5 },{"stem" :"jó" ,"prefix" :["kurva" ],"exc" :[{"stem" :"nincs" },{"stem" :"nem" },{"stem" :"éjt" },{"stem" :"reggelt" },{"stem" :"napot" },{"stem" :"estét" },{"stem" :"éjszakát" }]},{"stem" :"j[oó]l\s?van" ,"wordclass" :"regex" },{"stem" :"király" },{"stem" :"ügyes" },{"stem" :"(sz[eé]p\s(volt|munka))|(ez\s(lesz\s)?az)|(sz?uper)|zs[ií]r" ,"wordclass" :"regex" },{"stem" :"👍" ,"wordclass" :"emoji" },{"stem" :"\(Y\)" ,"wordclass" :"regex" ,"boundary" :False },{"stem" :"profi vagy" },{"stem" :"fant[aoö](rp|sz?t)i[ck](us)?(an)?" ,"wordclass" :"regex" },{"stem" :"szeretem" ,"inc" :[{"stem" :"amikor" },{"stem" :"ahogy" }],"exc" :[{"stem" :"nem" }]}],
23
23
"dontknow" : [{"stem" :"fogalmam sincs" ,"affix" :["en" ]},{"stem" :"(m[eé]g)?[ns]em?\stud(hat)?o\w+" ,"wordclass" :"regex" },{"stem" :"hon+an.+?tud(jam|(hat)?n[aá]m)" ,"wordclass" :"regex" }],
@@ -53,7 +53,7 @@ def commands():
53
53
"volume_up" : [{"stem" :"((n[oö]vel\w+|magas\w+|fel|t[oö]b+)\s(\w+\s)?hang(er)?[oöő]?t?|hang(er)?[oöő]?t?\s(n[oö]vel\w+|magas\w+|fel))" ,"wordclass" :"regex" },{"stem" :"hangos\w+" ,"wordclass" :"regex" ,"exc" :[{"stem" :"túl" }]},{"stem" :"t[uú]l\shalk\w*" ,"wordclass" :"regex" ,"exc" :[{"stem" :"túl" }]},{"stem" :"(nem|alig|sem+it\s[ns]em?)\shal+[ao][km]" ,"wordclass" :"regex" },{"stem" :"adj\w*(\sm[eé]g)?(\sr[aá])?(\sm[eé]g)?\s(hang\w+t|kaka[oó]t)" ,"wordclass" :"regex" }],
54
54
"volume_down" : [{"stem" :"((cs[oö]k+en\w+|alacsony\w+|le(j+eb+)?|keveseb+)\s(\w+\s)?hang(er)?[eoöő]?(j[aáeé])?t?|hang(er)?[eoöő]?(j[aáeé])?t?\s(cs[oö]k+en\w+|alacsony\w+|le(j+eb+)?))" ,"wordclass" :"regex" },{"stem" :"t[uú]l\shangos\w*" ,"wordclass" :"regex" },{"stem" :"halk[aií]\w+" ,"wordclass" :"regex" ,"exc" :[{"stem" :"túl" }]}],
55
55
"mute" : [{"stem" :"n[eé]m[aáií]\w{0,3}" ,"wordclass" :"regex" ,"exc" :[{"stem" :"vége" },{"stem" :"vissza" },{"stem" :"feloldás" ,"affix" :["a" ]}]},{"stem" :"mute" ,"wordclass" :"verb" ,"prefix" :["le" ,"ki" ]},{"stem" :"(kus+(ol\w*)?|cs[eoö]nd(ben?|et)?)(\sel|\s?legyen)?" ,"wordclass" :"regex" }],
56
- "unmute" : [{"stem" :"n[eé]m[aáií]\w{0,3}" ,"wordclass" :"regex" ,"inc" :[{"stem" :"vége" },{"stem" :"vissza" },{"stem" :"feloldás" ,"affix" :["a" ]}]},{"stem" :"unmute" ,"wordclass" :"verb" ,"prefix" :[]},{"stem" :"hang(o\w+)?" ,"wordclass" :"regex" ,"inc" :[{"stem" :"vissza" }]}]
56
+ "unmute" : [{"stem" :"n[eé]m[aáií]\w{0,3}" ,"wordclass" :"regex" ,"inc" :[{"stem" :"vége" },{"stem" :"vissza" },{"stem" :"feloldás" ,"affix" :["a" ]}]},{"stem" :"unmute" ,"wordclass" :"verb" ,"prefix" :[]},{"stem" :"hang(o\w+)?" ,"wordclass" :"regex" ,"inc" :[{"stem" :"vissza" },{ "stem" : "((be|vis+za)kapcsol\w*)|(kapcsol\w*\s(az?\s)?(\w+\s)?(be|vis+za))" , "wordclass" : "regex" } ]}]
57
57
}
58
58
59
59
# hungarian counties and county seats
@@ -130,7 +130,7 @@ def smalltalk():
130
130
"are_you_thirsty" : [{"stem" :"kérsz" ,"inc" :[{"stem" :"inni" }]},{"stem" :"nem vagy szomjas" },{"stem" :"szomjas vagy" },{"stem" :"(nem\s)?i(n+[aá]|szo)l?\s(meg\s)?(most\s)?(velem\s)?valamit?" ,"wordclass" :"regex" }],
131
131
"are_you_busy" : [{"stem" :"elfoglalt" ,"inc" :[{"stem" :"vagy" }]},{"stem" :"r[aá]m?\s?[eé]r(n[eé]l|sz)(\smost)?(\segy)?(\skicsit|\skis\s\w+|\svalamen+yi\w*)?" ,"wordclass" :"regex" },{"stem" :"(van|volna)\s(most\s)?(r[aá]m?\s)?(most\s)?(egy\s)?(kis\s|kev[eé]s\s|valamen+yi\s)?(szabad\s?)?id[oöő]d(\sr[aá]m)?" ,"wordclass" :"regex" },{"stem" :"sok dolgod van" }],
132
132
"are_you_lying" : [{"stem" :"hazud" ,"wordclass" :"verb" },{"stem" :"nem mondt[aá][dl]\s((el|meg)\saz\s)?igaz(at|s[aá]got)" ,"wordclass" :"regex" }],
133
- "are_you_serious" : [{"stem" :"(nem?|csak)\s(vic+el(sz|j)|mond+(od)?|ideges[ií]ts)" , "wordclass" : "regex" },{ "stem" : "(ne|csak)?\svic+el(sz|j )" ,"wordclass" :"regex" },{"stem" :"(komolyan|t[eé]nyleg)\s?([uúií]gy\s|azt\s)?((mond|gondol|[ií]r)(ja|od|tad)|hisz(i|ed)|hit+ed?)" ,"wordclass" :"regex" },{"stem" :"biztos(an)?\s(vagy\s)?(\w+\s)?(ben+e|eb+en|mondod|mondja )" ,"wordclass" :"regex" },{"stem" :"ezt?\s(most\s)?komoly(an)?" ,"wordclass" :"regex" }],
133
+ "are_you_serious" : [{"stem" :"(nem?|csak)\s(vic+el(sz|j)? |mond+(od|ja )?|ideges[ií]ts(en)? )" ,"wordclass" :"regex" },{"stem" :"(komolyan|t[eé]nyleg)\s?([uúií]gy\s|azt\s)?((mond|gondol|[ií]r)(ja|od|tad? )|hisz(i|ed)|hit+ed?)" ,"wordclass" :"regex" },{"stem" :"biztos(an)?\s(vagy\s)?(\w+\s)?(ben+e|eb+en|mond(ta|o)d|mond[jt]a )" ,"wordclass" :"regex" },{"stem" :"ezt?\s(most\s)?komoly(an)?" ,"wordclass" :"regex" }],
134
134
"can_you_hear_me" : [{"stem" :"(olvas+a|hal+ja|n[eé]zi|van\sit+)(\sezt)?\s(vala|b[aá]r)ki(\sis)?" ,"wordclass" :"regex" },{"stem" :"(hal+(asz|od)|l[aá]t(sz|od)|vesze[ld])\s(engem|a?mit\s(mondok|[ií]rok|k[eé]rdezek))" ,"wordclass" :"regex" },{"stem" :"valaki\s(hal+(ja)?\s|olvas+a|figyeli?(\sar+a)?)\sa?mit\s(ide\s?|it+\s)?([ií]rok|mondok|k[eé]rdezek)" ,"wordclass" :"regex" },{"stem" :"felfogtad" ,"max_words" :3 }],
135
135
"can_you_learn" : [{"stem" :"(k[eé]pes(\svagy)?|tud(sz)?)\stanulni" ,"wordclass" :"regex" },{"stem" :"tanulsz\s(is|[ae].+?b[oóöő]l)" ,"wordclass" :"regex" },{"stem" :"[dln][aáeéo][km]\s(be|meg)?tan[ií]tani\b " ,"wordclass" :"regex" ,"boundary" :False }],
136
136
"can_you_understand_me" :[{"stem" :"(meg)?[eé]rt(e(d|sz|t+ed?)|i)\,?((\shogy)?\sa?mit\s([ií]r|mond)\w+|\smagyarul)" ,"wordclass" :"regex" }]
0 commit comments