Skip to content

Commit

Permalink
Merge pull request #45 from szegedai/bugfix
Browse files Browse the repository at this point in the history
Minor improvements
  • Loading branch information
nsomabalint authored Jul 9, 2024
2 parents 5fbd132 + 0178301 commit d77b5d3
Show file tree
Hide file tree
Showing 5 changed files with 16 additions and 5 deletions.
2 changes: 1 addition & 1 deletion hun_date_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

__all__ = ["DatetimeTextualizer", "DatetimeExtractor", "datetime2text", "text2datetime", "text2date", "text2time"]

__version__ = "0.2.7"
__version__ = "0.2.8"
4 changes: 3 additions & 1 deletion hun_date_parser/date_parser/date_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,9 @@ def has_month_already_pass(now, month):
continue

if bool(group[0].strip(" ")):
if 'jovo' in remove_accent(group[0]):
if ('jovo' in remove_accent(group[0])
# hack
and 'jovok' not in remove_accent(group[0])):
group_res['date_parts'].append(Year(now.year + 1, 'named_month'))
elif 'tavaly' in remove_accent(group[0]):
group_res['date_parts'].append(Year(now.year - 1, 'named_month'))
Expand Down
6 changes: 3 additions & 3 deletions hun_date_parser/date_parser/patterns.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,14 @@
R_MULTI = r'(.*)\bvagy\b(.*)|(.*)\bés\b(.*)'

# schemas
R_TOLIG = r'(.*-?t[oóöő]l\b)(.*-?ig\b)'
R_TOL = r'([:\w ]*-?t[oóöő]l\b).*'
R_TOLIG = r'(.*-?t[oóöő]l\b|.* [oó]ta\b)(.*-?ig\b)'
R_TOL = r'([:\w ]*-?t[oóöő]l\b|.* [oó]ta\b).*'
R_IG = r'([:\w ]*-?ig\b)'

# hyper day level patterns
R_ISO_DATE = r'(\b\d{4,4})(?:[-\\/\. ] ?(1[0-2]|0?[1-9]))?(?:[-\\/\. ] ?(1[0-9]|2[0-9]|3[01]|0?[1-9]))?'
R_REV_ISO_DATE = r'\b(1[0-9]|2[0-9]|3[01]|0?[1-9])[-\\/\. ] ?(1[0-2]|0?[1-9])[-\\/\. ] ?(\b\d{4,4})'
R_NAMED_MONTH = r'(j[oöő]v[oöő].*?|tavaly.*?)?(\bjan(?:\b|\.|u[aá]r){1}|\bfeb(?:\b|r\.|\.|ru[aá]r){1}|\bm[aá]r(?:\b|c\b|c\.|\.|cius){1}|\b[aá]pr(?:\b|\.|ilis){1}\b|m[aá]j(?:\b|\.|us){1}|\bj[uú]n(?:\b|\.|ius){1}|\bj[uú]l(?:\b|\.|ius){1}|\baug(?:\b|\.|usztus){1}|\bszep(?:t\b|t\.|\b|\.|tember){1}|\bokt(?:\b|\.|[oó]ber){1}|\bnov(?:\b|\.|ember){1}|\bdec(?:\b|\.|ember))(?: ([1-3][0-9]|[1-9]|egy.{0,5}|kett[őöo].{0,5}|h[aá]rom.{0,5}|n[eée]gy.{0,5}|öt.{0,5}|hat.{0,5}|h[eé]t.{0,5}|nyolc.{0,5}|kilenc.{0,5}|tíz.{0,5}|tizen.{0,10}|huszon.{0,10}|harminc.{0,10}|elsej[eé].{0,5}|második[aá].{0,5}|harmadik[aá].{0,5}|negyedik[eé].{0,5}|ötödik[eé].{0,5}|hatodik[aá].{0,5}|hetedik[eé].{0,5}|nyolcadik[aá].{0,5}|kilencedik[eé].{0,5}|tizedik[eé].{0,5}|tizen.{0,10}|huszadik[aá].{0,5}|huszon.{0,10}|harmincadik[aá].{0,5}))?'
R_NAMED_MONTH = r'\b(j[oöő]v[oöő].*?|tavaly.*?)?(\bjan(?:\b|\.|u[aá]r){1}|\bfeb(?:\b|r\.|\.|ru[aá]r){1}|\bm[aá]r(?:\b|c\b|c\.|\.|cius){1}|\b[aá]pr(?:\b|\.|ilis){1}\b|m[aá]j(?:\b|\.|us){1}|\bj[uú]n(?:\b|\.|ius){1}|\bj[uú]l(?:\b|\.|ius){1}|\baug(?:\b|\.|usztus){1}|\bszep(?:t\b|t\.|\b|\.|tember){1}|\bokt(?:\b|\.|[oó]ber){1}|\bnov(?:\b|\.|ember){1}|\bdec(?:\b|\.|ember))(?: ([1-3][0-9]|[1-9]|egy.{0,5}|kett[őöo].{0,5}|h[aá]rom.{0,5}|n[eée]gy.{0,5}|öt.{0,5}|hat.{0,5}|h[eé]t.{0,5}|nyolc.{0,5}|kilenc.{0,5}|tíz.{0,5}|tizen.{0,10}|huszon.{0,10}|harminc.{0,10}|elsej[eé].{0,5}|második[aá].{0,5}|harmadik[aá].{0,5}|negyedik[eé].{0,5}|ötödik[eé].{0,5}|hatodik[aá].{0,5}|hetedik[eé].{0,5}|nyolcadik[aá].{0,5}|kilencedik[eé].{0,5}|tizedik[eé].{0,5}|tizen.{0,10}|huszadik[aá].{0,5}|huszon.{0,10}|harmincadik[aá].{0,5}))?'
R_RELATIVE_MONTH = r'(?:(\blegut[oó]bbi|\butols[oó]|\bmúlt|\but[oó]bbi|\bezen|\bebben|\baktu[aá]lis|\bj[oöő]v[oöő]|\bk[oö]vetkez[oőö]|\bk[oö]vetkezend[oőö]).*)? a?h[oó]nap'

R_WEEKDAY = r'(?:(el[oő]z[oő]|m[uú]lt|ezen|j[oöő]v[oöő]).*)?(h[eé]tf[oő]|kedd|szerd[aá]|cs[uü]t[oö]rt[oö]k|p[eé]ntek|szombat|vas[aá]rnap)'
Expand Down
5 changes: 5 additions & 0 deletions test/test_datetime_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,11 @@
("az idei események", [datetime(2020, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),
("az idén történtek", [datetime(2020, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),
("az ebben az évben történtek", [datetime(2020, 1, 1), datetime(2020, 12, 31, 23, 59, 59)]),
("bejövő hívás májusban", [datetime(2020, 5, 1), datetime(2020, 5, 31, 23, 59, 59)]),
("jövök májusban", [datetime(2020, 5, 1), datetime(2020, 5, 31, 23, 59, 59)]),
(" május óta", [datetime(2020, 5, 1), None]),
(" május 5 óta", [datetime(2020, 5, 5), None]),
(" majus 5 ota", [datetime(2020, 5, 5), None]),
]


Expand Down
4 changes: 4 additions & 0 deletions test/test_structure_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@ def test_match_interval():
('reggeltől estig', {'start_date': 'reggeltől', 'end_date': 'estig'}),
('kedden', {}),
('2020 január másodikától jövő év közepéig', {'start_date': '2020 január másodikától', 'end_date': 'jövő év közepéig'}),
('2020 január másodika óta jövő év közepéig',
{'start_date': '2020 január másodika óta', 'end_date': 'jövő év közepéig'}),
('2020 decemberétől', {'start_date': '2020 decemberétől', 'end_date': 'OPEN'}),
('2020 decembere óta', {'start_date': '2020 decembere óta', 'end_date': 'OPEN'}),
('január 1 óta', {'start_date': 'január 1 óta', 'end_date': 'OPEN'}),
('ma reggeltől bármikor', {'start_date': 'ma reggeltől', 'end_date': 'OPEN'}),
('egészen péntekig jó lesz', {'start_date': 'OPEN', 'end_date': 'egészen péntekig'}),
('2020-10-12-től 2020-11-01-ig', {'start_date': '2020-10-12-től', 'end_date': '2020-11-01-ig'})]
Expand Down

0 comments on commit d77b5d3

Please sign in to comment.