diff --git a/CHANGELOG.md b/CHANGELOG.md index 8806e4d3bd..0c9f38ce08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,8 @@ +## 0.3.5-dev0 + +* Add new pattern to recognize plain text dash bullets +* Add test for bullet patterns + ## 0.3.4 * Python-3.7 compat diff --git a/test_unstructured/partition/test_text_type.py b/test_unstructured/partition/test_text_type.py index 060251c2b3..c53064b456 100644 --- a/test_unstructured/partition/test_text_type.py +++ b/test_unstructured/partition/test_text_type.py @@ -88,6 +88,7 @@ def test_is_possible_title(text, expected, monkeypatch): ("⦿ This is a fine point!", True), (" This is a fine point!", True), ("* This is a fine point!", True), + ("- This is a fine point!", True), ("This is NOT a fine point!", False), # No bullet point ("I love morse code! ● ● ● --- ● ● ●", False), # Not at the beginning ], diff --git a/unstructured/__version__.py b/unstructured/__version__.py index 527ff71284..016dbc7725 100644 --- a/unstructured/__version__.py +++ b/unstructured/__version__.py @@ -1 +1 @@ -__version__ = "0.3.4" # pragma: no cover +__version__ = "0.3.5-dev0" # pragma: no cover diff --git a/unstructured/nlp/patterns.py b/unstructured/nlp/patterns.py index ec730f55cd..ac29ce3974 100644 --- a/unstructured/nlp/patterns.py +++ b/unstructured/nlp/patterns.py @@ -26,6 +26,7 @@ "\u2767", "\u29BE", "\u29BF", + "\u002D", "", "\*", # noqa: W605 NOTE(robinson) - skipping qa because we need the escape for the regex "\x95",