Skip to content

Commit 0df160f

Browse files
committed
v0.6.10 fix str_to_tree to support unicode characters
1 parent d525f61 commit 0df160f

File tree

4 files changed

+44
-15
lines changed

4 files changed

+44
-15
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.
44
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7+
## [0.6.10] - 2023-01-23
8+
### Fixed
9+
- Tree Construct: `str_to_tree` to accept prefixes to support unicode characters in node names.
10+
711
## [0.6.9] - 2023-01-22
812
### Added
913
- Tree Construct: `str_to_tree` to construct tree from tree string.
@@ -177,6 +181,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
177181
- Utility Iterator: Tree traversal methods.
178182
- Workflow To Do App: Tree use case with to-do list implementation.
179183

184+
[0.6.10]: https://github.com/kayjan/bigtree/compare/v0.6.9...v0.6.10
180185
[0.6.9]: https://github.com/kayjan/bigtree/compare/v0.6.8...v0.6.9
181186
[0.6.8]: https://github.com/kayjan/bigtree/compare/v0.6.7...v0.6.8
182187
[0.6.7]: https://github.com/kayjan/bigtree/compare/v0.6.6...v0.6.7

bigtree/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
__version__ = "0.6.9"
1+
__version__ = "0.6.10"
22

33
from bigtree.binarytree.construct import list_to_binarytree
44
from bigtree.dag.construct import dataframe_to_dag, dict_to_dag, list_to_dag

bigtree/tree/construct.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import re
12
from collections import OrderedDict
23
from typing import List, Tuple, Type
34

@@ -409,13 +410,14 @@ def add_dataframe_to_tree_by_name(
409410

410411
def str_to_tree(
411412
tree_string: str,
413+
tree_prefix_list: List[str] = [],
412414
node_type: Type[Node] = Node,
413415
) -> Node:
414416
r"""Construct tree from tree string
415417
416418
>>> from bigtree import str_to_tree, print_tree
417419
>>> tree_str = 'a\n├── b\n│ ├── d\n│ └── e\n│ ├── g\n│ └── h\n└── c\n └── f'
418-
>>> root = str_to_tree(tree_str)
420+
>>> root = str_to_tree(tree_str, tree_prefix_list=["├──", "└──"])
419421
>>> print_tree(root)
420422
a
421423
├── b
@@ -428,6 +430,8 @@ def str_to_tree(
428430
429431
Args:
430432
tree_string (str): String to construct tree
433+
tree_prefix_list (list): List of prefix to mark the end of tree branch/stem and start of node name, optional.
434+
If not specified, it will infer unicode characters and whitespace as prefix.
431435
node_type (Type[Node]): node type of tree to be created, defaults to Node
432436
433437
Returns:
@@ -443,21 +447,25 @@ def str_to_tree(
443447
prefix_length = None
444448
cur_parent = tree_root
445449
for node_str in tree_list[1:]:
446-
node_name = node_str.encode("ascii", "ignore").decode("ascii").lstrip()
450+
if len(tree_prefix_list):
451+
node_name = re.split("|".join(tree_prefix_list), node_str)[-1].lstrip()
452+
else:
453+
node_name = node_str.encode("ascii", "ignore").decode("ascii").lstrip()
447454

448455
# Find node parent
449456
if not prefix_length:
450457
prefix_length = node_str.index(node_name)
451458
if not prefix_length:
452459
raise ValueError(
453-
f"Invalid prefix, prefix should be unicode character or whitespace, check: {node_str}"
460+
f"Invalid prefix, prefix should be unicode character or whitespace, "
461+
f"otherwise specify one or more prefixes in `tree_prefix_list`, check: {node_str}"
454462
)
455-
node_prefix = node_str.index(node_name)
456-
if node_prefix % prefix_length:
463+
node_prefix_length = node_str.index(node_name)
464+
if node_prefix_length % prefix_length:
457465
raise ValueError(
458466
f"Tree string have different prefix length, check branch: {node_str}"
459467
)
460-
while cur_parent.depth > node_prefix / prefix_length:
468+
while cur_parent.depth > node_prefix_length / prefix_length:
461469
cur_parent = cur_parent.parent
462470

463471
# Link node

tests/tree/test_construct.py

Lines changed: 24 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,30 @@ def test_str_to_tree(self):
865865
assert_tree_structure_basenode_root_generic(root)
866866
assert_tree_structure_node_root_generic(root)
867867

868+
def test_str_to_tree_with_prefix(self):
869+
root = str_to_tree(self.tree_str, tree_prefix_list=["─"])
870+
assert_tree_structure_basenode_root_generic(root)
871+
assert_tree_structure_node_root_generic(root)
872+
873+
def test_str_to_tree_with_multiple_prefix(self):
874+
root = str_to_tree(self.tree_str, tree_prefix_list=["├──", "└──"])
875+
assert_tree_structure_basenode_root_generic(root)
876+
assert_tree_structure_node_root_generic(root)
877+
878+
def test_ascii_character_error(self):
879+
tree_str = "a\n|-- b\n| |-- d\n| +-- e\n| |-- g\n| +-- h\n+-- c\n +-- f"
880+
with pytest.raises(ValueError) as exc_info:
881+
str_to_tree(tree_str)
882+
assert str(exc_info.value).startswith(
883+
"Invalid prefix, prefix should be unicode character or whitespace, otherwise specify one or more prefixes"
884+
)
885+
886+
def test_ascii_character_with_prefix(self):
887+
tree_str = "a\n|-- b\n| |-- d\n| +-- e\n| |-- g\n| +-- h\n+-- c\n +-- f"
888+
root = str_to_tree(tree_str, tree_prefix_list=["-"])
889+
assert_tree_structure_basenode_root_generic(root)
890+
assert_tree_structure_node_root_generic(root)
891+
868892
def test_empty_string(self):
869893
with pytest.raises(ValueError) as exc_info:
870894
str_to_tree("")
@@ -881,14 +905,6 @@ def test_empty_newline_string(self):
881905
== "Tree string does not contain any data, check `tree_string`"
882906
)
883907

884-
def test_invalid_prefix(self):
885-
tree_str = "a\n|-- b\n| |-- d\n| +-- e\n| |-- g\n| +-- h\n+-- c\n +-- f"
886-
with pytest.raises(ValueError) as exc_info:
887-
str_to_tree(tree_str)
888-
assert str(exc_info.value).startswith(
889-
"Invalid prefix, prefix should be unicode character or whitespace"
890-
)
891-
892908
def test_unequal_prefix_length(self):
893909
tree_str = "a\n├── b\n│ ├── d\n│ └── e\n│ ├── g\n│ └── h\n└── c\n └── f"
894910
with pytest.raises(ValueError) as exc_info:

0 commit comments

Comments
 (0)