Skip to content

Commit 0ad5a76

Browse files
authored
fix: 修复分段时,特殊情况会丢失数据 #938 (#946)
1 parent e59e262 commit 0ad5a76

File tree

1 file changed

+9
-6
lines changed

1 file changed

+9
-6
lines changed

apps/common/util/split_model.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def get_level_block(text, level_content_list, level_content_index, cursor):
2727
level_content_list) else None
2828
start_index = text.index(start_content, cursor)
2929
end_index = text.index(next_content, start_index + 1) if next_content is not None else len(text)
30-
return text[start_index+len(start_content):end_index], end_index
30+
return text[start_index + len(start_content):end_index], end_index
3131

3232

3333
def to_tree_obj(content, state='title'):
@@ -303,17 +303,20 @@ def parse_to_tree(self, text: str, index=0):
303303
level_content_list.insert(0, to_tree_obj(""))
304304

305305
cursor = 0
306-
for i in range(len(level_content_list)):
307-
block, cursor = get_level_block(text, level_content_list, i, cursor)
306+
level_title_content_list = [item for item in level_content_list if item.get('state') == 'title']
307+
for i in range(len(level_title_content_list)):
308+
start_content: str = level_title_content_list[i].get('content')
309+
if cursor < text.index(start_content, cursor):
310+
level_content_list.insert(0, to_tree_obj(text[cursor: text.index(start_content, cursor)], 'block'))
311+
block, cursor = get_level_block(text, level_title_content_list, i, cursor)
308312
if len(block) == 0:
309-
level_content_list[i]['children'] = [to_tree_obj("", "block")]
310313
continue
311314
children = self.parse_to_tree(text=block, index=index + 1)
312-
level_content_list[i]['children'] = children
315+
level_title_content_list[i]['children'] = children
313316
first_child_idx_in_block = block.lstrip().index(children[0]["content"].lstrip())
314317
if first_child_idx_in_block != 0:
315318
inner_children = self.parse_to_tree(block[:first_child_idx_in_block], index + 1)
316-
level_content_list[i]['children'].extend(inner_children)
319+
level_title_content_list[i]['children'].extend(inner_children)
317320
return level_content_list
318321

319322
def parse(self, text: str):

0 commit comments

Comments
 (0)