Skip to content

Commit 003b341

Browse files
authored
Merge pull request #41 from ewdlop/ewdlop-patch-33-Create-merge_html_elements.py
Create merge_html_elements.py
2 parents 687120d + d301ad4 commit 003b341

File tree

1 file changed

+62
-0
lines changed

1 file changed

+62
-0
lines changed

Python/merge_html_elements.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
from lxml import html, etree
2+
from itertools import zip_longest
3+
4+
def merge_elements(a: etree._Element, b: etree._Element):
5+
"""
6+
Lazy-merge 兩個 <Element>,相同標籤才往下比對。
7+
每次合併完成後立即 yield,讓呼叫端自行處理。
8+
"""
9+
if a is None and b is None:
10+
return # 兩邊都沒東西
11+
if a is None:
12+
yield b # 只有右邊
13+
return
14+
if b is None:
15+
yield a # 只有左邊
16+
return
17+
18+
# 標籤一致才真正合併;否則視為差異,各自輸出
19+
if a.tag != b.tag:
20+
yield a
21+
yield b
22+
return
23+
24+
# ---------- 1) 建立新的節點 ----------
25+
merged = etree.Element(a.tag)
26+
27+
# ---------- 2) 合併屬性 ----------
28+
merged.attrib.update(a.attrib) # 先左
29+
merged.attrib.update(b.attrib) # 再右(右邊覆蓋)
30+
31+
# ---------- 3) 合併文字 ----------
32+
merged.text = (a.text or '') + (b.text or '')
33+
34+
# ---------- 4) 逐子節點遞迴 ----------
35+
for left_child, right_child in zip_longest(list(a), list(b)):
36+
for sub in merge_elements(left_child, right_child):
37+
merged.append(sub)
38+
39+
# ---------- 5) 把結果交出去 ----------
40+
yield merged
41+
42+
43+
# --------- 使用範例 ---------
44+
html1 = """<ul>
45+
<li id="a">apple</li>
46+
<li id="b">banana</li>
47+
</ul>"""
48+
49+
html2 = """<ul>
50+
<li id="b">BANANA</li>
51+
<li id="c">cherry</li>
52+
</ul>"""
53+
54+
dom1 = html.fromstring(html1)
55+
dom2 = html.fromstring(html2)
56+
57+
# 只取第一層 <ul>
58+
merged_ul, = merge_elements(dom1, dom2) # 注意逗號 (unpack generator)
59+
60+
print(html.tostring(merged_ul,
61+
encoding="unicode",
62+
pretty_print=True))

0 commit comments

Comments
 (0)