Skip to content

Commit dc45681

Browse files
authored
Merge pull request #124 from openlawlibrary/merge-all-hyperlink
Merge all hyperlink
2 parents 485dac0 + d585c0f commit dc45681

File tree

7 files changed

+181
-22
lines changed

7 files changed

+181
-22
lines changed

docx/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from docx.api import Document # noqa
44

5-
__version__ = '0.8.10.29'
5+
__version__ = '0.8.10.30'
66

77

88
# register custom Part classes with opc package reader

docx/oxml/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -287,3 +287,6 @@ def OxmlElement(nsptag_str, attrs=None, nsdecls=None):
287287

288288
from .text.symbol import CT_Sym
289289
register_element_cls('w:sym', CT_Sym)
290+
291+
from .text.hyperlink import CT_Hyperlink
292+
register_element_cls('w:hyperlink', CT_Hyperlink)

docx/oxml/text/hyperlink.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# encoding: utf-8
2+
3+
"""
4+
Custom element classes related to text runs (CT_Hyperlink).
5+
"""
6+
7+
8+
from ..simpletypes import ST_String
9+
from ..xmlchemy import (
10+
BaseOxmlElement, OptionalAttribute, ZeroOrMore
11+
)
12+
13+
class CT_Hyperlink(BaseOxmlElement):
14+
"""
15+
``<w:hyperlink>`` element, containing properties related to field.
16+
"""
17+
anchor = OptionalAttribute('w:anchor', ST_String)
18+
relationship_id = OptionalAttribute('r:id', ST_String)
19+
r = ZeroOrMore('w:r')

docx/oxml/text/paragraph.py

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,36 @@ class CT_P(BaseOxmlElement):
1717
pPr = ZeroOrOne('w:pPr', successors=('w:bookmarkEnd',))
1818
r = ZeroOrMore('w:r', successors=('w:bookmarkEnd',))
1919
bookmarkEnd = ZeroOrMore('w:bookmarkEnd')
20+
hyperlink = ZeroOrMore('w:hyperlink')
2021

2122
def _insert_pPr(self, pPr):
2223
self.insert(0, pPr)
2324
return pPr
2425

26+
def add_hyperlink(self, text, reference):
27+
"""
28+
Return a new ``<w:hyperlink>`` element inserted at the end of this
29+
paragraph. The `reference` can be a valid URL or an bookmark name.
30+
31+
If the `reference` is a URL than a relationship element is created in
32+
the relationship part of the document, and the id of this relationship
33+
is stored in `relationship_id` attribute of the ``<w:hyperlink>``.
34+
35+
If the `reference` is an bookmark name then that value is stored in
36+
`anchor` attribute of the ``<w:hyperlink>``.
37+
"""
38+
new_h = self._add_hyperlink()
39+
r = new_h._add_r()
40+
r.text = text
41+
r.style = 'Hyperlink'
42+
if reference.startswith('rId'):
43+
# reference is an relationship id of a URL,
44+
# so it's stored in a `relationship_id`
45+
new_h.relationship_id = reference
46+
else:
47+
new_h.anchor = reference
48+
return new_h
49+
2550
def add_p_before(self):
2651
"""
2752
Return a new ``<w:p>`` element inserted directly prior to this one.
@@ -121,13 +146,15 @@ def style(self, style):
121146
pPr = self.get_or_add_pPr()
122147
pPr.style = style
123148

124-
def iter_r_lst_recursive(self):
149+
def iter_r_and_hyperlinks(self, return_hyperlinks=False):
125150
"""
126151
Override xmlchemy generated list of runs to include runs from
127152
hyperlinks and content controls.
153+
If the argument `return_hyperlinks` is `True` then the hyperlinks
154+
will be yielded as `CT_Hyperlink`.
128155
"""
129156

130-
def get_runs(elem):
157+
def get_el(elem):
131158
# Two flags used to remove hidden parts of complex field characters.
132159
ignoreRun = 0 # used to count nesting of ``<w:fldChar>``, if it's 0 then the run property is not inside a hidden part of ``<w:fldChar>``
133160
hasSeparate = False
@@ -161,6 +188,11 @@ def get_runs(elem):
161188
# yields runs that have at least one visible element
162189
if len(child) > 0:
163190
yield child
164-
elif child.tag in (qn('w:hyperlink'), qn('w:sdt'), qn('w:sdtContent'), qn('w:smartTag'),):
165-
yield from get_runs(child)
166-
yield from get_runs(self)
191+
elif child.tag == qn('w:hyperlink'):
192+
if return_hyperlinks:
193+
yield child
194+
else:
195+
yield from get_el(child)
196+
elif child.tag in (qn('w:sdt'), qn('w:sdtContent'), qn('w:smartTag'),):
197+
yield from get_el(child)
198+
yield from get_el(self)

docx/shared.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from __future__ import absolute_import, print_function, unicode_literals
88
from functools import wraps
99

10+
from docx.exceptions import PythonDocxError
11+
1012

1113
class Length(int):
1214
"""
@@ -275,3 +277,35 @@ def wrapper(self, *args, **kwargs):
275277
return out
276278
return wrapper
277279
return decorator
280+
281+
282+
def find_containing_document(element):
283+
"""
284+
Go through elements parent until it finds the root element (Document)
285+
And return it.
286+
"""
287+
from .document import Document
288+
while True:
289+
if not hasattr(element, '_parent'):
290+
raise PythonDocxError(f'{type(element)} has no `_parent` property.')
291+
if isinstance(element._parent, Document):
292+
return element._parent
293+
else:
294+
element = element._parent
295+
296+
297+
def is_valid_url(url):
298+
"""
299+
Returns `True` if it's a valid URL.
300+
"""
301+
if not isinstance(url, str):
302+
return False
303+
import re
304+
regex = re.compile(
305+
r'^https?://' # http:// or https://
306+
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|' # domain...
307+
r'localhost|' # localhost...
308+
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
309+
r'(?::\d+)?' # optional port
310+
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
311+
return bool(regex.search(url))

docx/text/hyperlink.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
from ..shared import Parented, find_containing_document
2+
3+
class Hyperlink(Parented):
4+
5+
def __init__(self, h, parent):
6+
super(Hyperlink, self).__init__(parent)
7+
self._h = h
8+
self._parent = parent
9+
10+
@property
11+
def text(self):
12+
"""
13+
String formed by concatenating the text of each run in the hyperlink.
14+
"""
15+
if self._h.r_lst is None:
16+
return ''
17+
text = ''
18+
for r in self._h.r_lst:
19+
text += r.text
20+
return text
21+
22+
@property
23+
def link(self):
24+
"""
25+
String that can be either an URL or an |Bookmark| name.
26+
"""
27+
if self._h.relationship_id:
28+
return find_containing_document(self).part.rels[self._h.relationship_id].target_ref
29+
else:
30+
return self._h.anchor

docx/text/paragraph.py

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,14 @@
1515
from ..enum.style import WD_STYLE_TYPE
1616
from .parfmt import ParagraphFormat
1717
from .run import Run
18-
from ..shared import Parented, Length, lazyproperty, Inches, cache, bust_cache
18+
from .hyperlink import Hyperlink
19+
from ..shared import Parented, Length, find_containing_document, is_valid_url, lazyproperty, Inches, cache, bust_cache
1920
from ..oxml.ns import nsmap
21+
from ..oxml.text.hyperlink import CT_Hyperlink
2022
from docx.bookmark import BookmarkParent
2123
from docx.parts.image import ImagePart
2224

25+
from docx.opc.constants import RELATIONSHIP_TYPE as RT
2326

2427
# Decorator for all text changing functions used to invalidate text cache.
2528
text_changing = bust_cache(('text', 'run_text'))
@@ -51,6 +54,19 @@ def add_footnote(self):
5154
footnote = document._add_footnote(new_fr_id)
5255
return footnote
5356

57+
def add_hyperlink(self, text, reference):
58+
"""
59+
Append a ``<w:hyperlink>`` element.
60+
The passed `reference` can be a valid URL address or
61+
an bookmark name.
62+
"""
63+
if is_valid_url(reference):
64+
# Store URL as relationship rId
65+
rId = find_containing_document(self).part.relate_to(
66+
reference, RT.HYPERLINK, True)
67+
reference = rId
68+
return Hyperlink(self._p.add_hyperlink(text, reference), self)
69+
5470
@text_changing
5571
def add_run(self, text=None, style=None):
5672
"""
@@ -110,7 +126,6 @@ def set_std_placeholder_text(r, text=None):
110126
active_placeholder = sdtPr._add_active_placeholder()
111127
active_placeholder.set('{%s}val' % nsmap['w'], 'true')
112128

113-
114129
sdt = self._p._new_sdt()
115130

116131
sdtPr = sdt._add_sdtPr()
@@ -164,6 +179,14 @@ def clear(self):
164179
self._p.clear_content()
165180
return self
166181

182+
@property
183+
def runs_and_hyperlinks(self):
184+
"""
185+
Sequence of |Run| and |Hyperlink| instances corresponding to the
186+
``<w:r>`` and ``<w:hyperlink>`` elements in this paragraph.
187+
"""
188+
return [Hyperlink(e, self) if isinstance(e, CT_Hyperlink) else Run(e, self) for e in self._p.iter_r_and_hyperlinks(return_hyperlinks=True)]
189+
167190
@property
168191
def footnotes(self):
169192
"""
@@ -179,6 +202,14 @@ def footnotes(self):
179202
footnote_list.append(footnotes[ref_id])
180203
return footnote_list
181204

205+
@property
206+
def hyperlinks(self):
207+
"""
208+
Sequence of |Hyperlink| instances corresponding to the <w:hyperlink>
209+
elements in this paragraph.
210+
"""
211+
return [Hyperlink(h, self) for h in self._p.hyperlink_lst]
212+
182213
def insert_paragraph_before(self, text=None, style=None, ilvl=None):
183214
"""
184215
Return a newly created paragraph, inserted directly before this
@@ -269,7 +300,7 @@ def remove_text(self, start=0, end=-1):
269300
runend = runstart + len(run.text)
270301
if runstart <= start and end <= runend:
271302
run.text = run.text[:(start-runstart)] \
272-
+ run.text[(end-runstart):]
303+
+ run.text[(end-runstart):]
273304
if not run.text:
274305
run._r.getparent().remove(run._r)
275306
return self
@@ -334,7 +365,8 @@ def lvl_from_para_props(self):
334365
"""
335366
if self._lvl_from_para_props is None:
336367
try:
337-
self._lvl_from_para_props = self._p.lvl_from_para_props(self.part.numbering_part._element)
368+
self._lvl_from_para_props = self._p.lvl_from_para_props(
369+
self.part.numbering_part._element)
338370
except (AttributeError, NotImplementedError):
339371
return None
340372
return self._lvl_from_para_props
@@ -384,7 +416,7 @@ def runs(self):
384416
Sequence of |Run| instances corresponding to the <w:r> elements in
385417
this paragraph.
386418
"""
387-
return [Run(r, self) for r in self._p.iter_r_lst_recursive()]
419+
return [Run(r, self) for r in self._p.iter_r_and_hyperlinks()]
388420

389421
@property
390422
def bookmark_starts(self):
@@ -445,7 +477,7 @@ def set_li_lvl(self, styles, prev, ilvl):
445477
prev_el = prev._element if prev else None
446478
_ilvl = 0 if ilvl is None else ilvl
447479
self._p.set_li_lvl(self.part.numbering_part._element,
448-
self.part.cached_styles, prev_el, _ilvl)
480+
self.part.cached_styles, prev_el, _ilvl)
449481

450482
@property
451483
@cache
@@ -506,7 +538,7 @@ def insert_text(self, position, new_text):
506538
runend += len(run.text)
507539
if runend >= position:
508540
run.text = run.text[:(position-runstart)] \
509-
+ new_text + run.text[(position-runstart):]
541+
+ new_text + run.text[(position-runstart):]
510542
break
511543
return self
512544

@@ -618,7 +650,8 @@ def _inner_get_tabstops(obj):
618650

619651
obj = obj.paragraph_format
620652

621-
tabstops.extend([round(ts.position.inches, 2) for ts in obj.tab_stops])
653+
tabstops.extend([round(ts.position.inches, 2)
654+
for ts in obj.tab_stops])
622655
clear_t_stops = [round(ts.position.inches, 2)
623656
for ts in obj.tab_stops
624657
if ts._element.attrib['{%s}val' % nsmap['w']] == 'clear']
@@ -637,16 +670,21 @@ def apply_formatting(source, first_line_indent=None, left_indent=None):
637670

638671
# Apply paragraph styles by priority (from lowest to highest).
639672
# Formatting from the base style has the lowest priority.
640-
first_line_indent = get_base_style_attr(self.style, 'first_line_indent')
673+
first_line_indent = get_base_style_attr(
674+
self.style, 'first_line_indent')
641675
left_indent = get_base_style_attr(self.style, 'left_indent')
642676
# Next, we apply formatting from numbering properties defined in paragraph style.
643-
first_line_indent, left_indent = apply_formatting(self.style_numbering_format, first_line_indent, left_indent)
677+
first_line_indent, left_indent = apply_formatting(
678+
self.style_numbering_format, first_line_indent, left_indent)
644679
# Then formatting from paragraph style.
645-
first_line_indent, left_indent = apply_formatting(self.style.paragraph_format, first_line_indent, left_indent)
680+
first_line_indent, left_indent = apply_formatting(
681+
self.style.paragraph_format, first_line_indent, left_indent)
646682
# Next, formatting from numbering properties defined in direct paragraph properties is applied.
647-
first_line_indent, left_indent = apply_formatting(self.para_numbering_format, first_line_indent, left_indent)
683+
first_line_indent, left_indent = apply_formatting(
684+
self.para_numbering_format, first_line_indent, left_indent)
648685
# Finally, we apply formatting from direct paragraph formatting.
649-
first_line_indent, left_indent = apply_formatting(self.paragraph_format, first_line_indent, left_indent)
686+
first_line_indent, left_indent = apply_formatting(
687+
self.paragraph_format, first_line_indent, left_indent)
650688

651689
# Get explicitly set indentation
652690
if first_line_indent is not None:
@@ -664,13 +702,15 @@ def apply_formatting(source, first_line_indent=None, left_indent=None):
664702

665703
# Find out the number of tabs at the beginning of the paragraph.
666704
# Ignore regular spaces.
667-
tab_count = self.text[:len(self.text) - len(self.text.lstrip())].count('\t')
705+
tab_count = self.text[:len(self.text) -
706+
len(self.text.lstrip())].count('\t')
668707

669708
if tab_count:
670709

671710
# Get tab stops but only those to the right of first line indent as the previous
672711
# don't affect the indentation.
673-
tab_stops = [ts for ts in get_tabstops(self) if ts > first_line_indent]
712+
tab_stops = [ts for ts in get_tabstops(
713+
self) if ts > first_line_indent]
674714

675715
# If the first line indent is left of the paragraph indent, first tab will tab to
676716
# the paragraph indent.
@@ -695,7 +735,8 @@ def apply_formatting(source, first_line_indent=None, left_indent=None):
695735

696736
# Let's round up to the first tab char indent. If already rounded add one.
697737
tab_count -= 1
698-
indent = math.ceil(indent) if not indent.is_integer() else indent + 1
738+
indent = math.ceil(
739+
indent) if not indent.is_integer() else indent + 1
699740

700741
# The remaining tab chars just adds whole indents.
701742
indent += tab_count

0 commit comments

Comments
 (0)