Skip to content

Commit bd433f7

Browse files
authored
ENH: Parse and format comb fields in text widget annotations (#3519)
This patch implements comb-formatted appearance streams. Closes #2153.
1 parent c0caa5d commit bd433f7

File tree

2 files changed

+94
-18
lines changed

2 files changed

+94
-18
lines changed

pypdf/generic/_appearance_stream.py

Lines changed: 62 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,9 @@ def _generate_appearance_stream_data(
145145
font_size: float = 0.0,
146146
font_color: str = "0 g",
147147
is_multiline: bool = False,
148-
alignment: TextAlignment = TextAlignment.LEFT
148+
alignment: TextAlignment = TextAlignment.LEFT,
149+
is_comb: bool = False,
150+
max_length: Optional[int] = None
149151
) -> bytes:
150152
"""
151153
Generates the raw bytes of the PDF appearance stream for a text field.
@@ -168,6 +170,10 @@ def _generate_appearance_stream_data(
168170
graphics state string (e.g., "0 g" for black).
169171
is_multiline: A boolean indicating if the text field is multiline.
170172
alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER.
173+
is_comb: Boolean that designates fixed-length fields, where every character
174+
fills one "cell", such as in a postcode.
175+
max_length: Used if is_comb is set. The maximum number of characters for a fixed-
176+
length field.
171177
172178
Returns:
173179
A byte string containing the PDF content stream data.
@@ -196,6 +202,17 @@ def _generate_appearance_stream_data(
196202
text,
197203
is_multiline,
198204
)
205+
elif is_comb:
206+
if max_length and len(text) > max_length:
207+
logger_warning (
208+
f"Length of text {text} exceeds maximum length ({max_length}) of field, input truncated.",
209+
__name__
210+
)
211+
# We act as if each character is one line, because we draw it separately later on
212+
lines = [(
213+
font_descriptor.text_width(char) * font_size / 1000,
214+
char
215+
) for index, char in enumerate(text) if index < (max_length or len(text))]
199216
else:
200217
lines = [(
201218
font_descriptor.text_width(line) * font_size / 1000,
@@ -222,7 +239,15 @@ def _generate_appearance_stream_data(
222239

223240
# Calculate the desired absolute starting X for the current line
224241
desired_abs_x_start: float = 0
225-
if alignment == TextAlignment.RIGHT:
242+
if is_comb and max_length:
243+
# Calculate the width of a cell for one character
244+
cell_width = rectangle.width / max_length
245+
# Space from the left edge of the cell to the character's baseline start
246+
# line_width here is the *actual* character width in points for the single character 'line'
247+
centering_offset_in_cell = (cell_width - line_width) / 2
248+
# Absolute start X = (Cell Index, i.e., line_number * Cell Width) + Centering Offset
249+
desired_abs_x_start = (line_number * cell_width) + centering_offset_in_cell
250+
elif alignment == TextAlignment.RIGHT:
226251
desired_abs_x_start = rectangle.width - 2 - line_width
227252
elif alignment == TextAlignment.CENTER:
228253
desired_abs_x_start = (rectangle.width - line_width) / 2
@@ -236,6 +261,8 @@ def _generate_appearance_stream_data(
236261
y_rel_offset: float = 0
237262
if line_number == 0:
238263
y_rel_offset = y_offset # Initial vertical position
264+
elif is_comb:
265+
y_rel_offset = 0.0 # DO NOT move vertically for subsequent characters
239266
else:
240267
y_rel_offset = - font_size * 1.4 # Move down by line height
241268

@@ -266,7 +293,9 @@ def __init__(
266293
font_size: float = 0.0,
267294
font_color: str = "0 g",
268295
is_multiline: bool = False,
269-
alignment: TextAlignment = TextAlignment.LEFT
296+
alignment: TextAlignment = TextAlignment.LEFT,
297+
is_comb: bool = False,
298+
max_length: Optional[int] = None
270299
) -> None:
271300
"""
272301
Initializes a TextStreamAppearance object.
@@ -286,6 +315,10 @@ def __init__(
286315
font_color: The font color string.
287316
is_multiline: A boolean indicating if the text field is multiline.
288317
alignment: Text alignment, can be TextAlignment.LEFT, .RIGHT, or .CENTER.
318+
is_comb: Boolean that designates fixed-length fields, where every character
319+
fills one "cell", such as in a postcode.
320+
max_length: Used if is_comb is set. The maximum number of characters for a fixed-
321+
length field.
289322
290323
"""
291324
super().__init__()
@@ -331,11 +364,13 @@ def __init__(
331364
rectangle,
332365
font_descriptor,
333366
font_glyph_byte_map,
334-
font_name,
335-
font_size,
336-
font_color,
337-
is_multiline,
338-
alignment
367+
font_name=font_name,
368+
font_size=font_size,
369+
font_color=font_color,
370+
is_multiline=is_multiline,
371+
alignment=alignment,
372+
is_comb=is_comb,
373+
max_length=max_length
339374
)
340375

341376
self[NameObject("/Type")] = NameObject("/XObject")
@@ -439,12 +474,8 @@ def from_text_annotation(
439474
if not is_null_or_none(font_resource):
440475
font_resource = cast(DictionaryObject, font_resource.get_object())
441476

442-
# Retrieve field text, selected values and formatting information
443-
is_multiline = False
477+
# Retrieve field text and selected values
444478
field_flags = field.get(FieldDictionaryAttributes.Ff, 0)
445-
alignment = field.get("/Q", TextAlignment.LEFT)
446-
if field_flags & FieldDictionaryAttributes.FfBits.Multiline:
447-
is_multiline = True
448479
if (
449480
field.get(FieldDictionaryAttributes.FT, "/Tx") == "/Ch" and
450481
field_flags & FieldDictionaryAttributes.FfBits.Combo == 0
@@ -460,17 +491,30 @@ def from_text_annotation(
460491
# Escape parentheses (PDF 1.7 reference, table 3.2, Literal Strings)
461492
text = text.replace("\\", "\\\\").replace("(", r"\(").replace(")", r"\)")
462493

494+
# Retrieve formatting information
495+
is_comb = False
496+
max_length = None
497+
if field_flags & FieldDictionaryAttributes.FfBits.Comb:
498+
is_comb = True
499+
max_length = annotation.get("/MaxLen")
500+
is_multiline = False
501+
if field_flags & FieldDictionaryAttributes.FfBits.Multiline:
502+
is_multiline = True
503+
alignment = field.get("/Q", TextAlignment.LEFT)
504+
463505
# Create the TextStreamAppearance instance
464506
new_appearance_stream = cls(
465507
text,
466508
selection,
467509
rectangle,
468510
font_resource,
469-
font_name,
470-
font_size,
471-
font_color,
472-
is_multiline,
473-
alignment
511+
font_name=font_name,
512+
font_size=font_size,
513+
font_color=font_color,
514+
is_multiline=is_multiline,
515+
alignment=alignment,
516+
is_comb=is_comb,
517+
max_length=max_length
474518
)
475519
if AnnotationDictionaryAttributes.AP in annotation:
476520
for key, value in (

tests/test_appearance_stream.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,38 @@
33
from pypdf.generic._appearance_stream import TextStreamAppearance
44

55

6+
def test_comb():
7+
rectangle = (0.0, 0.0, 197.285, 18.455)
8+
font_size = 10.0
9+
text = "01234567"
10+
max_length = 10
11+
is_comb = True
12+
appearance_stream = TextStreamAppearance(
13+
text, rectangle=rectangle, font_size=font_size, is_comb=is_comb, max_length=max_length
14+
)
15+
assert appearance_stream.get_data() == (
16+
b"q\n/Tx BMC \nq\n1 1 196.285 17.455 re\nW\nBT\n/Helv 10.0 Tf 0 g\n"
17+
b"7.084250000000001 7.454999999999998 Td\n(0) Tj\n"
18+
b"19.7285 0.0 Td\n(1) Tj\n"
19+
b"19.728500000000004 0.0 Td\n(2) Tj\n"
20+
b"19.728499999999997 0.0 Td\n(3) Tj\n"
21+
b"19.728499999999997 0.0 Td\n(4) Tj\n"
22+
b"19.728499999999997 0.0 Td\n(5) Tj\n"
23+
b"19.72850000000001 0.0 Td\n(6) Tj\n"
24+
b"19.728499999999997 0.0 Td\n(7) Tj\nET\nQ\nEMC\nQ\n"
25+
)
26+
27+
rectangle = (0.0, 0.0, 20.852, 20.84)
28+
text = "AA"
29+
max_length = 1
30+
appearance_stream = TextStreamAppearance(
31+
text, rectangle=rectangle, font_size=font_size, is_comb=is_comb, max_length=max_length
32+
)
33+
assert appearance_stream.get_data() == (
34+
b"q\n/Tx BMC \nq\n1 1 19.852 19.84 re\nW\nBT\n/Helv 10.0 Tf 0 g\n7.091 9.84 Td\n(A) Tj\nET\nQ\nEMC\nQ\n"
35+
)
36+
37+
638
def test_scale_text():
739
rectangle = (0, 0, 9.1, 55.4)
840
font_size = 10.1

0 commit comments

Comments
 (0)