Skip to content

Commit 7dfb46e

Browse files
minorninthSkia Commit-Bot
authored andcommitted
Re-land: Support adding attributes to PDF document structure nodes.
Originally landed: https://skia-review.googlesource.com/c/skia/+/268878 Reverted: https://skia-review.googlesource.com/c/skia/+/271858 The issue was with compilation when PDF support is disabled. See the diff between patchsets 1 and 2. This is an important part of writing a tagged PDF. Many of the nodes in the document structure tree need additional attributes, just like in HTML. This change aims to add support for a few useful attributes, not to be comprehensive. Bug: chromium:1039816 Change-Id: I15f8b6c41d4fdaa4b6e21775ab6d26ec57eb0f5d Reviewed-on: https://skia-review.googlesource.com/c/skia/+/271916 Commit-Queue: Dominic Mazzoni <dmazzoni@chromium.org> Reviewed-by: Mike Reed <reed@google.com>
1 parent d2d4c5e commit 7dfb46e

File tree

7 files changed

+269
-14
lines changed

7 files changed

+269
-14
lines changed

RELEASE_NOTES.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ Milestone 82
2323
related calls is treated as a request to do no color correction at decode
2424
time.
2525

26+
* Add new APIs to add attributes to document structure node when
27+
creating a tagged PDF.
28+
2629
* Remove CGFontRef parameter from SkCreateTypefaceFromCTFont.
2730
Use CTFontManagerCreateFontDescriptorFromData instead of
2831
CGFontCreateWithDataProvider to create CTFonts to avoid memory use issues.

gn/tests.gni

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ tests_sources = [
169169
"$_tests/PDFMetadataAttributeTest.cpp",
170170
"$_tests/PDFOpaqueSrcModeToSrcOverTest.cpp",
171171
"$_tests/PDFPrimitivesTest.cpp",
172+
"$_tests/PDFTaggedTableTest.cpp",
172173
"$_tests/PDFTaggedTest.cpp",
173174
"$_tests/PackBitsTest.cpp",
174175
"$_tests/PackedConfigsTextureTest.cpp",

include/docs/SkPDFDocument.h

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,21 @@
55

66
#include "include/core/SkDocument.h"
77

8+
#include <vector>
9+
10+
#include "include/core/SkColor.h"
811
#include "include/core/SkMilestone.h"
912
#include "include/core/SkScalar.h"
1013
#include "include/core/SkString.h"
1114
#include "include/core/SkTime.h"
15+
#include "include/private/SkNoncopyable.h"
1216

1317
#define SKPDF_STRING(X) SKPDF_STRING_IMPL(X)
1418
#define SKPDF_STRING_IMPL(X) #X
1519

1620
class SkExecutor;
21+
class SkPDFArray;
22+
class SkPDFTagTree;
1723

1824
namespace SkPDF {
1925

@@ -71,16 +77,42 @@ enum class DocumentStructureType {
7177
kForm, //!< Form control (not like an HTML FORM element)
7278
};
7379

80+
/** Attributes for nodes in the PDF tree. */
81+
class SK_API AttributeList : SkNoncopyable {
82+
public:
83+
AttributeList();
84+
~AttributeList();
85+
86+
// Each attribute must have an owner (e.g. "Layout", "List", "Table", etc)
87+
// and an attribute name (e.g. "BBox", "RowSpan", etc.) from PDF32000_2008 14.8.5,
88+
// and then a value of the proper type according to the spec.
89+
void appendInt(const char* owner, const char* name, int value);
90+
void appendFloat(const char* owner, const char* name, float value);
91+
void appendString(const char* owner, const char* name, const char* value);
92+
void appendFloatArray(const char* owner,
93+
const char* name,
94+
const std::vector<float>& value);
95+
void appendStringArray(const char* owner,
96+
const char* name,
97+
const std::vector<SkString>& value);
98+
99+
private:
100+
friend class ::SkPDFTagTree;
101+
102+
std::unique_ptr<SkPDFArray> fAttrs;
103+
};
104+
74105
/** A node in a PDF structure tree, giving a semantic representation
75106
of the content. Each node ID is associated with content
76107
by passing the SkCanvas and node ID to SkPDF::SetNodeId() when drawing.
77108
NodeIDs should be unique within each tree.
78109
*/
79110
struct StructureElementNode {
80-
const StructureElementNode* fChildren = nullptr;
81-
size_t fChildCount;
82-
int fNodeId;
83-
DocumentStructureType fType;
111+
StructureElementNode* fChildren = nullptr;
112+
size_t fChildCount = 0;
113+
int fNodeId = 0;
114+
DocumentStructureType fType = DocumentStructureType::kNonStruct;
115+
AttributeList fAttributes;
84116
};
85117

86118
/** Optional metadata to be passed into the PDF factory function.
@@ -149,7 +181,7 @@ struct Metadata {
149181
a semantic representation of the content. The caller
150182
should retain ownership.
151183
*/
152-
const StructureElementNode* fStructureElementTreeRoot = nullptr;
184+
StructureElementNode* fStructureElementTreeRoot = nullptr;
153185

154186
/** Executor to handle threaded work within PDF Backend. If this is nullptr,
155187
then all work will be done serially on the main thread. To have worker

src/pdf/SkDocument_PDF_None.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,14 @@
88
#include "include/core/SkCanvas.h"
99
#include "include/docs/SkPDFDocument.h"
1010

11+
class SkPDFArray {};
12+
1113
sk_sp<SkDocument> SkPDF::MakeDocument(SkWStream*, const SkPDF::Metadata&) { return nullptr; }
1214

1315
void SkPDF::SetNodeId(SkCanvas* c, int n) {
1416
c->drawAnnotation({0, 0, 0, 0}, "PDF_Node_Key", SkData::MakeWithCopy(&n, sizeof(n)).get());
1517
}
18+
19+
SkPDF::AttributeList::AttributeList() = default;
20+
21+
SkPDF::AttributeList::~AttributeList() = default;

src/pdf/SkPDFTag.cpp

Lines changed: 81 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,70 @@ static const char* tag_name_from_type(SkPDF::DocumentStructureType type) {
6666
SK_ABORT("bad tag");
6767
}
6868

69+
SkPDF::AttributeList::AttributeList() = default;
70+
71+
SkPDF::AttributeList::~AttributeList() = default;
72+
73+
void SkPDF::AttributeList::appendInt(
74+
const char* owner, const char* name, int value) {
75+
if (!fAttrs)
76+
fAttrs = SkPDFMakeArray();
77+
std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
78+
attrDict->insertName("O", owner);
79+
attrDict->insertInt(name, value);
80+
fAttrs->appendObject(std::move(attrDict));
81+
}
82+
83+
void SkPDF::AttributeList::appendFloat(
84+
const char* owner, const char* name, float value) {
85+
if (!fAttrs)
86+
fAttrs = SkPDFMakeArray();
87+
std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
88+
attrDict->insertName("O", owner);
89+
attrDict->insertScalar(name, value);
90+
fAttrs->appendObject(std::move(attrDict));
91+
}
92+
93+
void SkPDF::AttributeList::appendString(
94+
const char* owner, const char* name, const char* value) {
95+
if (!fAttrs)
96+
fAttrs = SkPDFMakeArray();
97+
std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
98+
attrDict->insertName("O", owner);
99+
attrDict->insertName(name, value);
100+
fAttrs->appendObject(std::move(attrDict));
101+
}
102+
103+
void SkPDF::AttributeList::appendFloatArray(
104+
const char* owner, const char* name, const std::vector<float>& value) {
105+
if (!fAttrs)
106+
fAttrs = SkPDFMakeArray();
107+
std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
108+
attrDict->insertName("O", owner);
109+
std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
110+
for (float element : value) {
111+
pdfArray->appendScalar(element);
112+
}
113+
attrDict->insertObject(name, std::move(pdfArray));
114+
fAttrs->appendObject(std::move(attrDict));
115+
}
116+
117+
void SkPDF::AttributeList::appendStringArray(
118+
const char* owner,
119+
const char* name,
120+
const std::vector<SkString>& value) {
121+
if (!fAttrs)
122+
fAttrs = SkPDFMakeArray();
123+
std::unique_ptr<SkPDFDict> attrDict = SkPDFMakeDict();
124+
attrDict->insertName("O", owner);
125+
std::unique_ptr<SkPDFArray> pdfArray = SkPDFMakeArray();
126+
for (SkString element : value) {
127+
pdfArray->appendName(element);
128+
}
129+
attrDict->insertObject(name, std::move(pdfArray));
130+
fAttrs->appendObject(std::move(attrDict));
131+
}
132+
69133
struct SkPDFTagNode {
70134
SkPDFTagNode* fChildren = nullptr;
71135
size_t fChildCount = 0;
@@ -82,16 +146,18 @@ struct SkPDFTagNode {
82146
kYes,
83147
kNo,
84148
} fCanDiscard = kUnknown;
149+
std::unique_ptr<SkPDFArray> fAttributes;
85150
};
86151

87152
SkPDFTagTree::SkPDFTagTree() : fArena(4 * sizeof(SkPDFTagNode)) {}
88153

89154
SkPDFTagTree::~SkPDFTagTree() = default;
90155

91-
static void copy(const SkPDF::StructureElementNode& node,
92-
SkPDFTagNode* dst,
93-
SkArenaAlloc* arena,
94-
SkTHashMap<int, SkPDFTagNode*>* nodeMap) {
156+
// static
157+
void SkPDFTagTree::Copy(SkPDF::StructureElementNode& node,
158+
SkPDFTagNode* dst,
159+
SkArenaAlloc* arena,
160+
SkTHashMap<int, SkPDFTagNode*>* nodeMap) {
95161
nodeMap->set(node.fNodeId, dst);
96162
size_t childCount = node.fChildCount;
97163
SkPDFTagNode* children = arena->makeArray<SkPDFTagNode>(childCount);
@@ -100,14 +166,15 @@ static void copy(const SkPDF::StructureElementNode& node,
100166
dst->fType = node.fType;
101167
dst->fChildren = children;
102168
for (size_t i = 0; i < childCount; ++i) {
103-
copy(node.fChildren[i], &children[i], arena, nodeMap);
169+
Copy(node.fChildren[i], &children[i], arena, nodeMap);
104170
}
171+
dst->fAttributes = std::move(node.fAttributes.fAttrs);
105172
}
106173

107-
void SkPDFTagTree::init(const SkPDF::StructureElementNode* node) {
174+
void SkPDFTagTree::init(SkPDF::StructureElementNode* node) {
108175
if (node) {
109176
fRoot = fArena.make<SkPDFTagNode>();
110-
copy(*node, fRoot, &fArena, &fNodeMap);
177+
Copy(*node, fRoot, &fArena, &fNodeMap);
111178
}
112179
}
113180

@@ -184,6 +251,13 @@ SkPDFIndirectReference prepare_tag_tree_to_emit(SkPDFIndirectReference parent,
184251
dict.insertName("S", tag_name_from_type(node->fType));
185252
dict.insertRef("P", parent);
186253
dict.insertObject("K", std::move(kids));
254+
SkString idString;
255+
idString.printf("%d", node->fNodeId);
256+
dict.insertName("ID", idString.c_str());
257+
if (node->fAttributes) {
258+
dict.insertObject("A", std::move(node->fAttributes));
259+
}
260+
187261
return doc->emit(dict, ref);
188262
}
189263

@@ -223,4 +297,3 @@ SkPDFIndirectReference SkPDFTagTree::makeStructTreeRoot(SkPDFDocument* doc) {
223297
structTreeRoot.insertRef("ParentTree", doc->emit(parentTree));
224298
return doc->emit(structTreeRoot, ref);
225299
}
226-

src/pdf/SkPDFTag.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,17 @@ class SkPDFTagTree {
2121
public:
2222
SkPDFTagTree();
2323
~SkPDFTagTree();
24-
void init(const SkPDF::StructureElementNode*);
24+
void init(SkPDF::StructureElementNode*);
2525
void reset();
2626
int getMarkIdForNodeId(int nodeId, unsigned pageIndex);
2727
SkPDFIndirectReference makeStructTreeRoot(SkPDFDocument* doc);
2828

2929
private:
30+
static void Copy(SkPDF::StructureElementNode& node,
31+
SkPDFTagNode* dst,
32+
SkArenaAlloc* arena,
33+
SkTHashMap<int, SkPDFTagNode*>* nodeMap);
34+
3035
SkArenaAlloc fArena;
3136
SkTHashMap<int, SkPDFTagNode*> fNodeMap;
3237
SkPDFTagNode* fRoot = nullptr;

tests/PDFTaggedTableTest.cpp

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
/*
2+
* Copyright 2020 Google Inc.
3+
*
4+
* Use of this source code is governed by a BSD-style license that can be
5+
* found in the LICENSE file.
6+
*/
7+
#include "tests/Test.h"
8+
9+
#include "include/core/SkCanvas.h"
10+
#include "include/core/SkFont.h"
11+
#include "include/core/SkStream.h"
12+
#include "include/docs/SkPDFDocument.h"
13+
14+
using PDFTag = SkPDF::StructureElementNode;
15+
16+
// Test building a tagged PDF containing a table.
17+
// Add this to args.gn to output the PDF to a file:
18+
// extra_cflags = [ "-DSK_PDF_TEST_TAGS_OUTPUT_PATH=\"/tmp/table.pdf\"" ]
19+
DEF_TEST(SkPDF_tagged_table, r) {
20+
REQUIRE_PDF_DOCUMENT(SkPDF_tagged, r);
21+
#ifdef SK_PDF_TEST_TAGS_OUTPUT_PATH
22+
SkFILEWStream outputStream(SK_PDF_TEST_TAGS_OUTPUT_PATH);
23+
#else
24+
SkDynamicMemoryWStream outputStream;
25+
#endif
26+
27+
SkSize pageSize = SkSize::Make(612, 792); // U.S. Letter
28+
29+
SkPDF::Metadata metadata;
30+
metadata.fTitle = "Example Tagged Table PDF";
31+
metadata.fCreator = "Skia";
32+
SkTime::DateTime now;
33+
SkTime::GetDateTime(&now);
34+
metadata.fCreation = now;
35+
metadata.fModified = now;
36+
37+
constexpr int kRowCount = 5;
38+
constexpr int kColCount = 4;
39+
const char* cellData[kRowCount * kColCount] = {
40+
"Car", "Engine", "City MPG", "Highway MPG",
41+
"Mitsubishi Mirage ES", "Gas", "28", "47",
42+
"Toyota Prius Three", "Hybrid", "43", "59",
43+
"Nissan Leaf SL", "Electric", "N/A", nullptr,
44+
"Tesla Model 3", nullptr, "N/A", nullptr
45+
};
46+
47+
// The document tag.
48+
PDFTag root;
49+
root.fNodeId = 1;
50+
root.fType = SkPDF::DocumentStructureType::kDocument;
51+
root.fChildCount = 2;
52+
PDFTag rootChildren[2];
53+
54+
// Heading.
55+
PDFTag& h1 = rootChildren[0];
56+
h1.fNodeId = 2;
57+
h1.fType = SkPDF::DocumentStructureType::kH1;
58+
h1.fChildCount = 0;
59+
60+
// Table.
61+
PDFTag& table = rootChildren[1];
62+
table.fNodeId = 3;
63+
table.fType = SkPDF::DocumentStructureType::kTable;
64+
table.fChildCount = 5;
65+
table.fAttributes.appendFloatArray("Layout", "BBox", {72, 72, 360, 360});
66+
67+
PDFTag rows[kRowCount];
68+
PDFTag all_cells[kRowCount * kColCount];
69+
for (int rowIndex = 0; rowIndex < kRowCount; rowIndex++) {
70+
PDFTag& row = rows[rowIndex];
71+
row.fNodeId = 4 + rowIndex;
72+
row.fType = SkPDF::DocumentStructureType::kTR;
73+
row.fChildCount = kColCount;
74+
PDFTag* cells = &all_cells[rowIndex * kColCount];
75+
76+
for (int colIndex = 0; colIndex < kColCount; colIndex++) {
77+
int cellIndex = rowIndex * kColCount + colIndex;
78+
PDFTag& cell = cells[colIndex];
79+
cell.fNodeId = 10 + cellIndex;
80+
if (!cellData[cellIndex])
81+
cell.fType = SkPDF::DocumentStructureType::kNonStruct;
82+
else if (rowIndex == 0 || colIndex == 0)
83+
cell.fType = SkPDF::DocumentStructureType::kTH;
84+
else
85+
cell.fType = SkPDF::DocumentStructureType::kTD;
86+
cell.fChildCount = 0;
87+
88+
if (cellIndex == 13) {
89+
cell.fAttributes.appendInt("Table", "RowSpan", 2);
90+
} else if (cellIndex == 14 || cellIndex == 18) {
91+
cell.fAttributes.appendInt("Table", "ColSpan", 2);
92+
} else if (cell.fType == SkPDF::DocumentStructureType::kTH) {
93+
cell.fAttributes.appendString(
94+
"Table", "Scope", rowIndex == 0 ? "Column" : "Row");
95+
}
96+
}
97+
row.fChildren = cells;
98+
}
99+
table.fChildren = rows;
100+
root.fChildren = rootChildren;
101+
102+
metadata.fStructureElementTreeRoot = &root;
103+
sk_sp<SkDocument> document = SkPDF::MakeDocument(
104+
&outputStream, metadata);
105+
106+
SkPaint paint;
107+
paint.setColor(SK_ColorBLACK);
108+
109+
SkCanvas* canvas =
110+
document->beginPage(pageSize.width(),
111+
pageSize.height());
112+
SkPDF::SetNodeId(canvas, 2);
113+
SkFont font(nullptr, 36);
114+
canvas->drawString("Tagged PDF Table", 72, 72, font, paint);
115+
116+
font.setSize(14);
117+
for (int rowIndex = 0; rowIndex < kRowCount; rowIndex++) {
118+
for (int colIndex = 0; colIndex < kColCount; colIndex++) {
119+
int cellIndex = rowIndex * kColCount + colIndex;
120+
const char* str = cellData[cellIndex];
121+
if (!str)
122+
continue;
123+
124+
int x = 72 + colIndex * 108 + (colIndex > 0 ? 72 : 0);
125+
int y = 144 + rowIndex * 48;
126+
127+
SkPDF::SetNodeId(canvas, 10 + cellIndex);
128+
canvas->drawString(str, x, y, font, paint);
129+
}
130+
}
131+
132+
document->endPage();
133+
document->close();
134+
outputStream.flush();
135+
}

0 commit comments

Comments
 (0)