Skip to content

Commit b8942ba

Browse files
committed
Address comments from Ellis
1 parent a8f5e1a commit b8942ba

File tree

11 files changed

+189
-148
lines changed

11 files changed

+189
-148
lines changed

llvm/include/llvm/CodeGenData/CodeGenData.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,8 @@ class CodeGenData {
107107
/// Global outlined hash tree that has oulined hash sequences across modules.
108108
std::unique_ptr<OutlinedHashTree> PublishedHashTree;
109109

110-
/// This flag is set when -fcgdata-generate is passed.
111-
/// Or, it can be mutated with -ftwo-codegen-rounds during two codegen runs.
110+
/// This flag is set when -fcodegen-data-generate is passed.
111+
/// Or, it can be mutated with -fcodegen-data-thinlto-two-rounds.
112112
bool EmitCGData;
113113

114114
/// This is a singleton instance which is thread-safe. Unlike profile data
@@ -174,7 +174,7 @@ namespace IndexedCGData {
174174
const uint64_t Magic = 0x81617461646763ff; // "\xffcgdata\x81"
175175

176176
enum CGDataVersion {
177-
// Version 1 is the first version. This version support the outlined
177+
// Version 1 is the first version. This version supports the outlined
178178
// hash tree.
179179
Version1 = 1,
180180
CurrentVersion = CG_DATA_INDEX_VERSION

llvm/lib/CodeGenData/CodeGenDataReader.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -144,23 +144,23 @@ Error TextCodeGenDataReader::read() {
144144

145145
// Parse the custom header line by line.
146146
while (Line->starts_with(":")) {
147-
StringRef Str = Line->substr(1);
147+
StringRef Str = Line->drop_front().rtrim();
148148
if (Str.equals_insensitive("outlined_hash_tree"))
149149
DataKind |= CGDataKind::FunctionOutlinedHashTree;
150150
else
151151
return error(cgdata_error::bad_header);
152152
++Line;
153153
}
154154

155-
// We treat an empty header (that as a comment # only) as a valid header.
155+
// We treat an empty header (that is a comment # only) as a valid header.
156156
if (Line.is_at_eof()) {
157157
if (DataKind != CGDataKind::Unknown)
158158
return error(cgdata_error::bad_header);
159159
return Error::success();
160160
}
161161

162162
// The YAML docs follow after the header.
163-
const char *Pos = (*Line).data();
163+
const char *Pos = Line->data();
164164
size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
165165
reinterpret_cast<size_t>(Pos);
166166
yaml::Input YOS(StringRef(Pos, Size));

llvm/lib/CodeGenData/CodeGenDataWriter.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -108,8 +108,8 @@ Error CodeGenDataWriter::writeHeader(CGDataOStream &COS) {
108108

109109
Header.OutlinedHashTreeOffset = 0;
110110

111-
// Only write out up to the CGDataKind. We need to remember the offest of the
112-
// remaing fields to allow back patching later.
111+
// Only write up to the CGDataKind. We need to remember the offset of the
112+
// remaining fields to allow back-patching later.
113113
COS.write(Header.Magic);
114114
COS.write32(Header.Version);
115115
COS.write32(Header.DataKind);

llvm/test/tools/llvm-cgdata/dump.test

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@ RUN: split-file %s %t
55
RUN: llvm-cgdata dump -binary %t/dump.cgtext -o %t/dump.cgdata
66
RUN: llvm-cgdata dump -text %t/dump.cgdata -o %t/dump-round.cgtext
77
RUN: llvm-cgdata dump -binary %t/dump-round.cgtext -o %t/dump-round.cgdata
8+
RUN: llvm-cgdata dump -text %t/dump-round.cgtext -o %t/dump-round-round.cgtext
89
RUN: diff %t/dump.cgdata %t/dump-round.cgdata
10+
RUN: diff %t/dump-round.cgtext %t/dump-round-round.cgtext
911

1012
;--- dump.cgtext
1113
# Outlined stable hash tree

llvm/test/tools/llvm-cgdata/empty.test

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,25 @@
1+
# Test no input file
2+
RUN: not llvm-cgdata dump -o - 2>&1 | FileCheck %s --check-prefix=NOFILE
3+
NOFILE: error: No such file or directory
4+
15
# Test for empty cgdata file, which is invalid.
26
RUN: touch %t_emptyfile.cgtext
3-
RUN: not llvm-cgdata dump %t_emptyfile.cgtext -text -o - 2>&1 | FileCheck %s --check-prefix ERROR
4-
ERROR: {{.}}emptyfile.cgtext: empty codegen data
7+
RUN: not llvm-cgdata dump %t_emptyfile.cgtext -text 2>&1 | FileCheck %s --check-prefix=EMPTY
8+
EMPTY: {{.}}emptyfile.cgtext: empty codegen data
59

610
# Test for empty header in the text format. It can be converted to a valid binary file.
711
RUN: printf '#' > %t_emptyheader.cgtext
812
RUN: llvm-cgdata dump %t_emptyheader.cgtext -binary -o %t_emptyheader.cgdata
913

1014
# Without any cgdata other than the header, no data shows by default.
11-
RUN: llvm-cgdata show %t_emptyheader.cgdata | FileCheck %s --allow-empty --check-prefix EMPTY
12-
EMPTY-NOT: any
15+
RUN: llvm-cgdata show %t_emptyheader.cgdata | count 0
1316

1417
# The version number appears when asked, as it's in the header
15-
RUN: llvm-cgdata show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix VERSION
16-
VERSION: Version: {{.}}
18+
RUN: llvm-cgdata show --cgdata-version %t_emptyheader.cgdata | FileCheck %s --check-prefix=VERSION
19+
VERSION: Version: 1
1720

1821
# When converting a binary file (w/ the header only) to a text file, it's an empty file as the text format does not have an explicit header.
19-
RUN: llvm-cgdata dump %t_emptyheader.cgdata -text -o - | FileCheck %s --allow-empty --check-prefix EMPTY
22+
RUN: llvm-cgdata dump %t_emptyheader.cgdata -text | count 0
2023

2124
# Synthesize a header only cgdata.
2225
# struct Header {

llvm/test/tools/llvm-cgdata/error.test

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,31 +8,31 @@
88
# uint64_t OutlinedHashTreeOffset;
99
# }
1010
RUN: touch %t_empty.cgdata
11-
RUN: not llvm-cgdata show %t_empty.cgdata 2>&1 | FileCheck %s --check-prefix EMPTY
11+
RUN: not llvm-cgdata show %t_empty.cgdata 2>&1 | FileCheck %s --check-prefix=EMPTY
1212
EMPTY: {{.}}cgdata: empty codegen data
1313

1414
# Not a magic.
1515
RUN: printf '\xff' > %t_malformed.cgdata
16-
RUN: not llvm-cgdata show %t_malformed.cgdata 2>&1 | FileCheck %s --check-prefix MALFORMED
16+
RUN: not llvm-cgdata show %t_malformed.cgdata 2>&1 | FileCheck %s --check-prefix=MALFORMED
1717
MALFORMED: {{.}}cgdata: malformed codegen data
1818

1919
# The minimum header size is 24.
2020
RUN: printf '\xffcgdata\x81' > %t_corrupt.cgdata
21-
RUN: not llvm-cgdata show %t_corrupt.cgdata 2>&1 | FileCheck %s --check-prefix CORRUPT
21+
RUN: not llvm-cgdata show %t_corrupt.cgdata 2>&1 | FileCheck %s --check-prefix=CORRUPT
2222
CORRUPT: {{.}}cgdata: invalid codegen data (file header is corrupt)
2323

2424
# The current version 1 while the header says 2.
2525
RUN: printf '\xffcgdata\x81' > %t_version.cgdata
2626
RUN: printf '\x02\x00\x00\x00' >> %t_version.cgdata
2727
RUN: printf '\x00\x00\x00\x00' >> %t_version.cgdata
2828
RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_version.cgdata
29-
RUN: not llvm-cgdata show %t_version.cgdata 2>&1 | FileCheck %s --check-prefix BAD_VERSION
29+
RUN: not llvm-cgdata show %t_version.cgdata 2>&1 | FileCheck %s --check-prefix=BAD_VERSION
3030
BAD_VERSION: {{.}}cgdata: unsupported codegen data version
3131

3232
# Header says an outlined hash tree, but the file ends after the header.
3333
RUN: printf '\xffcgdata\x81' > %t_eof.cgdata
3434
RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata
3535
RUN: printf '\x01\x00\x00\x00' >> %t_eof.cgdata
3636
RUN: printf '\x18\x00\x00\x00\x00\x00\x00\x00' >> %t_eof.cgdata
37-
RUN: not llvm-cgdata show %t_eof.cgdata 2>&1 | FileCheck %s --check-prefix EOF
37+
RUN: not llvm-cgdata show %t_eof.cgdata 2>&1 | FileCheck %s --check-prefix=EOF
3838
EOF: {{.}}cgdata: end of File

llvm/test/tools/llvm-cgdata/merge-archive.test

Lines changed: 47 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -2,17 +2,30 @@
22

33
RUN: split-file %s %t
44

5+
# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
6+
RUN: llvm-cgdata dump -binary %t/raw-1.cgtext -o %t/raw-1.cgdata
7+
RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n' | sed 's/ /\\\\/g' > %t/raw-1-bytes.txt
8+
RUN: sed -i "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-1.ll
59
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-1.ll -o %t/merge-1.o
10+
11+
# Synthesize raw cgdata without the header (24 byte) from the indexed cgdata.
12+
RUN: llvm-cgdata dump -binary %t/raw-2.cgtext -o %t/raw-2.cgdata
13+
RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n' | sed 's/ /\\\\/g' > %t/raw-2-bytes.txt
14+
RUN: sed -i "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-2.ll
615
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-2.ll -o %t/merge-2.o
16+
17+
# Make an archive from two object files
718
RUN: llvm-ar rcs %t/merge-archive.a %t/merge-1.o %t/merge-2.o
19+
20+
# Merge the archive into the codegen data file.
821
RUN: llvm-cgdata merge %t/merge-archive.a -o %t/merge-archive.cgdata
922
RUN: llvm-cgdata show %t/merge-archive.cgdata | FileCheck %s
1023
CHECK: Outlined hash tree:
1124
CHECK-NEXT: Total Node Count: 4
1225
CHECK-NEXT: Terminal Node Count: 2
1326
CHECK-NEXT: Depth: 2
1427

15-
RUN: llvm-cgdata dump %t/merge-archive.cgdata | FileCheck %s --check-prefix TREE
28+
RUN: llvm-cgdata dump %t/merge-archive.cgdata | FileCheck %s --check-prefix=TREE
1629
TREE: # Outlined stable hash tree
1730
TREE-NEXT: :outlined_hash_tree
1831
TREE-NEXT: ---
@@ -34,42 +47,41 @@ TREE-NEXT: Terminals: 4
3447
TREE-NEXT: SuccessorIds: [ ]
3548
TREE-NEXT: ...
3649

50+
;--- raw-1.cgtext
51+
:outlined_hash_tree
52+
0:
53+
Hash: 0x0
54+
Terminals: 0
55+
SuccessorIds: [ 1 ]
56+
1:
57+
Hash: 0x1
58+
Terminals: 0
59+
SuccessorIds: [ 2 ]
60+
2:
61+
Hash: 0x2
62+
Terminals: 4
63+
SuccessorIds: [ ]
64+
...
65+
3766
;--- merge-1.ll
67+
@.data = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
3868

39-
; The .data is encoded in a binary form based on the following yaml form. See serialize() in OutlinedHashTreeRecord.cpp
40-
;---
41-
;0:
42-
; Hash: 0x0
43-
; Terminals: 0
44-
; SuccessorIds: [ 1 ]
45-
;1:
46-
; Hash: 0x1
47-
; Terminals: 0
48-
; SuccessorIds: [ 2 ]
49-
;2:
50-
; Hash: 0x2
51-
; Terminals: 4
52-
; SuccessorIds: [ ]
53-
;...
5469

55-
@.data = private unnamed_addr constant [72 x i8] c"\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\02\00\00\00\02\00\00\00\02\00\00\00\00\00\00\00\04\00\00\00\00\00\00\00", section "__DATA,__llvm_outline"
70+
;--- raw-2.cgtext
71+
:outlined_hash_tree
72+
0:
73+
Hash: 0x0
74+
Terminals: 0
75+
SuccessorIds: [ 1 ]
76+
1:
77+
Hash: 0x1
78+
Terminals: 0
79+
SuccessorIds: [ 2 ]
80+
2:
81+
Hash: 0x3
82+
Terminals: 5
83+
SuccessorIds: [ ]
84+
...
5685

5786
;--- merge-2.ll
58-
59-
; The .data is encoded in a binary form based on the following yaml form. See serialize() in OutlinedHashTreeRecord.cpp
60-
;---
61-
;0:
62-
; Hash: 0x0
63-
; Terminals: 0
64-
; SuccessorIds: [ 1 ]
65-
;1:
66-
; Hash: 0x1
67-
; Terminals: 0
68-
; SuccessorIds: [ 2 ]
69-
;2:
70-
; Hash: 0x3
71-
; Terminals: 5
72-
; SuccessorIds: [ ]
73-
;...
74-
75-
@.data = private unnamed_addr constant [72 x i8] c"\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\02\00\00\00\02\00\00\00\03\00\00\00\00\00\00\00\05\00\00\00\00\00\00\00", section "__DATA,__llvm_outline"
87+
@.data = private unnamed_addr constant [72 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_outline"

llvm/test/tools/llvm-cgdata/merge-concat.test

Lines changed: 45 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22

33
RUN: split-file %s %t
44

5+
# Synthesize two set of raw cgdata without the header (24 byte) from the indexed cgdata.
6+
# Concatenate them in merge-concat.ll
7+
RUN: llvm-cgdata dump -binary %t/raw-1.cgtext -o %t/raw-1.cgdata
8+
RUN: od -t x1 -j 24 -An %t/raw-1.cgdata | tr -d '\n' | sed 's/ /\\\\/g' > %t/raw-1-bytes.txt
9+
RUN: sed -i "s/<RAW_1_BYTES>/$(cat %t/raw-1-bytes.txt)/g" %t/merge-concat.ll
10+
RUN: llvm-cgdata dump -binary %t/raw-2.cgtext -o %t/raw-2.cgdata
11+
RUN: od -t x1 -j 24 -An %t/raw-2.cgdata | tr -d '\n' | sed 's/ /\\\\/g' > %t/raw-2-bytes.txt
12+
RUN: sed -i "s/<RAW_2_BYTES>/$(cat %t/raw-2-bytes.txt)/g" %t/merge-concat.ll
13+
514
RUN: llc -filetype=obj -mtriple arm64-apple-darwin %t/merge-concat.ll -o %t/merge-concat.o
615
RUN: llvm-cgdata merge %t/merge-concat.o -o %t/merge-concat.cgdata
716
RUN: llvm-cgdata show %t/merge-concat.cgdata | FileCheck %s
@@ -10,7 +19,7 @@ CHECK-NEXT: Total Node Count: 4
1019
CHECK-NEXT: Terminal Node Count: 2
1120
CHECK-NEXT: Depth: 2
1221

13-
RUN: llvm-cgdata dump %t/merge-concat.cgdata | FileCheck %s --check-prefix TREE
22+
RUN: llvm-cgdata dump %t/merge-concat.cgdata | FileCheck %s --check-prefix=TREE
1423
TREE: # Outlined stable hash tree
1524
TREE-NEXT: :outlined_hash_tree
1625
TREE-NEXT: ---
@@ -32,37 +41,40 @@ TREE-NEXT: Terminals: 4
3241
TREE-NEXT: SuccessorIds: [ ]
3342
TREE-NEXT: ...
3443

35-
;--- merge-concat.ll
44+
;--- raw-1.cgtext
45+
:outlined_hash_tree
46+
0:
47+
Hash: 0x0
48+
Terminals: 0
49+
SuccessorIds: [ 1 ]
50+
1:
51+
Hash: 0x1
52+
Terminals: 0
53+
SuccessorIds: [ 2 ]
54+
2:
55+
Hash: 0x2
56+
Terminals: 4
57+
SuccessorIds: [ ]
58+
...
3659

37-
; In an linked executable (as opposed to an object file), cgdata in __llvm_outline might be concatenated. Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated. In other word, the following two trees are encoded back-to-back in a binary format.
38-
;---
39-
;0:
40-
; Hash: 0x0
41-
; Terminals: 0
42-
; SuccessorIds: [ 1 ]
43-
;1:
44-
; Hash: 0x1
45-
; Terminals: 0
46-
; SuccessorIds: [ 2 ]
47-
;2:
48-
; Hash: 0x2
49-
; Terminals: 4
50-
; SuccessorIds: [ ]
51-
;...
52-
;---
53-
;0:
54-
; Hash: 0x0
55-
; Terminals: 0
56-
; SuccessorIds: [ 1 ]
57-
;1:
58-
; Hash: 0x1
59-
; Terminals: 0
60-
; SuccessorIds: [ 2 ]
61-
;2:
62-
; Hash: 0x3
63-
; Terminals: 5
64-
; SuccessorIds: [ ]
65-
;...
60+
;--- raw-2.cgtext
61+
:outlined_hash_tree
62+
0:
63+
Hash: 0x0
64+
Terminals: 0
65+
SuccessorIds: [ 1 ]
66+
1:
67+
Hash: 0x1
68+
Terminals: 0
69+
SuccessorIds: [ 2 ]
70+
2:
71+
Hash: 0x3
72+
Terminals: 5
73+
SuccessorIds: [ ]
74+
...
75+
76+
;--- merge-concat.ll
6677

67-
@.data1 = private unnamed_addr constant [72 x i8] c"\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\02\00\00\00\02\00\00\00\02\00\00\00\00\00\00\00\04\00\00\00\00\00\00\00", section "__DATA,__llvm_outline"
68-
@.data2 = private unnamed_addr constant [72 x i8] c"\03\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\01\00\00\00\00\00\00\00\00\00\00\00\01\00\00\00\02\00\00\00\02\00\00\00\03\00\00\00\00\00\00\00\05\00\00\00\00\00\00\00", section "__DATA,__llvm_outline"
78+
; In an linked executable (as opposed to an object file), cgdata in __llvm_outline might be concatenated. Although this is not a typical workflow, we simply support this case to parse cgdata that is concatenated. In other words, the following two trees are encoded back-to-back in a binary format.
79+
@.data1 = private unnamed_addr constant [72 x i8] c"<RAW_1_BYTES>", section "__DATA,__llvm_outline"
80+
@.data2 = private unnamed_addr constant [72 x i8] c"<RAW_2_BYTES>", section "__DATA,__llvm_outline"

0 commit comments

Comments
 (0)