Skip to content

Commit dbc1647

Browse files
committed
[llvm-rc] Add basic RC scripts parsing ability.
As for now, the parser supports a limited set of statements and resources. This will be extended in the following patches. Thanks to Nico Weber (thakis) for his original work in this area. Differential Revision: https://reviews.llvm.org/D36340 llvm-svn: 311175
1 parent ac6a5aa commit dbc1647

18 files changed

+729
-1
lines changed
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
meh IcOn "hello.bmp"
2+
Icon Icon "Icon"
3+
4+
LANGUAGE 5, 12
5+
6+
STRINGTABLE
7+
LANGUAGE 1, 1
8+
CHARACTERISTICS 500
9+
LANGUAGE 3, 4
10+
VERSION 14
11+
{
12+
1 "hello"
13+
2 "world"
14+
}
15+
STRINGTABLE BEGIN END
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
LANGUAGE
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
LANGUAGE 5 7
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
LANGUAGE 5,, 7
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
STRINGTABLE
2+
CHARACTERISTICS
3+
BEGIN
4+
100 "No integer after CHARACTERISTICS."
5+
END
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
& ICON "WeirdResourceName.ico"
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
HELLO
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
HELLO WORLD
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
STRINGTABLE
2+
VERSION 8
3+
{
4+
1 "hello"
5+
2
6+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
STRINGTABLE
2+
NONSENSETYPE 12 34
3+
BEGIN
4+
END

llvm/test/tools/llvm-rc/parser.test

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
; RUN: llvm-rc /V %p/Inputs/parser-correct-everything.rc | FileCheck %s --check-prefix PGOOD
2+
3+
; PGOOD: Icon (meh): "hello.bmp"
4+
; PGOOD-NEXT: Icon (Icon): "Icon"
5+
; PGOOD-NEXT: Language: 5, Sublanguage: 12
6+
; PGOOD-NEXT: StringTable:
7+
; PGOOD-NEXT: Option: Language: 1, Sublanguage: 1
8+
; PGOOD-NEXT: Option: Characteristics: 500
9+
; PGOOD-NEXT: Option: Language: 3, Sublanguage: 4
10+
; PGOOD-NEXT: Option: Version: 14
11+
; PGOOD-NEXT: 1 => "hello"
12+
; PGOOD-NEXT: 2 => "world"
13+
; PGOOD-NEXT: StringTable:
14+
15+
16+
; RUN: not llvm-rc /V %p/Inputs/parser-stringtable-no-string.rc 2> %t2
17+
; RUN: FileCheck %s --check-prefix PSTRINGTABLE1 --input-file %t2
18+
19+
; PSTRINGTABLE1: llvm-rc: Error parsing file: expected string, got }
20+
21+
22+
; RUN: not llvm-rc /V %p/Inputs/parser-stringtable-weird-option.rc 2> %t3
23+
; RUN: FileCheck %s --check-prefix PSTRINGTABLE2 --input-file %t3
24+
25+
; PSTRINGTABLE2: llvm-rc: Error parsing file: expected optional statement type, BEGIN or '{', got NONSENSETYPE
26+
27+
28+
; RUN: not llvm-rc /V %p/Inputs/parser-eof.rc 2> %t4
29+
; RUN: FileCheck %s --check-prefix PEOF --input-file %t4
30+
31+
; PEOF: llvm-rc: Error parsing file: expected integer, got <EOF>
32+
33+
34+
; RUN: not llvm-rc /V %p/Inputs/parser-no-characteristics-arg.rc 2> %t5
35+
; RUN: FileCheck %s --check-prefix PCHARACTERISTICS1 --input-file %t5
36+
37+
; PCHARACTERISTICS1: llvm-rc: Error parsing file: expected integer, got BEGIN
38+
39+
40+
; RUN: not llvm-rc /V %p/Inputs/parser-nonsense-token.rc 2> %t6
41+
; RUN: FileCheck %s --check-prefix PNONSENSE1 --input-file %t6
42+
43+
; PNONSENSE1: llvm-rc: Error parsing file: expected int or identifier, got &
44+
45+
46+
; RUN: not llvm-rc /V %p/Inputs/parser-nonsense-type.rc 2> %t7
47+
; RUN: FileCheck %s --check-prefix PNONSENSE2 --input-file %t7
48+
49+
; PNONSENSE2: llvm-rc: Error parsing file: expected resource type, got WORLD
50+
51+
52+
; RUN: not llvm-rc /V %p/Inputs/parser-nonsense-type-eof.rc 2> %t8
53+
; RUN: FileCheck %s --check-prefix PNONSENSE3 --input-file %t8
54+
55+
; PNONSENSE3: llvm-rc: Error parsing file: expected int or identifier, got <EOF>
56+
57+
58+
; RUN: not llvm-rc /V %p/Inputs/parser-language-no-comma.rc 2> %t9
59+
; RUN: FileCheck %s --check-prefix PLANGUAGE1 --input-file %t9
60+
61+
; PLANGUAGE1: llvm-rc: Error parsing file: expected ',', got 7
62+
63+
64+
; RUN: not llvm-rc /V %p/Inputs/parser-language-too-many-commas.rc 2> %t10
65+
; RUN: FileCheck %s --check-prefix PLANGUAGE2 --input-file %t10
66+
67+
; PLANGUAGE2: llvm-rc: Error parsing file: expected integer, got ,

llvm/test/tools/llvm-rc/tokenizer.test

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
; RUN: llvm-rc /V %p/Inputs/tokens.rc | FileCheck %s
1+
; RUN: not llvm-rc /V %p/Inputs/tokens.rc | FileCheck %s
2+
; llvm-rc fails now on this sample because it is an invalid resource file
3+
; script. We silence the error message and just analyze the output.
24

35
; CHECK: Int: 1; int value = 1
46
; CHECK-NEXT: Plus: +

llvm/tools/llvm-rc/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,7 @@ add_public_tablegen_target(RcTableGen)
1010

1111
add_llvm_tool(llvm-rc
1212
llvm-rc.cpp
13+
ResourceScriptParser.cpp
14+
ResourceScriptStmt.cpp
1315
ResourceScriptToken.cpp
1416
)
Lines changed: 265 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
//===-- ResourceScriptParser.cpp --------------------------------*- C++-*-===//
2+
//
3+
// The LLVM Compiler Infrastructure
4+
//
5+
// This file is distributed under the University of Illinois Open Source
6+
// License. See LICENSE.TXT for details.
7+
//
8+
//===---------------------------------------------------------------------===//
9+
//
10+
// This implements the parser defined in ResourceScriptParser.h.
11+
//
12+
//===---------------------------------------------------------------------===//
13+
14+
#include "ResourceScriptParser.h"
15+
16+
// Take an expression returning llvm::Error and forward the error if it exists.
17+
#define RETURN_IF_ERROR(Expr) \
18+
if (auto Err = (Expr)) \
19+
return std::move(Err);
20+
21+
// Take an expression returning llvm::Expected<T> and assign it to Var or
22+
// forward the error out of the function.
23+
#define ASSIGN_OR_RETURN(Var, Expr) \
24+
auto Var = (Expr); \
25+
if (!Var) \
26+
return Var.takeError();
27+
28+
namespace llvm {
29+
namespace rc {
30+
31+
RCParser::ParserError::ParserError(const Twine Expected, const LocIter CurLoc,
32+
const LocIter End)
33+
: ErrorLoc(CurLoc), FileEnd(End) {
34+
CurMessage = "Error parsing file: expected " + Expected.str() + ", got " +
35+
(CurLoc == End ? "<EOF>" : CurLoc->value()).str();
36+
}
37+
38+
char RCParser::ParserError::ID = 0;
39+
40+
RCParser::RCParser(const std::vector<RCToken> &TokenList)
41+
: Tokens(TokenList), CurLoc(Tokens.begin()), End(Tokens.end()) {}
42+
43+
RCParser::RCParser(std::vector<RCToken> &&TokenList)
44+
: Tokens(std::move(TokenList)), CurLoc(Tokens.begin()), End(Tokens.end()) {}
45+
46+
bool RCParser::isEof() const { return CurLoc == End; }
47+
48+
RCParser::ParseType RCParser::parseSingleResource() {
49+
// The first thing we read is usually a resource's name. However, in some
50+
// cases (LANGUAGE and STRINGTABLE) the resources don't have their names
51+
// and the first token to be read is the type.
52+
ASSIGN_OR_RETURN(NameToken, readTypeOrName());
53+
54+
if (NameToken->equalsLower("LANGUAGE"))
55+
return parseLanguageResource();
56+
else if (NameToken->equalsLower("STRINGTABLE"))
57+
return parseStringTableResource();
58+
59+
// If it's not an unnamed resource, what we've just read is a name. Now,
60+
// read resource type;
61+
ASSIGN_OR_RETURN(TypeToken, readTypeOrName());
62+
63+
ParseType Result = std::unique_ptr<RCResource>();
64+
(void)!Result;
65+
66+
if (TypeToken->equalsLower("ICON"))
67+
Result = parseIconResource();
68+
else
69+
return getExpectedError("resource type", /* IsAlreadyRead = */ true);
70+
71+
if (Result)
72+
(*Result)->setName(*NameToken);
73+
74+
return Result;
75+
}
76+
77+
bool RCParser::isNextTokenKind(Kind TokenKind) const {
78+
return !isEof() && look().kind() == TokenKind;
79+
}
80+
81+
const RCToken &RCParser::look() const {
82+
assert(!isEof());
83+
return *CurLoc;
84+
}
85+
86+
const RCToken &RCParser::read() {
87+
assert(!isEof());
88+
return *CurLoc++;
89+
}
90+
91+
void RCParser::consume() {
92+
assert(!isEof());
93+
CurLoc++;
94+
}
95+
96+
Expected<uint32_t> RCParser::readInt() {
97+
if (!isNextTokenKind(Kind::Int))
98+
return getExpectedError("integer");
99+
return read().intValue();
100+
}
101+
102+
Expected<StringRef> RCParser::readString() {
103+
if (!isNextTokenKind(Kind::String))
104+
return getExpectedError("string");
105+
return read().value();
106+
}
107+
108+
Expected<StringRef> RCParser::readIdentifier() {
109+
if (!isNextTokenKind(Kind::Identifier))
110+
return getExpectedError("identifier");
111+
return read().value();
112+
}
113+
114+
Expected<IntOrString> RCParser::readTypeOrName() {
115+
// We suggest that the correct resource name or type should be either an
116+
// identifier or an integer. The original RC tool is much more liberal.
117+
if (!isNextTokenKind(Kind::Identifier) && !isNextTokenKind(Kind::Int))
118+
return getExpectedError("int or identifier");
119+
120+
const RCToken &Tok = read();
121+
if (Tok.kind() == Kind::Int)
122+
return IntOrString(Tok.intValue());
123+
else
124+
return IntOrString(Tok.value());
125+
}
126+
127+
Error RCParser::consumeType(Kind TokenKind) {
128+
if (isNextTokenKind(TokenKind)) {
129+
consume();
130+
return Error::success();
131+
}
132+
133+
switch (TokenKind) {
134+
#define TOKEN(TokenName) \
135+
case Kind::TokenName: \
136+
return getExpectedError(#TokenName);
137+
#define SHORT_TOKEN(TokenName, TokenCh) \
138+
case Kind::TokenName: \
139+
return getExpectedError(#TokenCh);
140+
#include "ResourceScriptTokenList.h"
141+
#undef SHORT_TOKEN
142+
#undef TOKEN
143+
}
144+
}
145+
146+
bool RCParser::consumeOptionalType(Kind TokenKind) {
147+
if (isNextTokenKind(TokenKind)) {
148+
consume();
149+
return true;
150+
}
151+
152+
return false;
153+
}
154+
155+
Expected<SmallVector<uint32_t, 8>>
156+
RCParser::readIntsWithCommas(size_t MinCount, size_t MaxCount) {
157+
assert(MinCount <= MaxCount);
158+
159+
SmallVector<uint32_t, 8> Result;
160+
161+
auto FailureHandler =
162+
[&](llvm::Error Err) -> Expected<SmallVector<uint32_t, 8>> {
163+
if (Result.size() < MinCount)
164+
return std::move(Err);
165+
consumeError(std::move(Err));
166+
return Result;
167+
};
168+
169+
for (size_t i = 0; i < MaxCount; ++i) {
170+
// Try to read a comma unless we read the first token.
171+
// Sometimes RC tool requires them and sometimes not. We decide to
172+
// always require them.
173+
if (i >= 1) {
174+
if (auto CommaError = consumeType(Kind::Comma))
175+
return FailureHandler(std::move(CommaError));
176+
}
177+
178+
if (auto IntResult = readInt())
179+
Result.push_back(*IntResult);
180+
else
181+
return FailureHandler(IntResult.takeError());
182+
}
183+
184+
return Result;
185+
}
186+
187+
// As for now, we ignore the extended set of statements.
188+
Expected<OptionalStmtList> RCParser::parseOptionalStatements(bool IsExtended) {
189+
OptionalStmtList Result;
190+
191+
// The last statement is always followed by the start of the block.
192+
while (!isNextTokenKind(Kind::BlockBegin)) {
193+
ASSIGN_OR_RETURN(SingleParse, parseSingleOptionalStatement(IsExtended));
194+
Result.addStmt(std::move(*SingleParse));
195+
}
196+
197+
return Result;
198+
}
199+
200+
Expected<std::unique_ptr<OptionalStmt>>
201+
RCParser::parseSingleOptionalStatement(bool) {
202+
ASSIGN_OR_RETURN(TypeToken, readIdentifier());
203+
if (TypeToken->equals_lower("CHARACTERISTICS"))
204+
return parseCharacteristicsStmt();
205+
else if (TypeToken->equals_lower("LANGUAGE"))
206+
return parseLanguageStmt();
207+
else if (TypeToken->equals_lower("VERSION"))
208+
return parseVersionStmt();
209+
else
210+
return getExpectedError("optional statement type, BEGIN or '{'",
211+
/* IsAlreadyRead = */ true);
212+
}
213+
214+
RCParser::ParseType RCParser::parseLanguageResource() {
215+
// Read LANGUAGE as an optional statement. If it's read correctly, we can
216+
// upcast it to RCResource.
217+
return parseLanguageStmt();
218+
}
219+
220+
RCParser::ParseType RCParser::parseIconResource() {
221+
ASSIGN_OR_RETURN(Arg, readString());
222+
return make_unique<IconResource>(*Arg);
223+
}
224+
225+
RCParser::ParseType RCParser::parseStringTableResource() {
226+
ASSIGN_OR_RETURN(OptStatements, parseOptionalStatements());
227+
RETURN_IF_ERROR(consumeType(Kind::BlockBegin));
228+
229+
auto Table = make_unique<StringTableResource>(std::move(*OptStatements));
230+
231+
// Read strings until we reach the end of the block.
232+
while (!consumeOptionalType(Kind::BlockEnd)) {
233+
// Each definition consists of string's ID (an integer) and a string.
234+
// Some examples in documentation suggest that there might be a comma in
235+
// between, however we strictly adhere to the single statement definition.
236+
ASSIGN_OR_RETURN(IDResult, readInt());
237+
ASSIGN_OR_RETURN(StrResult, readString());
238+
Table->addString(*IDResult, *StrResult);
239+
}
240+
241+
return Table;
242+
}
243+
244+
RCParser::ParseOptionType RCParser::parseLanguageStmt() {
245+
ASSIGN_OR_RETURN(Args, readIntsWithCommas(/* min = */ 2, /* max = */ 2));
246+
return make_unique<LanguageResource>((*Args)[0], (*Args)[1]);
247+
}
248+
249+
RCParser::ParseOptionType RCParser::parseCharacteristicsStmt() {
250+
ASSIGN_OR_RETURN(Arg, readInt());
251+
return make_unique<CharacteristicsStmt>(*Arg);
252+
}
253+
254+
RCParser::ParseOptionType RCParser::parseVersionStmt() {
255+
ASSIGN_OR_RETURN(Arg, readInt());
256+
return make_unique<VersionStmt>(*Arg);
257+
}
258+
259+
Error RCParser::getExpectedError(const Twine Message, bool IsAlreadyRead) {
260+
return make_error<ParserError>(
261+
Message, IsAlreadyRead ? std::prev(CurLoc) : CurLoc, End);
262+
}
263+
264+
} // namespace rc
265+
} // namespace llvm

0 commit comments

Comments
 (0)