Skip to content

Commit fc11020

Browse files
authored
[Support] Validate number of arguments passed to formatv() (#105745)
Change formatv() to validate that the number of arguments passed matches number of replacement fields in the format string, and that the replacement indices do not contain holes. To support cases where this cannot be guaranteed, introduce a formatv() overload that allows disabling validation with a bool flag as its first argument.
1 parent 9edd998 commit fc11020

File tree

7 files changed

+205
-58
lines changed

7 files changed

+205
-58
lines changed

clang/lib/StaticAnalyzer/Checkers/StdLibraryFunctionsChecker.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1401,7 +1401,10 @@ void StdLibraryFunctionsChecker::checkPostCall(const CallEvent &Call,
14011401
ErrnoNote =
14021402
llvm::formatv("After calling '{0}' {1}", FunctionName, ErrnoNote);
14031403
} else {
1404-
CaseNote = llvm::formatv(Case.getNote().str().c_str(), FunctionName);
1404+
// Disable formatv() validation as the case note may not always have the
1405+
// {0} placeholder for function name.
1406+
CaseNote =
1407+
llvm::formatv(false, Case.getNote().str().c_str(), FunctionName);
14051408
}
14061409
const SVal RV = Call.getReturnValue();
14071410

llvm/benchmarks/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,4 @@ set(LLVM_LINK_COMPONENTS
55
add_benchmark(DummyYAML DummyYAML.cpp PARTIAL_SOURCES_INTENDED)
66
add_benchmark(xxhash xxhash.cpp PARTIAL_SOURCES_INTENDED)
77
add_benchmark(GetIntrinsicForClangBuiltin GetIntrinsicForClangBuiltin.cpp PARTIAL_SOURCES_INTENDED)
8+
add_benchmark(FormatVariadicBM FormatVariadicBM.cpp PARTIAL_SOURCES_INTENDED)

llvm/benchmarks/FormatVariadicBM.cpp

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
//===- FormatVariadicBM.cpp - formatv() benchmark ---------- --------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "benchmark/benchmark.h"
10+
#include "llvm/Support/FormatVariadic.h"
11+
#include <algorithm>
12+
#include <string>
13+
#include <vector>
14+
15+
using namespace llvm;
16+
using namespace std;
17+
18+
// Generate a list of format strings that have `NumReplacements` replacements
19+
// by permuting the replacements and some literal text.
20+
static vector<string> getFormatStrings(int NumReplacements) {
21+
vector<string> Components;
22+
for (int I = 0; I < NumReplacements; I++)
23+
Components.push_back("{" + to_string(I) + "}");
24+
// Intersperse these with some other literal text (_).
25+
const string_view Literal = "____";
26+
for (char C : Literal)
27+
Components.push_back(string(1, C));
28+
29+
vector<string> Formats;
30+
do {
31+
string Concat;
32+
for (const string &C : Components)
33+
Concat += C;
34+
Formats.emplace_back(Concat);
35+
} while (next_permutation(Components.begin(), Components.end()));
36+
return Formats;
37+
}
38+
39+
// Generate the set of formats to exercise outside the benchmark code.
40+
static const vector<vector<string>> Formats = {
41+
getFormatStrings(1), getFormatStrings(2), getFormatStrings(3),
42+
getFormatStrings(4), getFormatStrings(5),
43+
};
44+
45+
// Benchmark formatv() for a variety of format strings and 1-5 replacements.
46+
static void BM_FormatVariadic(benchmark::State &state) {
47+
for (auto _ : state) {
48+
for (const string &Fmt : Formats[0])
49+
formatv(Fmt.c_str(), 1).str();
50+
for (const string &Fmt : Formats[1])
51+
formatv(Fmt.c_str(), 1, 2).str();
52+
for (const string &Fmt : Formats[2])
53+
formatv(Fmt.c_str(), 1, 2, 3).str();
54+
for (const string &Fmt : Formats[3])
55+
formatv(Fmt.c_str(), 1, 2, 3, 4).str();
56+
for (const string &Fmt : Formats[4])
57+
formatv(Fmt.c_str(), 1, 2, 3, 4, 5).str();
58+
}
59+
}
60+
61+
BENCHMARK(BM_FormatVariadic);
62+
63+
BENCHMARK_MAIN();

llvm/include/llvm/Support/FormatVariadic.h

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -67,23 +67,20 @@ class formatv_object_base {
6767
protected:
6868
StringRef Fmt;
6969
ArrayRef<support::detail::format_adapter *> Adapters;
70-
71-
static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
72-
size_t &Align, char &Pad);
73-
74-
static std::pair<ReplacementItem, StringRef>
75-
splitLiteralAndReplacement(StringRef Fmt);
70+
bool Validate;
7671

7772
formatv_object_base(StringRef Fmt,
78-
ArrayRef<support::detail::format_adapter *> Adapters)
79-
: Fmt(Fmt), Adapters(Adapters) {}
73+
ArrayRef<support::detail::format_adapter *> Adapters,
74+
bool Validate)
75+
: Fmt(Fmt), Adapters(Adapters), Validate(Validate) {}
8076

8177
formatv_object_base(formatv_object_base const &rhs) = delete;
8278
formatv_object_base(formatv_object_base &&rhs) = default;
8379

8480
public:
8581
void format(raw_ostream &S) const {
86-
for (auto &R : parseFormatString(Fmt)) {
82+
const auto Replacements = parseFormatString(Fmt, Adapters.size(), Validate);
83+
for (const auto &R : Replacements) {
8784
if (R.Type == ReplacementType::Empty)
8885
continue;
8986
if (R.Type == ReplacementType::Literal) {
@@ -101,9 +98,10 @@ class formatv_object_base {
10198
Align.format(S, R.Options);
10299
}
103100
}
104-
static SmallVector<ReplacementItem, 2> parseFormatString(StringRef Fmt);
105101

106-
static std::optional<ReplacementItem> parseReplacementItem(StringRef Spec);
102+
// Parse and optionally validate format string (in debug builds).
103+
static SmallVector<ReplacementItem, 2>
104+
parseFormatString(StringRef Fmt, size_t NumArgs, bool Validate);
107105

108106
std::string str() const {
109107
std::string Result;
@@ -149,8 +147,8 @@ template <typename Tuple> class formatv_object : public formatv_object_base {
149147
};
150148

151149
public:
152-
formatv_object(StringRef Fmt, Tuple &&Params)
153-
: formatv_object_base(Fmt, ParameterPointers),
150+
formatv_object(StringRef Fmt, Tuple &&Params, bool Validate)
151+
: formatv_object_base(Fmt, ParameterPointers, Validate),
154152
Parameters(std::move(Params)) {
155153
ParameterPointers = std::apply(create_adapters(), Parameters);
156154
}
@@ -247,15 +245,22 @@ template <typename Tuple> class formatv_object : public formatv_object_base {
247245
// assertion. Otherwise, it will try to do something reasonable, but in general
248246
// the details of what that is are undefined.
249247
//
248+
249+
// formatv() with validation enable/disable controlled by the first argument.
250250
template <typename... Ts>
251-
inline auto formatv(const char *Fmt, Ts &&...Vals)
251+
inline auto formatv(bool Validate, const char *Fmt, Ts &&...Vals)
252252
-> formatv_object<decltype(std::make_tuple(
253253
support::detail::build_format_adapter(std::forward<Ts>(Vals))...))> {
254254
using ParamTuple = decltype(std::make_tuple(
255255
support::detail::build_format_adapter(std::forward<Ts>(Vals))...));
256-
return formatv_object<ParamTuple>(
257-
Fmt, std::make_tuple(support::detail::build_format_adapter(
258-
std::forward<Ts>(Vals))...));
256+
auto Params = std::make_tuple(
257+
support::detail::build_format_adapter(std::forward<Ts>(Vals))...);
258+
return formatv_object<ParamTuple>(Fmt, std::move(Params), Validate);
259+
}
260+
261+
// formatv() with validation enabled.
262+
template <typename... Ts> inline auto formatv(const char *Fmt, Ts &&...Vals) {
263+
return formatv<Ts...>(true, Fmt, std::forward<Ts>(Vals)...);
259264
}
260265

261266
} // end namespace llvm

llvm/lib/Support/FormatVariadic.cpp

Lines changed: 72 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ static std::optional<AlignStyle> translateLocChar(char C) {
2525
LLVM_BUILTIN_UNREACHABLE;
2626
}
2727

28-
bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
29-
size_t &Align, char &Pad) {
28+
static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
29+
size_t &Align, char &Pad) {
3030
Where = AlignStyle::Right;
3131
Align = 0;
3232
Pad = ' ';
@@ -35,8 +35,7 @@ bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
3535

3636
if (Spec.size() > 1) {
3737
// A maximum of 2 characters at the beginning can be used for something
38-
// other
39-
// than the width.
38+
// other than the width.
4039
// If Spec[1] is a loc char, then Spec[0] is a pad char and Spec[2:...]
4140
// contains the width.
4241
// Otherwise, if Spec[0] is a loc char, then Spec[1:...] contains the width.
@@ -55,8 +54,7 @@ bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
5554
return !Failed;
5655
}
5756

58-
std::optional<ReplacementItem>
59-
formatv_object_base::parseReplacementItem(StringRef Spec) {
57+
static std::optional<ReplacementItem> parseReplacementItem(StringRef Spec) {
6058
StringRef RepString = Spec.trim("{}");
6159

6260
// If the replacement sequence does not start with a non-negative integer,
@@ -82,15 +80,14 @@ formatv_object_base::parseReplacementItem(StringRef Spec) {
8280
RepString = StringRef();
8381
}
8482
RepString = RepString.trim();
85-
if (!RepString.empty()) {
86-
assert(false && "Unexpected characters found in replacement string!");
87-
}
83+
assert(RepString.empty() &&
84+
"Unexpected characters found in replacement string!");
8885

8986
return ReplacementItem{Spec, Index, Align, Where, Pad, Options};
9087
}
9188

92-
std::pair<ReplacementItem, StringRef>
93-
formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
89+
static std::pair<ReplacementItem, StringRef>
90+
splitLiteralAndReplacement(StringRef Fmt) {
9491
while (!Fmt.empty()) {
9592
// Everything up until the first brace is a literal.
9693
if (Fmt.front() != '{') {
@@ -143,15 +140,77 @@ formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
143140
return std::make_pair(ReplacementItem{Fmt}, StringRef());
144141
}
145142

143+
#ifndef NDEBUG
144+
#define ENABLE_VALIDATION 1
145+
#else
146+
#define ENABLE_VALIDATION 0 // Conveniently enable validation in release mode.
147+
#endif
148+
146149
SmallVector<ReplacementItem, 2>
147-
formatv_object_base::parseFormatString(StringRef Fmt) {
150+
formatv_object_base::parseFormatString(StringRef Fmt, size_t NumArgs,
151+
bool Validate) {
148152
SmallVector<ReplacementItem, 2> Replacements;
149-
ReplacementItem I;
153+
154+
#if ENABLE_VALIDATION
155+
const StringRef SavedFmtStr = Fmt;
156+
size_t NumExpectedArgs = 0;
157+
#endif
158+
150159
while (!Fmt.empty()) {
160+
ReplacementItem I;
151161
std::tie(I, Fmt) = splitLiteralAndReplacement(Fmt);
152162
if (I.Type != ReplacementType::Empty)
153163
Replacements.push_back(I);
164+
#if ENABLE_VALIDATION
165+
if (I.Type == ReplacementType::Format)
166+
NumExpectedArgs = std::max(NumExpectedArgs, I.Index + 1);
167+
#endif
168+
}
169+
170+
#if ENABLE_VALIDATION
171+
if (!Validate)
172+
return Replacements;
173+
174+
// Perform additional validation. Verify that the number of arguments matches
175+
// the number of replacement indices and that there are no holes in the
176+
// replacement indices.
177+
178+
// When validation fails, return an array of replacement items that
179+
// will print an error message as the outout of this formatv() (used when
180+
// validation is enabled in release mode).
181+
auto getErrorReplacements = [SavedFmtStr](StringLiteral ErrorMsg) {
182+
return SmallVector<ReplacementItem, 2>{
183+
ReplacementItem("Invalid formatv() call: "), ReplacementItem(ErrorMsg),
184+
ReplacementItem(" for format string: "), ReplacementItem(SavedFmtStr)};
185+
};
186+
187+
if (NumExpectedArgs != NumArgs) {
188+
errs() << formatv(
189+
"Expected {0} Args, but got {1} for format string '{2}'\n",
190+
NumExpectedArgs, NumArgs, SavedFmtStr);
191+
assert(0 && "Invalid formatv() call");
192+
return getErrorReplacements("Unexpected number of arguments");
193+
}
194+
195+
// Find the number of unique indices seen. All replacement indices
196+
// are < NumExpectedArgs.
197+
SmallVector<bool> Indices(NumExpectedArgs);
198+
size_t Count = 0;
199+
for (const ReplacementItem &I : Replacements) {
200+
if (I.Type != ReplacementType::Format || Indices[I.Index])
201+
continue;
202+
Indices[I.Index] = true;
203+
++Count;
204+
}
205+
206+
if (Count != NumExpectedArgs) {
207+
errs() << formatv(
208+
"Replacement field indices cannot have holes for format string '{0}'\n",
209+
SavedFmtStr);
210+
assert(0 && "Invalid format string");
211+
return getErrorReplacements("Replacement indices have holes");
154212
}
213+
#endif // ENABLE_VALIDATION
155214
return Replacements;
156215
}
157216

0 commit comments

Comments
 (0)