Skip to content

Commit 94d3cd2

Browse files
committed
Add a file-table-tform tool: manipulating tabular string data files.
Signed-off-by: Konstantin S Bobrovsky <konstantin.s.bobrovsky@intel.com>
1 parent 96de11d commit 94d3cd2

File tree

9 files changed

+373
-0
lines changed

9 files changed

+373
-0
lines changed
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[A|B|C|D]
2+
aaa|bbb|100|XXX
3+
ccc|ddd|200|YYY
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[11|12]
2+
00|11
3+
22|33
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
[AA|C|D]
2+
00|100|XXX
3+
22|200|YYY
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
-- Remove column B:
2+
RUN: file-table-tform --extract=A,C,D %S/Inputs/a.txt -o b.txt
3+
4+
-- Rename column A to AA:
5+
RUN: file-table-tform --rename=A,AA b.txt -o c.txt
6+
7+
-- Replace column AA in c.txt to column 11 from %S/Inputs/a0.txt
8+
RUN: file-table-tform --replace=AA,11 c.txt %S/Inputs/a0.txt -o d.txt
9+
10+
-- Verify result
11+
RUN: diff d.txt %S/Inputs/gold.txt

llvm/tools/LLVMBuild.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
subdirectories =
1919
bugpoint
2020
dsymutil
21+
file-table-tform
2122
llc
2223
lli
2324
llvm-ar
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
set(LLVM_LINK_COMPONENTS
2+
Support
3+
)
4+
5+
add_llvm_tool(file-table-tform
6+
file-table-tform.cpp
7+
)
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
;===- ./tools/llvm-table/LLVMBuild.txt -------------------------*- Conf -*--===;
2+
;
3+
; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
; See https://llvm.org/LICENSE.txt for license information.
5+
; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
;
7+
;===------------------------------------------------------------------------===;
8+
;
9+
; This is an LLVMBuild description file for the components in this subdirectory.
10+
;
11+
; For more information on the LLVMBuild system, please see:
12+
;
13+
; http://llvm.org/docs/LLVMBuild.html
14+
;
15+
;===------------------------------------------------------------------------===;
16+
17+
[component_0]
18+
type = Tool
19+
name = file-table-tform
20+
parent = Tools
21+
required_libraries =
Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,322 @@
1+
//===- file-table-tform.cpp - transform files with tables of strings ------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// This tool transforms a series of input file tables into single output file
9+
// table according to operations passed on the command line. Operations'
10+
// arguments are input files, and some operations like 'rename' take single
11+
// input, others like 'replace' take two. Operations are executed in
12+
// command-line order and consume needed amount of inputs left-to-right in the
13+
// command-line order. Table files and operation example:
14+
// $ cat a.txt
15+
// [Code|Symbols|Properties]
16+
// a_0.bc|a_0.sym|a_0.props
17+
// a_1.bc|a_1.sym|a_1.props
18+
//
19+
// $ cat b.txt:
20+
// [Files|Attrs]
21+
// a_0.spv|aa.attr
22+
// a_1.spv|bb.attr
23+
//
24+
// $ file-table-tform --replace=Code,Files a.txt b.txt -o c.txt
25+
//
26+
// $ cat c.txt
27+
// [Code|Symbols|Properties]
28+
// a_0.spv|a_0.sym|a_0.props
29+
// a_1.spv|a_1.sym|a_1.props
30+
//
31+
// The tool for now supports only linear transformation sequences like shown on
32+
// the graph below. 'op*' represent operations, 'Input' is the main input file,
33+
// 'Output' is the single output file, edges are directed and designate inputs
34+
// and outputs of the operations.
35+
// File1 File3
36+
// \ \
37+
// Input - op1 - op2 - op3 - Output
38+
// /
39+
// File2
40+
// More complex transformation trees such as:
41+
// File0 - op0 File3
42+
// \ \
43+
// Input - op1 - op2 - op3 - Output
44+
// /
45+
// File2
46+
// are not supported. For now, "File0 - op0" transformation must be done in a
47+
// separate tool invocation.
48+
// TODO support SQL-like transformation style if the tool ever evolves.
49+
50+
#include "llvm/ADT/SmallVector.h"
51+
#include "llvm/ADT/Twine.h"
52+
#include "llvm/Support/CommandLine.h"
53+
#include "llvm/Support/Errc.h"
54+
#include "llvm/Support/InitLLVM.h"
55+
#include "llvm/Support/SimpleTable.h"
56+
#include "llvm/Support/WithColor.h"
57+
58+
#include <algorithm>
59+
#include <functional>
60+
#include <map>
61+
#include <memory>
62+
#include <string>
63+
64+
using namespace llvm;
65+
66+
static StringRef ToolName; // set in main first thing
67+
68+
cl::OptionCategory FileTableTformCat{"file-table-tform Options"};
69+
70+
static cl::list<std::string> Inputs(cl::Positional, cl::ZeroOrMore,
71+
cl::desc("<input filenames>"),
72+
cl::cat(FileTableTformCat));
73+
74+
static cl::opt<std::string> Output("o", cl::Required,
75+
cl::desc("<output filename>"),
76+
cl::value_desc("filename"),
77+
cl::cat(FileTableTformCat));
78+
79+
static constexpr char OPT_REPLACE[] = "replace";
80+
static constexpr char OPT_RENAME[] = "rename";
81+
static constexpr char OPT_EXTRACT[] = "extract";
82+
83+
static cl::list<std::string> TformReplace{
84+
OPT_REPLACE, cl::ZeroOrMore, cl::desc("replace a column"),
85+
cl::value_desc("<column name or ordinal>"), cl::cat(FileTableTformCat)};
86+
87+
static cl::list<std::string> TformRename{
88+
OPT_RENAME, cl::ZeroOrMore, cl::desc("rename a column"),
89+
cl::value_desc("<old_name>,<new_name>"), cl::cat(FileTableTformCat)};
90+
91+
static cl::list<std::string> TformExtract{
92+
OPT_EXTRACT, cl::ZeroOrMore,
93+
cl::desc("extract column(s) identified by names"),
94+
cl::value_desc("<name1>,<name2>,..."), cl::cat(FileTableTformCat)};
95+
96+
static cl::opt<bool> DropTitles{"drop_titles", cl::Optional,
97+
cl::desc("drop column titles"),
98+
cl::cat(FileTableTformCat)};
99+
100+
Error makeToolError(Twine Msg) {
101+
return make_error<StringError>("*** " + llvm::Twine(ToolName) +
102+
" ERROR: " + Msg,
103+
inconvertibleErrorCode());
104+
}
105+
106+
Error makeIOError(Twine Msg) {
107+
return make_error<StringError>(
108+
"*** " + Twine(ToolName) + " SYSTEM ERROR: " + Msg, errc::io_error);
109+
}
110+
111+
Error makeUserError(Twine Msg) {
112+
return createStringError(errc::invalid_argument,
113+
"*** " + Twine(ToolName) + " usage ERROR: " + Msg);
114+
}
115+
116+
struct TformCmd {
117+
using UPtrTy = std::unique_ptr<TformCmd>;
118+
StringRef Kind;
119+
SmallVector<StringRef, 2> Args;
120+
SmallVector<StringRef, 2> Inputs;
121+
122+
TformCmd() = default;
123+
TformCmd(StringRef Kind) : Kind(Kind) {}
124+
125+
static Expected<UPtrTy> create(StringRef Kind, StringRef RawArg = "") {
126+
UPtrTy Res = std::make_unique<TformCmd>(Kind);
127+
Error E = Res->parseArg(RawArg);
128+
if (E)
129+
return std::move(E);
130+
return std::move(Res);
131+
}
132+
133+
using InpIt = cl::list<std::string>::iterator;
134+
135+
Error consumeSingleInput(InpIt &Cur, const InpIt End) {
136+
if (Cur == End)
137+
return makeUserError("no input for '" + Twine(Kind) + "' command");
138+
if (!llvm::sys::fs::exists(*Cur))
139+
return makeIOError("file not found: " + Twine(*Cur));
140+
Inputs.push_back(*Cur);
141+
Cur++;
142+
return Error::success();
143+
}
144+
145+
using Func = std::function<Error(TformCmd *)>;
146+
147+
Error consumeInput(InpIt Cur, const InpIt End) {
148+
Func F =
149+
StringSwitch<Func>(Kind)
150+
.Case(OPT_REPLACE,
151+
[&](TformCmd *Cmd) {
152+
return Cmd->consumeSingleInput(Cur, End);
153+
})
154+
.Case(OPT_RENAME, [&](TformCmd *Cmd) { return Error::success(); })
155+
.Case(OPT_EXTRACT, [&](TformCmd *Cmd) { return Error::success(); });
156+
return F(this);
157+
}
158+
159+
Error parseArg(StringRef Arg) {
160+
Func F =
161+
StringSwitch<Func>(Kind)
162+
// need '-> Error' return type declaration in the lambdas below as
163+
// it can't be deduced automatically
164+
.Case(OPT_REPLACE,
165+
[&](TformCmd *Cmd) -> Error {
166+
// argument is <column name>
167+
if (Arg.empty())
168+
return makeUserError("empty argument in " +
169+
Twine(OPT_REPLACE));
170+
Arg.split(Args, ',');
171+
if (Args.size() != 2 || Args[0].empty() || Args[1].empty())
172+
return makeUserError("invalid argument in " +
173+
Twine(OPT_REPLACE));
174+
return Error::success();
175+
})
176+
.Case(OPT_RENAME,
177+
[&](TformCmd *Cmd) -> Error {
178+
// argument is <old_name>,<new_name>
179+
if (Arg.empty())
180+
return makeUserError("empty argument in " +
181+
Twine(OPT_RENAME));
182+
auto Names = Arg.split(',');
183+
if (Names.first.empty() || Names.second.empty())
184+
return makeUserError("invalid argument in " +
185+
Twine(OPT_RENAME));
186+
Args.push_back(Names.first);
187+
Args.push_back(Names.second);
188+
return Error::success();
189+
})
190+
.Case(OPT_EXTRACT, [&](TformCmd *Cmd) -> Error {
191+
// argument is <name1>,<name2>,... (1 or more)
192+
if (Arg.empty())
193+
return makeUserError("empty argument in " + Twine(OPT_RENAME));
194+
SmallVector<StringRef, 3> Names;
195+
Arg.split(Names, ',');
196+
if (std::find(Names.begin(), Names.end(), "") != Names.end())
197+
return makeUserError("empty name in " + Twine(OPT_RENAME));
198+
std::copy(Names.begin(), Names.end(), std::back_inserter(Args));
199+
return Error::success();
200+
});
201+
return F(this);
202+
}
203+
204+
Error execute(util::SimpleTable &Table) {
205+
Func F =
206+
StringSwitch<Func>(Kind)
207+
.Case(OPT_REPLACE,
208+
[&](TformCmd *Cmd) -> Error {
209+
// argument is <column name>
210+
assert(Cmd->Args.size() == 2 && Cmd->Inputs.size() == 1);
211+
Expected<util::SimpleTable::UPtrTy> Table1 =
212+
util::SimpleTable::read(Cmd->Inputs[0]);
213+
if (!Table1)
214+
return Table1.takeError();
215+
Error Res =
216+
Table.replaceColumn(Args[0], *Table1->get(), Args[1]);
217+
return Res ? std::move(Res) : std::move(Error::success());
218+
})
219+
.Case(OPT_RENAME,
220+
[&](TformCmd *Cmd) -> Error {
221+
// argument is <old_name>,<new_name>
222+
assert(Args.size() == 2);
223+
Error Res = Table.renameColumn(Args[0], Args[1]);
224+
return Res ? std::move(Res) : std::move(Error::success());
225+
})
226+
.Case(OPT_EXTRACT, [&](TformCmd *Cmd) -> Error {
227+
// argument is <name1>,<name2>,... (1 or more)
228+
assert(!Args.empty());
229+
Error Res = Table.peelColumns(Args);
230+
return Res ? std::move(Res) : std::move(Error::success());
231+
});
232+
return F(this);
233+
}
234+
};
235+
236+
#define CHECK_AND_EXIT(E) \
237+
{ \
238+
Error LocE = std::move(E); \
239+
if (LocE) { \
240+
logAllUnhandledErrors(std::move(LocE), WithColor::error(errs())); \
241+
return 1; \
242+
} \
243+
}
244+
245+
int main(int argc, char **argv) {
246+
ToolName = argv[0]; // make tool name available for functions in this source
247+
InitLLVM X{argc, argv};
248+
249+
cl::HideUnrelatedOptions(FileTableTformCat);
250+
cl::ParseCommandLineOptions(
251+
argc, argv,
252+
"File table transformation tool.\n"
253+
"Inputs and output of this tool is a \"file table\" files containing\n"
254+
"2D table of strings with optional row of column titles. Based on\n"
255+
"transformation actions passed via the command line, the tool "
256+
"transforms the first input file table and emits a new one as a result.\n"
257+
"\n"
258+
"Transformation actions are:\n"
259+
"- replace a column\n"
260+
"- rename a column\n"
261+
"- extract column(s)\n");
262+
263+
std::map<int, TformCmd::UPtrTy> Cmds;
264+
265+
// Partially construct commands (w/o input information). Can't fully construct
266+
// yet, as an order across all command line options-commands needs to be
267+
// established first to properly map inputs to commands.
268+
269+
auto Lists = {std::addressof(TformReplace), std::addressof(TformRename),
270+
std::addressof(TformExtract)};
271+
272+
for (const auto *L : Lists) {
273+
for (auto It = L->begin(); It != L->end(); It++) {
274+
Expected<TformCmd::UPtrTy> Cmd = TformCmd::create(L->ArgStr, *It);
275+
276+
if (!Cmd)
277+
CHECK_AND_EXIT(Cmd.takeError());
278+
const int Pos = L->getPosition(It - L->begin());
279+
Cmds.emplace(Pos, std::move(Cmd.get()));
280+
}
281+
}
282+
// finish command construction first w/o execution to make sure command line
283+
// is valid
284+
auto CurInput = Inputs.begin();
285+
const auto EndInput = Inputs.end();
286+
// first input is the "current" - it will undergo the transformation sequence
287+
if (CurInput == EndInput)
288+
CHECK_AND_EXIT(makeUserError("no inputs"));
289+
std::string &InputFile = *CurInput++;
290+
291+
for (auto &P : Cmds) {
292+
TformCmd::UPtrTy &Cmd = P.second;
293+
// this will advance cur iterator as far as needed
294+
Error E = Cmd->consumeInput(CurInput, EndInput);
295+
CHECK_AND_EXIT(E);
296+
}
297+
// commands are constructed, command line is correct - read input and execute
298+
// transformations on it
299+
300+
Expected<util::SimpleTable::UPtrTy> Table =
301+
util::SimpleTable::read(InputFile);
302+
if (!Table)
303+
CHECK_AND_EXIT(Table.takeError());
304+
305+
for (auto &P : Cmds) {
306+
TformCmd::UPtrTy &Cmd = P.second;
307+
Error Res = Cmd->execute(*Table->get());
308+
CHECK_AND_EXIT(Res);
309+
}
310+
// Finally, write the result
311+
std::error_code EC;
312+
raw_fd_ostream Out{Output, EC, sys::fs::OpenFlags::OF_None};
313+
314+
if (EC)
315+
CHECK_AND_EXIT(createFileError(Output, EC));
316+
Table->get()->write(Out, !DropTitles);
317+
318+
if (Out.has_error())
319+
CHECK_AND_EXIT(createFileError(Output, Out.error()));
320+
Out.close();
321+
return 0;
322+
}

0 commit comments

Comments
 (0)