|
| 1 | +//===- file-table-tform.cpp - transform files with tables of strings ------===// |
| 2 | +// |
| 3 | +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | +// |
| 7 | +//===----------------------------------------------------------------------===// |
| 8 | +// This tool transforms a series of input file tables into single output file |
| 9 | +// table according to operations passed on the command line. Operations' |
| 10 | +// arguments are input files, and some operations like 'rename' take single |
| 11 | +// input, others like 'replace' take two. Operations are executed in |
| 12 | +// command-line order and consume needed amount of inputs left-to-right in the |
| 13 | +// command-line order. Table files and operation example: |
| 14 | +// $ cat a.txt |
| 15 | +// [Code|Symbols|Properties] |
| 16 | +// a_0.bc|a_0.sym|a_0.props |
| 17 | +// a_1.bc|a_1.sym|a_1.props |
| 18 | +// |
| 19 | +// $ cat b.txt: |
| 20 | +// [Files|Attrs] |
| 21 | +// a_0.spv|aa.attr |
| 22 | +// a_1.spv|bb.attr |
| 23 | +// |
| 24 | +// $ file-table-tform --replace=Code,Files a.txt b.txt -o c.txt |
| 25 | +// |
| 26 | +// $ cat c.txt |
| 27 | +// [Code|Symbols|Properties] |
| 28 | +// a_0.spv|a_0.sym|a_0.props |
| 29 | +// a_1.spv|a_1.sym|a_1.props |
| 30 | +// |
| 31 | +// The tool for now supports only linear transformation sequences like shown on |
| 32 | +// the graph below. 'op*' represent operations, 'Input' is the main input file, |
| 33 | +// 'Output' is the single output file, edges are directed and designate inputs |
| 34 | +// and outputs of the operations. |
| 35 | +// File1 File3 |
| 36 | +// \ \ |
| 37 | +// Input - op1 - op2 - op3 - Output |
| 38 | +// / |
| 39 | +// File2 |
| 40 | +// More complex transformation trees such as: |
| 41 | +// File0 - op0 File3 |
| 42 | +// \ \ |
| 43 | +// Input - op1 - op2 - op3 - Output |
| 44 | +// / |
| 45 | +// File2 |
| 46 | +// are not supported. For now, "File0 - op0" transformation must be done in a |
| 47 | +// separate tool invocation. |
| 48 | +// TODO support SQL-like transformation style if the tool ever evolves. |
| 49 | + |
| 50 | +#include "llvm/ADT/SmallVector.h" |
| 51 | +#include "llvm/ADT/Twine.h" |
| 52 | +#include "llvm/Support/CommandLine.h" |
| 53 | +#include "llvm/Support/Errc.h" |
| 54 | +#include "llvm/Support/InitLLVM.h" |
| 55 | +#include "llvm/Support/SimpleTable.h" |
| 56 | +#include "llvm/Support/WithColor.h" |
| 57 | + |
| 58 | +#include <algorithm> |
| 59 | +#include <functional> |
| 60 | +#include <map> |
| 61 | +#include <memory> |
| 62 | +#include <string> |
| 63 | + |
| 64 | +using namespace llvm; |
| 65 | + |
| 66 | +static StringRef ToolName; // set in main first thing |
| 67 | + |
| 68 | +cl::OptionCategory FileTableTformCat{"file-table-tform Options"}; |
| 69 | + |
| 70 | +static cl::list<std::string> Inputs(cl::Positional, cl::ZeroOrMore, |
| 71 | + cl::desc("<input filenames>"), |
| 72 | + cl::cat(FileTableTformCat)); |
| 73 | + |
| 74 | +static cl::opt<std::string> Output("o", cl::Required, |
| 75 | + cl::desc("<output filename>"), |
| 76 | + cl::value_desc("filename"), |
| 77 | + cl::cat(FileTableTformCat)); |
| 78 | + |
| 79 | +static constexpr char OPT_REPLACE[] = "replace"; |
| 80 | +static constexpr char OPT_RENAME[] = "rename"; |
| 81 | +static constexpr char OPT_EXTRACT[] = "extract"; |
| 82 | + |
| 83 | +static cl::list<std::string> TformReplace{ |
| 84 | + OPT_REPLACE, cl::ZeroOrMore, cl::desc("replace a column"), |
| 85 | + cl::value_desc("<column name or ordinal>"), cl::cat(FileTableTformCat)}; |
| 86 | + |
| 87 | +static cl::list<std::string> TformRename{ |
| 88 | + OPT_RENAME, cl::ZeroOrMore, cl::desc("rename a column"), |
| 89 | + cl::value_desc("<old_name>,<new_name>"), cl::cat(FileTableTformCat)}; |
| 90 | + |
| 91 | +static cl::list<std::string> TformExtract{ |
| 92 | + OPT_EXTRACT, cl::ZeroOrMore, |
| 93 | + cl::desc("extract column(s) identified by names"), |
| 94 | + cl::value_desc("<name1>,<name2>,..."), cl::cat(FileTableTformCat)}; |
| 95 | + |
| 96 | +static cl::opt<bool> DropTitles{"drop_titles", cl::Optional, |
| 97 | + cl::desc("drop column titles"), |
| 98 | + cl::cat(FileTableTformCat)}; |
| 99 | + |
| 100 | +Error makeToolError(Twine Msg) { |
| 101 | + return make_error<StringError>("*** " + llvm::Twine(ToolName) + |
| 102 | + " ERROR: " + Msg, |
| 103 | + inconvertibleErrorCode()); |
| 104 | +} |
| 105 | + |
| 106 | +Error makeIOError(Twine Msg) { |
| 107 | + return make_error<StringError>( |
| 108 | + "*** " + Twine(ToolName) + " SYSTEM ERROR: " + Msg, errc::io_error); |
| 109 | +} |
| 110 | + |
| 111 | +Error makeUserError(Twine Msg) { |
| 112 | + return createStringError(errc::invalid_argument, |
| 113 | + "*** " + Twine(ToolName) + " usage ERROR: " + Msg); |
| 114 | +} |
| 115 | + |
| 116 | +struct TformCmd { |
| 117 | + using UPtrTy = std::unique_ptr<TformCmd>; |
| 118 | + StringRef Kind; |
| 119 | + SmallVector<StringRef, 2> Args; |
| 120 | + SmallVector<StringRef, 2> Inputs; |
| 121 | + |
| 122 | + TformCmd() = default; |
| 123 | + TformCmd(StringRef Kind) : Kind(Kind) {} |
| 124 | + |
| 125 | + static Expected<UPtrTy> create(StringRef Kind, StringRef RawArg = "") { |
| 126 | + UPtrTy Res = std::make_unique<TformCmd>(Kind); |
| 127 | + Error E = Res->parseArg(RawArg); |
| 128 | + if (E) |
| 129 | + return std::move(E); |
| 130 | + return std::move(Res); |
| 131 | + } |
| 132 | + |
| 133 | + using InpIt = cl::list<std::string>::iterator; |
| 134 | + |
| 135 | + Error consumeSingleInput(InpIt &Cur, const InpIt End) { |
| 136 | + if (Cur == End) |
| 137 | + return makeUserError("no input for '" + Twine(Kind) + "' command"); |
| 138 | + if (!llvm::sys::fs::exists(*Cur)) |
| 139 | + return makeIOError("file not found: " + Twine(*Cur)); |
| 140 | + Inputs.push_back(*Cur); |
| 141 | + Cur++; |
| 142 | + return Error::success(); |
| 143 | + } |
| 144 | + |
| 145 | + using Func = std::function<Error(TformCmd *)>; |
| 146 | + |
| 147 | + Error consumeInput(InpIt Cur, const InpIt End) { |
| 148 | + Func F = |
| 149 | + StringSwitch<Func>(Kind) |
| 150 | + .Case(OPT_REPLACE, |
| 151 | + [&](TformCmd *Cmd) { |
| 152 | + return Cmd->consumeSingleInput(Cur, End); |
| 153 | + }) |
| 154 | + .Case(OPT_RENAME, [&](TformCmd *Cmd) { return Error::success(); }) |
| 155 | + .Case(OPT_EXTRACT, [&](TformCmd *Cmd) { return Error::success(); }); |
| 156 | + return F(this); |
| 157 | + } |
| 158 | + |
| 159 | + Error parseArg(StringRef Arg) { |
| 160 | + Func F = |
| 161 | + StringSwitch<Func>(Kind) |
| 162 | + // need '-> Error' return type declaration in the lambdas below as |
| 163 | + // it can't be deduced automatically |
| 164 | + .Case(OPT_REPLACE, |
| 165 | + [&](TformCmd *Cmd) -> Error { |
| 166 | + // argument is <column name> |
| 167 | + if (Arg.empty()) |
| 168 | + return makeUserError("empty argument in " + |
| 169 | + Twine(OPT_REPLACE)); |
| 170 | + Arg.split(Args, ','); |
| 171 | + if (Args.size() != 2 || Args[0].empty() || Args[1].empty()) |
| 172 | + return makeUserError("invalid argument in " + |
| 173 | + Twine(OPT_REPLACE)); |
| 174 | + return Error::success(); |
| 175 | + }) |
| 176 | + .Case(OPT_RENAME, |
| 177 | + [&](TformCmd *Cmd) -> Error { |
| 178 | + // argument is <old_name>,<new_name> |
| 179 | + if (Arg.empty()) |
| 180 | + return makeUserError("empty argument in " + |
| 181 | + Twine(OPT_RENAME)); |
| 182 | + auto Names = Arg.split(','); |
| 183 | + if (Names.first.empty() || Names.second.empty()) |
| 184 | + return makeUserError("invalid argument in " + |
| 185 | + Twine(OPT_RENAME)); |
| 186 | + Args.push_back(Names.first); |
| 187 | + Args.push_back(Names.second); |
| 188 | + return Error::success(); |
| 189 | + }) |
| 190 | + .Case(OPT_EXTRACT, [&](TformCmd *Cmd) -> Error { |
| 191 | + // argument is <name1>,<name2>,... (1 or more) |
| 192 | + if (Arg.empty()) |
| 193 | + return makeUserError("empty argument in " + Twine(OPT_RENAME)); |
| 194 | + SmallVector<StringRef, 3> Names; |
| 195 | + Arg.split(Names, ','); |
| 196 | + if (std::find(Names.begin(), Names.end(), "") != Names.end()) |
| 197 | + return makeUserError("empty name in " + Twine(OPT_RENAME)); |
| 198 | + std::copy(Names.begin(), Names.end(), std::back_inserter(Args)); |
| 199 | + return Error::success(); |
| 200 | + }); |
| 201 | + return F(this); |
| 202 | + } |
| 203 | + |
| 204 | + Error execute(util::SimpleTable &Table) { |
| 205 | + Func F = |
| 206 | + StringSwitch<Func>(Kind) |
| 207 | + .Case(OPT_REPLACE, |
| 208 | + [&](TformCmd *Cmd) -> Error { |
| 209 | + // argument is <column name> |
| 210 | + assert(Cmd->Args.size() == 2 && Cmd->Inputs.size() == 1); |
| 211 | + Expected<util::SimpleTable::UPtrTy> Table1 = |
| 212 | + util::SimpleTable::read(Cmd->Inputs[0]); |
| 213 | + if (!Table1) |
| 214 | + return Table1.takeError(); |
| 215 | + Error Res = |
| 216 | + Table.replaceColumn(Args[0], *Table1->get(), Args[1]); |
| 217 | + return Res ? std::move(Res) : std::move(Error::success()); |
| 218 | + }) |
| 219 | + .Case(OPT_RENAME, |
| 220 | + [&](TformCmd *Cmd) -> Error { |
| 221 | + // argument is <old_name>,<new_name> |
| 222 | + assert(Args.size() == 2); |
| 223 | + Error Res = Table.renameColumn(Args[0], Args[1]); |
| 224 | + return Res ? std::move(Res) : std::move(Error::success()); |
| 225 | + }) |
| 226 | + .Case(OPT_EXTRACT, [&](TformCmd *Cmd) -> Error { |
| 227 | + // argument is <name1>,<name2>,... (1 or more) |
| 228 | + assert(!Args.empty()); |
| 229 | + Error Res = Table.peelColumns(Args); |
| 230 | + return Res ? std::move(Res) : std::move(Error::success()); |
| 231 | + }); |
| 232 | + return F(this); |
| 233 | + } |
| 234 | +}; |
| 235 | + |
| 236 | +#define CHECK_AND_EXIT(E) \ |
| 237 | + { \ |
| 238 | + Error LocE = std::move(E); \ |
| 239 | + if (LocE) { \ |
| 240 | + logAllUnhandledErrors(std::move(LocE), WithColor::error(errs())); \ |
| 241 | + return 1; \ |
| 242 | + } \ |
| 243 | + } |
| 244 | + |
| 245 | +int main(int argc, char **argv) { |
| 246 | + ToolName = argv[0]; // make tool name available for functions in this source |
| 247 | + InitLLVM X{argc, argv}; |
| 248 | + |
| 249 | + cl::HideUnrelatedOptions(FileTableTformCat); |
| 250 | + cl::ParseCommandLineOptions( |
| 251 | + argc, argv, |
| 252 | + "File table transformation tool.\n" |
| 253 | + "Inputs and output of this tool is a \"file table\" files containing\n" |
| 254 | + "2D table of strings with optional row of column titles. Based on\n" |
| 255 | + "transformation actions passed via the command line, the tool " |
| 256 | + "transforms the first input file table and emits a new one as a result.\n" |
| 257 | + "\n" |
| 258 | + "Transformation actions are:\n" |
| 259 | + "- replace a column\n" |
| 260 | + "- rename a column\n" |
| 261 | + "- extract column(s)\n"); |
| 262 | + |
| 263 | + std::map<int, TformCmd::UPtrTy> Cmds; |
| 264 | + |
| 265 | + // Partially construct commands (w/o input information). Can't fully construct |
| 266 | + // yet, as an order across all command line options-commands needs to be |
| 267 | + // established first to properly map inputs to commands. |
| 268 | + |
| 269 | + auto Lists = {std::addressof(TformReplace), std::addressof(TformRename), |
| 270 | + std::addressof(TformExtract)}; |
| 271 | + |
| 272 | + for (const auto *L : Lists) { |
| 273 | + for (auto It = L->begin(); It != L->end(); It++) { |
| 274 | + Expected<TformCmd::UPtrTy> Cmd = TformCmd::create(L->ArgStr, *It); |
| 275 | + |
| 276 | + if (!Cmd) |
| 277 | + CHECK_AND_EXIT(Cmd.takeError()); |
| 278 | + const int Pos = L->getPosition(It - L->begin()); |
| 279 | + Cmds.emplace(Pos, std::move(Cmd.get())); |
| 280 | + } |
| 281 | + } |
| 282 | + // finish command construction first w/o execution to make sure command line |
| 283 | + // is valid |
| 284 | + auto CurInput = Inputs.begin(); |
| 285 | + const auto EndInput = Inputs.end(); |
| 286 | + // first input is the "current" - it will undergo the transformation sequence |
| 287 | + if (CurInput == EndInput) |
| 288 | + CHECK_AND_EXIT(makeUserError("no inputs")); |
| 289 | + std::string &InputFile = *CurInput++; |
| 290 | + |
| 291 | + for (auto &P : Cmds) { |
| 292 | + TformCmd::UPtrTy &Cmd = P.second; |
| 293 | + // this will advance cur iterator as far as needed |
| 294 | + Error E = Cmd->consumeInput(CurInput, EndInput); |
| 295 | + CHECK_AND_EXIT(E); |
| 296 | + } |
| 297 | + // commands are constructed, command line is correct - read input and execute |
| 298 | + // transformations on it |
| 299 | + |
| 300 | + Expected<util::SimpleTable::UPtrTy> Table = |
| 301 | + util::SimpleTable::read(InputFile); |
| 302 | + if (!Table) |
| 303 | + CHECK_AND_EXIT(Table.takeError()); |
| 304 | + |
| 305 | + for (auto &P : Cmds) { |
| 306 | + TformCmd::UPtrTy &Cmd = P.second; |
| 307 | + Error Res = Cmd->execute(*Table->get()); |
| 308 | + CHECK_AND_EXIT(Res); |
| 309 | + } |
| 310 | + // Finally, write the result |
| 311 | + std::error_code EC; |
| 312 | + raw_fd_ostream Out{Output, EC, sys::fs::OpenFlags::OF_None}; |
| 313 | + |
| 314 | + if (EC) |
| 315 | + CHECK_AND_EXIT(createFileError(Output, EC)); |
| 316 | + Table->get()->write(Out, !DropTitles); |
| 317 | + |
| 318 | + if (Out.has_error()) |
| 319 | + CHECK_AND_EXIT(createFileError(Output, Out.error())); |
| 320 | + Out.close(); |
| 321 | + return 0; |
| 322 | +} |
0 commit comments