Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 73 additions & 79 deletions common/src/tsv_utils/common/utils.d
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@ $(LIST
* [getTsvFieldValue] - A convenience function when only a single value is needed
from an input line.

* [throwIfWindowsNewlineOnUnix] - A utility for Unix platform builds to detecting
Windows newlines in input.
* [throwIfWindowsNewline] - A utility for detecting Windows newlines in input.
)

Copyright (c) 2015-2020, eBay Inc.
Expand Down Expand Up @@ -1426,103 +1425,98 @@ if (isSomeChar!C)
assertThrown(assertNotThrown!ConvException(getTsvFieldValue!double("abc\tdef", 2, '\t')));
}

/** [Yes|No.newlineWasRemoved] is a template parameter to throwIfWindowsNewlineOnUnix.
* A Yes value indicates the Unix newline was already removed, as might be done via
* std.File.byLine or similar mechanism.
*/
/**
Yes|No.newlineWasRemoved is a template parameter to throwIfWindowsNewline. A Yes
value indicates the Unix newline was already removed, as might be done via
std.File.byLine or similar mechanism.
*/
alias NewlineWasRemoved = Flag!"newlineWasRemoved";

/**
throwIfWindowsLineNewlineOnUnix is used to throw an exception if a Windows/DOS
line ending is found on a build compiled for a Unix platform. This is used by
the TSV Utilities to detect Window/DOS line endings and terminate processing
with an error message to the user.
*/
void throwIfWindowsNewlineOnUnix
throwIfWindowsLineNewline throws an exception if the 'line' argument ends with a
Windows/DOS line ending. This is used by TSV Utilities tools to detect Window/DOS
line endings and terminate processing with an error message to the user.

The 'nlWasRemoved' template parameter can be used if a Unix newline character was
already removed. In this case the CR character from a Windows CRLF remains and can be
detected. This is useful when reading files in binary mode, stripping Unix newlines.
*/
void throwIfWindowsNewline
(NewlineWasRemoved nlWasRemoved = Yes.newlineWasRemoved)
(const char[] line, const char[] filename, size_t lineNum)
{
version(Posix)
static if (nlWasRemoved)
{
static if (nlWasRemoved)
{
immutable bool hasWindowsLineEnding = line.length != 0 && line[$ - 1] == '\r';
}
else
{
immutable bool hasWindowsLineEnding =
line.length > 1 &&
line[$ - 2] == '\r' &&
line[$ - 1] == '\n';
}
immutable bool hasWindowsLineEnding = line.length != 0 && line[$ - 1] == '\r';
}
else
{
immutable bool hasWindowsLineEnding =
line.length > 1 &&
line[$ - 2] == '\r' &&
line[$ - 1] == '\n';
}

if (hasWindowsLineEnding)
{
import std.format;
throw new Exception(
format("Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix').\n File: %s, Line: %s",
(filename == "-") ? "Standard Input" : filename, lineNum));
}
if (hasWindowsLineEnding)
{
import std.format;
throw new Exception(
format("Windows/DOS line ending found. Convert file to Unix newlines before processing (e.g. 'dos2unix').\n File: %s, Line: %s",
(filename == "-") ? "Standard Input" : filename, lineNum));
}
}

// throwIfWindowsNewlineOnUnix
// throwIfWindowsNewline
@safe unittest
{
/* Note: Currently only building on Posix. Need to add non-Posix test cases
* if Windows builds are ever done.
*/
version(Posix)
{
import std.exception;
import std.exception;

assertNotThrown(throwIfWindowsNewlineOnUnix("", "afile.tsv", 1));
assertNotThrown(throwIfWindowsNewlineOnUnix("a", "afile.tsv", 2));
assertNotThrown(throwIfWindowsNewlineOnUnix("ab", "afile.tsv", 3));
assertNotThrown(throwIfWindowsNewlineOnUnix("abc", "afile.tsv", 4));
assertNotThrown(throwIfWindowsNewline("", "afile.tsv", 1));
assertNotThrown(throwIfWindowsNewline("a", "afile.tsv", 2));
assertNotThrown(throwIfWindowsNewline("ab", "afile.tsv", 3));
assertNotThrown(throwIfWindowsNewline("abc", "afile.tsv", 4));

assertThrown(throwIfWindowsNewlineOnUnix("\r", "afile.tsv", 1));
assertThrown(throwIfWindowsNewlineOnUnix("a\r", "afile.tsv", 2));
assertThrown(throwIfWindowsNewlineOnUnix("ab\r", "afile.tsv", 3));
assertThrown(throwIfWindowsNewlineOnUnix("abc\r", "afile.tsv", 4));
assertThrown(throwIfWindowsNewline("\r", "afile.tsv", 1));
assertThrown(throwIfWindowsNewline("a\r", "afile.tsv", 2));
assertThrown(throwIfWindowsNewline("ab\r", "afile.tsv", 3));
assertThrown(throwIfWindowsNewline("abc\r", "afile.tsv", 4));

assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\n", "afile.tsv", 1));
assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\n", "afile.tsv", 2));
assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\n", "afile.tsv", 3));
assertNotThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\n", "afile.tsv", 4));
assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("\n", "afile.tsv", 1));
assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("a\n", "afile.tsv", 2));
assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("ab\n", "afile.tsv", 3));
assertNotThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("abc\n", "afile.tsv", 4));

assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "afile.tsv", 5));
assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("a\r\n", "afile.tsv", 6));
assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("ab\r\n", "afile.tsv", 7));
assertThrown(throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("abc\r\n", "afile.tsv", 8));
assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("\r\n", "afile.tsv", 5));
assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("a\r\n", "afile.tsv", 6));
assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("ab\r\n", "afile.tsv", 7));
assertThrown(throwIfWindowsNewline!(No.newlineWasRemoved)("abc\r\n", "afile.tsv", 8));

/* Standard Input formatting. */
import std.algorithm : endsWith;
bool exceptionCaught = false;
/* Standard Input formatting. */
import std.algorithm : endsWith;
bool exceptionCaught = false;

try (throwIfWindowsNewlineOnUnix("\r", "-", 99));
catch (Exception e)
{
assert(e.msg.endsWith("File: Standard Input, Line: 99"));
exceptionCaught = true;
}
finally
{
assert(exceptionCaught);
exceptionCaught = false;
}
try (throwIfWindowsNewline("\r", "-", 99));
catch (Exception e)
{
assert(e.msg.endsWith("File: Standard Input, Line: 99"));
exceptionCaught = true;
}
finally
{
assert(exceptionCaught);
exceptionCaught = false;
}

try (throwIfWindowsNewlineOnUnix!(No.newlineWasRemoved)("\r\n", "-", 99));
catch (Exception e)
{
assert(e.msg.endsWith("File: Standard Input, Line: 99"));
exceptionCaught = true;
}
finally
{
assert(exceptionCaught);
exceptionCaught = false;
}
try (throwIfWindowsNewline!(No.newlineWasRemoved)("\r\n", "-", 99));
catch (Exception e)
{
assert(e.msg.endsWith("File: Standard Input, Line: 99"));
exceptionCaught = true;
}
finally
{
assert(exceptionCaught);
exceptionCaught = false;
}
}

Expand Down
10 changes: 5 additions & 5 deletions tsv-filter/src/tsv_utils/tsv-filter.d
Original file line number Diff line number Diff line change
Expand Up @@ -753,7 +753,7 @@ struct TsvFilterOptions
import std.getopt;
import std.path : baseName, stripExtension;
import tsv_utils.common.getopt_inorder;
import tsv_utils.common.utils : throwIfWindowsNewlineOnUnix;
import tsv_utils.common.utils : throwIfWindowsNewline;

bool helpVerbose = false; // --help-verbose
bool helpOptions = false; // --help-options
Expand Down Expand Up @@ -974,7 +974,7 @@ struct TsvFilterOptions

if (hasHeader)
{
throwIfWindowsNewlineOnUnix(inputSources.front.header, inputSources.front.name, 1);
throwIfWindowsNewline(inputSources.front.header, inputSources.front.name, 1);
headerFields = inputSources.front.header.split(delim).to!(string[]);
fieldListArgProcessing();
}
Expand All @@ -995,7 +995,7 @@ void tsvFilter(ref TsvFilterOptions cmdopt)
import std.algorithm : all, any, splitter;
import std.range;
import tsv_utils.common.utils : BufferedOutputRange, bufferedByLine, InputSourceRange,
throwIfWindowsNewlineOnUnix;
throwIfWindowsNewline;

/* inputSources must be an InputSourceRange and include at least stdin. */
assert(!cmdopt.inputSources.empty);
Expand Down Expand Up @@ -1028,11 +1028,11 @@ void tsvFilter(ref TsvFilterOptions cmdopt)

foreach (inputStream; cmdopt.inputSources)
{
if (cmdopt.hasHeader) throwIfWindowsNewlineOnUnix(inputStream.header, inputStream.name, 1);
if (cmdopt.hasHeader) throwIfWindowsNewline(inputStream.header, inputStream.name, 1);

foreach (lineNum, line; inputStream.file.bufferedByLine.enumerate(fileBodyStartLine))
{
if (lineNum == 1) throwIfWindowsNewlineOnUnix(line, inputStream.name, lineNum);
if (lineNum == 1) throwIfWindowsNewline(line, inputStream.name, lineNum);

/* Copy the needed number of fields to the fields array. */
int fieldIndex = -1;
Expand Down
14 changes: 7 additions & 7 deletions tsv-join/src/tsv_utils/tsv-join.d
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ struct TsvJoinOptions
import std.path : baseName, stripExtension;
import std.typecons : Yes, No;
import tsv_utils.common.fieldlist;
import tsv_utils.common.utils : throwIfWindowsNewlineOnUnix;
import tsv_utils.common.utils : throwIfWindowsNewline;

bool helpVerbose = false; // --help-verbose
bool helpFields = false; // --help-fields
Expand Down Expand Up @@ -356,10 +356,10 @@ struct TsvJoinOptions
{
if (!filterSource.front.byLine.empty)
{
throwIfWindowsNewlineOnUnix(filterSource.front.byLine.front, filterSource.front.name, 1);
throwIfWindowsNewline(filterSource.front.byLine.front, filterSource.front.name, 1);
filterFileHeaderFields = filterSource.front.byLine.front.split(delim).to!(string[]);
}
throwIfWindowsNewlineOnUnix(inputSources.front.header, inputSources.front.name, 1);
throwIfWindowsNewline(inputSources.front.header, inputSources.front.name, 1);
inputSourceHeaderFields = inputSources.front.header.split(delim).to!(string[]);
fieldListArgProcessing();
}
Expand Down Expand Up @@ -403,7 +403,7 @@ int main(string[] cmdArgs)
void tsvJoin(ref TsvJoinOptions cmdopt)
{
import tsv_utils.common.utils : ByLineSourceRange, bufferedByLine, BufferedOutputRange,
isFlushableOutputRange, InputFieldReordering, InputSourceRange, throwIfWindowsNewlineOnUnix;
isFlushableOutputRange, InputFieldReordering, InputSourceRange, throwIfWindowsNewline;
import std.algorithm : splitter;
import std.array : join;
import std.range;
Expand Down Expand Up @@ -511,7 +511,7 @@ void tsvJoin(ref TsvJoinOptions cmdopt)

debug writeln(" --> [key]:[append] => [", key, "]:[", appendValues, "]");

if (lineNum == 1) throwIfWindowsNewlineOnUnix(line, filterStream.name, lineNum);
if (lineNum == 1) throwIfWindowsNewline(line, filterStream.name, lineNum);

if (lineNum == 1 && cmdopt.hasHeader)
{
Expand Down Expand Up @@ -576,13 +576,13 @@ void tsvJoin(ref TsvJoinOptions cmdopt)

foreach (inputStream; cmdopt.inputSources)
{
if (cmdopt.hasHeader) throwIfWindowsNewlineOnUnix(inputStream.header, inputStream.name, 1);
if (cmdopt.hasHeader) throwIfWindowsNewline(inputStream.header, inputStream.name, 1);

foreach (lineNum, line; inputStream.file.bufferedByLine.enumerate(fileBodyStartLine))
{
debug writeln("[input line] |", line, "|");

if (lineNum == 1) throwIfWindowsNewlineOnUnix(line, inputStream.name, lineNum);
if (lineNum == 1) throwIfWindowsNewline(line, inputStream.name, lineNum);

/*
* Next block checks if the input line matches a hash entry. Two cases:
Expand Down
Loading