From 40e08f5e73f8663721d8d00b72f6d9762a269b40 Mon Sep 17 00:00:00 2001 From: Max Horn Date: Mon, 9 Apr 2018 23:59:37 +0200 Subject: [PATCH] scanner+io: handle line continuation in io.c ... and not in the scanner. This is much simpler, and also ensures uniform treatment of line continuations everywhere. Several new test cases are added to demonstrate this. This leads to one change in behavior: line continuations inside of triple quoted strings are now handled, while before they would just insert a backslash followed by a newline into the string. This change is intentional. A test case is adjusted accordingly. --- src/io.c | 40 +++++++++++- src/io.h | 5 +- src/scanner.c | 94 +++++++--------------------- tst/testinstall/linecontinuation.tst | 15 ++++- 4 files changed, 78 insertions(+), 76 deletions(-) diff --git a/src/io.c b/src/io.c index 2746857877..25dc213517 100644 --- a/src/io.c +++ b/src/io.c @@ -195,14 +195,50 @@ Char GET_NEXT_CHAR(void) } else STATE(In)++; + + // handle line continuation, i.e., backslash followed by new line; and + // also the case when we run out of buffered data + while (*STATE(In) == '\\' || *STATE(In) == 0) { + // first check if we run out of data; in that case, get more, and try + // again + if (*STATE(In) == 0) + GetLine(); + // else, we must have seen a backslash; so check now if it starts a + // line continuation, i.e., whether it is followed by a line terminator + else if (STATE(In)[1] == '\n') // LF for UNIX line ends + STATE(In) += 2; + else if (STATE(In)[1] == '\r') // CR+LF for DOS/Windows line ends + STATE(In) += (STATE(In)[2] == '\n') ? 3 : 2; + // if we just saw a backlash, without a line terminator after it, stop + // the loop and return it + else + break; + } + + return *STATE(In); +} + +// GET_NEXT_CHAR_NO_LC is like GET_NEXT_CHAR, but does not handle +// line continuations. This is used when skipping to the end of the +// current line, when handling comment lines. +static Char GET_NEXT_CHAR_NO_LC(void) +{ + if (STATE(In) == &IO()->Pushback) { + STATE(In) = IO()->RealIn; + } + else + STATE(In)++; + if (!*STATE(In)) GetLine(); + return *STATE(In); } Char PEEK_NEXT_CHAR(void) { assert(IS_CHAR_PUSHBACK_EMPTY()); + // store the current character IO()->Pushback = *STATE(In); @@ -220,11 +256,11 @@ Char PEEK_CURR_CHAR(void) return *STATE(In); } -void IGNORE_REST_OF_LINE(void) +void SKIP_TO_END_OF_LINE(void) { Char c = *STATE(In); while (c != '\n' && c != '\r' && c != '\377') - c = GET_NEXT_CHAR(); + c = GET_NEXT_CHAR_NO_LC(); } diff --git a/src/io.h b/src/io.h index 636c796fa0..3b6150d7d3 100644 --- a/src/io.h +++ b/src/io.h @@ -25,8 +25,9 @@ extern Char GET_NEXT_CHAR(void); extern Char PEEK_NEXT_CHAR(void); extern Char PEEK_CURR_CHAR(void); -// skip the rest of the current line -extern void IGNORE_REST_OF_LINE(void); +// skip the rest of the current line, ignoring line continuations +// (used to handle comments) +extern void SKIP_TO_END_OF_LINE(void); /**************************************************************************** ** diff --git a/src/scanner.c b/src/scanner.c index 737f738e9b..a06c3dfd4e 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -191,7 +191,7 @@ void Match ( */ static void GetIdent(void) { - Int i, fetch; + Int i; Int isQuoted; /* initially it could be a keyword */ @@ -201,24 +201,13 @@ static void GetIdent(void) Char c = PEEK_CURR_CHAR(); for ( i=0; IsIdent(c) || IsDigit(c) || c=='\\'; i++ ) { - fetch = 1; /* handle escape sequences */ /* we ignore '\ newline' by decrementing i, except at the very start of the identifier, when we cannot do that so we recurse instead */ if ( c == '\\' ) { c = GET_NEXT_CHAR(); - if ( c == '\n' && i == 0 ) { GetSymbol(); return; } - else if ( c == '\r' ) { - c = GET_NEXT_CHAR(); - if ( c == '\n' ) { - if (i == 0) { GetSymbol(); return; } - else i--; - } - else {STATE(Value)[i] = '\r'; fetch = 0;} - } - else if ( c == '\n' && i < SAFE_VALUE_SIZE-1 ) i--; - else if ( c == 'n' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\n'; + if ( c == 'n' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\n'; else if ( c == 't' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\t'; else if ( c == 'r' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\r'; else if ( c == 'b' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\b'; @@ -234,7 +223,7 @@ static void GetIdent(void) } /* read the next character */ - if (fetch) c = GET_NEXT_CHAR(); + c = GET_NEXT_CHAR(); } @@ -327,35 +316,24 @@ static void GetIdent(void) ** exponent digit. ** */ -static Char GetCleanedChar( UInt *wasEscaped ) { - Char c = GET_NEXT_CHAR(); - *wasEscaped = 0; - if (c == '\\') { - c = GET_NEXT_CHAR(); - if ( c == '\n') - return GetCleanedChar(wasEscaped); - else if ( c == '\r' ) { - if ( PEEK_NEXT_CHAR() == '\n' ) { - GET_NEXT_CHAR(); // skip the \n - return GetCleanedChar(wasEscaped); - } - else { +static Char GetCleanedChar(UInt * wasEscaped) +{ + Char c = GET_NEXT_CHAR(); + *wasEscaped = 0; + if (c == '\\') { + c = GET_NEXT_CHAR(); *wasEscaped = 1; - return '\r'; - } - } - else { - *wasEscaped = 1; - if ( c == 'n') return '\n'; - else if ( c == 't') return '\t'; - else if ( c == 'r') return '\r'; - else if ( c == 'b') return '\b'; - else if ( c == '>') return '\01'; - else if ( c == '<') return '\02'; - else if ( c == 'c') return '\03'; + switch (c) { + case 'n': return '\n'; + case 't': return '\t'; + case 'r': return '\r'; + case 'b': return '\b'; + case '>': return '\01'; + case '<': return '\02'; + case 'c': return '\03'; + } } - } - return c; + return c; } @@ -712,30 +690,17 @@ static Char GetEscapedChar(void) */ static void GetStr(void) { - Int i = 0, fetch; + Int i = 0; Char c = PEEK_CURR_CHAR(); /* read all characters into 'Value' */ for ( i = 0; i < SAFE_VALUE_SIZE-1 && c != '"' && c != '\n' && c != '\377'; i++ ) { - fetch = 1; /* handle escape sequences */ if ( c == '\\' ) { c = GET_NEXT_CHAR(); - /* if next is another '\\' followed by '\n' it must be ignored */ - while ( c == '\\' && PEEK_NEXT_CHAR() == '\n' ) { - c = GET_NEXT_CHAR(); // skip '\\' - c = GET_NEXT_CHAR(); // skip '\n' - } - if ( c == '\n' ) i--; - else if ( c == '\r' ) { - c = GET_NEXT_CHAR(); - if ( c == '\n' ) i--; - else {STATE(Value)[i] = '\r'; fetch = 0;} - } else { - STATE(Value)[i] = GetEscapedChar(); - } + STATE(Value)[i] = GetEscapedChar(); } /* put normal chars into 'Value' but only if there is room */ @@ -744,7 +709,7 @@ static void GetStr(void) } /* read the next character */ - if (fetch) c = GET_NEXT_CHAR(); + c = GET_NEXT_CHAR(); } @@ -971,7 +936,7 @@ void GetSymbol ( void ) /* skip over , , and comments */ while (c==' '||c=='\t'||c=='\n'||c=='\r'||c=='\f'||c=='#') { if ( c == '#' ) - IGNORE_REST_OF_LINE(); + SKIP_TO_END_OF_LINE(); c = GET_NEXT_CHAR(); } @@ -988,8 +953,6 @@ void GetSymbol ( void ) break; case '!': STATE(Symbol) = S_ILLEGAL; c = GET_NEXT_CHAR(); - if ( c == '\\' ) { c = GET_NEXT_CHAR(); - if ( c == '\n' ) { c = GET_NEXT_CHAR(); } } if ( c == '.' ) { STATE(Symbol) = S_BDOT; GET_NEXT_CHAR(); break; } if ( c == '[' ) { STATE(Symbol) = S_BLBRACK; GET_NEXT_CHAR(); break; } if ( c == '{' ) { STATE(Symbol) = S_BLBRACE; GET_NEXT_CHAR(); break; } @@ -1003,11 +966,6 @@ void GetSymbol ( void ) case ',': STATE(Symbol) = S_COMMA; GET_NEXT_CHAR(); break; case ':': STATE(Symbol) = S_COLON; c = GET_NEXT_CHAR(); - if ( c == '\\' ) { - c = GET_NEXT_CHAR(); - if ( c == '\n' ) - { c = GET_NEXT_CHAR(); } - } if ( c == '=' ) { STATE(Symbol) = S_ASSIGN; c = GET_NEXT_CHAR(); break; } break; @@ -1019,21 +977,15 @@ void GetSymbol ( void ) case '=': STATE(Symbol) = S_EQ; GET_NEXT_CHAR(); break; case '<': STATE(Symbol) = S_LT; c = GET_NEXT_CHAR(); - if ( c == '\\' ) { c = GET_NEXT_CHAR(); - if ( c == '\n' ) { c = GET_NEXT_CHAR(); } } if ( c == '=' ) { STATE(Symbol) = S_LE; c = GET_NEXT_CHAR(); break; } if ( c == '>' ) { STATE(Symbol) = S_NE; c = GET_NEXT_CHAR(); break; } break; case '>': STATE(Symbol) = S_GT; c = GET_NEXT_CHAR(); - if ( c == '\\' ) { c = GET_NEXT_CHAR(); - if ( c == '\n' ) { c = GET_NEXT_CHAR(); } } if ( c == '=' ) { STATE(Symbol) = S_GE; c = GET_NEXT_CHAR(); break; } break; case '+': STATE(Symbol) = S_PLUS; GET_NEXT_CHAR(); break; case '-': STATE(Symbol) = S_MINUS; c = GET_NEXT_CHAR(); - if ( c == '\\' ) { c = GET_NEXT_CHAR(); - if ( c == '\n' ) { c = GET_NEXT_CHAR(); } } if ( c == '>' ) { STATE(Symbol)=S_MAPTO; c = GET_NEXT_CHAR(); break; } break; case '*': STATE(Symbol) = S_MULT; GET_NEXT_CHAR(); break; diff --git a/tst/testinstall/linecontinuation.tst b/tst/testinstall/linecontinuation.tst index ea3e8ba9fe..ee4e62eaa6 100644 --- a/tst/testinstall/linecontinuation.tst +++ b/tst/testinstall/linecontinuation.tst @@ -12,7 +12,7 @@ gap> x:="foo\ # in triple quoted string gap> x:="""haha\ > !"""; -"haha\\\n!" +"haha!" # break keywords and operators like :=, <=, >= etc. in the middle gap> 1 m\ @@ -22,6 +22,19 @@ gap> x :\ > =1; 1 +# inside range expressions +gap> [1.\ +> .4]; +[ 1 .. 4 ] + +# inside triple dots +gap> {x..\ +> .}->x; +function( x... ) ... end +gap> {x.\ +> ..}->x; +function( x... ) ... end + # however, in comments, you cannot use line continuations: gap> # 1234\ gap> 5;