Skip to content

Commit

Permalink
scanner+io: handle line continuation in io.c
Browse files Browse the repository at this point in the history
... and not in the scanner. This is much simpler, and also ensures
uniform treatment of line continuations everywhere. Several new test
cases are added to demonstrate this.

This leads to one change in behavior: line continuations inside of
triple quoted strings are now handled, while before they would just
insert a backslash followed by a newline into the string. This change
is intentional. A test case is adjusted accordingly.
  • Loading branch information
fingolfin committed Apr 10, 2018
1 parent 765a9cc commit 40e08f5
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 76 deletions.
40 changes: 38 additions & 2 deletions src/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,50 @@ Char GET_NEXT_CHAR(void)
}
else
STATE(In)++;

// handle line continuation, i.e., backslash followed by new line; and
// also the case when we run out of buffered data
while (*STATE(In) == '\\' || *STATE(In) == 0) {
// first check if we run out of data; in that case, get more, and try
// again
if (*STATE(In) == 0)
GetLine();
// else, we must have seen a backslash; so check now if it starts a
// line continuation, i.e., whether it is followed by a line terminator
else if (STATE(In)[1] == '\n') // LF for UNIX line ends
STATE(In) += 2;
else if (STATE(In)[1] == '\r') // CR+LF for DOS/Windows line ends
STATE(In) += (STATE(In)[2] == '\n') ? 3 : 2;
// if we just saw a backlash, without a line terminator after it, stop
// the loop and return it
else
break;
}

return *STATE(In);
}

// GET_NEXT_CHAR_NO_LC is like GET_NEXT_CHAR, but does not handle
// line continuations. This is used when skipping to the end of the
// current line, when handling comment lines.
static Char GET_NEXT_CHAR_NO_LC(void)
{
if (STATE(In) == &IO()->Pushback) {
STATE(In) = IO()->RealIn;
}
else
STATE(In)++;

if (!*STATE(In))
GetLine();

return *STATE(In);
}

Char PEEK_NEXT_CHAR(void)
{
assert(IS_CHAR_PUSHBACK_EMPTY());

// store the current character
IO()->Pushback = *STATE(In);

Expand All @@ -220,11 +256,11 @@ Char PEEK_CURR_CHAR(void)
return *STATE(In);
}

void IGNORE_REST_OF_LINE(void)
void SKIP_TO_END_OF_LINE(void)
{
Char c = *STATE(In);
while (c != '\n' && c != '\r' && c != '\377')
c = GET_NEXT_CHAR();
c = GET_NEXT_CHAR_NO_LC();
}


Expand Down
5 changes: 3 additions & 2 deletions src/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ extern Char GET_NEXT_CHAR(void);
extern Char PEEK_NEXT_CHAR(void);
extern Char PEEK_CURR_CHAR(void);

// skip the rest of the current line
extern void IGNORE_REST_OF_LINE(void);
// skip the rest of the current line, ignoring line continuations
// (used to handle comments)
extern void SKIP_TO_END_OF_LINE(void);

/****************************************************************************
**
Expand Down
94 changes: 23 additions & 71 deletions src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ void Match (
*/
static void GetIdent(void)
{
Int i, fetch;
Int i;
Int isQuoted;

/* initially it could be a keyword */
Expand All @@ -201,24 +201,13 @@ static void GetIdent(void)
Char c = PEEK_CURR_CHAR();
for ( i=0; IsIdent(c) || IsDigit(c) || c=='\\'; i++ ) {

fetch = 1;
/* handle escape sequences */
/* we ignore '\ newline' by decrementing i, except at the
very start of the identifier, when we cannot do that
so we recurse instead */
if ( c == '\\' ) {
c = GET_NEXT_CHAR();
if ( c == '\n' && i == 0 ) { GetSymbol(); return; }
else if ( c == '\r' ) {
c = GET_NEXT_CHAR();
if ( c == '\n' ) {
if (i == 0) { GetSymbol(); return; }
else i--;
}
else {STATE(Value)[i] = '\r'; fetch = 0;}
}
else if ( c == '\n' && i < SAFE_VALUE_SIZE-1 ) i--;
else if ( c == 'n' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\n';
if ( c == 'n' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\n';
else if ( c == 't' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\t';
else if ( c == 'r' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\r';
else if ( c == 'b' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\b';
Expand All @@ -234,7 +223,7 @@ static void GetIdent(void)
}

/* read the next character */
if (fetch) c = GET_NEXT_CHAR();
c = GET_NEXT_CHAR();

}

Expand Down Expand Up @@ -327,35 +316,24 @@ static void GetIdent(void)
** exponent digit.
**
*/
static Char GetCleanedChar( UInt *wasEscaped ) {
Char c = GET_NEXT_CHAR();
*wasEscaped = 0;
if (c == '\\') {
c = GET_NEXT_CHAR();
if ( c == '\n')
return GetCleanedChar(wasEscaped);
else if ( c == '\r' ) {
if ( PEEK_NEXT_CHAR() == '\n' ) {
GET_NEXT_CHAR(); // skip the \n
return GetCleanedChar(wasEscaped);
}
else {
static Char GetCleanedChar(UInt * wasEscaped)
{
Char c = GET_NEXT_CHAR();
*wasEscaped = 0;
if (c == '\\') {
c = GET_NEXT_CHAR();
*wasEscaped = 1;
return '\r';
}
}
else {
*wasEscaped = 1;
if ( c == 'n') return '\n';
else if ( c == 't') return '\t';
else if ( c == 'r') return '\r';
else if ( c == 'b') return '\b';
else if ( c == '>') return '\01';
else if ( c == '<') return '\02';
else if ( c == 'c') return '\03';
switch (c) {
case 'n': return '\n';
case 't': return '\t';
case 'r': return '\r';
case 'b': return '\b';
case '>': return '\01';
case '<': return '\02';
case 'c': return '\03';
}
}
}
return c;
return c;
}


Expand Down Expand Up @@ -712,30 +690,17 @@ static Char GetEscapedChar(void)
*/
static void GetStr(void)
{
Int i = 0, fetch;
Int i = 0;
Char c = PEEK_CURR_CHAR();

/* read all characters into 'Value' */
for ( i = 0; i < SAFE_VALUE_SIZE-1 && c != '"'
&& c != '\n' && c != '\377'; i++ ) {

fetch = 1;
/* handle escape sequences */
if ( c == '\\' ) {
c = GET_NEXT_CHAR();
/* if next is another '\\' followed by '\n' it must be ignored */
while ( c == '\\' && PEEK_NEXT_CHAR() == '\n' ) {
c = GET_NEXT_CHAR(); // skip '\\'
c = GET_NEXT_CHAR(); // skip '\n'
}
if ( c == '\n' ) i--;
else if ( c == '\r' ) {
c = GET_NEXT_CHAR();
if ( c == '\n' ) i--;
else {STATE(Value)[i] = '\r'; fetch = 0;}
} else {
STATE(Value)[i] = GetEscapedChar();
}
STATE(Value)[i] = GetEscapedChar();
}

/* put normal chars into 'Value' but only if there is room */
Expand All @@ -744,7 +709,7 @@ static void GetStr(void)
}

/* read the next character */
if (fetch) c = GET_NEXT_CHAR();
c = GET_NEXT_CHAR();

}

Expand Down Expand Up @@ -971,7 +936,7 @@ void GetSymbol ( void )
/* skip over <spaces>, <tabs>, <newlines> and comments */
while (c==' '||c=='\t'||c=='\n'||c=='\r'||c=='\f'||c=='#') {
if ( c == '#' )
IGNORE_REST_OF_LINE();
SKIP_TO_END_OF_LINE();
c = GET_NEXT_CHAR();
}

Expand All @@ -988,8 +953,6 @@ void GetSymbol ( void )
break;

case '!': STATE(Symbol) = S_ILLEGAL; c = GET_NEXT_CHAR();
if ( c == '\\' ) { c = GET_NEXT_CHAR();
if ( c == '\n' ) { c = GET_NEXT_CHAR(); } }
if ( c == '.' ) { STATE(Symbol) = S_BDOT; GET_NEXT_CHAR(); break; }
if ( c == '[' ) { STATE(Symbol) = S_BLBRACK; GET_NEXT_CHAR(); break; }
if ( c == '{' ) { STATE(Symbol) = S_BLBRACE; GET_NEXT_CHAR(); break; }
Expand All @@ -1003,11 +966,6 @@ void GetSymbol ( void )
case ',': STATE(Symbol) = S_COMMA; GET_NEXT_CHAR(); break;

case ':': STATE(Symbol) = S_COLON; c = GET_NEXT_CHAR();
if ( c == '\\' ) {
c = GET_NEXT_CHAR();
if ( c == '\n' )
{ c = GET_NEXT_CHAR(); }
}
if ( c == '=' ) { STATE(Symbol) = S_ASSIGN; c = GET_NEXT_CHAR(); break; }
break;

Expand All @@ -1019,21 +977,15 @@ void GetSymbol ( void )

case '=': STATE(Symbol) = S_EQ; GET_NEXT_CHAR(); break;
case '<': STATE(Symbol) = S_LT; c = GET_NEXT_CHAR();
if ( c == '\\' ) { c = GET_NEXT_CHAR();
if ( c == '\n' ) { c = GET_NEXT_CHAR(); } }
if ( c == '=' ) { STATE(Symbol) = S_LE; c = GET_NEXT_CHAR(); break; }
if ( c == '>' ) { STATE(Symbol) = S_NE; c = GET_NEXT_CHAR(); break; }
break;
case '>': STATE(Symbol) = S_GT; c = GET_NEXT_CHAR();
if ( c == '\\' ) { c = GET_NEXT_CHAR();
if ( c == '\n' ) { c = GET_NEXT_CHAR(); } }
if ( c == '=' ) { STATE(Symbol) = S_GE; c = GET_NEXT_CHAR(); break; }
break;

case '+': STATE(Symbol) = S_PLUS; GET_NEXT_CHAR(); break;
case '-': STATE(Symbol) = S_MINUS; c = GET_NEXT_CHAR();
if ( c == '\\' ) { c = GET_NEXT_CHAR();
if ( c == '\n' ) { c = GET_NEXT_CHAR(); } }
if ( c == '>' ) { STATE(Symbol)=S_MAPTO; c = GET_NEXT_CHAR(); break; }
break;
case '*': STATE(Symbol) = S_MULT; GET_NEXT_CHAR(); break;
Expand Down
15 changes: 14 additions & 1 deletion tst/testinstall/linecontinuation.tst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ gap> x:="foo\
# in triple quoted string
gap> x:="""haha\
> !""";
"haha\\\n!"
"haha!"

# break keywords and operators like :=, <=, >= etc. in the middle
gap> 1 m\
Expand All @@ -22,6 +22,19 @@ gap> x :\
> =1;
1

# inside range expressions
gap> [1.\
> .4];
[ 1 .. 4 ]

# inside triple dots
gap> {x..\
> .}->x;
function( x... ) ... end
gap> {x.\
> ..}->x;
function( x... ) ... end

# however, in comments, you cannot use line continuations:
gap> # 1234\
gap> 5;
Expand Down

0 comments on commit 40e08f5

Please sign in to comment.