Skip to content

Commit

Permalink
scanner+io: handle line continuation in io.c
Browse files Browse the repository at this point in the history
... and not in the scanner. This is much simpler, and also ensures
uniform treatment of line continuations everywhere. Several new test
cases are added to demonstrate this.

This leads to one change in behavior: line continuations inside of
triple quoted strings are now handled, while before they would just
insert a backslash followed by a newline into the string. This change
is intentional. A test case is adjusted accordingly.
  • Loading branch information
fingolfin committed Apr 10, 2018
1 parent 765a9cc commit 9c704a9
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 73 deletions.
32 changes: 31 additions & 1 deletion src/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,44 @@ Char GET_NEXT_CHAR(void)
}
else
STATE(In)++;

// handle line continuation, i.e., backslash followed by new line or CRLF;
// and also the end of a line in general
while (*STATE(In) == '\\' || *STATE(In) == 0) {
if (!*STATE(In))
GetLine();
else if (STATE(In)[1] == '\n')
STATE(In) += 2;
else if (STATE(In)[1] == '\r')
STATE(In) += (STATE(In)[2] == '\n') ? 3 : 2;
else
break;
}

return *STATE(In);
}

// GET_NEXT_CHAR_NO_LC is like GET_NEXT_CHAR, but does not handle
// line continuations. This is used when skipping to the end of the
// current line, when handling comment lines.
static Char GET_NEXT_CHAR_NO_LC(void)
{
if (STATE(In) == &IO()->Pushback) {
STATE(In) = IO()->RealIn;
}
else
STATE(In)++;

if (!*STATE(In))
GetLine();

return *STATE(In);
}

Char PEEK_NEXT_CHAR(void)
{
assert(IS_CHAR_PUSHBACK_EMPTY());

// store the current character
IO()->Pushback = *STATE(In);

Expand All @@ -224,7 +254,7 @@ void IGNORE_REST_OF_LINE(void)
{
Char c = *STATE(In);
while (c != '\n' && c != '\r' && c != '\377')
c = GET_NEXT_CHAR();
c = GET_NEXT_CHAR_NO_LC();
}


Expand Down
3 changes: 2 additions & 1 deletion src/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ extern Char GET_NEXT_CHAR(void);
extern Char PEEK_NEXT_CHAR(void);
extern Char PEEK_CURR_CHAR(void);

// skip the rest of the current line
// skip the rest of the current line, ignoring line continuations
// (used to handle comments)
extern void IGNORE_REST_OF_LINE(void);

/****************************************************************************
Expand Down
92 changes: 22 additions & 70 deletions src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ void Match (
*/
static void GetIdent(void)
{
Int i, fetch;
Int i;
Int isQuoted;

/* initially it could be a keyword */
Expand All @@ -201,24 +201,13 @@ static void GetIdent(void)
Char c = PEEK_CURR_CHAR();
for ( i=0; IsIdent(c) || IsDigit(c) || c=='\\'; i++ ) {

fetch = 1;
/* handle escape sequences */
/* we ignore '\ newline' by decrementing i, except at the
very start of the identifier, when we cannot do that
so we recurse instead */
if ( c == '\\' ) {
c = GET_NEXT_CHAR();
if ( c == '\n' && i == 0 ) { GetSymbol(); return; }
else if ( c == '\r' ) {
c = GET_NEXT_CHAR();
if ( c == '\n' ) {
if (i == 0) { GetSymbol(); return; }
else i--;
}
else {STATE(Value)[i] = '\r'; fetch = 0;}
}
else if ( c == '\n' && i < SAFE_VALUE_SIZE-1 ) i--;
else if ( c == 'n' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\n';
if ( c == 'n' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\n';
else if ( c == 't' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\t';
else if ( c == 'r' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\r';
else if ( c == 'b' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\b';
Expand All @@ -234,7 +223,7 @@ static void GetIdent(void)
}

/* read the next character */
if (fetch) c = GET_NEXT_CHAR();
c = GET_NEXT_CHAR();

}

Expand Down Expand Up @@ -327,35 +316,24 @@ static void GetIdent(void)
** exponent digit.
**
*/
static Char GetCleanedChar( UInt *wasEscaped ) {
Char c = GET_NEXT_CHAR();
*wasEscaped = 0;
if (c == '\\') {
c = GET_NEXT_CHAR();
if ( c == '\n')
return GetCleanedChar(wasEscaped);
else if ( c == '\r' ) {
if ( PEEK_NEXT_CHAR() == '\n' ) {
GET_NEXT_CHAR(); // skip the \n
return GetCleanedChar(wasEscaped);
}
else {
static Char GetCleanedChar(UInt * wasEscaped)
{
Char c = GET_NEXT_CHAR();
*wasEscaped = 0;
if (c == '\\') {
c = GET_NEXT_CHAR();
*wasEscaped = 1;
return '\r';
}
}
else {
*wasEscaped = 1;
if ( c == 'n') return '\n';
else if ( c == 't') return '\t';
else if ( c == 'r') return '\r';
else if ( c == 'b') return '\b';
else if ( c == '>') return '\01';
else if ( c == '<') return '\02';
else if ( c == 'c') return '\03';
switch (c) {
case 'n': return '\n';
case 't': return '\t';
case 'r': return '\r';
case 'b': return '\b';
case '>': return '\01';
case '<': return '\02';
case 'c': return '\03';
}
}
}
return c;
return c;
}


Expand Down Expand Up @@ -712,30 +690,17 @@ static Char GetEscapedChar(void)
*/
static void GetStr(void)
{
Int i = 0, fetch;
Int i = 0;
Char c = PEEK_CURR_CHAR();

/* read all characters into 'Value' */
for ( i = 0; i < SAFE_VALUE_SIZE-1 && c != '"'
&& c != '\n' && c != '\377'; i++ ) {

fetch = 1;
/* handle escape sequences */
if ( c == '\\' ) {
c = GET_NEXT_CHAR();
/* if next is another '\\' followed by '\n' it must be ignored */
while ( c == '\\' && PEEK_NEXT_CHAR() == '\n' ) {
c = GET_NEXT_CHAR(); // skip '\\'
c = GET_NEXT_CHAR(); // skip '\n'
}
if ( c == '\n' ) i--;
else if ( c == '\r' ) {
c = GET_NEXT_CHAR();
if ( c == '\n' ) i--;
else {STATE(Value)[i] = '\r'; fetch = 0;}
} else {
STATE(Value)[i] = GetEscapedChar();
}
STATE(Value)[i] = GetEscapedChar();
}

/* put normal chars into 'Value' but only if there is room */
Expand All @@ -744,7 +709,7 @@ static void GetStr(void)
}

/* read the next character */
if (fetch) c = GET_NEXT_CHAR();
c = GET_NEXT_CHAR();

}

Expand Down Expand Up @@ -988,8 +953,6 @@ void GetSymbol ( void )
break;

case '!': STATE(Symbol) = S_ILLEGAL; c = GET_NEXT_CHAR();
if ( c == '\\' ) { c = GET_NEXT_CHAR();
if ( c == '\n' ) { c = GET_NEXT_CHAR(); } }
if ( c == '.' ) { STATE(Symbol) = S_BDOT; GET_NEXT_CHAR(); break; }
if ( c == '[' ) { STATE(Symbol) = S_BLBRACK; GET_NEXT_CHAR(); break; }
if ( c == '{' ) { STATE(Symbol) = S_BLBRACE; GET_NEXT_CHAR(); break; }
Expand All @@ -1003,11 +966,6 @@ void GetSymbol ( void )
case ',': STATE(Symbol) = S_COMMA; GET_NEXT_CHAR(); break;

case ':': STATE(Symbol) = S_COLON; c = GET_NEXT_CHAR();
if ( c == '\\' ) {
c = GET_NEXT_CHAR();
if ( c == '\n' )
{ c = GET_NEXT_CHAR(); }
}
if ( c == '=' ) { STATE(Symbol) = S_ASSIGN; c = GET_NEXT_CHAR(); break; }
break;

Expand All @@ -1019,21 +977,15 @@ void GetSymbol ( void )

case '=': STATE(Symbol) = S_EQ; GET_NEXT_CHAR(); break;
case '<': STATE(Symbol) = S_LT; c = GET_NEXT_CHAR();
if ( c == '\\' ) { c = GET_NEXT_CHAR();
if ( c == '\n' ) { c = GET_NEXT_CHAR(); } }
if ( c == '=' ) { STATE(Symbol) = S_LE; c = GET_NEXT_CHAR(); break; }
if ( c == '>' ) { STATE(Symbol) = S_NE; c = GET_NEXT_CHAR(); break; }
break;
case '>': STATE(Symbol) = S_GT; c = GET_NEXT_CHAR();
if ( c == '\\' ) { c = GET_NEXT_CHAR();
if ( c == '\n' ) { c = GET_NEXT_CHAR(); } }
if ( c == '=' ) { STATE(Symbol) = S_GE; c = GET_NEXT_CHAR(); break; }
break;

case '+': STATE(Symbol) = S_PLUS; GET_NEXT_CHAR(); break;
case '-': STATE(Symbol) = S_MINUS; c = GET_NEXT_CHAR();
if ( c == '\\' ) { c = GET_NEXT_CHAR();
if ( c == '\n' ) { c = GET_NEXT_CHAR(); } }
if ( c == '>' ) { STATE(Symbol)=S_MAPTO; c = GET_NEXT_CHAR(); break; }
break;
case '*': STATE(Symbol) = S_MULT; GET_NEXT_CHAR(); break;
Expand Down
15 changes: 14 additions & 1 deletion tst/testinstall/linecontinuation.tst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ gap> x:="foo\
# in triple quoted string
gap> x:="""haha\
> !""";
"haha\\\n!"
"haha!"

# break keywords and operators like :=, <=, >= etc. in the middle
gap> 1 m\
Expand All @@ -22,6 +22,19 @@ gap> x :\
> =1;
1

# inside range expressions
gap> [1.\
> .4];
[ 1 .. 4 ]

# inside triple dots
gap> {x..\
> .}->x;
function( x... ) ... end
gap> {x.\
> ..}->x;
function( x... ) ... end

# however, in comments, you cannot use line continuations:
gap> # 1234\
gap> 5;
Expand Down

0 comments on commit 9c704a9

Please sign in to comment.