Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move line continuation handling to io.c #2215

Merged
merged 3 commits into from
Apr 11, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 69 additions & 14 deletions src/io.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,14 +195,57 @@ Char GET_NEXT_CHAR(void)
}
else
STATE(In)++;

// handle line continuation, i.e., backslash followed by new line; and
// also the case when we run out of buffered data
while (*STATE(In) == '\\' || *STATE(In) == 0) {

// if we run out of data, get more, and try again
if (*STATE(In) == 0) {
GetLine();
continue;
}

// we have seen a backslash; so check now if it starts a
// line continuation, i.e., whether it is followed by a line terminator
if (STATE(In)[1] == '\n') {
// LF is the line terminator used in Unix and its relatives
STATE(In) += 2;
}
else if (STATE(In)[1] == '\r' && STATE(In)[2] == '\n') {
// CR+LF is the line terminator used by Windows
STATE(In) += 3;
}
else {
// if we see a backlash without a line terminator after it, stop
break;
}
}

return *STATE(In);
}

// GET_NEXT_CHAR_NO_LC is like GET_NEXT_CHAR, but does not handle
// line continuations. This is used when skipping to the end of the
// current line, when handling comment lines.
static Char GET_NEXT_CHAR_NO_LC(void)
{
if (STATE(In) == &IO()->Pushback) {
STATE(In) = IO()->RealIn;
}
else
STATE(In)++;

if (!*STATE(In))
GetLine();

return *STATE(In);
}

Char PEEK_NEXT_CHAR(void)
{
assert(IS_CHAR_PUSHBACK_EMPTY());

// store the current character
IO()->Pushback = *STATE(In);

Expand All @@ -220,6 +263,14 @@ Char PEEK_CURR_CHAR(void)
return *STATE(In);
}

void SKIP_TO_END_OF_LINE(void)
{
Char c = *STATE(In);
while (c != '\n' && c != '\r' && c != '\377')
c = GET_NEXT_CHAR_NO_LC();
}


const Char * GetInputFilename(void)
{
GAP_ASSERT(IO()->Input);
Expand Down Expand Up @@ -1067,26 +1118,30 @@ static Int GetLine2 (
bptr++;

/* copy piece of input->sline into buffer and adjust counters */
UInt count = input->spos;
Char *ptr = (Char *)CHARS_STRING(input->sline) + count;
UInt len = GET_LEN_STRING(input->sline);
Char *bend = buffer + length - 2;
while (bptr < bend && count < len && *ptr != '\n' && *ptr != '\r') {
*bptr++ = *ptr++;
count++;
}
/* we also copy an end of line if there is one */
if (*ptr == '\n' || *ptr == '\r') {
*bptr++ = *ptr++;
count++;
Char *ptr = CSTR_STRING(input->sline) + input->spos;
const Char * const end = CSTR_STRING(input->sline) + GET_LEN_STRING(input->sline);
const Char * const bend = buffer + length - 2;
while (bptr < bend && ptr < end) {
Char c = *ptr++;

// ignore CR, so that a Window CR+LF line terminator looks
// to us the same as a Unix LF line terminator
if (c == '\r')
continue;

*bptr++ = c;

// check for line end
if (c == '\n')
break;
}
*bptr = '\0';
input->spos = count;
input->spos = ptr - (Char *)CHARS_STRING(input->sline);

/* if input->stream is a string stream, we have to adjust the
position counter in the stream object as well */
if (input->isstringstream) {
ADDR_OBJ(input->stream)[1] = INTOBJ_INT(count);
ADDR_OBJ(input->stream)[1] = INTOBJ_INT(input->spos);
}
}
else {
Expand Down
3 changes: 3 additions & 0 deletions src/io.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ extern Char GET_NEXT_CHAR(void);
extern Char PEEK_NEXT_CHAR(void);
extern Char PEEK_CURR_CHAR(void);

// skip the rest of the current line, ignoring line continuations
// (used to handle comments)
extern void SKIP_TO_END_OF_LINE(void);

/****************************************************************************
**
Expand Down
98 changes: 24 additions & 74 deletions src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ void Match (
*/
static void GetIdent(void)
{
Int i, fetch;
Int i;
Int isQuoted;

/* initially it could be a keyword */
Expand All @@ -201,24 +201,13 @@ static void GetIdent(void)
Char c = PEEK_CURR_CHAR();
for ( i=0; IsIdent(c) || IsDigit(c) || c=='\\'; i++ ) {

fetch = 1;
/* handle escape sequences */
/* we ignore '\ newline' by decrementing i, except at the
very start of the identifier, when we cannot do that
so we recurse instead */
if ( c == '\\' ) {
c = GET_NEXT_CHAR();
if ( c == '\n' && i == 0 ) { GetSymbol(); return; }
else if ( c == '\r' ) {
c = GET_NEXT_CHAR();
if ( c == '\n' ) {
if (i == 0) { GetSymbol(); return; }
else i--;
}
else {STATE(Value)[i] = '\r'; fetch = 0;}
}
else if ( c == '\n' && i < SAFE_VALUE_SIZE-1 ) i--;
else if ( c == 'n' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\n';
if ( c == 'n' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\n';
else if ( c == 't' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\t';
else if ( c == 'r' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\r';
else if ( c == 'b' && i < SAFE_VALUE_SIZE-1 ) STATE(Value)[i] = '\b';
Expand All @@ -234,7 +223,7 @@ static void GetIdent(void)
}

/* read the next character */
if (fetch) c = GET_NEXT_CHAR();
c = GET_NEXT_CHAR();

}

Expand Down Expand Up @@ -327,35 +316,24 @@ static void GetIdent(void)
** exponent digit.
**
*/
static Char GetCleanedChar( UInt *wasEscaped ) {
Char c = GET_NEXT_CHAR();
*wasEscaped = 0;
if (c == '\\') {
c = GET_NEXT_CHAR();
if ( c == '\n')
return GetCleanedChar(wasEscaped);
else if ( c == '\r' ) {
if ( PEEK_NEXT_CHAR() == '\n' ) {
GET_NEXT_CHAR(); // skip the \n
return GetCleanedChar(wasEscaped);
}
else {
static Char GetCleanedChar(UInt * wasEscaped)
{
Char c = GET_NEXT_CHAR();
*wasEscaped = 0;
if (c == '\\') {
c = GET_NEXT_CHAR();
*wasEscaped = 1;
return '\r';
}
}
else {
*wasEscaped = 1;
if ( c == 'n') return '\n';
else if ( c == 't') return '\t';
else if ( c == 'r') return '\r';
else if ( c == 'b') return '\b';
else if ( c == '>') return '\01';
else if ( c == '<') return '\02';
else if ( c == 'c') return '\03';
switch (c) {
case 'n': return '\n';
case 't': return '\t';
case 'r': return '\r';
case 'b': return '\b';
case '>': return '\01';
case '<': return '\02';
case 'c': return '\03';
}
}
}
return c;
return c;
}


Expand Down Expand Up @@ -712,30 +690,17 @@ static Char GetEscapedChar(void)
*/
static void GetStr(void)
{
Int i = 0, fetch;
Int i = 0;
Char c = PEEK_CURR_CHAR();

/* read all characters into 'Value' */
for ( i = 0; i < SAFE_VALUE_SIZE-1 && c != '"'
&& c != '\n' && c != '\377'; i++ ) {

fetch = 1;
/* handle escape sequences */
if ( c == '\\' ) {
c = GET_NEXT_CHAR();
/* if next is another '\\' followed by '\n' it must be ignored */
while ( c == '\\' && PEEK_NEXT_CHAR() == '\n' ) {
c = GET_NEXT_CHAR(); // skip '\\'
c = GET_NEXT_CHAR(); // skip '\n'
}
if ( c == '\n' ) i--;
else if ( c == '\r' ) {
c = GET_NEXT_CHAR();
if ( c == '\n' ) i--;
else {STATE(Value)[i] = '\r'; fetch = 0;}
} else {
STATE(Value)[i] = GetEscapedChar();
}
STATE(Value)[i] = GetEscapedChar();
}

/* put normal chars into 'Value' but only if there is room */
Expand All @@ -744,7 +709,7 @@ static void GetStr(void)
}

/* read the next character */
if (fetch) c = GET_NEXT_CHAR();
c = GET_NEXT_CHAR();

}

Expand Down Expand Up @@ -970,10 +935,8 @@ void GetSymbol ( void )

/* skip over <spaces>, <tabs>, <newlines> and comments */
while (c==' '||c=='\t'||c=='\n'||c=='\r'||c=='\f'||c=='#') {
if ( c == '#' ) {
while ( c != '\n' && c != '\r' && c != '\377' )
c = GET_NEXT_CHAR();
}
if ( c == '#' )
SKIP_TO_END_OF_LINE();
c = GET_NEXT_CHAR();
}

Expand All @@ -990,8 +953,6 @@ void GetSymbol ( void )
break;

case '!': STATE(Symbol) = S_ILLEGAL; c = GET_NEXT_CHAR();
if ( c == '\\' ) { c = GET_NEXT_CHAR();
if ( c == '\n' ) { c = GET_NEXT_CHAR(); } }
if ( c == '.' ) { STATE(Symbol) = S_BDOT; GET_NEXT_CHAR(); break; }
if ( c == '[' ) { STATE(Symbol) = S_BLBRACK; GET_NEXT_CHAR(); break; }
if ( c == '{' ) { STATE(Symbol) = S_BLBRACE; GET_NEXT_CHAR(); break; }
Expand All @@ -1005,11 +966,6 @@ void GetSymbol ( void )
case ',': STATE(Symbol) = S_COMMA; GET_NEXT_CHAR(); break;

case ':': STATE(Symbol) = S_COLON; c = GET_NEXT_CHAR();
if ( c == '\\' ) {
c = GET_NEXT_CHAR();
if ( c == '\n' )
{ c = GET_NEXT_CHAR(); }
}
if ( c == '=' ) { STATE(Symbol) = S_ASSIGN; c = GET_NEXT_CHAR(); break; }
break;

Expand All @@ -1021,21 +977,15 @@ void GetSymbol ( void )

case '=': STATE(Symbol) = S_EQ; GET_NEXT_CHAR(); break;
case '<': STATE(Symbol) = S_LT; c = GET_NEXT_CHAR();
if ( c == '\\' ) { c = GET_NEXT_CHAR();
if ( c == '\n' ) { c = GET_NEXT_CHAR(); } }
if ( c == '=' ) { STATE(Symbol) = S_LE; c = GET_NEXT_CHAR(); break; }
if ( c == '>' ) { STATE(Symbol) = S_NE; c = GET_NEXT_CHAR(); break; }
break;
case '>': STATE(Symbol) = S_GT; c = GET_NEXT_CHAR();
if ( c == '\\' ) { c = GET_NEXT_CHAR();
if ( c == '\n' ) { c = GET_NEXT_CHAR(); } }
if ( c == '=' ) { STATE(Symbol) = S_GE; c = GET_NEXT_CHAR(); break; }
break;

case '+': STATE(Symbol) = S_PLUS; GET_NEXT_CHAR(); break;
case '-': STATE(Symbol) = S_MINUS; c = GET_NEXT_CHAR();
if ( c == '\\' ) { c = GET_NEXT_CHAR();
if ( c == '\n' ) { c = GET_NEXT_CHAR(); } }
if ( c == '>' ) { STATE(Symbol)=S_MAPTO; c = GET_NEXT_CHAR(); break; }
break;
case '*': STATE(Symbol) = S_MULT; GET_NEXT_CHAR(); break;
Expand Down
15 changes: 14 additions & 1 deletion tst/testinstall/linecontinuation.tst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ gap> x:="foo\
# in triple quoted string
gap> x:="""haha\
> !""";
"haha\\\n!"
"haha!"

# break keywords and operators like :=, <=, >= etc. in the middle
gap> 1 m\
Expand All @@ -22,6 +22,19 @@ gap> x :\
> =1;
1

# inside range expressions
gap> [1.\
> .4];
[ 1 .. 4 ]

# inside triple dots
gap> {x..\
> .}->x;
function( x... ) ... end
gap> {x.\
> ..}->x;
function( x... ) ... end

# however, in comments, you cannot use line continuations:
gap> # 1234\
gap> 5;
Expand Down