Skip to content

Commit

Permalink
WIP: add TOKENIZE_STREAM
Browse files Browse the repository at this point in the history
  • Loading branch information
fingolfin committed Oct 21, 2022
1 parent 76a5b6d commit 29e98f6
Show file tree
Hide file tree
Showing 7 changed files with 408 additions and 16 deletions.
190 changes: 190 additions & 0 deletions lib/tokenizer.g
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
ID_TO_SYMBOL_LIST:=[];
SYMBOL_COLORS:=rec();
# We split the scanner ids into the lower 3 bits, plus PValuation(id>>3, 2)
# compressing them to 3+5 = 8 bits
COMPRESS_SCANNER_ID := function(id)
local lo, hi;
lo := (id mod 8);
hi := QuoInt(id, 8);
if hi > 0 then
hi := PValuation(hi, 2) + 1;
fi;
return lo + hi * 8 + 1;
end;

for name in RecNames(SCANNER_SYMBOLS) do
SYMBOL_COLORS.(name) := TextAttr.reset;
id := SCANNER_SYMBOLS.(name);
#Print(name, ": ",id, " -> ", [ id mod 8, QuoInt(id,8), COMPRESS_SCANNER_ID(id)], "\n");
ID_TO_SYMBOL_LIST[COMPRESS_SCANNER_ID(id)] := name;
od;
ID_TO_SYMBOL := id -> ID_TO_SYMBOL_LIST[COMPRESS_SCANNER_ID(id)];

# identifiers and keywords
SYMBOL_COLORS.S_IDENT := TextAttr.1;
SYMBOL_COLORS.S_UNBIND := TextAttr.1;
SYMBOL_COLORS.S_ISBOUND := TextAttr.1;
SYMBOL_COLORS.S_TRYNEXT := TextAttr.1;
SYMBOL_COLORS.S_INFO := TextAttr.1;
SYMBOL_COLORS.S_ASSERT := TextAttr.1;
SYMBOL_COLORS.S_READWRITE := TextAttr.1;
SYMBOL_COLORS.S_READONLY := TextAttr.1;
SYMBOL_COLORS.S_ASSERT := TextAttr.1;
SYMBOL_COLORS.S_REC := TextAttr.1;
SYMBOL_COLORS.S_FUNCTION := TextAttr.1;
SYMBOL_COLORS.S_LOCAL := TextAttr.1;
SYMBOL_COLORS.S_END := TextAttr.1;
SYMBOL_COLORS.S_IF := TextAttr.1;
SYMBOL_COLORS.S_FOR := TextAttr.1;
SYMBOL_COLORS.S_WHILE := TextAttr.1;
SYMBOL_COLORS.S_REPEAT := TextAttr.1;
SYMBOL_COLORS.S_ATOMIC := TextAttr.1;
SYMBOL_COLORS.S_THEN := TextAttr.1;
SYMBOL_COLORS.S_ELIF := TextAttr.1;
SYMBOL_COLORS.S_ELSE := TextAttr.1;
SYMBOL_COLORS.S_FI := TextAttr.1;
SYMBOL_COLORS.S_DO := TextAttr.1;
SYMBOL_COLORS.S_OD := TextAttr.1;
SYMBOL_COLORS.S_UNTIL := TextAttr.1;
SYMBOL_COLORS.S_BREAK := TextAttr.1;
SYMBOL_COLORS.S_RETURN := TextAttr.1;
SYMBOL_COLORS.S_QUIT := TextAttr.1;
SYMBOL_COLORS.S_QQUIT := TextAttr.1;
SYMBOL_COLORS.S_CONTINUE := TextAttr.1;

SYMBOL_COLORS.S_MOD := TextAttr.1;
SYMBOL_COLORS.S_IN := TextAttr.1;
SYMBOL_COLORS.S_NOT := TextAttr.1;
SYMBOL_COLORS.S_AND := TextAttr.1;
SYMBOL_COLORS.S_OR := TextAttr.1;

# brackets, parens, ...
SYMBOL_COLORS.S_LBRACK := TextAttr.5;
SYMBOL_COLORS.S_LBRACE := TextAttr.5;
SYMBOL_COLORS.S_BLBRACK := TextAttr.5;
SYMBOL_COLORS.S_RBRACK := TextAttr.5;
SYMBOL_COLORS.S_RBRACE := TextAttr.5;
SYMBOL_COLORS.S_DOT := TextAttr.5;
SYMBOL_COLORS.S_BDOT := TextAttr.5;
SYMBOL_COLORS.S_LPAREN := TextAttr.5;
SYMBOL_COLORS.S_RPAREN := TextAttr.5;
SYMBOL_COLORS.S_COMMA := TextAttr.5;
SYMBOL_COLORS.S_DOTDOT := TextAttr.5;
SYMBOL_COLORS.S_COLON := TextAttr.5;
SYMBOL_COLORS.S_DOTDOTDOT := TextAttr.5;
SYMBOL_COLORS.S_SEMICOLON := TextAttr.5;
SYMBOL_COLORS.S_DUALSEMICOLON := TextAttr.5;


# constants
SYMBOL_COLORS.S_INT := TextAttr.4;
SYMBOL_COLORS.S_FLOAT := TextAttr.4;
SYMBOL_COLORS.S_TRUE := TextAttr.4;
SYMBOL_COLORS.S_FALSE := TextAttr.4;
SYMBOL_COLORS.S_CHAR := TextAttr.4;

# strings
SYMBOL_COLORS.S_STRING := TextAttr.3;

# operators
SYMBOL_COLORS.S_MULT := TextAttr.2;
SYMBOL_COLORS.S_MULT := TextAttr.2;
SYMBOL_COLORS.S_DIV := TextAttr.2;
SYMBOL_COLORS.S_POW := TextAttr.2;
SYMBOL_COLORS.S_PLUS := TextAttr.2;
SYMBOL_COLORS.S_MINUS := TextAttr.2;
SYMBOL_COLORS.S_EQ := TextAttr.2;
SYMBOL_COLORS.S_LT := TextAttr.2;
SYMBOL_COLORS.S_GT := TextAttr.2;
SYMBOL_COLORS.S_NE := TextAttr.2;
SYMBOL_COLORS.S_LE := TextAttr.2;
SYMBOL_COLORS.S_GE := TextAttr.2;
SYMBOL_COLORS.S_ASSIGN := TextAttr.2;


ExtractRangeFromLines := function(lines, startline, startpos, endline, endpos)
local data, tmp, i;
if startline = endline then
return lines[startline]{[startpos+1 .. endpos]};
fi;
tmp := lines[startline];
data := tmp{[startpos+1 .. Length(tmp)]};
Add(data, '\n');
for i in [startline+1 .. endline-1] do
Append(data, lines[i]);
Add(data, '\n');
od;
tmp := lines[endline];
Append(data, tmp{[1 .. endpos]});
return data;
end;


TOKENIZE_STRING:=function(str)
local res, stat, token, symbol, lines, text, sep1, sep2;
Add(str, '\n');
sep1 := "";
sep2 := "";
#sep1 := "<";
#sep2 := ">";
# Print("Input:\n", str, "\n");
# Print("Output:\n");
lines := SplitString(str, "\n");
res := TOKENIZE_STREAM(InputTextString(str));
for stat in res do
if not IsList(stat) then continue; fi;
for token in stat do
if not IsList(token) then continue; fi;
if token[1] = "ERROR" then
Print("\nEncountered an error: ", token[2], "\n");
continue;
fi;
symbol := Remove(token);
Add(token, ID_TO_SYMBOL(symbol));

# extract symbol
if Length(token) <> 8 then continue; fi;
if symbol = SCANNER_SYMBOLS.S_EOF then
Print("\n\n-- EOF --\n");
continue;
fi;
text := ExtractRangeFromLines(lines, token[2], token[3], token[4], token[5]);
if Length(text) > 0 then
Print(TextAttr.6, sep1, text, sep2, TextAttr.reset);
#Print(TextAttr.b6, sep1, text, sep2, TextAttr.reset);
fi;
text := ExtractRangeFromLines(lines, token[4], token[5], token[6], token[7]);
if Length(text) > 0 then
Print(SYMBOL_COLORS.(ID_TO_SYMBOL(symbol)), sep1, text, sep2, TextAttr.reset);
fi;
Add(token, text);
od;
od;
Print("\n");
return res;
end;


SetPrintFormattingStatus("*stdout*", false);

l:=TOKENIZE_STRING("1;");

l:=TOKENIZE_STRING("1");

l:=TOKENIZE_STRING("1-;");


l:=TOKENIZE_STRING("1+1;");

l:=TOKENIZE_STRING("123 + 456;");

l:=TOKENIZE_STRING("1+1; x:=y-3;");

l:=TOKENIZE_STRING("x:=0123 + 1234; xxxx+777777;");

l:=TOKENIZE_STRING("""
1+1; x:=y-3;
# This is a little test program
f := x -> x+1; # increment function
f(2);
""");
8 changes: 4 additions & 4 deletions src/intrprtr.c
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ void IntrBegin(IntrState * intr)
intr->StackObj = NEW_PLIST(T_PLIST, 64);

/* must be in immediate (non-ignoring, non-coding) mode */
GAP_ASSERT(intr->ignoring == 0);
// GAP_ASSERT(intr->ignoring == 0);
GAP_ASSERT(intr->coding == 0);

/* no return-statement was yet interpreted */
Expand All @@ -254,13 +254,13 @@ ExecStatus IntrEnd(IntrState * intr, BOOL error, Obj * result)
if ( ! error ) {

/* must be back in immediate (non-ignoring, non-coding) mode */
GAP_ASSERT(intr->ignoring == 0);
// GAP_ASSERT(intr->ignoring == 0);
GAP_ASSERT(intr->coding == 0);

/* and the stack must contain the result value (which may be void) */
GAP_ASSERT(LEN_PLIST(intr->StackObj) == 1);
// GAP_ASSERT(LEN_PLIST(intr->StackObj) == 1);
if (result)
*result = PopVoidObj(intr);
*result = (LEN_PLIST(intr->StackObj) == 0) ? 0 : PopVoidObj(intr);

return intr->returning;
}
Expand Down
44 changes: 43 additions & 1 deletion src/read.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@
#endif


static ExecStatus ReadCommand(Obj context,
TypInputFile * input,
BOOL justTokenize,
Obj * evalResult,
BOOL * dualSemicolon);

/****************************************************************************
**
*S TRY_IF_NO_ERROR
Expand Down Expand Up @@ -2510,6 +2516,32 @@ ExecStatus ReadEvalCommand(Obj context,
TypInputFile * input,
Obj * evalResult,
BOOL * dualSemicolon)
{
return ReadCommand(context, input, FALSE, evalResult, dualSemicolon);
}


/****************************************************************************
**
*F ReadTokenizeCommand()
**
*/
ExecStatus ReadTokenizeCommand(Obj context, TypInputFile * input, Obj * tokens)
{
return ReadCommand(context, input, TRUE, tokens, 0);
}


/****************************************************************************
**
*F ReadCommand()
**
*/
static ExecStatus ReadCommand(Obj context,
TypInputFile * input,
BOOL justTokenize,
Obj * evalResult,
BOOL * dualSemicolon)
{
volatile ExecStatus status;
volatile Obj tilde;
Expand All @@ -2528,6 +2560,16 @@ ExecStatus ReadEvalCommand(Obj context,

ClearError();

if (justTokenize) {
// HACK / TODO: explain this
rs->intr.returning = STATUS_RETURN;
rs->intr.ignoring = 1;
if (*evalResult)
rs->s.tokens = *evalResult;
else
rs->s.tokens = *evalResult = NEW_PLIST(T_PLIST, 16);
}

/* get the first symbol from the input */
Match_(rs, rs->s.Symbol, "", 0);

Expand Down Expand Up @@ -2607,7 +2649,7 @@ ExecStatus ReadEvalCommand(Obj context,
*dualSemicolon = (rs->s.Symbol == S_DUALSEMICOLON);

// end the interpreter
status = IntrEnd(&rs->intr, rs->s.NrError > 0, evalResult);
status = IntrEnd(&rs->intr, rs->s.NrError > 0, justTokenize ? 0 : evalResult);

// restore the execution environment
SWITCH_TO_OLD_LVARS(oldLVars);
Expand Down
8 changes: 8 additions & 0 deletions src/read.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ ExecStatus ReadEvalCommand(Obj context,
BOOL * dualSemicolon);


/****************************************************************************
**
*F ReadTokenizeCommand()
**
*/
ExecStatus ReadTokenizeCommand(Obj context, TypInputFile * input, Obj * tokens);


/****************************************************************************
**
*F ReadEvalFile() . . . . . . . . . . . . . . . . . . . . . . . read a file
Expand Down
Loading

0 comments on commit 29e98f6

Please sign in to comment.