TJaMScannerImpl.mesa
Copyright © 1985 by Xerox Corporation. All rights reserved.
Original version by John Warnock, March 7, 1979
Paxton, 22-Jan-82 10:19:46
Maureen Stone February 14, 1984 4:04:32 pm PST
Doug Wyatt, March 23, 1985 6:14:16 pm PST
DIRECTORY
Atom USING [MakeAtomFromRefText],
Convert USING [Error, IntFromRope, RealFromRope],
IO USING [Backup, Close, EndOfStream, GetChar, RIS, STREAM],
RefText USING [InlineAppendChar, ObtainScratch, ReleaseScratch, TrustTextAsRope],
Rope USING [Concat, FromRefText, ROPE],
TJaM USING [Array, AStore, CountToMark, ExecuteAtom, Frame, Mark, MarkRep, NewArray, NumberRep, PopMark, ProduceError, PushArray, PushAtom, PushInt, PushMark, PushNum, PushReal, PushRope];
TJaMScannerImpl: CEDAR PROGRAM
IMPORTS Atom, Convert, IO, RefText, Rope, TJaM
EXPORTS TJaM
= BEGIN OPEN TJaM;
ROPE: TYPE ~ Rope.ROPE;
STREAM: TYPE ~ IO.STREAM;
Class: TYPE ~ {other, space, plus, minus, dot, digit, letterB, letterE, name};
ClassArray: TYPE ~ ARRAY CHAR OF Class;
InitClassArray: PROC RETURNS[REF ClassArray] ~ {
class: REF ClassArray ~ NEW[ClassArray ← ALL[other]];
FOR char: CHAR IN CHAR DO
SELECT char FROM
'{, '}, '(, '), '", '% => class[char] ← other; -- delimiting characters
'\n, '\l, '\t, ' , ', => class[char] ← space; -- white space
'+ => class[char] ← plus; -- plus sign
'- => class[char] ← minus; -- minus sign
'. => class[char] ← dot; -- decimal point
IN['0..'9] => class[char] ← digit; -- digits
'b, 'B => class[char] ← letterB; -- marks octal numbers
'e, 'E => class[char] ← letterE; -- introduces exponent
IN['\041..'\176] => class[char] ← name; -- may occur in a name
ENDCASE;
ENDLOOP;
RETURN[class];
};
classFromChar: REF ClassArray ~ InitClassArray[];
State: TYPE ~ {
null, name, plus, minus, int, oct, dot, real1, realF, real2, real3, realE,
single, double -- these states terminate a token
};
ActionType: TYPE ~ PROC[state: State, text: REF TEXT, stream: STREAM]
RETURNS[quit: BOOLFALSE];
MapTokens: PROC[stream: STREAM, buffer: REF TEXT, action: ActionType]
RETURNS[quit: BOOLFALSE] ~ {
text: REF TEXT ← buffer;
char: CHAR; class: Class;
peeked: BOOLFALSE;
Cleanup: PROC ~ { IF peeked THEN { IO.Backup[stream, char]; peeked ← FALSE } };
UNTIL quit DO -- for each token
state: State ← null;
text.length ← 0;
WHILE state<single DO -- for each char
IF peeked THEN peeked ← FALSE
ELSE {
char ← IO.GetChar[stream ! IO.EndOfStream => EXIT];
class ← classFromChar[char];
};
{ -- block for EXITS below
SELECT state FROM
null => SELECT class FROM
other => state ← single;
space => GOTO Skip;
digit => state ← int;
plus => state ← plus;
minus => state ← minus;
dot => state ← dot;
ENDCASE => state ← name;
name => GOTO Test;
plus => SELECT class FROM
digit => state ← int;
dot => state ← real1;
ENDCASE => GOTO Test;
minus => SELECT class FROM
digit => state ← int;
minus => state ← double;
dot => state ← real1;
ENDCASE => GOTO Test;
int => SELECT class FROM
digit => NULL;
dot => state ← realF;
letterE => state ← real2;
letterB => state ← oct;
ENDCASE => GOTO Test;
oct => GOTO Test;
dot => SELECT class FROM
digit => state ← realF;
ENDCASE => GOTO Test;
real1 => SELECT class FROM
digit => state ← realF;
ENDCASE => GOTO Test;
realF => SELECT class FROM
digit => NULL;
letterE => state ← real2;
ENDCASE => GOTO Test;
real2 => SELECT class FROM
digit => state ← realE;
plus, minus => state ← real3;
ENDCASE => GOTO Test;
real3 => SELECT class FROM
digit => state ← realE;
ENDCASE => GOTO Test;
realE => SELECT class FROM
digit => NULL;
ENDCASE => GOTO Test;
ENDCASE;
EXITS
Skip => LOOP; -- skip white space, don't append to text
Test => SELECT class FROM -- test for end of token
other => { peeked ← TRUE; EXIT }; -- put back delimiting character and stop
space => EXIT; -- stop with current state, don't bother to put back space
ENDCASE => state ← name; -- turn token into a name and keep scanning
};
text ← RefText.InlineAppendChar[to: text, from: char];
ENDLOOP;
IF state=null THEN EXIT; -- normal EndOfStream
quit ← action[state: state, text: text, stream: stream ! UNWIND => Cleanup[]];
ENDLOOP;
IF peeked THEN Cleanup[];
};
GetToken: PROC[stream: STREAM, buffer: REF TEXT]
RETURNS[state: State, index: INT, text: REF TEXT] ~ {
stateT: State ← null; indexT: INT ← 0; textT: REF TEXTNIL;
action: ActionType ~ { stateT ← state; indexT ← index; textT ← text; RETURN[TRUE] };
[] ← MapTokens[stream, buffer, action];
RETURN[stateT, indexT, textT];
};
DoWithScratchText: PROC[action: PROC[REF TEXT], len: NAT ← 100] ~ {
scratch: REF TEXT ~ RefText.ObtainScratch[len];
action[scratch ! UNWIND => RefText.ReleaseScratch[scratch]];
RefText.ReleaseScratch[scratch];
};
StringProc: TYPE ~ PROC[stream: STREAM, put: PROC[CHAR]];
SyntaxError: ERROR ~ CODE;
PString: StringProc ~ {
DO char: CHAR ~ IO.GetChar[stream];
SELECT char FROM
') => EXIT;
'( => { put['(]; PString[stream, put]; put[')] };
'" => { put['"]; QString[stream, put]; put['"] };
ENDCASE => put[char];
ENDLOOP;
};
QString: StringProc ~ {
state: {null, esc1, esc2, esc3} ← null;
code: [0..377B];
DO char: CHAR ~ IO.GetChar[stream];
SELECT state FROM
null => SELECT char FROM
'" => EXIT;
'\\ => state ← esc1;
ENDCASE => put[char];
esc1 => SELECT char FROM
'n, 'N => { put['\n]; state ← null };
'r, 'R => { put['\r]; state ← null };
'l, 'L => { put['\l]; state ← null };
't, 'T => { put['\t]; state ← null };
'b, 'B => { put['\b]; state ← null };
'f, 'F => { put['\f]; state ← null };
'', '", '\\ => { put[char]; state ← null };
IN['0..'3] => { code ← char-'0; state ← esc2 };
ENDCASE => ERROR SyntaxError;
esc2 => SELECT char FROM
IN['0..'7] => { code ← code*10B+(char-'0); state ← esc3 };
ENDCASE => ERROR SyntaxError;
esc3 => SELECT char FROM
IN['0..'7] => { code ← code*10B+(char-'0); put[VAL[code]]; state ← null };
ENDCASE => ERROR SyntaxError;
ENDCASE => ERROR;
ENDLOOP;
};
RopeFromString: PROC[stream: STREAM, string: StringProc] RETURNS[rope: ROPENIL] ~ {
action: PROC[text: REF TEXT] ~ {
put: PROC[char: CHAR] ~ {
IF NOT text.length<NAT.LAST THEN {
rope ← Rope.Concat[rope, Rope.FromRefText[text]];
text.length ← 0;
};
text ← RefText.InlineAppendChar[text, char];
};
string[stream, put];
IF text.length#0 THEN rope ← Rope.Concat[rope, Rope.FromRefText[text]];
};
DoWithScratchText[action];
};
SkipString: PROC[stream: STREAM, string: StringProc] ~ {
put: PROC[char: CHAR] ~ { };
string[stream, put];
};
SkipJaMComment: PROC[stream: STREAM] ~ {
DO
char: CHAR ~ IO.GetChar[stream ! IO.EndOfStream => EXIT];
IF char='\n THEN EXIT;
ENDLOOP;
};
SkipCedarComment: PROC[stream: STREAM] ~ {
state: {null, dash} ← null;
DO
char: CHAR ~ IO.GetChar[stream ! IO.EndOfStream => EXIT];
SELECT state FROM
null => SELECT char FROM
'\n => EXIT;
'- => state ← dash;
ENDCASE;
dash => SELECT char FROM
'\n, '- => EXIT;
ENDCASE => state ← null;
ENDCASE;
ENDLOOP;
};
IntFromText: PROC[text: REF READONLY TEXT] RETURNS[INT] ~ {
RETURN[Convert.IntFromRope[RefText.TrustTextAsRope[text]]];
};
RealFromText: PROC[text: REF READONLY TEXT] RETURNS[REAL] ~ {
RETURN[Convert.RealFromRope[RefText.TrustTextAsRope[text]]];
};
NumFromText: PROC[text: REF READONLY TEXT] RETURNS[NumberRep] ~ {
RETURN[[int[IntFromText[text ! Convert.Error => IF reason=overflow THEN CONTINUE]]]];
RETURN[[real[RealFromText[text]]]];
};
arrayMark: Mark ~ NEW[MarkRep];
ExecuteStream: PUBLIC PROC[frame: Frame, stream: STREAM, closeAtEnd: BOOLTRUE] ~ {
arrayNest: INT ← 0;
tokenAction: ActionType ~ {
SELECT state FROM
single => SELECT text[0] FROM
'( => PushRope[frame, RopeFromString[stream, PString]];
'" => PushRope[frame, RopeFromString[stream, QString]];
'{ => {
arrayNest ← arrayNest+1;
PushMark[frame, arrayMark];
};
'} => IF arrayNest=0 THEN GOTO Name ELSE {
len: INT ~ CountToMark[frame];
array: Array ~ NewArray[len];
AStore[frame, array];
IF PopMark[frame]#arrayMark THEN ProduceError[bug];
PushArray[frame, array];
arrayNest ← arrayNest-1;
};
'% => SkipJaMComment[stream];
ENDCASE => GOTO Name;
double => SELECT text[0] FROM
'- => SELECT text[1] FROM
'- => SkipCedarComment[stream];
ENDCASE => GOTO Name;
ENDCASE => GOTO Name;
int => PushNum[frame, NumFromText[text]];
oct => PushInt[frame, IntFromText[text]];
realF, realE => PushReal[frame, RealFromText[text]];
ENDCASE => GOTO Name;
EXITS Name => {
atom: ATOM ~ Atom.MakeAtomFromRefText[text];
IF arrayNest>0 THEN PushAtom[frame, atom]
ELSE ExecuteAtom[frame, atom];
};
};
bufferAction: PROC[buffer: REF TEXT] ~ {
[] ← MapTokens[stream: stream, buffer: buffer, action: tokenAction !
IO.EndOfStream => GOTO EndOfStreamExit;
SyntaxError => GOTO SyntaxErrorExit;
];
IF arrayNest>0 THEN ProduceError[endOfStream];
EXITS
EndOfStreamExit => ProduceError[endOfStream];
SyntaxErrorExit => ProduceError[syntaxError];
};
DoWithScratchText[bufferAction];
IF closeAtEnd THEN IO.Close[stream];
};
ExecuteRope: PUBLIC PROC[frame: Frame, rope: ROPE] ~ {
stream: STREAM ~ IO.RIS[rope];
ExecuteStream[frame, stream];
};
LineComplete: PUBLIC PROC[rope: ROPE] RETURNS[BOOL] ~ {
stream: STREAM ~ IO.RIS[rope];
arrayNest: INT ← 0;
endOfStream: BOOLFALSE;
tokenAction: ActionType ~ {
SELECT state FROM
single => SELECT text[0] FROM
'( => SkipString[stream, PString];
'" => SkipString[stream, QString];
'{ => arrayNest ← arrayNest+1;
'} => IF arrayNest>0 THEN arrayNest ← arrayNest-1;
'% => SkipJaMComment[stream];
ENDCASE => NULL;
double => SELECT text[0] FROM
'- => SELECT text[1] FROM
'- => SkipCedarComment[stream];
ENDCASE => NULL;
ENDCASE => NULL;
ENDCASE => NULL;
};
bufferAction: PROC[buffer: REF TEXT] ~ {
[] ← MapTokens[stream: stream, buffer: buffer, action: tokenAction !
IO.EndOfStream => GOTO EndOfStreamExit;
SyntaxError => GOTO SyntaxErrorExit;
];
IF arrayNest>0 THEN endOfStream ← TRUE;
EXITS
EndOfStreamExit => endOfStream ← TRUE;
SyntaxErrorExit => NULL;
};
DoWithScratchText[bufferAction];
RETURN[NOT endOfStream];
};
END.