IPWrittenReaderImpl.mesa
Last edited by:
Doug Wyatt, March 9, 1984 4:59:23 pm PST
DIRECTORY
Convert USING [IntFromRope, RealFromRope],
IO USING [Backup, EndOfStream, GetChar, GetIndex, PutFR, STREAM, text],
IPBasic USING [Op, Rational],
IPReader,
IPWritten,
RefText USING [Append, Find, InlineAppendChar, ObtainScratch, ReleaseScratch, TrustTextAsRope],
Rope USING [ROPE],
SymTab USING [Create, Fetch, Insert, Ref];
IPWrittenReaderImpl: CEDAR PROGRAM
IMPORTS Convert, IO, IPReader, IPWritten, RefText, SymTab
~ BEGIN OPEN IPReader, IPWritten;
ROPE: TYPE ~ Rope.ROPE;
STREAM: TYPE ~ IO.STREAM;
Op: TYPE ~ IPBasic.Op;
Rational: TYPE ~ IPBasic.Rational;
InvertEncodingTable: PROC[encode: EncodingTable] RETURNS[SymTab.Ref] = {
decode: SymTab.Ref ~ SymTab.Create[mod: 101, case: TRUE];
FOR op: Op IN Op DO
key: ROPE ~ encode[op];
IF key#NIL THEN {
opRef: REF Op ~ NEW[Op ← op];
IF decode.Insert[key: key, val: opRef] THEN NULL ELSE ERROR;
};
ENDLOOP;
RETURN[decode];
};
decode: SymTab.Ref ~ InvertEncodingTable[GetEncodingTable[]];
Decode: PROC[rope: ROPE] RETURNS[Op] ~ {
found: BOOL; val: REF;
[found: found, val: val] ← decode.Fetch[key: rope];
IF found THEN { opRef: REF Op ~ NARROW[val]; RETURN[opRef^] }
ELSE RETURN[nil];
};
writtenReader: Class ~ NEW[ClassRep ← [
encoding: $Written,
getToken: WrittenGetToken,
readInt: WrittenReadInt,
readReal: WrittenReadReal,
readRational: WrittenReadRational,
readVector: WrittenReadVector
]];
WrittenGetToken: PROC[reader: Reader, flushComments: BOOLTRUE] ~ {
stream: STREAM ~ reader.stream;
token: Token ← nullToken;
text: REF TEXTNIL;
DO
state: {begin, op, id, plus, minus, ast, int, den1, den2, den3, dot, frac, exp1, exp2, exp3, string, escape, com, com1, file, file1, ann, ann1} ← begin;
err: {ok, illegalChar, invalidRational, invalidReal, invalidEscape} ← ok;
char: CHAR; -- last character fetched
got: BOOLTRUE; -- true if got char from stream, false if char is fake space at end
esc: NAT; -- starting index of escape sequence
BeginEscape: PROC ~ { esc ← text.length-1 };
EndEscape: PROC ~ {
start: NAT ~ esc+1; stop: NAT ~ text.length-1;
val: CARDINAL ← 0;
IF stop<=start THEN err ← invalidEscape
ELSE FOR i: NAT IN[start..stop) DO c: CHAR ~ text[i];
IF c IN['0..'9] THEN val ← val*10+(c-'0) ELSE { err ← invalidEscape; EXIT };
IF val>255 THEN { val ← 0; err ← invalidEscape; EXIT };
ENDLOOP;
text[esc] ← 0C+val; text.length ← esc+1;
};
reader.index ← stream.GetIndex[];
DO -- skip white space
char ← stream.GetChar[! IO.EndOfStream => GOTO EndOfFile];
IF char IN['\041..'\176] THEN EXIT ELSE reader.index ← reader.index+1;
ENDLOOP;
text ← reader.buffer; text.length ← 0;
DO -- scan a token
text ← RefText.InlineAppendChar[text, char]; -- tentatively append char to token text
{
SELECT state FROM
begin => SELECT char FROM
IN['A..'Z] => { state ← op }; -- begin op
IN['a..'z] => { state ← id }; -- begin identifier
IN['0..'9] => { state ← int }; -- begin number
'{ => { token.op ← beginBody; GOTO SingleOp };
'} => { token.op ← endBody; GOTO SingleOp };
'[ => { token.op ← beginVec; GOTO SingleOp };
'] => { token.op ← endVec; GOTO SingleOp };
', => { token.op ← comma; GOTO SingleOp };
'< => { state ← string; GOTO Begin }; -- begin string
'+ => { state ← plus }; -- might begin number or insertfile
'- => { state ← minus }; -- might begin number or annotation
'* => { state ← ast }; -- might begin comment
'. => { state ← dot }; -- might begin real
ENDCASE => { err ← illegalChar; GOTO Stop };
op => SELECT char FROM
IN['A..'Z], IN['0..'9] => { }; -- extend op
IN['a..'z], '- => { state ← id }; -- switch to id
ENDCASE => { token.type ← op; GOTO Back }; -- op token
id => SELECT char FROM
IN['a..'z], IN['A..'Z], IN['0..'9], '- => { }; -- extend id
ENDCASE => { token.type ← identifier; GOTO Back };
plus => SELECT char FROM
IN['0..'9] => { state ← int }; -- first integer digit
'+ => { state ← file; GOTO Begin }; -- begin insertfile
ENDCASE => { err ← illegalChar; GOTO Back }; -- plus sign alone
minus => SELECT char FROM
IN['0..'9] => { state ← int }; -- first integer digit
'- => { state ← ann; GOTO Begin }; -- begin annotation
ENDCASE => { err ← illegalChar; GOTO Back }; -- minus sign alone
ast => SELECT char FROM
'* => { state ← com; GOTO Begin }; -- begin comment
ENDCASE => { err ← illegalChar; GOTO Back }; -- asterisk alone
int => SELECT char FROM
IN['0..'9] => { }; -- integer digit
'. => { state ← frac }; -- fraction follows
'/ => { state ← den1 }; -- denominator follows
'E, 'e => { state ← exp1 }; -- exponent follows
ENDCASE => { token.type ← integer; GOTO Back }; -- integer token
den1 => SELECT char FROM
IN['0..'9] => { state ← den3 }; -- first denominator digit
'+, '- => { state ← den2 }; -- denominator sign
ENDCASE => { err ← invalidRational; GOTO Back };
den2 => SELECT char FROM
IN['0..'9] => { state ← den3 }; -- first denominator digit (after sign)
ENDCASE => { err ← invalidRational; GOTO Back };
den3 => SELECT char FROM
IN['0..'9] => { }; -- denominator digit
ENDCASE => { token.type ← rational; GOTO Back }; -- rational token
dot => SELECT char FROM
IN['0..'9] => { state ← frac }; -- first fraction digit
ENDCASE => { err ← illegalChar; GOTO Back }; -- dot alone
frac => SELECT char FROM
IN['0..'9] => { }; -- fraction digit
'E, 'e => { state ← exp1 }; -- exponent follows
ENDCASE => { token.type ← real; GOTO Back }; -- real token (no exponent)
exp1 => SELECT char FROM
IN['0..'9] => { state ← exp3 }; -- first exponent digit
'+, '- => { state ← exp2 }; -- exponent sign
ENDCASE => { err ← invalidReal; GOTO Back };
exp2 => SELECT char FROM
IN['0..'9] => { state ← exp3 }; -- first exponent digit (after sign)
ENDCASE => { err ← invalidReal; GOTO Back };
exp3 => SELECT char FROM
IN['0..'9] => { }; -- exponent digit
ENDCASE => { token.type ← real; GOTO Back }; -- real token (with exponent)
string => SELECT char FROM
'> => { token.type ← string; GOTO Lop1 }; -- end string token
'# => { BeginEscape[]; state ← escape }; -- begin escape sequence
ENDCASE => { }; -- extend string
escape => SELECT char FROM
'# => { EndEscape[]; state ← string }; -- end escape sequence
ENDCASE => { }; -- extend escape sequence
com => SELECT char FROM
'* => { state ← com1 }; -- look for second *
ENDCASE => { }; -- extend comment
com1 => SELECT char FROM
'* => { token.type ← comment; GOTO Lop2 }; -- end comment
ENDCASE => { state ← com }; -- continue comment
file => SELECT char FROM
'+ => { state ← file1 }; -- look for second +
ENDCASE => { }; -- extend file name
file1 => SELECT char FROM
'+ => { token.type ← insertfile; GOTO Lop2 }; -- end insertfile
ENDCASE => { state ← file }; -- continue file name
ann => SELECT char FROM
'- => { state ← ann1 }; -- look for second -
ENDCASE => { }; -- extend annotation
ann1 => SELECT char FROM
'- => { token.type ← annotation; GOTO Lop2 }; -- end annotation
ENDCASE => { state ← ann }; -- continue annotation
ENDCASE => ERROR; -- unknown state
EXITS
Begin => text.length ← 0;
SingleOp => { token.type ← op; EXIT };
Stop => { EXIT };
Lop1 => { text.length ← text.length-1; EXIT };
Lop2 => { text.length ← text.length-2; EXIT };
Back => { IF got THEN stream.Backup[char]; text.length ← text.length-1; EXIT };
};
char ← stream.GetChar[!
IO.EndOfStream => IF got THEN { char ← ' ; got ← FALSE; CONTINUE }];
ENDLOOP;
reader.text ← text;
IF err#ok THEN ERROR Error[$illegalToken, SELECT err FROM
illegalChar => "Illegal character.",
invalidRational => "Invalid rational.",
invalidReal => "Invalid real number.",
invalidEscape => "Invalid escape sequence in string.",
ENDCASE => NIL];
SELECT token.type FROM
$nil => ERROR Error[$bug, "WrittenGetToken didn't set token type."];
$op => IF token.op=$nil THEN {
token.op ← Decode[RefText.TrustTextAsRope[text]];
IF token.op=$nil THEN ERROR Error[$illegalToken,
IO.PutFR["%g is not a primitive.", IO.text[text]]];
};
$comment, $annotation => IF flushComments THEN LOOP;
ENDCASE;
EXIT;
REPEAT
EndOfFile => token.type ← eof;
ENDLOOP;
reader.token ← token;
};
ParseInt: PROC[text: REF TEXT, start: NAT ← 0, len: NATNAT.LAST] RETURNS[x: INT] ~ {
s: NAT ~ MIN[start, text.length];
n: NAT ~ MIN[len, text.length-s];
IF s=0 AND n=text.length THEN x ← Convert.IntFromRope[RefText.TrustTextAsRope[text]]
ELSE {
scratch: REF TEXT ~ RefText.ObtainScratch[n];
{ ENABLE UNWIND => RefText.ReleaseScratch[scratch];
temp: REF TEXT ~ RefText.Append[to: scratch, from: text, start: s, len: n];
x ← Convert.IntFromRope[RefText.TrustTextAsRope[temp]];
};
RefText.ReleaseScratch[scratch];
};
};
ParseReal: PROC[text: REF TEXT, start: NAT ← 0, len: NATNAT.LAST] RETURNS[x: REAL] ~ {
s: NAT ~ MIN[start, text.length];
n: NAT ~ MIN[len, text.length-s];
IF s=0 AND n=text.length THEN x ← Convert.RealFromRope[RefText.TrustTextAsRope[text]]
ELSE {
scratch: REF TEXT ~ RefText.ObtainScratch[n];
{ ENABLE UNWIND => RefText.ReleaseScratch[scratch];
temp: REF TEXT ~ RefText.Append[to: scratch, from: text, start: s, len: n];
x ← Convert.RealFromRope[RefText.TrustTextAsRope[temp]];
};
RefText.ReleaseScratch[scratch];
};
};
WrittenReadInt: PROC[reader: Reader] RETURNS[INT] ~ {
SELECT reader.token.type FROM
$shortNumber => RETURN[reader.shortNumber];
$integer => RETURN[ParseInt[reader.text]];
ENDCASE => ERROR Error[$wrongType, "Token is not an integer."];
};
WrittenReadRational: PROC[reader: Reader] RETURNS[Rational] ~ {
text: REF TEXT ~ reader.text;
SELECT reader.token.type FROM
$rational => {
slash: NAT ~ RefText.Find[s1: text, s2: "/"];
{ ENABLE Error => IF code=$overflow THEN GOTO Overflow;
num: INT ~ ParseInt[text: text, len: slash];
den: INT ~ ParseInt[text: text, start: slash+1];
RETURN[[num: num, den: den]];
EXITS Overflow => Error[$overflow, "Number is too big for an INT."];
};
};
ENDCASE => ERROR Error[$wrongType, "Token is not a rational."];
};
WrittenReadReal: PROC[reader: Reader] RETURNS[REAL] ~ {
SELECT reader.token.type FROM
$integer, $real => RETURN[ParseReal[reader.text]];
ENDCASE => ERROR Error[$wrongType, "Token type is not $integer or $real."];
};
WrittenReadVector: PROC[reader: Reader] RETURNS[EncodedVector] ~ {
type: TokenType ~ reader.token.type;
SELECT reader.token.type FROM
$vector => ERROR Error[$unimplemented, "Can't handle $vector yet."];
ENDCASE => ERROR Error[$wrongType, "Token is not a vector."];
};
Register[writtenReader];
END.