IPWrittenReaderImpl.mesa
Copyright © 1984 Xerox Corporation. All rights reserved.
Doug Wyatt, August 24, 1984 10:46:00 am PDT
DIRECTORY
Convert USING [IntFromRope, RealFromRope],
IO USING [Backup, CreateStream, CreateStreamProcs, EndOfStream, Error, GetChar, GetIndex, PutFR, SetIndex, STREAM, StreamProcs, text],
IPBasic USING [Op, Rational],
IPReader USING [Class, ClassRep, Error, LargeVector, LargeVectorRep, nullToken, Reader, Register, StreamFromInputStream, Token, TokenType],
IPWritten USING [EncodingTable, GetEncodingTable],
RefText USING [Append, Find, InlineAppendChar, ObtainScratch, ReleaseScratch, TrustTextAsRope],
Rope USING [ROPE],
SymTab USING [Create, Fetch, Insert, Ref];
IPWrittenReaderImpl: CEDAR PROGRAM
IMPORTS Convert, IO, IPReader, IPWritten, RefText, SymTab
~ BEGIN
ROPE: TYPE ~ Rope.ROPE;
STREAM: TYPE ~ IO.STREAM;
BYTE: TYPE ~ [0..256);
Op: TYPE ~ IPBasic.Op;
Rational: TYPE ~ IPBasic.Rational;
Reader: TYPE ~ IPReader.Reader;
Token: TYPE ~ IPReader.Token;
nullToken: Token ~ IPReader.nullToken;
InvertEncodingTable: PROC[encode: IPWritten.EncodingTable] RETURNS[SymTab.Ref] = {
decode: SymTab.Ref ~ SymTab.Create[mod: 101, case: TRUE];
FOR op: Op IN Op DO
key: ROPE ~ encode[op];
IF key#NIL THEN {
opRef: REF Op ~ NEW[Op ← op];
IF SymTab.Insert[x: decode, key: key, val: opRef] THEN NULL ELSE ERROR;
};
ENDLOOP;
RETURN[decode];
};
decode: SymTab.Ref ~ InvertEncodingTable[IPWritten.GetEncodingTable[]];
Decode: PROC[rope: ROPE] RETURNS[Op] ~ {
found: BOOL; val: REF;
[found: found, val: val] ← SymTab.Fetch[x: decode, key: rope];
IF found THEN { opRef: REF Op ~ NARROW[val]; RETURN[opRef^] }
ELSE RETURN[nil];
};
writtenReader: IPReader.Class ~ NEW[IPReader.ClassRep ← [
encoding: $Written,
getToken: WrittenGetToken,
readInt: WrittenReadInt,
readReal: WrittenReadReal,
readRational: WrittenReadRational,
readLargeVector: WrittenReadLargeVector
]];
GetVisibleChar: PROC[stream: STREAM] RETURNS[CHAR] ~ {
DO char: CHAR ~ IO.GetChar[stream];
IF char IN['\041..'\176] THEN RETURN[char];
ENDLOOP;
};
WrittenGetToken: PROC[reader: Reader, buffer: REF TEXT, flushComments: BOOL]
RETURNS[token: Token ← nullToken, text: REF TEXTNIL] ~ {
stream: STREAM ~ reader.stream;
DO
state: {begin, op, id, plus, minus, ast, int, den1, den2, den3, dot, frac, exp1, exp2, exp3, string, escape, vector, com, com1, file, file1, ann, ann1} ← begin;
err: {ok, illegalChar, invalidRational, invalidReal, invalidEscape, invalidVector} ← ok;
char: CHAR; -- last character fetched
got: BOOLTRUE; -- true if got char from stream, false if char is fake space at end
esc: NAT; -- starting index of escape sequence
BeginEscape: PROC ~ { esc ← text.length-1 };
EndEscape: PROC ~ {
start: NAT ~ esc+1; stop: NAT ~ text.length-1;
val: CARDINAL ← 0;
IF stop<=start THEN err ← invalidEscape
ELSE FOR i: NAT IN[start..stop) DO c: CHAR ~ text[i];
IF c IN['0..'9] THEN val ← val*10+(c-'0) ELSE { err ← invalidEscape; EXIT };
IF val>255 THEN { val ← 0; err ← invalidEscape; EXIT };
ENDLOOP;
text[esc] ← 0C+val; text.length ← esc+1;
};
token.index ← stream.GetIndex[];
DO -- skip white space
char ← stream.GetChar[! IO.EndOfStream => GOTO EndOfFile];
IF char IN['\041..'\176] THEN EXIT ELSE token.index ← token.index+1;
ENDLOOP;
IF (text ← buffer)=NIL THEN text ← reader.buffer; text.length ← 0;
DO -- scan a token
IF text#NIL THEN text ← RefText.InlineAppendChar[text, char]; -- tentatively append char
{
SELECT state FROM
begin => SELECT char FROM
IN['A..'Z] => { state ← op }; -- begin op
IN['a..'z] => { state ← id }; -- begin identifier
IN['0..'9] => { state ← int }; -- begin number
'{ => { token.op ← beginBody; GOTO SingleOp };
'} => { token.op ← endBody; GOTO SingleOp };
'[ => { token.op ← beginVec; GOTO SingleOp };
'] => { token.op ← endVec; GOTO SingleOp };
', => { token.op ← comma; GOTO SingleOp };
'< => { state ← string; GOTO Begin }; -- begin string
'( => { state ← vector; GOTO Skip }; -- begin largeVector
'+ => { state ← plus }; -- might begin number or insertfile
'- => { state ← minus }; -- might begin number or annotation
'* => { state ← ast }; -- might begin comment
'. => { state ← dot }; -- might begin real
ENDCASE => { err ← illegalChar; GOTO Stop };
op => SELECT char FROM
IN['A..'Z], IN['0..'9] => { }; -- extend op
IN['a..'z], '- => { state ← id }; -- switch to id
ENDCASE => { token.type ← op; GOTO Back }; -- op token
id => SELECT char FROM
IN['a..'z], IN['A..'Z], IN['0..'9], '- => { }; -- extend id
ENDCASE => { token.type ← identifier; GOTO Back };
plus => SELECT char FROM
IN['0..'9] => { state ← int }; -- first integer digit
'+ => { state ← file; GOTO Begin }; -- begin insertfile
ENDCASE => { err ← illegalChar; GOTO Back }; -- plus sign alone
minus => SELECT char FROM
IN['0..'9] => { state ← int }; -- first integer digit
'- => { state ← ann; GOTO Begin }; -- begin annotation
ENDCASE => { err ← illegalChar; GOTO Back }; -- minus sign alone
ast => SELECT char FROM
'* => { state ← com; GOTO Begin }; -- begin comment
ENDCASE => { err ← illegalChar; GOTO Back }; -- asterisk alone
int => SELECT char FROM
IN['0..'9] => { }; -- integer digit
'. => { state ← frac }; -- fraction follows
'/ => { state ← den1 }; -- denominator follows
'E, 'e => { state ← exp1 }; -- exponent follows
ENDCASE => { token.type ← integer; GOTO Back }; -- integer token
den1 => SELECT char FROM
IN['0..'9] => { state ← den3 }; -- first denominator digit
'+, '- => { state ← den2 }; -- denominator sign
ENDCASE => { err ← invalidRational; GOTO Back };
den2 => SELECT char FROM
IN['0..'9] => { state ← den3 }; -- first denominator digit (after sign)
ENDCASE => { err ← invalidRational; GOTO Back };
den3 => SELECT char FROM
IN['0..'9] => { }; -- denominator digit
ENDCASE => { token.type ← rational; GOTO Back }; -- rational token
dot => SELECT char FROM
IN['0..'9] => { state ← frac }; -- first fraction digit
ENDCASE => { err ← illegalChar; GOTO Back }; -- dot alone
frac => SELECT char FROM
IN['0..'9] => { }; -- fraction digit
'E, 'e => { state ← exp1 }; -- exponent follows
ENDCASE => { token.type ← real; GOTO Back }; -- real token (no exponent)
exp1 => SELECT char FROM
IN['0..'9] => { state ← exp3 }; -- first exponent digit
'+, '- => { state ← exp2 }; -- exponent sign
ENDCASE => { err ← invalidReal; GOTO Back };
exp2 => SELECT char FROM
IN['0..'9] => { state ← exp3 }; -- first exponent digit (after sign)
ENDCASE => { err ← invalidReal; GOTO Back };
exp3 => SELECT char FROM
IN['0..'9] => { }; -- exponent digit
ENDCASE => { token.type ← real; GOTO Back }; -- real token (with exponent)
string => SELECT char FROM
'> => { token.type ← string; GOTO Lop1 }; -- end string token
'# => { BeginEscape[]; state ← escape }; -- begin escape sequence
ENDCASE => { }; -- extend string
escape => SELECT char FROM
'# => { EndEscape[]; state ← string }; -- end escape sequence
ENDCASE => { }; -- extend escape sequence
vector => SELECT char FROM
') => { token.type ← largeVector; GOTO Stop }; -- end large vector
IN['0..'9], IN['A..'F], ' => { }; -- continue vector
ENDCASE => { err ← invalidVector }; -- note error and press on
com => SELECT char FROM
'* => { state ← com1 }; -- look for second *
ENDCASE => { }; -- extend comment
com1 => SELECT char FROM
'* => { token.type ← comment; GOTO Lop2 }; -- end comment
ENDCASE => { state ← com }; -- continue comment
file => SELECT char FROM
'+ => { state ← file1 }; -- look for second +
ENDCASE => { }; -- extend file name
file1 => SELECT char FROM
'+ => { token.type ← insertfile; GOTO Lop2 }; -- end insertfile
ENDCASE => { state ← file }; -- continue file name
ann => SELECT char FROM
'- => { state ← ann1 }; -- look for second -
ENDCASE => { }; -- extend annotation
ann1 => SELECT char FROM
'- => { token.type ← annotation; GOTO Lop2 }; -- end annotation
ENDCASE => { state ← ann }; -- continue annotation
ENDCASE => ERROR; -- unknown state
EXITS
SingleOp => { token.type ← op; EXIT };
Begin => text.length ← 0;
Skip => text ← NIL;
Stop => { EXIT };
Lop1 => { text.length ← text.length-1; EXIT };
Lop2 => { text.length ← text.length-2; EXIT };
Back => { IF got THEN stream.Backup[char]; text.length ← text.length-1; EXIT };
};
char ← IO.GetChar[stream !
IO.EndOfStream => IF got THEN { char ← ' ; got ← FALSE; CONTINUE }];
ENDLOOP;
IF err#ok THEN ERROR IPReader.Error[$illegalToken, SELECT err FROM
illegalChar => "Illegal character.",
invalidRational => "Invalid rational.",
invalidReal => "Invalid real number.",
invalidEscape => "Invalid escape sequence in string.",
invalidVector => "Invalid character in large vector.",
ENDCASE => NIL];
SELECT token.type FROM
$nil => ERROR IPReader.Error[$bug, "WrittenGetToken didn't set token type."];
$op => {
IF token.op=$nil THEN token.op ← Decode[RefText.TrustTextAsRope[text]];
IF token.op=$nil THEN ERROR IPReader.Error[$illegalToken,
IO.PutFR["\"%g\" is not a primitive.", IO.text[text]]];
};
$comment, $annotation => IF flushComments THEN LOOP;
ENDCASE;
EXIT;
REPEAT EndOfFile => token.type ← eof;
ENDLOOP;
IF buffer=NIL THEN text ← NIL;
};
ParseInt: PROC[text: REF TEXT, start: NAT ← 0, len: NATNAT.LAST] RETURNS[x: INT] ~ {
s: NAT ~ MIN[start, text.length];
n: NAT ~ MIN[len, text.length-s];
IF s=0 AND n=text.length THEN x ← Convert.IntFromRope[RefText.TrustTextAsRope[text]]
ELSE {
scratch: REF TEXT ~ RefText.ObtainScratch[n];
{ ENABLE UNWIND => RefText.ReleaseScratch[scratch];
temp: REF TEXT ~ RefText.Append[to: scratch, from: text, start: s, len: n];
x ← Convert.IntFromRope[RefText.TrustTextAsRope[temp]];
};
RefText.ReleaseScratch[scratch];
};
};
ParseReal: PROC[text: REF TEXT, start: NAT ← 0, len: NATNAT.LAST] RETURNS[x: REAL] ~ {
s: NAT ~ MIN[start, text.length];
n: NAT ~ MIN[len, text.length-s];
IF s=0 AND n=text.length THEN x ← Convert.RealFromRope[RefText.TrustTextAsRope[text]]
ELSE {
scratch: REF TEXT ~ RefText.ObtainScratch[n];
{ ENABLE UNWIND => RefText.ReleaseScratch[scratch];
temp: REF TEXT ~ RefText.Append[to: scratch, from: text, start: s, len: n];
x ← Convert.RealFromRope[RefText.TrustTextAsRope[temp]];
};
RefText.ReleaseScratch[scratch];
};
};
WrittenReadInt: PROC[reader: Reader, token: Token, text: REF TEXT]
RETURNS[INT] ~ {
SELECT token.type FROM
$shortNumber => RETURN[token.shortNumber];
$integer => RETURN[ParseInt[text]];
ENDCASE => ERROR IPReader.Error[$wrongType, "Token is not an integer."];
};
WrittenReadRational: PROC[reader: Reader, token: Token, text: REF TEXT]
RETURNS[Rational] ~ {
SELECT token.type FROM
$rational => {
slash: NAT ~ RefText.Find[s1: text, s2: "/"];
{ ENABLE IPReader.Error => IF code=$overflow THEN GOTO Overflow;
num: INT ~ ParseInt[text: text, len: slash];
den: INT ~ ParseInt[text: text, start: slash+1];
RETURN[[num: num, den: den]];
EXITS Overflow => ERROR IPReader.Error[$overflow, "Number is too big for an INT."];
};
};
ENDCASE => ERROR IPReader.Error[$wrongType, "Token is not a rational."];
};
WrittenReadReal: PROC[reader: Reader, token: Token, text: REF TEXT]
RETURNS[REAL] ~ {
SELECT token.type FROM
$integer, $real => RETURN[ParseReal[text]];
ENDCASE => ERROR IPReader.Error[$wrongType, "Token type is not $integer or $real."];
};
EndOfLargeVector: ERROR ~ CODE;
GetHexDigit: PROC[stream: STREAM] RETURNS[[0..16)] ~ {
DO char: CHAR ~ IO.GetChar[stream];
SELECT char FROM
IN['0..'9] => RETURN[char-'0];
IN['A..'F] => RETURN[char-'A+10];
') => ERROR EndOfLargeVector;
<='\040 => NULL;
ENDCASE => ERROR IPReader.Error[$illegalToken, "Illegal character in large vector."];
ENDLOOP;
};
GetHexByte: PROC[stream: STREAM] RETURNS[BYTE] ~ {
h1, h2: [0..16);
h1 ← GetHexDigit[stream];
h2 ← GetHexDigit[stream ! EndOfLargeVector => GOTO OddDigit];
RETURN[h1*16+h2];
EXITS OddDigit => ERROR IPReader.Error[$illegalToken, "Large vector ends in mid-byte."];
};
lvProcs: REF IO.StreamProcs ~ IO.CreateStreamProcs[
variety: $input, class: $InterpressLargeVectorInputWritten,
getChar: LVGetChar,
endOf: LVEndOf,
getIndex: LVGetIndex,
setIndex: LVSetIndex,
getLength: LVGetLength
];
LVData: TYPE ~ REF LVDataRep;
LVDataRep: TYPE ~ RECORD[
master: STREAM, -- stream on the master file
start: INT, -- index in master of first data byte
bytesPerElement: BYTE, -- data bytes per vector element
index: INT, -- current stream index
length: INT -- stream length (INT.LAST if not yet known)
];
CreateLVStream: PROC[master: STREAM, bytesPerElement: BYTE] RETURNS[STREAM] ~ {
start: INT ~ IO.GetIndex[master];
data: LVData ~ NEW[LVDataRep ← [master: master, start: start,
index: 0, length: INT.LAST, bytesPerElement: bytesPerElement]];
RETURN[IO.CreateStream[streamProcs: lvProcs, streamData: data]];
};
SetLength: PROC[data: LVData, length: INT] ~ {
data.length ← length;
IF (length MOD data.bytesPerElement)#0 THEN ERROR IPReader.Error[
$illegalToken, "Large vector length must be a multiple of bytesPerElement."];
};
LVGetChar: PROC[self: STREAM] RETURNS[CHAR] ~ {
data: LVData ~ NARROW[self.streamData];
IF data.index<data.length THEN {
byte: BYTE ~ GetHexByte[data.master ! EndOfLargeVector => GOTO End];
data.index ← data.index+1;
RETURN[VAL[byte]];
EXITS End => SetLength[data, data.index];
};
ERROR IO.EndOfStream[self];
};
LVEndOf: PROC[self: STREAM] RETURNS[BOOL] ~ {
data: LVData ~ NARROW[self.streamData];
IF data.index=data.length THEN RETURN[TRUE]
ELSE DO
char: CHAR ~ IO.GetChar[data.master];
SELECT char FROM
') => { SetLength[data, data.index]; RETURN[TRUE] };
IN['0..'9], IN['A..'F] => { IO.Backup[data.master, char]; RETURN[FALSE] };
IN['\041..'\176] => ERROR IPReader.Error[$illegalToken, "Illegal character in large vector."];
ENDCASE;
ENDLOOP;
};
LVGetIndex: PROC[self: STREAM] RETURNS[INT] ~ {
data: LVData ~ NARROW[self.streamData];
RETURN[data.index];
};
LVSetIndex: PROC[self: STREAM, index: INT] ~ {
data: LVData ~ NARROW[self.streamData];
IF index<0 THEN ERROR IO.Error[$BadIndex, self];
IF index>data.length THEN ERROR IO.EndOfStream[self];
IF index<data.index THEN { IO.SetIndex[data.master, data.start]; data.index ← 0 };
WHILE data.index<index DO
byte: BYTE ~ GetHexByte[data.master ! EndOfLargeVector => GOTO End];
data.index ← data.index+1;
REPEAT End => { SetLength[data, data.index]; ERROR IO.EndOfStream[self] };
ENDLOOP;
};
LVGetLength: PROC[self: STREAM] RETURNS[INT] ~ {
data: LVData ~ NARROW[self.streamData];
IF data.length=INT.LAST THEN {
mindex: INT ~ IO.GetIndex[data.master];
count: INT ← 0;
DO
[] ← GetHexByte[data.master ! EndOfLargeVector => EXIT];
count ← count+1;
ENDLOOP;
SetLength[data, data.index+count];
IO.SetIndex[data.master, mindex];
};
RETURN[data.length];
};
WrittenReadLargeVector: PROC[reader: Reader, token: Token]
RETURNS[IPReader.LargeVector] ~ {
SELECT token.type FROM
$largeVector => {
stream: STREAM ~ IPReader.StreamFromInputStream[reader.stream];
bytesPerElement: BYTE ← 0;
IO.SetIndex[stream, token.index];
IF IO.GetChar[stream]#'( THEN GOTO NoParen;
bytesPerElement ← GetHexByte[stream ! EndOfLargeVector => GOTO NoBytes];
RETURN[NEW[IPReader.LargeVectorRep ← [
source: CreateLVStream[stream, bytesPerElement],
bytesPerElement: bytesPerElement, type: $nil]]];
EXITS
NoParen => ERROR IPReader.Error[$illegalToken, "Expected a large vector token."];
NoBytes => ERROR IPReader.Error[$illegalToken, "Empty large vector."];
};
ENDCASE => ERROR IPReader.Error[$wrongType, "Token is not a vector."];
};
IPReader.Register[writtenReader];
END.