IPWrittenReaderImpl.mesa
Copyright © 1984 Xerox Corporation. All rights reserved.
Doug Wyatt, August 24, 1984 10:46:00 am PDT
DIRECTORY
Convert USING [IntFromRope, RealFromRope],
IO USING [Backup, CreateStream, CreateStreamProcs, EndOfStream, Error, GetChar, GetIndex, PutFR, SetIndex, STREAM, StreamProcs, text],
IPBasic USING [Op, Rational],
IPReader USING [Class, ClassRep, Error, LargeVector, LargeVectorRep, nullToken, Reader, Register, StreamFromInputStream, Token, TokenType],
IPWritten USING [EncodingTable, GetEncodingTable],
RefText USING [Append, Find, InlineAppendChar, ObtainScratch, ReleaseScratch, TrustTextAsRope],
Rope USING [ROPE],
SymTab USING [Create, Fetch, Insert, Ref];
IPWrittenReaderImpl: CEDAR PROGRAM
IMPORTS Convert, IO, IPReader, IPWritten, RefText, SymTab
~ BEGIN
ROPE: TYPE ~ Rope.ROPE;
STREAM: TYPE ~ IO.STREAM;
BYTE: TYPE ~ [0..256);
Op: TYPE ~ IPBasic.Op;
Rational: TYPE ~ IPBasic.Rational;
Reader: TYPE ~ IPReader.Reader;
Token: TYPE ~ IPReader.Token;
nullToken: Token ~ IPReader.nullToken;
InvertEncodingTable:
PROC[encode: IPWritten.EncodingTable]
RETURNS[SymTab.Ref] = {
decode: SymTab.Ref ~ SymTab.Create[mod: 101, case: TRUE];
FOR op: Op
IN Op
DO
key: ROPE ~ encode[op];
IF key#
NIL
THEN {
opRef: REF Op ~ NEW[Op ← op];
IF SymTab.Insert[x: decode, key: key, val: opRef] THEN NULL ELSE ERROR;
};
ENDLOOP;
RETURN[decode];
};
decode: SymTab.Ref ~ InvertEncodingTable[IPWritten.GetEncodingTable[]];
Decode:
PROC[rope:
ROPE]
RETURNS[Op] ~ {
found: BOOL; val: REF;
[found: found, val: val] ← SymTab.Fetch[x: decode, key: rope];
IF found THEN { opRef: REF Op ~ NARROW[val]; RETURN[opRef^] }
ELSE RETURN[nil];
};
writtenReader: IPReader.Class ~
NEW[IPReader.ClassRep ← [
encoding: $Written,
getToken: WrittenGetToken,
readInt: WrittenReadInt,
readReal: WrittenReadReal,
readRational: WrittenReadRational,
readLargeVector: WrittenReadLargeVector
]];
GetVisibleChar: PROC[stream: STREAM] RETURNS[CHAR] ~ {
DO char: CHAR ~ IO.GetChar[stream];
IF char IN['\041..'\176] THEN RETURN[char];
ENDLOOP;
};
WrittenGetToken:
PROC[reader: Reader, buffer:
REF
TEXT, flushComments:
BOOL]
RETURNS[token: Token ← nullToken, text: REF TEXT ← NIL] ~ {
stream: STREAM ~ reader.stream;
DO
state: {begin, op, id, plus, minus, ast, int, den1, den2, den3, dot, frac, exp1, exp2, exp3, string, escape, vector, com, com1, file, file1, ann, ann1} ← begin;
err: {ok, illegalChar, invalidRational, invalidReal, invalidEscape, invalidVector} ← ok;
char: CHAR; -- last character fetched
got: BOOL ← TRUE; -- true if got char from stream, false if char is fake space at end
esc: NAT; -- starting index of escape sequence
BeginEscape: PROC ~ { esc ← text.length-1 };
EndEscape:
PROC ~ {
start: NAT ~ esc+1; stop: NAT ~ text.length-1;
val: CARDINAL ← 0;
IF stop<=start THEN err ← invalidEscape
ELSE
FOR i:
NAT
IN[start..stop)
DO c:
CHAR ~ text[i];
IF c IN['0..'9] THEN val ← val*10+(c-'0) ELSE { err ← invalidEscape; EXIT };
IF val>255 THEN { val ← 0; err ← invalidEscape; EXIT };
ENDLOOP;
text[esc] ← 0C+val; text.length ← esc+1;
};
token.index ← stream.GetIndex[];
DO
-- skip white space
char ← stream.GetChar[! IO.EndOfStream => GOTO EndOfFile];
IF char IN['\041..'\176] THEN EXIT ELSE token.index ← token.index+1;
ENDLOOP;
IF (text ← buffer)=NIL THEN text ← reader.buffer; text.length ← 0;
DO
-- scan a token
IF text#NIL THEN text ← RefText.InlineAppendChar[text, char]; -- tentatively append char
{
SELECT state
FROM
begin =>
SELECT char
FROM
IN['A..'Z] => { state ← op }; -- begin op
IN['a..'z] => { state ← id }; -- begin identifier
IN['0..'9] => { state ← int }; -- begin number
'{ => { token.op ← beginBody; GOTO SingleOp };
'} => { token.op ← endBody; GOTO SingleOp };
'[ => { token.op ← beginVec; GOTO SingleOp };
'] => { token.op ← endVec; GOTO SingleOp };
', => { token.op ← comma; GOTO SingleOp };
'< => { state ← string; GOTO Begin }; -- begin string
'( => { state ← vector; GOTO Skip }; -- begin largeVector
'+ => { state ← plus }; -- might begin number or insertfile
'- => { state ← minus }; -- might begin number or annotation
'* => { state ← ast }; -- might begin comment
'. => { state ← dot }; -- might begin real
ENDCASE => { err ← illegalChar; GOTO Stop };
op =>
SELECT char
FROM
IN['A..'Z], IN['0..'9] => { }; -- extend op
IN['a..'z], '- => { state ← id }; -- switch to id
ENDCASE => { token.type ← op; GOTO Back }; -- op token
id =>
SELECT char
FROM
IN['a..'z], IN['A..'Z], IN['0..'9], '- => { }; -- extend id
ENDCASE => { token.type ← identifier; GOTO Back };
plus =>
SELECT char
FROM
IN['0..'9] => { state ← int }; -- first integer digit
'+ => { state ← file; GOTO Begin }; -- begin insertfile
ENDCASE => { err ← illegalChar; GOTO Back }; -- plus sign alone
minus =>
SELECT char
FROM
IN['0..'9] => { state ← int }; -- first integer digit
'- => { state ← ann; GOTO Begin }; -- begin annotation
ENDCASE => { err ← illegalChar; GOTO Back }; -- minus sign alone
ast =>
SELECT char
FROM
'* => { state ← com; GOTO Begin }; -- begin comment
ENDCASE => { err ← illegalChar; GOTO Back }; -- asterisk alone
int =>
SELECT char
FROM
IN['0..'9] => { }; -- integer digit
'. => { state ← frac }; -- fraction follows
'/ => { state ← den1 }; -- denominator follows
'E, 'e => { state ← exp1 }; -- exponent follows
ENDCASE => { token.type ← integer; GOTO Back }; -- integer token
den1 =>
SELECT char
FROM
IN['0..'9] => { state ← den3 }; -- first denominator digit
'+, '- => { state ← den2 }; -- denominator sign
ENDCASE => { err ← invalidRational; GOTO Back };
den2 =>
SELECT char
FROM
IN['0..'9] => { state ← den3 }; -- first denominator digit (after sign)
ENDCASE => { err ← invalidRational; GOTO Back };
den3 =>
SELECT char
FROM
IN['0..'9] => { }; -- denominator digit
ENDCASE => { token.type ← rational; GOTO Back }; -- rational token
dot =>
SELECT char
FROM
IN['0..'9] => { state ← frac }; -- first fraction digit
ENDCASE => { err ← illegalChar; GOTO Back }; -- dot alone
frac =>
SELECT char
FROM
IN['0..'9] => { }; -- fraction digit
'E, 'e => { state ← exp1 }; -- exponent follows
ENDCASE => { token.type ← real; GOTO Back }; -- real token (no exponent)
exp1 =>
SELECT char
FROM
IN['0..'9] => { state ← exp3 }; -- first exponent digit
'+, '- => { state ← exp2 }; -- exponent sign
ENDCASE => { err ← invalidReal; GOTO Back };
exp2 =>
SELECT char
FROM
IN['0..'9] => { state ← exp3 }; -- first exponent digit (after sign)
ENDCASE => { err ← invalidReal; GOTO Back };
exp3 =>
SELECT char
FROM
IN['0..'9] => { }; -- exponent digit
ENDCASE => { token.type ← real; GOTO Back }; -- real token (with exponent)
string =>
SELECT char
FROM
'> => { token.type ← string; GOTO Lop1 }; -- end string token
'# => { BeginEscape[]; state ← escape }; -- begin escape sequence
ENDCASE => { }; -- extend string
escape =>
SELECT char
FROM
'# => { EndEscape[]; state ← string }; -- end escape sequence
ENDCASE => { }; -- extend escape sequence
vector =>
SELECT char
FROM
') => { token.type ← largeVector; GOTO Stop }; -- end large vector
IN['0..'9], IN['A..'F], ' => { }; -- continue vector
ENDCASE => { err ← invalidVector }; -- note error and press on
com =>
SELECT char
FROM
'* => { state ← com1 }; -- look for second *
ENDCASE => { }; -- extend comment
com1 =>
SELECT char
FROM
'* => { token.type ← comment; GOTO Lop2 }; -- end comment
ENDCASE => { state ← com }; -- continue comment
file =>
SELECT char
FROM
'+ => { state ← file1 }; -- look for second +
ENDCASE => { }; -- extend file name
file1 =>
SELECT char
FROM
'+ => { token.type ← insertfile; GOTO Lop2 }; -- end insertfile
ENDCASE => { state ← file }; -- continue file name
ann =>
SELECT char
FROM
'- => { state ← ann1 }; -- look for second -
ENDCASE => { }; -- extend annotation
ann1 =>
SELECT char
FROM
'- => { token.type ← annotation; GOTO Lop2 }; -- end annotation
ENDCASE => { state ← ann }; -- continue annotation
ENDCASE => ERROR; -- unknown state
EXITS
SingleOp => { token.type ← op; EXIT };
Begin => text.length ← 0;
Skip => text ← NIL;
Stop => { EXIT };
Lop1 => { text.length ← text.length-1; EXIT };
Lop2 => { text.length ← text.length-2; EXIT };
Back => { IF got THEN stream.Backup[char]; text.length ← text.length-1; EXIT };
};
char ←
IO.GetChar[stream !
IO.EndOfStream => IF got THEN { char ← ' ; got ← FALSE; CONTINUE }];
ENDLOOP;
IF err#ok
THEN
ERROR IPReader.Error[$illegalToken,
SELECT err
FROM
illegalChar => "Illegal character.",
invalidRational => "Invalid rational.",
invalidReal => "Invalid real number.",
invalidEscape => "Invalid escape sequence in string.",
invalidVector => "Invalid character in large vector.",
ENDCASE => NIL];
SELECT token.type
FROM
$nil => ERROR IPReader.Error[$bug, "WrittenGetToken didn't set token type."];
$op => {
IF token.op=$nil THEN token.op ← Decode[RefText.TrustTextAsRope[text]];
IF token.op=$nil
THEN
ERROR IPReader.Error[$illegalToken,
IO.PutFR["\"%g\" is not a primitive.", IO.text[text]]];
};
$comment, $annotation => IF flushComments THEN LOOP;
ENDCASE;
EXIT;
REPEAT EndOfFile => token.type ← eof;
ENDLOOP;
IF buffer=NIL THEN text ← NIL;
};
ParseInt:
PROC[text:
REF
TEXT, start:
NAT ← 0, len:
NAT ←
NAT.
LAST]
RETURNS[x:
INT] ~ {
s: NAT ~ MIN[start, text.length];
n: NAT ~ MIN[len, text.length-s];
IF s=0 AND n=text.length THEN x ← Convert.IntFromRope[RefText.TrustTextAsRope[text]]
ELSE {
scratch: REF TEXT ~ RefText.ObtainScratch[n];
{
ENABLE
UNWIND => RefText.ReleaseScratch[scratch];
temp: REF TEXT ~ RefText.Append[to: scratch, from: text, start: s, len: n];
x ← Convert.IntFromRope[RefText.TrustTextAsRope[temp]];
};
RefText.ReleaseScratch[scratch];
};
};
ParseReal:
PROC[text:
REF
TEXT, start:
NAT ← 0, len:
NAT ←
NAT.
LAST]
RETURNS[x:
REAL] ~ {
s: NAT ~ MIN[start, text.length];
n: NAT ~ MIN[len, text.length-s];
IF s=0 AND n=text.length THEN x ← Convert.RealFromRope[RefText.TrustTextAsRope[text]]
ELSE {
scratch: REF TEXT ~ RefText.ObtainScratch[n];
{
ENABLE
UNWIND => RefText.ReleaseScratch[scratch];
temp: REF TEXT ~ RefText.Append[to: scratch, from: text, start: s, len: n];
x ← Convert.RealFromRope[RefText.TrustTextAsRope[temp]];
};
RefText.ReleaseScratch[scratch];
};
};
WrittenReadInt:
PROC[reader: Reader, token: Token, text:
REF
TEXT]
RETURNS[INT] ~ {
SELECT token.type
FROM
$shortNumber => RETURN[token.shortNumber];
$integer => RETURN[ParseInt[text]];
ENDCASE => ERROR IPReader.Error[$wrongType, "Token is not an integer."];
};
WrittenReadRational:
PROC[reader: Reader, token: Token, text:
REF
TEXT]
RETURNS[Rational] ~ {
SELECT token.type
FROM
$rational => {
slash: NAT ~ RefText.Find[s1: text, s2: "/"];
{
ENABLE IPReader.Error =>
IF code=$overflow
THEN
GOTO Overflow;
num: INT ~ ParseInt[text: text, len: slash];
den: INT ~ ParseInt[text: text, start: slash+1];
RETURN[[num: num, den: den]];
EXITS Overflow => ERROR IPReader.Error[$overflow, "Number is too big for an INT."];
};
};
ENDCASE => ERROR IPReader.Error[$wrongType, "Token is not a rational."];
};
WrittenReadReal:
PROC[reader: Reader, token: Token, text:
REF
TEXT]
RETURNS[REAL] ~ {
SELECT token.type
FROM
$integer, $real => RETURN[ParseReal[text]];
ENDCASE => ERROR IPReader.Error[$wrongType, "Token type is not $integer or $real."];
};
EndOfLargeVector: ERROR ~ CODE;
GetHexDigit:
PROC[stream:
STREAM]
RETURNS[[0..16)] ~ {
DO char:
CHAR ~
IO.GetChar[stream];
SELECT char
FROM
IN['0..'9] => RETURN[char-'0];
IN['A..'F] => RETURN[char-'A+10];
') => ERROR EndOfLargeVector;
<='\040 => NULL;
ENDCASE => ERROR IPReader.Error[$illegalToken, "Illegal character in large vector."];
ENDLOOP;
};
GetHexByte:
PROC[stream:
STREAM]
RETURNS[
BYTE] ~ {
h1, h2: [0..16);
h1 ← GetHexDigit[stream];
h2 ← GetHexDigit[stream ! EndOfLargeVector => GOTO OddDigit];
RETURN[h1*16+h2];
EXITS OddDigit => ERROR IPReader.Error[$illegalToken, "Large vector ends in mid-byte."];
};
lvProcs:
REF
IO.StreamProcs ~
IO.CreateStreamProcs[
variety: $input, class: $InterpressLargeVectorInputWritten,
getChar: LVGetChar,
endOf: LVEndOf,
getIndex: LVGetIndex,
setIndex: LVSetIndex,
getLength: LVGetLength
];
LVData: TYPE ~ REF LVDataRep;
LVDataRep:
TYPE ~
RECORD[
master: STREAM, -- stream on the master file
start: INT, -- index in master of first data byte
bytesPerElement: BYTE, -- data bytes per vector element
index: INT, -- current stream index
length: INT -- stream length (INT.LAST if not yet known)
];
CreateLVStream:
PROC[master:
STREAM, bytesPerElement:
BYTE]
RETURNS[
STREAM] ~ {
start: INT ~ IO.GetIndex[master];
data: LVData ~
NEW[LVDataRep ← [master: master, start: start,
index: 0, length: INT.LAST, bytesPerElement: bytesPerElement]];
RETURN[IO.CreateStream[streamProcs: lvProcs, streamData: data]];
};
SetLength:
PROC[data: LVData, length:
INT] ~ {
data.length ← length;
IF (length
MOD data.bytesPerElement)#0
THEN
ERROR IPReader.Error[
$illegalToken, "Large vector length must be a multiple of bytesPerElement."];
};
LVGetChar:
PROC[self:
STREAM]
RETURNS[
CHAR] ~ {
data: LVData ~ NARROW[self.streamData];
IF data.index<data.length
THEN {
byte: BYTE ~ GetHexByte[data.master ! EndOfLargeVector => GOTO End];
data.index ← data.index+1;
RETURN[VAL[byte]];
EXITS End => SetLength[data, data.index];
};
ERROR IO.EndOfStream[self];
};
LVEndOf:
PROC[self:
STREAM]
RETURNS[
BOOL] ~ {
data: LVData ~ NARROW[self.streamData];
IF data.index=data.length THEN RETURN[TRUE]
ELSE
DO
char: CHAR ~ IO.GetChar[data.master];
SELECT char
FROM
') => { SetLength[data, data.index]; RETURN[TRUE] };
IN['0..'9], IN['A..'F] => { IO.Backup[data.master, char]; RETURN[FALSE] };
IN['\041..'\176] => ERROR IPReader.Error[$illegalToken, "Illegal character in large vector."];
ENDCASE;
ENDLOOP;
};
LVGetIndex:
PROC[self:
STREAM]
RETURNS[
INT] ~ {
data: LVData ~ NARROW[self.streamData];
RETURN[data.index];
};
LVSetIndex:
PROC[self:
STREAM, index:
INT] ~ {
data: LVData ~ NARROW[self.streamData];
IF index<0 THEN ERROR IO.Error[$BadIndex, self];
IF index>data.length THEN ERROR IO.EndOfStream[self];
IF index<data.index THEN { IO.SetIndex[data.master, data.start]; data.index ← 0 };
WHILE data.index<index
DO
byte: BYTE ~ GetHexByte[data.master ! EndOfLargeVector => GOTO End];
data.index ← data.index+1;
REPEAT End => { SetLength[data, data.index]; ERROR IO.EndOfStream[self] };
ENDLOOP;
};
LVGetLength:
PROC[self:
STREAM]
RETURNS[
INT] ~ {
data: LVData ~ NARROW[self.streamData];
IF data.length=
INT.
LAST
THEN {
mindex: INT ~ IO.GetIndex[data.master];
count: INT ← 0;
DO
[] ← GetHexByte[data.master ! EndOfLargeVector => EXIT];
count ← count+1;
ENDLOOP;
SetLength[data, data.index+count];
IO.SetIndex[data.master, mindex];
};
RETURN[data.length];
};
WrittenReadLargeVector:
PROC[reader: Reader, token: Token]
RETURNS[IPReader.LargeVector] ~ {
SELECT token.type
FROM
$largeVector => {
stream: STREAM ~ IPReader.StreamFromInputStream[reader.stream];
bytesPerElement: BYTE ← 0;
IO.SetIndex[stream, token.index];
IF IO.GetChar[stream]#'( THEN GOTO NoParen;
bytesPerElement ← GetHexByte[stream ! EndOfLargeVector => GOTO NoBytes];
RETURN[
NEW[IPReader.LargeVectorRep ← [
source: CreateLVStream[stream, bytesPerElement],
bytesPerElement: bytesPerElement, type: $nil]]];
EXITS
NoParen => ERROR IPReader.Error[$illegalToken, "Expected a large vector token."];
NoBytes => ERROR IPReader.Error[$illegalToken, "Empty large vector."];
};
ENDCASE => ERROR IPReader.Error[$wrongType, "Token is not a vector."];
};
IPReader.Register[writtenReader];
END.