<> <> <> DIRECTORY Convert USING [IntFromRope, RealFromRope], IO USING [Backup, CreateStream, CreateStreamProcs, EndOfStream, Error, GetChar, GetIndex, PutFR, SetIndex, STREAM, StreamProcs, text], IPBasic USING [Op, Rational], IPReader USING [Class, ClassRep, Error, LargeVector, LargeVectorRep, nullToken, Reader, Register, StreamFromInputStream, Token, TokenType], IPWritten USING [EncodingTable, GetEncodingTable], RefText USING [Append, Find, InlineAppendChar, ObtainScratch, ReleaseScratch, TrustTextAsRope], Rope USING [ROPE], SymTab USING [Create, Fetch, Insert, Ref]; IPWrittenReaderImpl: CEDAR PROGRAM IMPORTS Convert, IO, IPReader, IPWritten, RefText, SymTab ~ BEGIN ROPE: TYPE ~ Rope.ROPE; STREAM: TYPE ~ IO.STREAM; BYTE: TYPE ~ [0..256); Op: TYPE ~ IPBasic.Op; Rational: TYPE ~ IPBasic.Rational; Reader: TYPE ~ IPReader.Reader; Token: TYPE ~ IPReader.Token; nullToken: Token ~ IPReader.nullToken; InvertEncodingTable: PROC[encode: IPWritten.EncodingTable] RETURNS[SymTab.Ref] = { decode: SymTab.Ref ~ SymTab.Create[mod: 101, case: TRUE]; FOR op: Op IN Op DO key: ROPE ~ encode[op]; IF key#NIL THEN { opRef: REF Op ~ NEW[Op _ op]; IF SymTab.Insert[x: decode, key: key, val: opRef] THEN NULL ELSE ERROR; }; ENDLOOP; RETURN[decode]; }; decode: SymTab.Ref ~ InvertEncodingTable[IPWritten.GetEncodingTable[]]; Decode: PROC[rope: ROPE] RETURNS[Op] ~ { found: BOOL; val: REF; [found: found, val: val] _ SymTab.Fetch[x: decode, key: rope]; IF found THEN { opRef: REF Op ~ NARROW[val]; RETURN[opRef^] } ELSE RETURN[nil]; }; writtenReader: IPReader.Class ~ NEW[IPReader.ClassRep _ [ encoding: $Written, getToken: WrittenGetToken, readInt: WrittenReadInt, readReal: WrittenReadReal, readRational: WrittenReadRational, readLargeVector: WrittenReadLargeVector ]]; <> <> <> <> <<};>> WrittenGetToken: PROC[reader: Reader, buffer: REF TEXT, flushComments: BOOL] RETURNS[token: Token _ nullToken, text: REF TEXT _ NIL] ~ { stream: STREAM ~ reader.stream; DO state: {begin, op, id, plus, minus, ast, int, den1, den2, den3, dot, frac, exp1, exp2, exp3, string, escape, vector, com, com1, file, file1, ann, ann1} _ begin; err: {ok, illegalChar, invalidRational, invalidReal, invalidEscape, invalidVector} _ ok; char: CHAR; -- last character fetched got: BOOL _ TRUE; -- true if got char from stream, false if char is fake space at end esc: NAT; -- starting index of escape sequence BeginEscape: PROC ~ { esc _ text.length-1 }; EndEscape: PROC ~ { start: NAT ~ esc+1; stop: NAT ~ text.length-1; val: CARDINAL _ 0; IF stop<=start THEN err _ invalidEscape ELSE FOR i: NAT IN[start..stop) DO c: CHAR ~ text[i]; IF c IN['0..'9] THEN val _ val*10+(c-'0) ELSE { err _ invalidEscape; EXIT }; IF val>255 THEN { val _ 0; err _ invalidEscape; EXIT }; ENDLOOP; text[esc] _ 0C+val; text.length _ esc+1; }; token.index _ stream.GetIndex[]; DO -- skip white space char _ stream.GetChar[! IO.EndOfStream => GOTO EndOfFile]; IF char IN['\041..'\176] THEN EXIT ELSE token.index _ token.index+1; ENDLOOP; IF (text _ buffer)=NIL THEN text _ reader.buffer; text.length _ 0; DO -- scan a token IF text#NIL THEN text _ RefText.InlineAppendChar[text, char]; -- tentatively append char { SELECT state FROM begin => SELECT char FROM IN['A..'Z] => { state _ op }; -- begin op IN['a..'z] => { state _ id }; -- begin identifier IN['0..'9] => { state _ int }; -- begin number '{ => { token.op _ beginBody; GOTO SingleOp }; '} => { token.op _ endBody; GOTO SingleOp }; '[ => { token.op _ beginVec; GOTO SingleOp }; '] => { token.op _ endVec; GOTO SingleOp }; ', => { token.op _ comma; GOTO SingleOp }; '< => { state _ string; GOTO Begin }; -- begin string '( => { state _ vector; GOTO Skip }; -- begin largeVector '+ => { state _ plus }; -- might begin number or insertfile '- => { state _ minus }; -- might begin number or annotation '* => { state _ ast }; -- might begin comment '. => { state _ dot }; -- might begin real ENDCASE => { err _ illegalChar; GOTO Stop }; op => SELECT char FROM IN['A..'Z], IN['0..'9] => { }; -- extend op IN['a..'z], '- => { state _ id }; -- switch to id ENDCASE => { token.type _ op; GOTO Back }; -- op token id => SELECT char FROM IN['a..'z], IN['A..'Z], IN['0..'9], '- => { }; -- extend id ENDCASE => { token.type _ identifier; GOTO Back }; plus => SELECT char FROM IN['0..'9] => { state _ int }; -- first integer digit '+ => { state _ file; GOTO Begin }; -- begin insertfile ENDCASE => { err _ illegalChar; GOTO Back }; -- plus sign alone minus => SELECT char FROM IN['0..'9] => { state _ int }; -- first integer digit '- => { state _ ann; GOTO Begin }; -- begin annotation ENDCASE => { err _ illegalChar; GOTO Back }; -- minus sign alone ast => SELECT char FROM '* => { state _ com; GOTO Begin }; -- begin comment ENDCASE => { err _ illegalChar; GOTO Back }; -- asterisk alone int => SELECT char FROM IN['0..'9] => { }; -- integer digit '. => { state _ frac }; -- fraction follows '/ => { state _ den1 }; -- denominator follows 'E, 'e => { state _ exp1 }; -- exponent follows ENDCASE => { token.type _ integer; GOTO Back }; -- integer token den1 => SELECT char FROM IN['0..'9] => { state _ den3 }; -- first denominator digit '+, '- => { state _ den2 }; -- denominator sign ENDCASE => { err _ invalidRational; GOTO Back }; den2 => SELECT char FROM IN['0..'9] => { state _ den3 }; -- first denominator digit (after sign) ENDCASE => { err _ invalidRational; GOTO Back }; den3 => SELECT char FROM IN['0..'9] => { }; -- denominator digit ENDCASE => { token.type _ rational; GOTO Back }; -- rational token dot => SELECT char FROM IN['0..'9] => { state _ frac }; -- first fraction digit ENDCASE => { err _ illegalChar; GOTO Back }; -- dot alone frac => SELECT char FROM IN['0..'9] => { }; -- fraction digit 'E, 'e => { state _ exp1 }; -- exponent follows ENDCASE => { token.type _ real; GOTO Back }; -- real token (no exponent) exp1 => SELECT char FROM IN['0..'9] => { state _ exp3 }; -- first exponent digit '+, '- => { state _ exp2 }; -- exponent sign ENDCASE => { err _ invalidReal; GOTO Back }; exp2 => SELECT char FROM IN['0..'9] => { state _ exp3 }; -- first exponent digit (after sign) ENDCASE => { err _ invalidReal; GOTO Back }; exp3 => SELECT char FROM IN['0..'9] => { }; -- exponent digit ENDCASE => { token.type _ real; GOTO Back }; -- real token (with exponent) string => SELECT char FROM '> => { token.type _ string; GOTO Lop1 }; -- end string token '# => { BeginEscape[]; state _ escape }; -- begin escape sequence ENDCASE => { }; -- extend string escape => SELECT char FROM '# => { EndEscape[]; state _ string }; -- end escape sequence ENDCASE => { }; -- extend escape sequence vector => SELECT char FROM ') => { token.type _ largeVector; GOTO Stop }; -- end large vector IN['0..'9], IN['A..'F], ' => { }; -- continue vector ENDCASE => { err _ invalidVector }; -- note error and press on com => SELECT char FROM '* => { state _ com1 }; -- look for second * ENDCASE => { }; -- extend comment com1 => SELECT char FROM '* => { token.type _ comment; GOTO Lop2 }; -- end comment ENDCASE => { state _ com }; -- continue comment file => SELECT char FROM '+ => { state _ file1 }; -- look for second + ENDCASE => { }; -- extend file name file1 => SELECT char FROM '+ => { token.type _ insertfile; GOTO Lop2 }; -- end insertfile ENDCASE => { state _ file }; -- continue file name ann => SELECT char FROM '- => { state _ ann1 }; -- look for second - ENDCASE => { }; -- extend annotation ann1 => SELECT char FROM '- => { token.type _ annotation; GOTO Lop2 }; -- end annotation ENDCASE => { state _ ann }; -- continue annotation ENDCASE => ERROR; -- unknown state EXITS SingleOp => { token.type _ op; EXIT }; Begin => text.length _ 0; Skip => text _ NIL; Stop => { EXIT }; Lop1 => { text.length _ text.length-1; EXIT }; Lop2 => { text.length _ text.length-2; EXIT }; Back => { IF got THEN stream.Backup[char]; text.length _ text.length-1; EXIT }; }; char _ IO.GetChar[stream ! IO.EndOfStream => IF got THEN { char _ ' ; got _ FALSE; CONTINUE }]; ENDLOOP; IF err#ok THEN ERROR IPReader.Error[$illegalToken, SELECT err FROM illegalChar => "Illegal character.", invalidRational => "Invalid rational.", invalidReal => "Invalid real number.", invalidEscape => "Invalid escape sequence in string.", invalidVector => "Invalid character in large vector.", ENDCASE => NIL]; SELECT token.type FROM $nil => ERROR IPReader.Error[$bug, "WrittenGetToken didn't set token type."]; $op => { IF token.op=$nil THEN token.op _ Decode[RefText.TrustTextAsRope[text]]; IF token.op=$nil THEN ERROR IPReader.Error[$illegalToken, IO.PutFR["\"%g\" is not a primitive.", IO.text[text]]]; }; $comment, $annotation => IF flushComments THEN LOOP; ENDCASE; EXIT; REPEAT EndOfFile => token.type _ eof; ENDLOOP; IF buffer=NIL THEN text _ NIL; }; ParseInt: PROC[text: REF TEXT, start: NAT _ 0, len: NAT _ NAT.LAST] RETURNS[x: INT] ~ { s: NAT ~ MIN[start, text.length]; n: NAT ~ MIN[len, text.length-s]; IF s=0 AND n=text.length THEN x _ Convert.IntFromRope[RefText.TrustTextAsRope[text]] ELSE { scratch: REF TEXT ~ RefText.ObtainScratch[n]; { ENABLE UNWIND => RefText.ReleaseScratch[scratch]; temp: REF TEXT ~ RefText.Append[to: scratch, from: text, start: s, len: n]; x _ Convert.IntFromRope[RefText.TrustTextAsRope[temp]]; }; RefText.ReleaseScratch[scratch]; }; }; ParseReal: PROC[text: REF TEXT, start: NAT _ 0, len: NAT _ NAT.LAST] RETURNS[x: REAL] ~ { s: NAT ~ MIN[start, text.length]; n: NAT ~ MIN[len, text.length-s]; IF s=0 AND n=text.length THEN x _ Convert.RealFromRope[RefText.TrustTextAsRope[text]] ELSE { scratch: REF TEXT ~ RefText.ObtainScratch[n]; { ENABLE UNWIND => RefText.ReleaseScratch[scratch]; temp: REF TEXT ~ RefText.Append[to: scratch, from: text, start: s, len: n]; x _ Convert.RealFromRope[RefText.TrustTextAsRope[temp]]; }; RefText.ReleaseScratch[scratch]; }; }; WrittenReadInt: PROC[reader: Reader, token: Token, text: REF TEXT] RETURNS[INT] ~ { SELECT token.type FROM $shortNumber => RETURN[token.shortNumber]; $integer => RETURN[ParseInt[text]]; ENDCASE => ERROR IPReader.Error[$wrongType, "Token is not an integer."]; }; WrittenReadRational: PROC[reader: Reader, token: Token, text: REF TEXT] RETURNS[Rational] ~ { SELECT token.type FROM $rational => { slash: NAT ~ RefText.Find[s1: text, s2: "/"]; { ENABLE IPReader.Error => IF code=$overflow THEN GOTO Overflow; num: INT ~ ParseInt[text: text, len: slash]; den: INT ~ ParseInt[text: text, start: slash+1]; RETURN[[num: num, den: den]]; EXITS Overflow => ERROR IPReader.Error[$overflow, "Number is too big for an INT."]; }; }; ENDCASE => ERROR IPReader.Error[$wrongType, "Token is not a rational."]; }; WrittenReadReal: PROC[reader: Reader, token: Token, text: REF TEXT] RETURNS[REAL] ~ { SELECT token.type FROM $integer, $real => RETURN[ParseReal[text]]; ENDCASE => ERROR IPReader.Error[$wrongType, "Token type is not $integer or $real."]; }; EndOfLargeVector: ERROR ~ CODE; GetHexDigit: PROC[stream: STREAM] RETURNS[[0..16)] ~ { DO char: CHAR ~ IO.GetChar[stream]; SELECT char FROM IN['0..'9] => RETURN[char-'0]; IN['A..'F] => RETURN[char-'A+10]; ') => ERROR EndOfLargeVector; <='\040 => NULL; ENDCASE => ERROR IPReader.Error[$illegalToken, "Illegal character in large vector."]; ENDLOOP; }; GetHexByte: PROC[stream: STREAM] RETURNS[BYTE] ~ { h1, h2: [0..16); h1 _ GetHexDigit[stream]; h2 _ GetHexDigit[stream ! EndOfLargeVector => GOTO OddDigit]; RETURN[h1*16+h2]; EXITS OddDigit => ERROR IPReader.Error[$illegalToken, "Large vector ends in mid-byte."]; }; lvProcs: REF IO.StreamProcs ~ IO.CreateStreamProcs[ variety: $input, class: $InterpressLargeVectorInputWritten, getChar: LVGetChar, endOf: LVEndOf, getIndex: LVGetIndex, setIndex: LVSetIndex, getLength: LVGetLength ]; LVData: TYPE ~ REF LVDataRep; LVDataRep: TYPE ~ RECORD[ master: STREAM, -- stream on the master file start: INT, -- index in master of first data byte bytesPerElement: BYTE, -- data bytes per vector element index: INT, -- current stream index length: INT -- stream length (INT.LAST if not yet known) ]; CreateLVStream: PROC[master: STREAM, bytesPerElement: BYTE] RETURNS[STREAM] ~ { start: INT ~ IO.GetIndex[master]; data: LVData ~ NEW[LVDataRep _ [master: master, start: start, index: 0, length: INT.LAST, bytesPerElement: bytesPerElement]]; RETURN[IO.CreateStream[streamProcs: lvProcs, streamData: data]]; }; SetLength: PROC[data: LVData, length: INT] ~ { data.length _ length; IF (length MOD data.bytesPerElement)#0 THEN ERROR IPReader.Error[ $illegalToken, "Large vector length must be a multiple of bytesPerElement."]; }; LVGetChar: PROC[self: STREAM] RETURNS[CHAR] ~ { data: LVData ~ NARROW[self.streamData]; IF data.index GOTO End]; data.index _ data.index+1; RETURN[VAL[byte]]; EXITS End => SetLength[data, data.index]; }; ERROR IO.EndOfStream[self]; }; LVEndOf: PROC[self: STREAM] RETURNS[BOOL] ~ { data: LVData ~ NARROW[self.streamData]; IF data.index=data.length THEN RETURN[TRUE] ELSE DO char: CHAR ~ IO.GetChar[data.master]; SELECT char FROM ') => { SetLength[data, data.index]; RETURN[TRUE] }; IN['0..'9], IN['A..'F] => { IO.Backup[data.master, char]; RETURN[FALSE] }; IN['\041..'\176] => ERROR IPReader.Error[$illegalToken, "Illegal character in large vector."]; ENDCASE; ENDLOOP; }; LVGetIndex: PROC[self: STREAM] RETURNS[INT] ~ { data: LVData ~ NARROW[self.streamData]; RETURN[data.index]; }; LVSetIndex: PROC[self: STREAM, index: INT] ~ { data: LVData ~ NARROW[self.streamData]; IF index<0 THEN ERROR IO.Error[$BadIndex, self]; IF index>data.length THEN ERROR IO.EndOfStream[self]; IF index GOTO End]; data.index _ data.index+1; REPEAT End => { SetLength[data, data.index]; ERROR IO.EndOfStream[self] }; ENDLOOP; }; LVGetLength: PROC[self: STREAM] RETURNS[INT] ~ { data: LVData ~ NARROW[self.streamData]; IF data.length=INT.LAST THEN { mindex: INT ~ IO.GetIndex[data.master]; count: INT _ 0; DO [] _ GetHexByte[data.master ! EndOfLargeVector => EXIT]; count _ count+1; ENDLOOP; SetLength[data, data.index+count]; IO.SetIndex[data.master, mindex]; }; RETURN[data.length]; }; WrittenReadLargeVector: PROC[reader: Reader, token: Token] RETURNS[IPReader.LargeVector] ~ { SELECT token.type FROM $largeVector => { stream: STREAM ~ IPReader.StreamFromInputStream[reader.stream]; bytesPerElement: BYTE _ 0; IO.SetIndex[stream, token.index]; IF IO.GetChar[stream]#'( THEN GOTO NoParen; bytesPerElement _ GetHexByte[stream ! EndOfLargeVector => GOTO NoBytes]; RETURN[NEW[IPReader.LargeVectorRep _ [ source: CreateLVStream[stream, bytesPerElement], bytesPerElement: bytesPerElement, type: $nil]]]; EXITS NoParen => ERROR IPReader.Error[$illegalToken, "Expected a large vector token."]; NoBytes => ERROR IPReader.Error[$illegalToken, "Empty large vector."]; }; ENDCASE => ERROR IPReader.Error[$wrongType, "Token is not a vector."]; }; IPReader.Register[writtenReader]; END.