DIRECTORY Ascii USING [TAB, CR, SP, DEL], IO USING [Backup, EndOf, EndOfStream, GetChar, PeekChar, STREAM], Lex822, RefText USING [InlineAppendChar, ObtainScratch, ReleaseScratch], Rope USING [Cat, FromRefText, IsEmpty, ROPE]; Lex822Impl: CEDAR PROGRAM IMPORTS IO, RefText, Rope EXPORTS Lex822 = BEGIN OPEN Lex822; CharType: TYPE = {ctlType, crType, lwspType, specialType, atomType, illegalType}; charType: ARRAY CHAR OF CharType; InternalGetToken: PROC [s: IO.STREAM] RETURNS [token: Rope.ROPE, tokType: TokenType] = { state: {start, inWhite, inAtom, inDelimited, slurpOne} _ start; nestable: BOOL; nestingLevel: INT _ 0; openingDelim, closingDelim: CHAR; TokenProc: CharProc = { SELECT state FROM start => SELECT charType[char] FROM crType => IF NOT s.EndOf[] AND charType[s.PeekChar[]] = lwspType THEN { tokType _ whiteSpaceTok; state _ inWhite; } ELSE { tokType _ EOLTok; RETURN [TRUE, TRUE] }; lwspType => {tokType _ whiteSpaceTok; state _ inWhite}; specialType => SELECT char FROM '( => { state _ inDelimited; nestable _ TRUE; nestingLevel _ 1; openingDelim _ char; closingDelim _ '); tokType _ commentTok; }; '[ => { state _ inDelimited; nestable _ FALSE; openingDelim _ char; closingDelim _ ']; tokType _ domainLiteralTok; }; '" => { state _ inDelimited; nestable _ FALSE; openingDelim _ char; closingDelim _ '"; tokType _ quotedStringTok; }; ENDCASE => {tokType _ specialTok; RETURN [TRUE, TRUE]}; atomType => {tokType _ atomTok; state _ inAtom}; ENDCASE => {tokType _ errorTok; RETURN [TRUE, TRUE]}; inWhite => { IF char = Ascii.CR THEN { IF s.EndOf[] OR charType[s.PeekChar[]] # lwspType THEN RETURN [TRUE, FALSE]; } ELSE RETURN[charType[char] # lwspType, charType[char] = lwspType]; }; inAtom => RETURN[charType[char] # atomType, charType[char] = atomType]; inDelimited => { IF char = '\\ THEN state _ slurpOne ELSE IF char = Ascii.CR THEN { IF s.EndOf[] OR charType[s.PeekChar[]] # lwspType THEN { tokType _ errorTok; -- token ends too soon RETURN [TRUE, FALSE]; }; } ELSE IF char = closingDelim THEN { IF nestable THEN nestingLevel _ nestingLevel - 1; IF nestingLevel <= 0 THEN quit _ TRUE; } ELSE IF char = openingDelim THEN IF nestable THEN nestingLevel _ nestingLevel + 1 ELSE {tokType _ errorTok; RETURN [TRUE, FALSE]} -- tried to nest illegally }; slurpOne => state _ inDelimited; ENDCASE => ERROR; }; -- of TokenProc tokType _ errorTok; token _ GetSequence[s, TokenProc]; -- sets tokType as side effect }; LexToken: PUBLIC PROC [s: IO.STREAM] RETURNS [token, whiteSpace: Rope.ROPE, tokType: TokenType] = { whiteSpace _ NIL; DO [token, tokType] _ InternalGetToken[s]; SELECT tokType FROM whiteSpaceTok => whiteSpace _ whiteSpace.Cat[" "]; commentTok => whiteSpace _ whiteSpace.Cat[token]; ENDCASE => RETURN; ENDLOOP; }; LexFieldName: PUBLIC PROC [s: IO.STREAM] RETURNS [fieldName: Rope.ROPE, fieldNameOk: BOOL] = { FieldProc: CharProc = { IF char = ': THEN RETURN [TRUE, FALSE]; SELECT charType[char] FROM ctlType, lwspType, crType => RETURN [TRUE, FALSE]; ENDCASE => RETURN [FALSE, TRUE]; }; fieldName _ GetSequence[s, FieldProc]; IF fieldName.IsEmpty THEN fieldNameOk _ s.EndOf[] OR charType[s.PeekChar[]] = crType ELSE fieldNameOk _ TRUE; }; LexText: PUBLIC PROC [s: IO.STREAM] RETURNS [text: Rope.ROPE] = { lastWasCR: BOOL _ FALSE; TextProc: CharProc = { IF lastWasCR AND charType[char] # lwspType THEN { quit _ TRUE; include _ FALSE; } ELSE { quit _ FALSE; include _ TRUE; }; lastWasCR _ char = Ascii.CR; }; RETURN [GetSequence[s, TextProc]]; }; GetSequence: PUBLIC PROC [stream: IO.STREAM, charProc: CharProc] RETURNS [value: Rope.ROPE] = { buffer: REF TEXT _ RefText.ObtainScratch[512]; buffer.length _ 0; DO char: CHAR _ stream.GetChar[ ! IO.EndOfStream => EXIT]; quit, include: BOOLEAN; [quit, include] _ charProc[char]; IF include THEN buffer _ RefText.InlineAppendChar[buffer, char] ELSE IF quit THEN stream.Backup[char]; IF quit THEN EXIT; ENDLOOP; value _ Rope.FromRefText[buffer]; RefText.ReleaseScratch[buffer]; }; FOR c: CHAR IN [0C..37C] DO charType[c] _ ctlType; ENDLOOP; FOR c: CHAR IN [41C..176C] DO charType[c] _ atomType; ENDLOOP; charType[Ascii.CR] _ crType; charType[Ascii.TAB] _ lwspType; charType[Ascii.SP] _ lwspType; charType['"] _ specialType; charType['\\] _ specialType; charType['(] _ specialType; charType[')] _ specialType; charType['[] _ specialType; charType[']] _ specialType; charType['<] _ specialType; charType['>] _ specialType; charType['@] _ specialType; charType['.] _ specialType; charType[',] _ specialType; charType[':] _ specialType; charType[';] _ specialType; charType[Ascii.DEL] _ ctlType; FOR c: CHAR IN [200C..377C] DO charType[c] _ illegalType; ENDLOOP; END. ฮLex822Impl.mesa, Implementation of RFC 822 lexical analyzer. HGM, March 8, 1984 10:08:21 pm PST David Nichols, July 13, 1983 3:17 pm Last Edited by: Taft, February 5, 1984 11:53:10 am PST John Larson, March 9, 1987 6:01:34 pm PST Get a token of any type from s, including white space and comments. Get a normal token from s and return the text of the token, the white space and comments following the token, and the type of token that was found. Since the white space is returned separately, tokType will never be whiteSpaceTok or commentTok. Return the contents of a field as text, i.e. return the text that follows up to a newline not followed by white space. This is first char of next header. Use the char. สฎ˜headšœ<™