DIRECTORY Ascii USING [TAB, CR, SP, DEL], IO USING [Backup, EndOf, EndOfStream, GetChar, PeekChar, STREAM], ArpaLex822, RefText USING [InlineAppendChar, ObtainScratch, ReleaseScratch], Rope USING [Cat, FromRefText, IsEmpty, ROPE]; ArpaLex822Impl: CEDAR PROGRAM IMPORTS IO, RefText, Rope EXPORTS ArpaLex822 = BEGIN OPEN ArpaLex822; CharType: TYPE = {ctlType, crType, lwspType, specialType, atomType, illegalType}; charType: ARRAY CHAR OF CharType; InternalGetToken: PROC [s: IO.STREAM] RETURNS [token: Rope.ROPE, tokType: TokenType] = { state: {start, inWhite, inAtom, inDelimited, slurpOne} _ start; nestable: BOOL; nestingLevel: INT _ 0; openingDelim, closingDelim: CHAR; TokenProc: CharProc = { SELECT state FROM start => SELECT charType[char] FROM crType => IF NOT s.EndOf[] AND charType[s.PeekChar[]] = lwspType THEN { tokType _ whiteSpaceTok; state _ inWhite; } ELSE { tokType _ EOLTok; RETURN [TRUE, TRUE] }; lwspType => {tokType _ whiteSpaceTok; state _ inWhite}; specialType => SELECT char FROM '( => { state _ inDelimited; nestable _ TRUE; nestingLevel _ 1; openingDelim _ char; closingDelim _ '); tokType _ commentTok; }; '[ => { state _ inDelimited; nestable _ FALSE; openingDelim _ char; closingDelim _ ']; tokType _ domainLiteralTok; }; '" => { state _ inDelimited; nestable _ FALSE; openingDelim _ char; closingDelim _ '"; tokType _ quotedStringTok; }; ENDCASE => {tokType _ specialTok; RETURN [TRUE, TRUE]}; atomType => {tokType _ atomTok; state _ inAtom}; ENDCASE => {tokType _ errorTok; RETURN [TRUE, TRUE]}; inWhite => { IF char = Ascii.CR THEN { IF s.EndOf[] OR charType[s.PeekChar[]] # lwspType THEN RETURN [TRUE, FALSE]; } ELSE RETURN[charType[char] # lwspType, charType[char] = lwspType]; }; inAtom => RETURN[charType[char] # atomType, charType[char] = atomType]; inDelimited => { IF char = '\\ THEN state _ slurpOne ELSE IF char = Ascii.CR THEN { IF s.EndOf[] OR charType[s.PeekChar[]] # lwspType THEN { tokType _ errorTok; -- token ends too soon RETURN [TRUE, FALSE]; }; } ELSE IF char = closingDelim THEN { IF nestable THEN nestingLevel _ nestingLevel - 1; IF nestingLevel <= 0 THEN quit _ TRUE; } ELSE IF char = openingDelim THEN IF nestable THEN nestingLevel _ nestingLevel + 1 ELSE {tokType _ errorTok; RETURN [TRUE, FALSE]} -- tried to nest illegally }; slurpOne => state _ inDelimited; ENDCASE => ERROR; }; -- of TokenProc tokType _ errorTok; token _ GetSequence[s, TokenProc]; -- sets tokType as side effect }; LexToken: PUBLIC PROC [s: IO.STREAM] RETURNS [token, whiteSpace: Rope.ROPE, tokType: TokenType] = { whiteSpace _ NIL; DO [token, tokType] _ InternalGetToken[s]; SELECT tokType FROM whiteSpaceTok => whiteSpace _ whiteSpace.Cat[" "]; commentTok => whiteSpace _ whiteSpace.Cat[token]; ENDCASE => RETURN; ENDLOOP; }; LexFieldName: PUBLIC PROC [s: IO.STREAM] RETURNS [fieldName: Rope.ROPE, fieldNameOk: BOOL] = { FieldProc: CharProc = { IF char = ': THEN RETURN [TRUE, FALSE]; SELECT charType[char] FROM ctlType, lwspType, crType => RETURN [TRUE, FALSE]; ENDCASE => RETURN [FALSE, TRUE]; }; fieldName _ GetSequence[s, FieldProc]; IF fieldName.IsEmpty THEN fieldNameOk _ s.EndOf[] OR charType[s.PeekChar[]] = crType ELSE fieldNameOk _ TRUE; }; LexText: PUBLIC PROC [s: IO.STREAM] RETURNS [text: Rope.ROPE] = { lastWasCR: BOOL _ FALSE; TextProc: CharProc = { IF lastWasCR AND charType[char] # lwspType THEN { quit _ TRUE; include _ FALSE; } ELSE { quit _ FALSE; include _ TRUE; }; lastWasCR _ char = Ascii.CR; }; RETURN [GetSequence[s, TextProc]]; }; GetSequence: PUBLIC PROC [stream: IO.STREAM, charProc: CharProc] RETURNS [value: Rope.ROPE] = { buffer: REF TEXT _ RefText.ObtainScratch[512]; buffer.length _ 0; DO char: CHAR _ stream.GetChar[ ! IO.EndOfStream => EXIT]; quit, include: BOOLEAN; [quit, include] _ charProc[char]; IF include THEN buffer _ RefText.InlineAppendChar[buffer, char] ELSE IF quit THEN stream.Backup[char]; IF quit THEN EXIT; ENDLOOP; value _ Rope.FromRefText[buffer]; RefText.ReleaseScratch[buffer]; }; FOR c: CHAR IN [0C..37C] DO charType[c] _ ctlType; ENDLOOP; FOR c: CHAR IN [41C..176C] DO charType[c] _ atomType; ENDLOOP; charType[Ascii.CR] _ crType; charType[Ascii.TAB] _ lwspType; charType[Ascii.SP] _ lwspType; charType['"] _ specialType; charType['\\] _ specialType; charType['(] _ specialType; charType[')] _ specialType; charType['[] _ specialType; charType[']] _ specialType; charType['<] _ specialType; charType['>] _ specialType; charType['@] _ specialType; charType['.] _ specialType; charType[',] _ specialType; charType[':] _ specialType; charType[';] _ specialType; charType[Ascii.DEL] _ ctlType; FOR c: CHAR IN [200C..377C] DO charType[c] _ illegalType; ENDLOOP; END. ψArpaLex822Impl.mesa, Implementation of RFC 822 lexical analyzer. HGM, March 8, 1984 10:08:21 pm PST David Nichols, July 13, 1983 3:17 pm Last Edited by: Taft, February 5, 1984 11:53:10 am PST HGM, March 8, 1984 10:08:21 pm PST John Larson, October 10, 1987 5:01:34 pm PDT Get a token of any type from s, including white space and comments. Get a normal token from s and return the text of the token, the white space and comments following the token, and the type of token that was found. Since the white space is returned separately, tokType will never be whiteSpaceTok or commentTok. Return the contents of a field as text, i.e. return the text that follows up to a newline not followed by white space. This is first char of next header. Use the char. Κ΄˜headšœ@™@Ibodyšœ"™"Lšœ$™$L™6Lšœ"™"Lšœ,™,code2šΟk ˜ Mš œœœœœœ˜Mšœœ1œ˜AM˜ M˜@Mšœœœ˜-——šœœ˜Mšœ˜Mšœ ˜Mšœœ ˜Mšœ œC˜QMšœ œœœ ˜!š Οnœœœœœ˜XM™CM˜?Mšœ œ˜Mšœœ˜Mšœœ˜!M˜šž œ˜šœ˜šœ œ˜#šœ ˜ šœœ œ#œ˜=M˜M˜M˜—šœ˜Mšœ˜Mšœœœ˜Mšœ˜——Mšœ7˜7šœœ˜˜M˜Mšœ œ˜M˜M˜M˜M˜M˜—˜M˜Mšœ œ˜M˜M˜M˜M˜—˜M˜Mšœ œ˜M˜Mšœ˜M˜M˜—Mšœœœœ˜7—Mšœ0˜0Mšœœœœ˜5—M˜šœ ˜ šœœœ˜šœ œ#˜6Mšœœœ˜—M˜—š˜Mšœ7˜=—M˜—M˜Mšœ œ7˜GM˜˜šœ ˜Mšœ˜—šœœœ˜šœ œ#œ˜8MšœΟc˜*Mšœœœ˜M˜—M˜—šœœœ˜"Mšœ œ!˜1Mšœœœ˜&M˜—šœœ˜ Mšœ œ ˜0Mš œœœœŸ˜J—M˜—M˜Mšœ ˜ M˜Mšœœ˜MšœŸ˜—M˜M˜M˜AM˜——šžœœœœœœœ˜cM™υMšœ œ˜š˜M˜'šœ ˜M˜2M˜1Mšœœ˜—Mšœ˜—M˜—šž œœœœœœœœ˜^šž œ˜Mš œ œœœœ˜'šœ˜Mšœ œœœ˜2Mšœœœ˜#——Mšœ&˜&Mšœœœ ˜TMšœœ˜M˜—šžœœœœœœ œ˜AM™vMšœ œœ˜M˜šžœ˜šœ œœ˜1Mšœ"™"Mšœœ˜ Mšœ œ˜M˜—šœ˜M™ Mšœœ˜ Mšœ œ˜M˜—Mšœœ˜M˜—M˜Mšœ˜"Mš˜—š ž œ œ œœœ˜_Mšœœœ˜.J˜š˜Mšœœ'œ˜7Mšœœ˜Mšœ!˜!Mšœ œ0˜?Mšœœœ˜&Mšœœœ˜Mšœ˜—Mšœ!˜!Mšœ˜Mšœ˜—šœœœ ˜M˜Mš˜—šœœœ ˜M˜Mš˜—Mšœœ ˜Mšœœ ˜Mšœœ ˜M˜M˜M˜M˜M˜M˜M˜M˜M˜M˜M˜M˜M˜M˜šœœœ˜M˜Mš˜—Mšœ˜——…—4ΰ