-- File: MailParserA.mesa -- Last edited by Levin: 29-Mar-82 9:16:14 DIRECTORY Ascii USING [CR, LF, SP, TAB], Inline USING [BITAND], MailParse USING [ endOfInput, endOfList, maxRecipientLength, NameInfo, NameType, ParseErrorCode], MailParsePrivate, Storage USING [FreePages, FreeString, Node, String], String USING [AppendChar, LowerCase, StringBoundsFault]; MailParserA: PROGRAM IMPORTS Inline, MailParsePrivate, Storage, String EXPORTS MailParse, MailParsePrivate = BEGIN OPEN Ascii, MailParse, MailParsePrivate; -- Miscellaneous -- ImpossibleString: ERROR = CODE; -- Exported Types, Procedures, and Signals -- ParseHandle: PUBLIC TYPE = MailParsePrivate.ParseHandle; ParseError: PUBLIC ERROR [code: ParseErrorCode] = CODE; GetFieldName: PUBLIC PROCEDURE [pH: ParseHandle, fieldNameOut: STRING] RETURNS [found: BOOLEAN] = BEGIN ch: CHARACTER; truncated: BOOLEAN _ FALSE; fieldNameOut.length _ 0; DO SELECT ch _ GetNaked[pH] FROM CR, endOfInput => IF fieldNameOut.length = 0 THEN RETURN[FALSE] ELSE EXIT; ': => IF truncated AND pH.signalTruncation THEN ERROR ParseError[truncated] ELSE RETURN[TRUE]; endOfList, IN [0C..10C], IN [12C..37C] => EXIT; ENDCASE => IF fieldNameOut.length = fieldNameOut.maxlength THEN truncated _ TRUE ELSE String.AppendChar[fieldNameOut, ch]; ENDLOOP; ERROR ParseError[badFieldName]; END; GetFieldBody: PUBLIC PROCEDURE [ pH: ParseHandle, fieldBodyOut: STRING, suppressWhiteSpace: BOOLEAN _ FALSE] = BEGIN ch: CHARACTER; truncated: BOOLEAN _ FALSE; spaceSeen: BOOLEAN _ TRUE; -- TRUE means ignore leading spaces RemoveTrailingSpace: PROCEDURE = INLINE BEGIN WHILE fieldBodyOut.length > 0 AND fieldBodyOut[fieldBodyOut.length - 1] = SP DO fieldBodyOut.length _ fieldBodyOut.length - 1; ENDLOOP; END; fieldBodyOut.length _ 0; IF fieldBodyOut.maxlength ~= 0 THEN BEGIN DO IF (ch _ GetNaked[pH]) = CR THEN ch _ CheckForFolding[pH]; SELECT ch FROM SP, TAB => BEGIN IF spaceSeen THEN LOOP; IF suppressWhiteSpace THEN {ch _ SP; spaceSeen _ TRUE}; END; endOfInput => GO TO Trouble; endOfList, CR => EXIT; ENDCASE => spaceSeen _ FALSE; IF fieldBodyOut.length = fieldBodyOut.maxlength THEN {truncated _ TRUE; EXIT}; fieldBodyOut[fieldBodyOut.length] _ ch; fieldBodyOut.length _ fieldBodyOut.length + 1; ENDLOOP; RemoveTrailingSpace[]; IF ~truncated THEN RETURN; END; DO -- faster loop for discarding IF (ch _ GetNaked[pH]) = CR THEN ch _ CheckForFolding[pH]; SELECT ch FROM CR, endOfList => EXIT; endOfInput => GO TO Trouble; ENDCASE => truncated _ TRUE; ENDLOOP; IF truncated AND pH.signalTruncation THEN ERROR ParseError[truncated]; EXITS Trouble => ERROR ParseError[badFieldBody]; END; SyntaxError: PUBLIC ERROR = CODE; ParseNameList: PUBLIC PROCEDURE [ pH: ParseHandle, process: PROCEDURE [STRING, STRING, STRING, NameInfo] RETURNS [BOOLEAN], write: PROCEDURE [CHARACTER] _ NIL, suppressWhiteSpace: BOOLEAN _ FALSE] = BEGIN error: BOOLEAN _ FALSE; CleanUp: PROCEDURE = BEGIN OPEN pH; DiscardLexemeList[pH, tagLexList]; tagLexList _ NIL; ResetCharacterStorage[pH]; ResetLexicalStorage[pH]; cantFinalize _ recordingChars _ FALSE; END; --main body of ParseNameList pH.write _ write; pH.process _ process; pH.suppressWhiteSpace _ suppressWhiteSpace; InitializeLexicalStorage[pH]; pH.cantFinalize _ pH.recordingChars _ TRUE; pH.nameOutput _ pH.scanningTag _ FALSE; pH.nameInfo _ [nesting: none, hasTag: FALSE, type: normal]; IF pH.simpleName = NIL THEN pH.simpleName _ Storage.String[maxRecipientLength]; IF pH.registry = NIL THEN pH.registry _ Storage.String[maxRecipientLength]; IF pH.arpaHost = NIL THEN pH.arpaHost _ Storage.String[maxRecipientLength]; ParseList[pH, endOfInput ! SyntaxError, EndOfInput => {error _ TRUE; CONTINUE}; UNWIND => CleanUp[]]; CleanUp[]; IF error THEN ERROR ParseError[badFieldBody]; END; -- Support procedures for ParseNameList -- ParseList: PUBLIC PROCEDURE [pH: ParseHandle, terminator: CHARACTER] = BEGIN OPEN pH; MSGdl: ERROR = CODE; outputThisLevel _ FALSE; DO -- the somewhat obscure coding of this (inner) loop is for speed lexType: LexemeType; char: CHARACTER _ GetNaked[pH]; IF Inline.BITAND[char, 177B] <= 100C THEN BEGIN IF char = CR THEN char _ CheckForFolding[pH]; SELECT char FROM SP, TAB => GO TO LexemePart; nameSeparator, CR, endOfList, endOfInput => BEGIN ProcessName[pH]; IF terminator = '; AND ~nameSeen THEN {backUp[]; ERROR MSGdl}; IF char ~= nameSeparator THEN IF nameInfo.nesting = none THEN EXIT ELSE GO TO Trouble; END; '@ => IF EmptyLexemeListOrSpace[pH] THEN AccumulateFileName[pH] ELSE GO TO LexemePart; '< => RecurForBrackets[pH, '<, '>]; ': => IF AccumulateTagOrPrelude[pH] = prelude THEN BEGIN nameSeen _ terminator ~= endOfInput; RecurForBrackets[pH, ':, '; ! MSGdl => BEGIN IF terminator ~= endOfInput THEN GO TO Trouble; BuildMSGdlAtom[pH]; -- do this before the UNWIND! CONTINUE END]; END; '>, '; => BEGIN IF char ~= terminator THEN GO TO Trouble; ProcessName[pH]; nameSeen _ TRUE; IF outputThisLevel AND write ~= NIL THEN write[char]; EXIT END; openComment => AccumulateComment[pH]; stringQuote => AccumulateQuotedString[pH]; closeComment => GO TO Trouble; ENDCASE => IF char < 040C THEN GO TO Trouble ELSE GO TO LexemePart; LOOP; EXITS LexemePart => lexType _ SELECT char FROM '. => dot, '@ => at, SP, TAB => space, ENDCASE => atom; END ELSE lexType _ atom; MaybeNewLexemeInline[pH, lexType]; WriteToStoreInline[pH, char]; ENDLOOP; EXITS Trouble => ERROR SyntaxError; END; ProcessName: PROCEDURE [pH: ParseHandle] = BEGIN OPEN pH; ParseName: PROCEDURE RETURNS [BOOLEAN] = -- INLINE -- BEGIN state: {initial, possibleHost, hostSeen, possibleRegistry} _ initial; InsertQualifier: PROCEDURE [lex: Lexeme, type: LexemeType, s: STRING] RETURNS [qL: Lexeme] = BEGIN qL _ MakeLexemeFromString[pH, type, s]; AddLexeme[pH, qL, lex]; AddLexeme[pH, MakeLexeme[pH, atom], lex]; END; GetQualifier: PROCEDURE [lex: Lexeme, s: STRING] = BEGIN lex _ lex.next; IF lex.type = space THEN lex _ lex.next; AppendLexemeValue[pH, s, lex]; END; AtomIsReallyAt: PROCEDURE [lex: Lexeme] RETURNS [BOOLEAN] = INLINE BEGIN RETURN[ lex.length = 2 AND lex.next.type = space AND lex.prev.type = space AND String.LowerCase[ReadFromStore[pH, lex.start]] = 'a AND String.LowerCase[ReadFromStore[pH, lex.start + 1]] = 't] END; IF LexemeCount[pH] = none THEN RETURN[FALSE]; nameSeen _ nameSeen OR ~EmptyLexemeListOrSpace[pH]; simpleName.length _ registry.length _ arpaHost.length _ 0; FOR lex: Lexeme _ lexHead.prev, lex.prev UNTIL lex.type = head DO IF lex.type = space THEN LOOP; SELECT state FROM initial => IF lex.type = atom THEN state _ possibleHost; possibleHost => BEGIN SELECT lex.type FROM at => NULL; dot => {dotLex _ lex; EXIT}; atom => IF AtomIsReallyAt[lex] THEN BEGIN lex.start _ lex.prev.start; lex.length _ lex.length + lex.next.length + lex.prev.length; lex.type _ at; DeleteLexeme[pH, lex.prev]; DeleteLexeme[pH, lex.next]; END ELSE EXIT; ENDCASE => EXIT; state _ hostSeen; atLex _ lex; END; hostSeen => IF lex.type = atom THEN state _ possibleRegistry ELSE EXIT; possibleRegistry => {IF lex.type = dot THEN dotLex _ lex; EXIT}; ENDCASE; ENDLOOP; IF atLex = NIL THEN atLex _ InsertQualifier[lexHead, at, " at "L] ELSE GetQualifier[atLex, arpaHost]; IF dotLex = NIL THEN dotLex _ InsertQualifier[atLex, dot, "."L] ELSE GetQualifier[dotLex, registry]; nameInfo.type _ AppendSublistToString[pH, simpleName, lexHead, dotLex]; nameInfo.hasTag _ tagLexList ~= NIL; RETURN[simpleName.length ~= 0 OR registry.length ~= 0 OR arpaHost.length ~= 0] END; IF scanningTag THEN ERROR SyntaxError; dotLex _ atLex _ NIL; TerminateLexeme[pH]; IF ParseName[ ! String.StringBoundsFault --[s] RETURNS [ns]-- => BEGIN ns _ Storage.String[s.maxlength + s.maxlength/2]; SELECT s FROM simpleName => simpleName _ ns; registry => registry _ ns; arpaHost => arpaHost _ ns; ENDCASE => ERROR ImpossibleString; Storage.FreeString[s]; RETRY END] AND process[simpleName, registry, arpaHost, nameInfo] THEN BEGIN IF write ~= NIL THEN OutputName[pH]; nameOutput _ outputThisLevel _ TRUE; END; IF tagLexList ~= NIL THEN BEGIN TruncateCharacterStorage[pH, tagLexList.start]; DiscardLexemeList[pH, tagLexList]; tagLexList _ NIL; END ELSE IF LexemeCount[pH] > none THEN TruncateCharacterStorage[pH, lexHead.next.start]; ResetLexicalStorage[pH]; END; AppendSublistToString: PUBLIC PROCEDURE [ pH: ParseHandle, s: STRING, first, last: Lexeme] RETURNS [nameType: NameType] = BEGIN lex: Lexeme _ first; nameType _ normal; DO SELECT lex.type FROM space, head => NULL; ENDCASE => BEGIN IF lex.prev.type = space AND s.length ~= 0 THEN {String.AppendChar[s, SP]; nameType _ multiAtom}; AppendLexemeValue[pH, s, lex]; IF nameType ~= multiAtom THEN SELECT s[0] FROM stringQuote => nameType _ quotedString; '@ => nameType _ file; ENDCASE => SELECT s[s.length-1] FROM ': => nameType _ msgDL; '^ => nameType _ publicDL; ENDCASE; END; IF (lex _ lex.next) = last THEN EXIT; ENDLOOP; END; -- Input Scanner -- EndOfInput: PUBLIC SIGNAL = CODE; Get: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [char: CHARACTER] = -- obtains next input character and smoothes over a few lexical quirks. This -- procedure deals with Arpa-standard line-folding, except that bare CR characters -- are not permitted. BEGIN OPEN pH; SELECT char _ nextChar[] FROM endOfList, endOfInput => SIGNAL EndOfInput; CR => char _ CheckForFoldingInline[pH]; ENDCASE; END; CheckForFolding: PROCEDURE [pH: ParseHandle] RETURNS [char: CHARACTER] = {RETURN[CheckForFoldingInline[pH]]}; CheckForFoldingInline: PROCEDURE [pH: ParseHandle] RETURNS [char: CHARACTER] = INLINE -- This procedure deals with Arpa-standard line-folding, except that bare CR -- characters are not permitted. BEGIN OPEN pH; lfSeen: BOOLEAN; char _ nextChar[]; IF (lfSeen _ (char = LF)) THEN char _ nextChar[]; SELECT char FROM SP, TAB => IF recordingChars THEN BEGIN MaybeNewLexeme[pH, space]; WriteToStore[pH, CR]; IF lfSeen THEN WriteToStore[pH, LF]; END; ENDCASE => {char _ CR; backUp[]}; END; -- Lexeme Storage -- ResetLexicalStorage: PUBLIC PROCEDURE [pH: ParseHandle] = -- flushes the lexeme table without destroying the backing storage behind it. {FlushLexemeList[pH]; pH.lexStart _ GetPosition[pH]; pH.curLexType _ null}; LexemeCount: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [Magnitude] = BEGIN RETURN[ SELECT pH.lexHead.next FROM pH.lexHead => none, pH.lexHead.prev => one, ENDCASE => many] END; EmptyLexemeListOrSpace: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [BOOLEAN] = BEGIN TerminateLexeme[pH]; SELECT LexemeCount[pH] FROM none => RETURN[TRUE]; one => RETURN[pH.lexHead.next.type = space] ENDCASE; RETURN[FALSE] END; MakeLexeme: PUBLIC PROCEDURE [ pH: ParseHandle, type: LexemeType, start: CharIndex _ 0, length: CARDINAL _ 0] RETURNS [lex: Lexeme] = -- bundles up the specified range of characters as a lexeme of the indicated type, -- and returns it. BEGIN IF (lex _ pH.freeLexHead) = NIL THEN lex _ Storage.Node[SIZE[Lex]] ELSE pH.freeLexHead _ lex.next; lex^ _ Lex[next: NIL, prev: NIL, type: type, length: length, start: start]; END; MakeLexemeFromString: PUBLIC PROCEDURE [pH: ParseHandle, type: LexemeType, s: STRING] RETURNS [lex: Lexeme] = -- bundles up the given string as a lexeme of the indicated type and returns it. BEGIN lex _ MakeLexeme[pH: pH, type: type, start: GetPosition[pH], length: s.length]; FOR i: CARDINAL IN [0..s.length) DO WriteToStore[pH, s[i]] ENDLOOP; END; AppendNewLexeme: PROCEDURE [pH: ParseHandle, type: LexemeType] = INLINE -- bundles up the current range of characters as a lexeme of the indicated type -- and appends it to the lexeme list. BEGIN pos: CharIndex = GetPosition[pH]; start: CharIndex = pH.lexStart; IF pos = start -- OR type = null -- THEN RETURN; AddLexeme[pH, MakeLexeme[pH: pH, type: type, start: start, length: pos - start]]; pH.lexStart _ pos; END; MaybeNewLexemeInline: PROCEDURE [pH: ParseHandle, new: LexemeType] = INLINE -- a character belonging to a lexeme of type 'new' has been found. This may extend -- a previous lexeme or begin a new one. MaybeNewLexeme decides which case -- applies and acts appropriately. BEGIN IF new = pH.curLexType THEN RETURN; AppendNewLexeme[pH, pH.curLexType]; pH.curLexType _ new; END; MaybeNewLexeme: PUBLIC PROCEDURE [pH: ParseHandle, new: LexemeType] = {MaybeNewLexemeInline[pH, new]}; AppendLexemeValue: PUBLIC PROCEDURE [pH: ParseHandle, s: STRING, lex: Lexeme] = -- appends the contents of the lexeme 'lex' to the string 's'. BEGIN FOR i: CARDINAL IN [lex.start..lex.start + lex.length) DO String.AppendChar[s, ReadFromStore[pH, i]]; ENDLOOP; END; RemoveLexemeList: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [head: Lexeme] = -- removes all lexemes except 'lexHead' from the list, returning the new 'head'. BEGIN IF (head _ pH.lexHead.next) = pH.lexHead THEN RETURN[NIL]; head.prev _ pH.lexHead.prev; pH.lexHead.prev.next _ head; pH.lexHead.next _ pH.lexHead.prev _ pH.lexHead; END; DiscardLexemeList: PUBLIC PROCEDURE [pH: ParseHandle, head: Lexeme] = -- releases all lexemes (including 'head'). BEGIN IF head = NIL THEN RETURN; head.prev.next _ pH.freeLexHead; pH.freeLexHead _ head; END; AddLexeme: PUBLIC PROCEDURE [pH: ParseHandle, lex: Lexeme, where: Lexeme _ NIL] = -- adds 'lex' to the lexeme list immediately before 'where'. If 'where' is -- defaulted, the lexeme goes at the end of the list. BEGIN IF where = NIL THEN where _ pH.lexHead; lex.next _ where; lex.prev _ where.prev; lex.prev.next _ where.prev _ lex; END; DeleteLexeme: PUBLIC PROCEDURE [pH: ParseHandle, lex: Lexeme] = -- removes 'lex' from the lexeme list. BEGIN IF lex.next ~= NIL THEN {lex.prev.next _ lex.next; lex.next.prev _ lex.prev}; lex.next _ pH.freeLexHead; pH.freeLexHead _ lex; END; -- Character Storage TruncateCharacterStorage: PUBLIC PROCEDURE [pH: ParseHandle, killFrom: CharIndex] = -- releases backing storage for buffered names. BEGIN OPEN pH; IF ~(killFrom = 0 AND bufferList.header.count = 0) THEN EnsureIndexInCache[pH, IF killFrom = 0 THEN 0 ELSE killFrom - 1]; DeleteSuccessorBuffers[cache]; cache.header.count _ killFrom - cache.header.first; IF preludeOut > killFrom THEN preludeOut _ killFrom; END; ReadFromStore: PUBLIC PROCEDURE [pH: ParseHandle, index: CharIndex] RETURNS [CHARACTER] = -- returns the character at position 'index' in backing storage. BEGIN EnsureIndexInCache[pH, index]; RETURN[pH.cache.chars[index - pH.cache.header.first]] END; WriteToStore: PUBLIC PROCEDURE [pH: ParseHandle, char: CHARACTER] = {WriteToStoreInline[pH, char]}; WriteToStoreInline: PROCEDURE [pH: ParseHandle, char: CHARACTER] = INLINE -- appends 'char' to backing storage. BEGIN b: CharBuffer _ pH.cache; UNTIL b.header.next = NIL DO b _ b.header.next ENDLOOP; IF b.header.count = maxCharsPerBuffer THEN { b.header.next _ AddBuffer[pH, b.header.first+maxCharsPerBuffer, b]; b _ b.header.next}; b.chars[b.header.count] _ char; b.header.count _ b.header.count + 1; END; GetPosition: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [CharIndex] = -- returns the character index of the next free character position in -- backing storage. BEGIN b: CharBuffer _ pH.cache; UNTIL b.header.next = NIL DO b _ b.header.next ENDLOOP; RETURN[b.header.first + b.header.count] END; EnsureIndexInCache: PROCEDURE [pH: ParseHandle, i: CharIndex] = INLINE BEGIN IF i ~IN [pH.cache.header.first..pH.cache.header.first+pH.cache.header.count) THEN LoadCache[pH, i]; END; DeleteSuccessorBuffers: PUBLIC PROCEDURE [b: CharBuffer] = BEGIN tb: CharBuffer _ b.header.next; b.header.next _ NIL; UNTIL (b _ tb) = NIL DO tb _ b.header.next; Storage.FreePages[b]; ENDLOOP; END; END.