-- File: MailParserA.mesa -- Last edited by Levin: 29-Mar-82 9:16:14 DIRECTORY Ascii USING [CR, LF, SP, TAB], Inline USING [BITAND], MailParse USING [ endOfInput, endOfList, maxRecipientLength, NameInfo, NameType, ParseErrorCode], MailParsePrivate, Storage USING [FreePages, FreeString, Node, String], String USING [AppendChar, LowerCase, StringBoundsFault]; MailParserA: PROGRAM IMPORTS Inline, MailParsePrivate, Storage, String EXPORTS MailParse, MailParsePrivate = BEGIN OPEN Ascii, MailParse, MailParsePrivate; -- Miscellaneous -- ImpossibleString: ERROR = CODE; -- Exported Types, Procedures, and Signals -- ParseHandle: PUBLIC TYPE = MailParsePrivate.ParseHandle; ParseError: PUBLIC ERROR [code: ParseErrorCode] = CODE; GetFieldName: PUBLIC PROCEDURE [pH: ParseHandle, fieldNameOut: STRING] RETURNS [found: BOOLEAN] = BEGIN ch: CHARACTER; truncated: BOOLEAN ← FALSE; fieldNameOut.length ← 0; DO SELECT ch ← GetNaked[pH] FROM CR, endOfInput => IF fieldNameOut.length = 0 THEN RETURN[FALSE] ELSE EXIT; ': => IF truncated AND pH.signalTruncation THEN ERROR ParseError[truncated] ELSE RETURN[TRUE]; endOfList, IN [0C..10C], IN [12C..37C] => EXIT; ENDCASE => IF fieldNameOut.length = fieldNameOut.maxlength THEN truncated ← TRUE ELSE String.AppendChar[fieldNameOut, ch]; ENDLOOP; ERROR ParseError[badFieldName]; END; GetFieldBody: PUBLIC PROCEDURE [ pH: ParseHandle, fieldBodyOut: STRING, suppressWhiteSpace: BOOLEAN ← FALSE] = BEGIN ch: CHARACTER; truncated: BOOLEAN ← FALSE; spaceSeen: BOOLEAN ← TRUE; -- TRUE means ignore leading spaces RemoveTrailingSpace: PROCEDURE = INLINE BEGIN WHILE fieldBodyOut.length > 0 AND fieldBodyOut[fieldBodyOut.length - 1] = SP DO fieldBodyOut.length ← fieldBodyOut.length - 1; ENDLOOP; END; fieldBodyOut.length ← 0; IF fieldBodyOut.maxlength ~= 0 THEN BEGIN DO IF (ch ← GetNaked[pH]) = CR THEN ch ← CheckForFolding[pH]; SELECT ch FROM SP, TAB => BEGIN IF spaceSeen THEN LOOP; IF suppressWhiteSpace THEN {ch ← SP; spaceSeen ← TRUE}; END; endOfInput => GO TO Trouble; endOfList, CR => EXIT; ENDCASE => spaceSeen ← FALSE; IF fieldBodyOut.length = fieldBodyOut.maxlength THEN {truncated ← TRUE; EXIT}; fieldBodyOut[fieldBodyOut.length] ← ch; fieldBodyOut.length ← fieldBodyOut.length + 1; ENDLOOP; RemoveTrailingSpace[]; IF ~truncated THEN RETURN; END; DO -- faster loop for discarding IF (ch ← GetNaked[pH]) = CR THEN ch ← CheckForFolding[pH]; SELECT ch FROM CR, endOfList => EXIT; endOfInput => GO TO Trouble; ENDCASE => truncated ← TRUE; ENDLOOP; IF truncated AND pH.signalTruncation THEN ERROR ParseError[truncated]; EXITS Trouble => ERROR ParseError[badFieldBody]; END; SyntaxError: PUBLIC ERROR = CODE; ParseNameList: PUBLIC PROCEDURE [ pH: ParseHandle, process: PROCEDURE [STRING, STRING, STRING, NameInfo] RETURNS [BOOLEAN], write: PROCEDURE [CHARACTER] ← NIL, suppressWhiteSpace: BOOLEAN ← FALSE] = BEGIN error: BOOLEAN ← FALSE; CleanUp: PROCEDURE = BEGIN OPEN pH; DiscardLexemeList[pH, tagLexList]; tagLexList ← NIL; ResetCharacterStorage[pH]; ResetLexicalStorage[pH]; cantFinalize ← recordingChars ← FALSE; END; --main body of ParseNameList pH.write ← write; pH.process ← process; pH.suppressWhiteSpace ← suppressWhiteSpace; InitializeLexicalStorage[pH]; pH.cantFinalize ← pH.recordingChars ← TRUE; pH.nameOutput ← pH.scanningTag ← FALSE; pH.nameInfo ← [nesting: none, hasTag: FALSE, type: normal]; IF pH.simpleName = NIL THEN pH.simpleName ← Storage.String[maxRecipientLength]; IF pH.registry = NIL THEN pH.registry ← Storage.String[maxRecipientLength]; IF pH.arpaHost = NIL THEN pH.arpaHost ← Storage.String[maxRecipientLength]; ParseList[pH, endOfInput ! SyntaxError, EndOfInput => {error ← TRUE; CONTINUE}; UNWIND => CleanUp[]]; CleanUp[]; IF error THEN ERROR ParseError[badFieldBody]; END; -- Support procedures for ParseNameList -- ParseList: PUBLIC PROCEDURE [pH: ParseHandle, terminator: CHARACTER] = BEGIN OPEN pH; MSGdl: ERROR = CODE; outputThisLevel ← FALSE; DO -- the somewhat obscure coding of this (inner) loop is for speed lexType: LexemeType; char: CHARACTER ← GetNaked[pH]; IF Inline.BITAND[char, 177B] <= 100C THEN BEGIN IF char = CR THEN char ← CheckForFolding[pH]; SELECT char FROM SP, TAB => GO TO LexemePart; nameSeparator, CR, endOfList, endOfInput => BEGIN ProcessName[pH]; IF terminator = '; AND ~nameSeen THEN {backUp[]; ERROR MSGdl}; IF char ~= nameSeparator THEN IF nameInfo.nesting = none THEN EXIT ELSE GO TO Trouble; END; '@ => IF EmptyLexemeListOrSpace[pH] THEN AccumulateFileName[pH] ELSE GO TO LexemePart; '< => RecurForBrackets[pH, '<, '>]; ': => IF AccumulateTagOrPrelude[pH] = prelude THEN BEGIN nameSeen ← terminator ~= endOfInput; RecurForBrackets[pH, ':, '; ! MSGdl => BEGIN IF terminator ~= endOfInput THEN GO TO Trouble; BuildMSGdlAtom[pH]; -- do this before the UNWIND! CONTINUE END]; END; '>, '; => BEGIN IF char ~= terminator THEN GO TO Trouble; ProcessName[pH]; nameSeen ← TRUE; IF outputThisLevel AND write ~= NIL THEN write[char]; EXIT END; openComment => AccumulateComment[pH]; stringQuote => AccumulateQuotedString[pH]; closeComment => GO TO Trouble; ENDCASE => IF char < 040C THEN GO TO Trouble ELSE GO TO LexemePart; LOOP; EXITS LexemePart => lexType ← SELECT char FROM '. => dot, '@ => at, SP, TAB => space, ENDCASE => atom; END ELSE lexType ← atom; MaybeNewLexemeInline[pH, lexType]; WriteToStoreInline[pH, char]; ENDLOOP; EXITS Trouble => ERROR SyntaxError; END; ProcessName: PROCEDURE [pH: ParseHandle] = BEGIN OPEN pH; ParseName: PROCEDURE RETURNS [BOOLEAN] = -- INLINE -- BEGIN state: {initial, possibleHost, hostSeen, possibleRegistry} ← initial; InsertQualifier: PROCEDURE [lex: Lexeme, type: LexemeType, s: STRING] RETURNS [qL: Lexeme] = BEGIN qL ← MakeLexemeFromString[pH, type, s]; AddLexeme[pH, qL, lex]; AddLexeme[pH, MakeLexeme[pH, atom], lex]; END; GetQualifier: PROCEDURE [lex: Lexeme, s: STRING] = BEGIN lex ← lex.next; IF lex.type = space THEN lex ← lex.next; AppendLexemeValue[pH, s, lex]; END; AtomIsReallyAt: PROCEDURE [lex: Lexeme] RETURNS [BOOLEAN] = INLINE BEGIN RETURN[ lex.length = 2 AND lex.next.type = space AND lex.prev.type = space AND String.LowerCase[ReadFromStore[pH, lex.start]] = 'a AND String.LowerCase[ReadFromStore[pH, lex.start + 1]] = 't] END; IF LexemeCount[pH] = none THEN RETURN[FALSE]; nameSeen ← nameSeen OR ~EmptyLexemeListOrSpace[pH]; simpleName.length ← registry.length ← arpaHost.length ← 0; FOR lex: Lexeme ← lexHead.prev, lex.prev UNTIL lex.type = head DO IF lex.type = space THEN LOOP; SELECT state FROM initial => IF lex.type = atom THEN state ← possibleHost; possibleHost => BEGIN SELECT lex.type FROM at => NULL; dot => {dotLex ← lex; EXIT}; atom => IF AtomIsReallyAt[lex] THEN BEGIN lex.start ← lex.prev.start; lex.length ← lex.length + lex.next.length + lex.prev.length; lex.type ← at; DeleteLexeme[pH, lex.prev]; DeleteLexeme[pH, lex.next]; END ELSE EXIT; ENDCASE => EXIT; state ← hostSeen; atLex ← lex; END; hostSeen => IF lex.type = atom THEN state ← possibleRegistry ELSE EXIT; possibleRegistry => {IF lex.type = dot THEN dotLex ← lex; EXIT}; ENDCASE; ENDLOOP; IF atLex = NIL THEN atLex ← InsertQualifier[lexHead, at, " at "L] ELSE GetQualifier[atLex, arpaHost]; IF dotLex = NIL THEN dotLex ← InsertQualifier[atLex, dot, "."L] ELSE GetQualifier[dotLex, registry]; nameInfo.type ← AppendSublistToString[pH, simpleName, lexHead, dotLex]; nameInfo.hasTag ← tagLexList ~= NIL; RETURN[simpleName.length ~= 0 OR registry.length ~= 0 OR arpaHost.length ~= 0] END; IF scanningTag THEN ERROR SyntaxError; dotLex ← atLex ← NIL; TerminateLexeme[pH]; IF ParseName[ ! String.StringBoundsFault --[s] RETURNS [ns]-- => BEGIN ns ← Storage.String[s.maxlength + s.maxlength/2]; SELECT s FROM simpleName => simpleName ← ns; registry => registry ← ns; arpaHost => arpaHost ← ns; ENDCASE => ERROR ImpossibleString; Storage.FreeString[s]; RETRY END] AND process[simpleName, registry, arpaHost, nameInfo] THEN BEGIN IF write ~= NIL THEN OutputName[pH]; nameOutput ← outputThisLevel ← TRUE; END; IF tagLexList ~= NIL THEN BEGIN TruncateCharacterStorage[pH, tagLexList.start]; DiscardLexemeList[pH, tagLexList]; tagLexList ← NIL; END ELSE IF LexemeCount[pH] > none THEN TruncateCharacterStorage[pH, lexHead.next.start]; ResetLexicalStorage[pH]; END; AppendSublistToString: PUBLIC PROCEDURE [ pH: ParseHandle, s: STRING, first, last: Lexeme] RETURNS [nameType: NameType] = BEGIN lex: Lexeme ← first; nameType ← normal; DO SELECT lex.type FROM space, head => NULL; ENDCASE => BEGIN IF lex.prev.type = space AND s.length ~= 0 THEN {String.AppendChar[s, SP]; nameType ← multiAtom}; AppendLexemeValue[pH, s, lex]; IF nameType ~= multiAtom THEN SELECT s[0] FROM stringQuote => nameType ← quotedString; '@ => nameType ← file; ENDCASE => SELECT s[s.length-1] FROM ': => nameType ← msgDL; '↑ => nameType ← publicDL; ENDCASE; END; IF (lex ← lex.next) = last THEN EXIT; ENDLOOP; END; -- Input Scanner -- EndOfInput: PUBLIC SIGNAL = CODE; Get: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [char: CHARACTER] = -- obtains next input character and smoothes over a few lexical quirks. This -- procedure deals with Arpa-standard line-folding, except that bare CR characters -- are not permitted. BEGIN OPEN pH; SELECT char ← nextChar[] FROM endOfList, endOfInput => SIGNAL EndOfInput; CR => char ← CheckForFoldingInline[pH]; ENDCASE; END; CheckForFolding: PROCEDURE [pH: ParseHandle] RETURNS [char: CHARACTER] = {RETURN[CheckForFoldingInline[pH]]}; CheckForFoldingInline: PROCEDURE [pH: ParseHandle] RETURNS [char: CHARACTER] = INLINE -- This procedure deals with Arpa-standard line-folding, except that bare CR -- characters are not permitted. BEGIN OPEN pH; lfSeen: BOOLEAN; char ← nextChar[]; IF (lfSeen ← (char = LF)) THEN char ← nextChar[]; SELECT char FROM SP, TAB => IF recordingChars THEN BEGIN MaybeNewLexeme[pH, space]; WriteToStore[pH, CR]; IF lfSeen THEN WriteToStore[pH, LF]; END; ENDCASE => {char ← CR; backUp[]}; END; -- Lexeme Storage -- ResetLexicalStorage: PUBLIC PROCEDURE [pH: ParseHandle] = -- flushes the lexeme table without destroying the backing storage behind it. {FlushLexemeList[pH]; pH.lexStart ← GetPosition[pH]; pH.curLexType ← null}; LexemeCount: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [Magnitude] = BEGIN RETURN[ SELECT pH.lexHead.next FROM pH.lexHead => none, pH.lexHead.prev => one, ENDCASE => many] END; EmptyLexemeListOrSpace: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [BOOLEAN] = BEGIN TerminateLexeme[pH]; SELECT LexemeCount[pH] FROM none => RETURN[TRUE]; one => RETURN[pH.lexHead.next.type = space] ENDCASE; RETURN[FALSE] END; MakeLexeme: PUBLIC PROCEDURE [ pH: ParseHandle, type: LexemeType, start: CharIndex ← 0, length: CARDINAL ← 0] RETURNS [lex: Lexeme] = -- bundles up the specified range of characters as a lexeme of the indicated type, -- and returns it. BEGIN IF (lex ← pH.freeLexHead) = NIL THEN lex ← Storage.Node[SIZE[Lex]] ELSE pH.freeLexHead ← lex.next; lex↑ ← Lex[next: NIL, prev: NIL, type: type, length: length, start: start]; END; MakeLexemeFromString: PUBLIC PROCEDURE [pH: ParseHandle, type: LexemeType, s: STRING] RETURNS [lex: Lexeme] = -- bundles up the given string as a lexeme of the indicated type and returns it. BEGIN lex ← MakeLexeme[pH: pH, type: type, start: GetPosition[pH], length: s.length]; FOR i: CARDINAL IN [0..s.length) DO WriteToStore[pH, s[i]] ENDLOOP; END; AppendNewLexeme: PROCEDURE [pH: ParseHandle, type: LexemeType] = INLINE -- bundles up the current range of characters as a lexeme of the indicated type -- and appends it to the lexeme list. BEGIN pos: CharIndex = GetPosition[pH]; start: CharIndex = pH.lexStart; IF pos = start -- OR type = null -- THEN RETURN; AddLexeme[pH, MakeLexeme[pH: pH, type: type, start: start, length: pos - start]]; pH.lexStart ← pos; END; MaybeNewLexemeInline: PROCEDURE [pH: ParseHandle, new: LexemeType] = INLINE -- a character belonging to a lexeme of type 'new' has been found. This may extend -- a previous lexeme or begin a new one. MaybeNewLexeme decides which case -- applies and acts appropriately. BEGIN IF new = pH.curLexType THEN RETURN; AppendNewLexeme[pH, pH.curLexType]; pH.curLexType ← new; END; MaybeNewLexeme: PUBLIC PROCEDURE [pH: ParseHandle, new: LexemeType] = {MaybeNewLexemeInline[pH, new]}; AppendLexemeValue: PUBLIC PROCEDURE [pH: ParseHandle, s: STRING, lex: Lexeme] = -- appends the contents of the lexeme 'lex' to the string 's'. BEGIN FOR i: CARDINAL IN [lex.start..lex.start + lex.length) DO String.AppendChar[s, ReadFromStore[pH, i]]; ENDLOOP; END; RemoveLexemeList: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [head: Lexeme] = -- removes all lexemes except 'lexHead' from the list, returning the new 'head'. BEGIN IF (head ← pH.lexHead.next) = pH.lexHead THEN RETURN[NIL]; head.prev ← pH.lexHead.prev; pH.lexHead.prev.next ← head; pH.lexHead.next ← pH.lexHead.prev ← pH.lexHead; END; DiscardLexemeList: PUBLIC PROCEDURE [pH: ParseHandle, head: Lexeme] = -- releases all lexemes (including 'head'). BEGIN IF head = NIL THEN RETURN; head.prev.next ← pH.freeLexHead; pH.freeLexHead ← head; END; AddLexeme: PUBLIC PROCEDURE [pH: ParseHandle, lex: Lexeme, where: Lexeme ← NIL] = -- adds 'lex' to the lexeme list immediately before 'where'. If 'where' is -- defaulted, the lexeme goes at the end of the list. BEGIN IF where = NIL THEN where ← pH.lexHead; lex.next ← where; lex.prev ← where.prev; lex.prev.next ← where.prev ← lex; END; DeleteLexeme: PUBLIC PROCEDURE [pH: ParseHandle, lex: Lexeme] = -- removes 'lex' from the lexeme list. BEGIN IF lex.next ~= NIL THEN {lex.prev.next ← lex.next; lex.next.prev ← lex.prev}; lex.next ← pH.freeLexHead; pH.freeLexHead ← lex; END; -- Character Storage TruncateCharacterStorage: PUBLIC PROCEDURE [pH: ParseHandle, killFrom: CharIndex] = -- releases backing storage for buffered names. BEGIN OPEN pH; IF ~(killFrom = 0 AND bufferList.header.count = 0) THEN EnsureIndexInCache[pH, IF killFrom = 0 THEN 0 ELSE killFrom - 1]; DeleteSuccessorBuffers[cache]; cache.header.count ← killFrom - cache.header.first; IF preludeOut > killFrom THEN preludeOut ← killFrom; END; ReadFromStore: PUBLIC PROCEDURE [pH: ParseHandle, index: CharIndex] RETURNS [CHARACTER] = -- returns the character at position 'index' in backing storage. BEGIN EnsureIndexInCache[pH, index]; RETURN[pH.cache.chars[index - pH.cache.header.first]] END; WriteToStore: PUBLIC PROCEDURE [pH: ParseHandle, char: CHARACTER] = {WriteToStoreInline[pH, char]}; WriteToStoreInline: PROCEDURE [pH: ParseHandle, char: CHARACTER] = INLINE -- appends 'char' to backing storage. BEGIN b: CharBuffer ← pH.cache; UNTIL b.header.next = NIL DO b ← b.header.next ENDLOOP; IF b.header.count = maxCharsPerBuffer THEN { b.header.next ← AddBuffer[pH, b.header.first+maxCharsPerBuffer, b]; b ← b.header.next}; b.chars[b.header.count] ← char; b.header.count ← b.header.count + 1; END; GetPosition: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [CharIndex] = -- returns the character index of the next free character position in -- backing storage. BEGIN b: CharBuffer ← pH.cache; UNTIL b.header.next = NIL DO b ← b.header.next ENDLOOP; RETURN[b.header.first + b.header.count] END; EnsureIndexInCache: PROCEDURE [pH: ParseHandle, i: CharIndex] = INLINE BEGIN IF i ~IN [pH.cache.header.first..pH.cache.header.first+pH.cache.header.count) THEN LoadCache[pH, i]; END; DeleteSuccessorBuffers: PUBLIC PROCEDURE [b: CharBuffer] = BEGIN tb: CharBuffer ← b.header.next; b.header.next ← NIL; UNTIL (b ← tb) = NIL DO tb ← b.header.next; Storage.FreePages[b]; ENDLOOP; END; END.