-- File: MailParserA.mesa
-- Last edited by Levin: 29-Mar-82  9:16:14

DIRECTORY
  Ascii USING [CR, LF, SP, TAB],
  Inline USING [BITAND],
  MailParse USING [
    endOfInput, endOfList, maxRecipientLength, NameInfo, NameType, ParseErrorCode],
  MailParsePrivate,
  Storage USING [FreePages, FreeString, Node, String],
  String USING [AppendChar, LowerCase, StringBoundsFault];

MailParserA: PROGRAM
  IMPORTS Inline, MailParsePrivate, Storage, String
  EXPORTS MailParse, MailParsePrivate =

  BEGIN OPEN Ascii, MailParse, MailParsePrivate;


  -- Miscellaneous --

  ImpossibleString: ERROR = CODE;


  -- Exported Types, Procedures, and Signals --
  
  ParseHandle: PUBLIC TYPE = MailParsePrivate.ParseHandle;

  ParseError: PUBLIC ERROR [code: ParseErrorCode] = CODE;

  GetFieldName: PUBLIC PROCEDURE [pH: ParseHandle, fieldNameOut: STRING]
    RETURNS [found: BOOLEAN] =
    BEGIN
    ch: CHARACTER;
    truncated: BOOLEAN ← FALSE;

    fieldNameOut.length ← 0;
    DO
      SELECT ch ← GetNaked[pH] FROM
	CR, endOfInput =>
	  IF fieldNameOut.length = 0 THEN RETURN[FALSE] ELSE EXIT;
	': =>
	  IF truncated AND pH.signalTruncation THEN ERROR ParseError[truncated]
	  ELSE RETURN[TRUE];
	endOfList, IN [0C..10C], IN [12C..37C] => EXIT;
	ENDCASE =>
	  IF fieldNameOut.length = fieldNameOut.maxlength THEN truncated ← TRUE
	  ELSE String.AppendChar[fieldNameOut, ch];
      ENDLOOP;
    ERROR ParseError[badFieldName];
    END;

  GetFieldBody: PUBLIC PROCEDURE [
    pH: ParseHandle, fieldBodyOut: STRING, suppressWhiteSpace: BOOLEAN ← FALSE] =
    BEGIN
    ch: CHARACTER;
    truncated: BOOLEAN ← FALSE;
    spaceSeen: BOOLEAN ← TRUE; -- TRUE means ignore leading spaces

    RemoveTrailingSpace: PROCEDURE = INLINE
      BEGIN
      WHILE fieldBodyOut.length > 0 AND fieldBodyOut[fieldBodyOut.length - 1] = SP DO
	fieldBodyOut.length ← fieldBodyOut.length - 1;
	ENDLOOP;
      END;

    fieldBodyOut.length ← 0;
    IF fieldBodyOut.maxlength ~= 0 THEN
      BEGIN
      DO
	IF (ch ← GetNaked[pH]) = CR THEN ch ← CheckForFolding[pH];
	SELECT ch FROM
	  SP, TAB =>
	    BEGIN
	    IF spaceSeen THEN LOOP;
	    IF suppressWhiteSpace THEN {ch ← SP; spaceSeen ← TRUE};
	    END;
	  endOfInput => GO TO Trouble;
	  endOfList, CR => EXIT;
	  ENDCASE => spaceSeen ← FALSE;
	IF fieldBodyOut.length = fieldBodyOut.maxlength THEN {truncated ← TRUE; EXIT};
	fieldBodyOut[fieldBodyOut.length] ← ch;
	fieldBodyOut.length ← fieldBodyOut.length + 1;
	ENDLOOP;
      RemoveTrailingSpace[];
      IF ~truncated THEN RETURN;
      END;
    DO  -- faster loop for discarding
      IF (ch ← GetNaked[pH]) = CR THEN ch ← CheckForFolding[pH];
      SELECT ch FROM
	CR, endOfList => EXIT;
	endOfInput => GO TO Trouble;
	ENDCASE => truncated ← TRUE;
      ENDLOOP;
    IF truncated AND pH.signalTruncation THEN ERROR ParseError[truncated];
    EXITS
      Trouble => ERROR ParseError[badFieldBody];
    END;

  SyntaxError: PUBLIC ERROR = CODE;

  ParseNameList: PUBLIC PROCEDURE [
    pH: ParseHandle,
    process: PROCEDURE [STRING, STRING, STRING, NameInfo] RETURNS [BOOLEAN],
    write: PROCEDURE [CHARACTER] ← NIL, suppressWhiteSpace: BOOLEAN ← FALSE] =
    BEGIN
    error: BOOLEAN ← FALSE;

    CleanUp: PROCEDURE =
      BEGIN OPEN pH;
      DiscardLexemeList[pH, tagLexList]; tagLexList ← NIL;
      ResetCharacterStorage[pH];
      ResetLexicalStorage[pH];
      cantFinalize ← recordingChars ← FALSE;
      END;

    --main body of ParseNameList

    pH.write ← write;
    pH.process ← process;
    pH.suppressWhiteSpace ← suppressWhiteSpace;
    InitializeLexicalStorage[pH];
    pH.cantFinalize ← pH.recordingChars ← TRUE;
    pH.nameOutput ← pH.scanningTag ← FALSE;
    pH.nameInfo ← [nesting: none, hasTag: FALSE, type: normal];
    IF pH.simpleName = NIL THEN pH.simpleName ← Storage.String[maxRecipientLength];
    IF pH.registry = NIL THEN pH.registry ← Storage.String[maxRecipientLength];
    IF pH.arpaHost = NIL THEN pH.arpaHost ← Storage.String[maxRecipientLength];
    ParseList[pH, endOfInput !
      SyntaxError, EndOfInput => {error ← TRUE; CONTINUE};
      UNWIND => CleanUp[]];
    CleanUp[];
    IF error THEN ERROR ParseError[badFieldBody];
    END;


  -- Support procedures for ParseNameList --

  ParseList: PUBLIC PROCEDURE [pH: ParseHandle, terminator: CHARACTER] =
    BEGIN OPEN pH;
    MSGdl: ERROR = CODE;
    outputThisLevel ← FALSE;
    DO  -- the somewhat obscure coding of this (inner) loop is for speed
      lexType: LexemeType;
      char: CHARACTER ← GetNaked[pH];
      IF Inline.BITAND[char, 177B] <= 100C THEN
	BEGIN
	IF char = CR THEN char ← CheckForFolding[pH];
	SELECT char FROM
	  SP, TAB => GO TO LexemePart;
	  nameSeparator, CR, endOfList, endOfInput =>
	    BEGIN
	    ProcessName[pH];
	    IF terminator = '; AND ~nameSeen THEN {backUp[]; ERROR MSGdl};
	    IF char ~= nameSeparator THEN
	      IF nameInfo.nesting = none THEN EXIT ELSE GO TO Trouble;
	    END;
	  '@ =>
	    IF EmptyLexemeListOrSpace[pH] THEN AccumulateFileName[pH]
	    ELSE GO TO LexemePart;
	  '< => RecurForBrackets[pH, '<, '>];
	  ': =>
	    IF AccumulateTagOrPrelude[pH] = prelude THEN
	      BEGIN
	      nameSeen ← terminator ~= endOfInput;
	      RecurForBrackets[pH, ':, '; ! MSGdl =>
		BEGIN
		IF terminator ~= endOfInput THEN GO TO Trouble;
		BuildMSGdlAtom[pH];  -- do this before the UNWIND!
		CONTINUE
		END];
	      END;
	  '>, '; =>
	    BEGIN
	    IF char ~= terminator THEN GO TO Trouble;
	    ProcessName[pH];
	    nameSeen ← TRUE;
	    IF outputThisLevel AND write ~= NIL THEN write[char];
	    EXIT
	    END;
	  openComment => AccumulateComment[pH];
	  stringQuote => AccumulateQuotedString[pH];
	  closeComment => GO TO Trouble;
	  ENDCASE => IF char < 040C THEN GO TO Trouble ELSE GO TO LexemePart;
	LOOP;
	EXITS
	  LexemePart =>
	    lexType ←
	      SELECT char FROM
		'. => dot,
		'@ => at,
		SP, TAB => space,
		ENDCASE => atom;
	END
      ELSE lexType ← atom;
      MaybeNewLexemeInline[pH, lexType];
      WriteToStoreInline[pH, char];
      ENDLOOP;
    EXITS Trouble => ERROR SyntaxError;
    END;

  ProcessName: PROCEDURE [pH: ParseHandle] =
    BEGIN OPEN pH;

    ParseName: PROCEDURE RETURNS [BOOLEAN] = -- INLINE --
      BEGIN
      state: {initial, possibleHost, hostSeen, possibleRegistry} ← initial;

      InsertQualifier: PROCEDURE [lex: Lexeme, type: LexemeType, s: STRING]
	RETURNS [qL: Lexeme] =
	BEGIN
	qL ← MakeLexemeFromString[pH, type, s];
	AddLexeme[pH, qL, lex];
	AddLexeme[pH, MakeLexeme[pH, atom], lex];
	END;

      GetQualifier: PROCEDURE [lex: Lexeme, s: STRING] =
	BEGIN
	lex ← lex.next;
	IF lex.type = space THEN lex ← lex.next;
	AppendLexemeValue[pH, s, lex];
	END;

      AtomIsReallyAt: PROCEDURE [lex: Lexeme] RETURNS [BOOLEAN] = INLINE
	BEGIN
	RETURN[
	  lex.length = 2 AND lex.next.type = space AND lex.prev.type = space AND
	  String.LowerCase[ReadFromStore[pH, lex.start]] = 'a AND
	  String.LowerCase[ReadFromStore[pH, lex.start + 1]] = 't]
	END;

      IF LexemeCount[pH] = none THEN RETURN[FALSE];
      nameSeen ← nameSeen OR ~EmptyLexemeListOrSpace[pH];
      simpleName.length ← registry.length ← arpaHost.length ← 0;
      FOR lex: Lexeme ← lexHead.prev, lex.prev UNTIL lex.type = head DO
	IF lex.type = space THEN LOOP;
	SELECT state FROM
	  initial => IF lex.type = atom THEN state ← possibleHost;
	  possibleHost =>
	    BEGIN
	    SELECT lex.type FROM
	      at => NULL;
	      dot => {dotLex ← lex; EXIT};
	      atom =>
		IF AtomIsReallyAt[lex] THEN
		  BEGIN
		  lex.start ← lex.prev.start;
		  lex.length ← lex.length + lex.next.length + lex.prev.length;
		  lex.type ← at;
		  DeleteLexeme[pH, lex.prev];
		  DeleteLexeme[pH, lex.next];
		  END
		ELSE EXIT;
	      ENDCASE => EXIT;
	    state ← hostSeen;
	    atLex ← lex;
	    END;
	  hostSeen => IF lex.type = atom THEN state ← possibleRegistry ELSE EXIT;
	  possibleRegistry => {IF lex.type = dot THEN dotLex ← lex; EXIT};
	  ENDCASE;
	ENDLOOP;
      IF atLex = NIL THEN atLex ← InsertQualifier[lexHead, at, " at "L]
      ELSE GetQualifier[atLex, arpaHost];
      IF dotLex = NIL THEN dotLex ← InsertQualifier[atLex, dot, "."L]
      ELSE GetQualifier[dotLex, registry];
      nameInfo.type ← AppendSublistToString[pH, simpleName, lexHead, dotLex];
      nameInfo.hasTag ← tagLexList ~= NIL;
      RETURN[simpleName.length ~= 0 OR registry.length ~= 0 OR arpaHost.length ~= 0]
      END;

    IF scanningTag THEN ERROR SyntaxError;
    dotLex ← atLex ← NIL;
    TerminateLexeme[pH];
    IF ParseName[ ! String.StringBoundsFault --[s] RETURNS [ns]-- =>
			BEGIN
			ns ← Storage.String[s.maxlength + s.maxlength/2];
			SELECT s FROM
			  simpleName => simpleName ← ns;
			  registry => registry ← ns;
			  arpaHost => arpaHost ← ns;
			  ENDCASE => ERROR ImpossibleString;
			Storage.FreeString[s];
			RETRY
			END]
      AND process[simpleName, registry, arpaHost, nameInfo] THEN
      BEGIN
      IF write ~= NIL THEN OutputName[pH];
      nameOutput ← outputThisLevel ← TRUE;
      END;
    IF tagLexList ~= NIL THEN
      BEGIN
      TruncateCharacterStorage[pH, tagLexList.start];
      DiscardLexemeList[pH, tagLexList];
      tagLexList ← NIL;
      END
    ELSE
      IF LexemeCount[pH] > none THEN TruncateCharacterStorage[pH, lexHead.next.start];
    ResetLexicalStorage[pH];
    END;

  AppendSublistToString: PUBLIC PROCEDURE [
    pH: ParseHandle, s: STRING, first, last: Lexeme]
    RETURNS [nameType: NameType] =
    BEGIN
    lex: Lexeme ← first;
    nameType ← normal;
    DO
      SELECT lex.type FROM
	space, head => NULL;
	ENDCASE =>
	  BEGIN
	  IF lex.prev.type = space AND s.length ~= 0 THEN
	    {String.AppendChar[s, SP]; nameType ← multiAtom};	
	  AppendLexemeValue[pH, s, lex];
	  IF nameType ~= multiAtom THEN
	    SELECT s[0] FROM
	      stringQuote => nameType ← quotedString;
	      '@ => nameType ← file;
	      ENDCASE =>
		SELECT s[s.length-1] FROM
		  ': => nameType ← msgDL;
		  '↑ => nameType ← publicDL;
		  ENDCASE;
	  END;
      IF (lex ← lex.next) = last THEN EXIT;
      ENDLOOP;
    END;


  -- Input Scanner --

  EndOfInput: PUBLIC SIGNAL = CODE;

  Get: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [char: CHARACTER] =
    -- obtains next input character and smoothes over a few lexical quirks.  This
    -- procedure deals with Arpa-standard line-folding, except that bare CR characters
    -- are not permitted.
    BEGIN OPEN pH;
    SELECT char ← nextChar[] FROM
      endOfList, endOfInput => SIGNAL EndOfInput;
      CR => char ← CheckForFoldingInline[pH];
      ENDCASE;
    END;

  CheckForFolding: PROCEDURE [pH: ParseHandle] RETURNS [char: CHARACTER] =
    {RETURN[CheckForFoldingInline[pH]]};

  CheckForFoldingInline: PROCEDURE [pH: ParseHandle] RETURNS [char: CHARACTER] = INLINE
    -- This procedure deals with Arpa-standard line-folding, except that bare CR
    -- characters are not permitted.
    BEGIN OPEN pH;
    lfSeen: BOOLEAN;
    char ← nextChar[];
    IF (lfSeen ← (char = LF)) THEN char ← nextChar[];
    SELECT char FROM
      SP, TAB =>
	IF recordingChars THEN
	  BEGIN
	  MaybeNewLexeme[pH, space];
	  WriteToStore[pH, CR];
	  IF lfSeen THEN WriteToStore[pH, LF];
	  END;
      ENDCASE => {char ← CR; backUp[]};
    END;


  -- Lexeme Storage --

  ResetLexicalStorage: PUBLIC PROCEDURE [pH: ParseHandle] =
    -- flushes the lexeme table without destroying the backing storage behind it.
    {FlushLexemeList[pH]; pH.lexStart ← GetPosition[pH]; pH.curLexType ← null};

  LexemeCount: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [Magnitude] =
    BEGIN
    RETURN[
      SELECT pH.lexHead.next FROM
	pH.lexHead => none,
	pH.lexHead.prev => one,
	ENDCASE => many]
    END;

  EmptyLexemeListOrSpace: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [BOOLEAN] =
    BEGIN
    TerminateLexeme[pH];
    SELECT LexemeCount[pH] FROM
      none => RETURN[TRUE];
      one => RETURN[pH.lexHead.next.type = space]
      ENDCASE;
    RETURN[FALSE]
    END;

  MakeLexeme: PUBLIC PROCEDURE [
    pH: ParseHandle, type: LexemeType, start: CharIndex ← 0, length: CARDINAL ← 0]
    RETURNS [lex: Lexeme] =
    -- bundles up the specified range of characters as a lexeme of the indicated type,
    -- and returns it.
    BEGIN
    IF (lex ← pH.freeLexHead) = NIL THEN lex ← Storage.Node[SIZE[Lex]]
    ELSE pH.freeLexHead ← lex.next;
    lex↑ ← Lex[next: NIL, prev: NIL, type: type, length: length, start: start];
    END;

  MakeLexemeFromString: PUBLIC PROCEDURE [pH: ParseHandle, type: LexemeType, s: STRING]
    RETURNS [lex: Lexeme] =
    -- bundles up the given string as a lexeme of the indicated type and returns it.
    BEGIN
    lex ← MakeLexeme[pH: pH, type: type, start: GetPosition[pH], length: s.length];
    FOR i: CARDINAL IN [0..s.length) DO WriteToStore[pH, s[i]] ENDLOOP;
    END;

  AppendNewLexeme: PROCEDURE [pH: ParseHandle, type: LexemeType] = INLINE
    -- bundles up the current range of characters as a lexeme of the indicated type
    -- and appends it to the lexeme list.
    BEGIN
    pos: CharIndex = GetPosition[pH];
    start: CharIndex = pH.lexStart;
    IF pos = start -- OR type = null -- THEN RETURN;
    AddLexeme[pH, MakeLexeme[pH: pH, type: type, start: start, length: pos - start]];
    pH.lexStart ← pos;
    END;

  MaybeNewLexemeInline: PROCEDURE [pH: ParseHandle, new: LexemeType] = INLINE
    -- a character belonging to a lexeme of type 'new' has been found.  This may extend
    -- a previous lexeme or begin a new one.  MaybeNewLexeme decides which case
    -- applies and acts appropriately.
    BEGIN
    IF new = pH.curLexType THEN RETURN;
    AppendNewLexeme[pH, pH.curLexType];
    pH.curLexType ← new;
    END;

  MaybeNewLexeme: PUBLIC PROCEDURE [pH: ParseHandle, new: LexemeType] =
    {MaybeNewLexemeInline[pH, new]};

  AppendLexemeValue: PUBLIC PROCEDURE [pH: ParseHandle, s: STRING, lex: Lexeme] =
    -- appends the contents of the lexeme 'lex' to the string 's'.
    BEGIN
    FOR i: CARDINAL IN [lex.start..lex.start + lex.length) DO
      String.AppendChar[s, ReadFromStore[pH, i]];
      ENDLOOP;
    END;

  RemoveLexemeList: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [head: Lexeme] =
    -- removes all lexemes except 'lexHead' from the list, returning the new 'head'.
    BEGIN
    IF (head ← pH.lexHead.next) = pH.lexHead THEN RETURN[NIL];
    head.prev ← pH.lexHead.prev;
    pH.lexHead.prev.next ← head;
    pH.lexHead.next ← pH.lexHead.prev ← pH.lexHead;
    END;

  DiscardLexemeList: PUBLIC PROCEDURE [pH: ParseHandle, head: Lexeme] =
    -- releases all lexemes (including 'head').
    BEGIN
    IF head = NIL THEN RETURN;
    head.prev.next ← pH.freeLexHead;
    pH.freeLexHead ← head;
    END;

  AddLexeme: PUBLIC PROCEDURE [pH: ParseHandle, lex: Lexeme, where: Lexeme ← NIL] =
    -- adds 'lex' to the lexeme list immediately before 'where'.  If 'where' is
    -- defaulted, the lexeme goes at the end of the list.
    BEGIN
    IF where = NIL THEN where ← pH.lexHead;
    lex.next ← where;
    lex.prev ← where.prev;
    lex.prev.next ← where.prev ← lex;
    END;

  DeleteLexeme: PUBLIC PROCEDURE [pH: ParseHandle, lex: Lexeme] =
    -- removes 'lex' from the lexeme list.
    BEGIN
    IF lex.next ~= NIL THEN {lex.prev.next ← lex.next; lex.next.prev ← lex.prev};
    lex.next ← pH.freeLexHead;
    pH.freeLexHead ← lex;
    END;


  -- Character Storage

  TruncateCharacterStorage: PUBLIC PROCEDURE [pH: ParseHandle, killFrom: CharIndex] =
    -- releases backing storage for buffered names.
    BEGIN OPEN pH;
    IF ~(killFrom = 0 AND bufferList.header.count = 0) THEN
      EnsureIndexInCache[pH, IF killFrom = 0 THEN 0 ELSE killFrom - 1];
    DeleteSuccessorBuffers[cache];
    cache.header.count ← killFrom - cache.header.first;
    IF preludeOut > killFrom THEN preludeOut ← killFrom;
    END;

  ReadFromStore: PUBLIC PROCEDURE [pH: ParseHandle, index: CharIndex]
    RETURNS [CHARACTER] =
    -- returns the character at position 'index' in backing storage.
    BEGIN
    EnsureIndexInCache[pH, index];
    RETURN[pH.cache.chars[index - pH.cache.header.first]]
    END;

  WriteToStore: PUBLIC PROCEDURE [pH: ParseHandle, char: CHARACTER] =
    {WriteToStoreInline[pH, char]};

  WriteToStoreInline: PROCEDURE [pH: ParseHandle, char: CHARACTER] = INLINE
    -- appends 'char' to backing storage.
    BEGIN
    b: CharBuffer ← pH.cache;
    UNTIL b.header.next = NIL DO b ← b.header.next ENDLOOP;
    IF b.header.count = maxCharsPerBuffer THEN {
      b.header.next ← AddBuffer[pH, b.header.first+maxCharsPerBuffer, b];
      b ← b.header.next};
    b.chars[b.header.count] ← char;
    b.header.count ← b.header.count + 1;
    END;

  GetPosition: PUBLIC PROCEDURE [pH: ParseHandle] RETURNS [CharIndex] =
    -- returns the character index of the next free character position in
    -- backing storage.
    BEGIN
    b: CharBuffer ← pH.cache;
    UNTIL b.header.next = NIL DO b ← b.header.next ENDLOOP;
    RETURN[b.header.first + b.header.count]
    END;

  EnsureIndexInCache: PROCEDURE [pH: ParseHandle, i: CharIndex] = INLINE
    BEGIN
    IF i ~IN [pH.cache.header.first..pH.cache.header.first+pH.cache.header.count) THEN
      LoadCache[pH, i];
    END;

  DeleteSuccessorBuffers: PUBLIC PROCEDURE [b: CharBuffer] =
    BEGIN
    tb: CharBuffer ← b.header.next;
    b.header.next ← NIL;
    UNTIL (b ← tb) = NIL DO
      tb ← b.header.next;
      Storage.FreePages[b];
      ENDLOOP;
    END;


  END.