-- file ArpaMTEvalImpl.mesa rewritten by PGS, October 21, 1987 10:20:55 pm PDT

-- ArpaMTEvalImpl.pgs
-- file ArpaMTEvalImpl.mesa rewritten by PGS, 14-Jun-83 15:33
-- last edit by Schmidt, June 11, 1982 3:12 pm
-- last modified by Satterthwaite, June 2, 1983 12:45 pm
 -- Last Edited by: Nichols, July 15, 1983 5:35 pm
 -- Last Edited by: HGM, March 3, 1984 10:46:50 pm PST
-- John Larson, October 10, 1987 5:12:05 pm PDT

-- pgs [defs: ArpaMTParseTable, bcd: ArpaMTParseData, grammar: ArpaMT] ← ArpaMTEvalImpl.mesa;

-- output: new version of ArpaMTEvalImpl.mesa, tables in ArpaMTParseData.bcd
-- interface on ArpaMTParseTable.mesa
-- log on PGS.Log, grammar on ArpaMT.Grammar,
-- errors on ArpaMTParseData.errlog

DIRECTORY
	List: TYPE USING [Nconc1, NthElement],
	ArpaMTMiscOps: TYPE USING [AToI, IToA, Substitute],
	ArpaMTP1: TYPE --P1-- USING [ActionStack, AddrSpecNode, DomainNode, DotWordsNode,
		InternalNode, LinkStack, MailboxNode, RouteAddrNode, RouteNode, TerminalNode, TValue,
		ValueStack],
	ArpaMTParseTable: TYPE ParseTable USING [ProdDataRef],
	Rope: TYPE USING [Cat, Equal, IsEmpty, Length, ROPE, SkipOver, Substr];
	
ArpaMTEvalImpl: CEDAR PROGRAM
		IMPORTS List, ArpaMTMiscOps, Rope
		EXPORTS ArpaMTP1 ~ {
	-- parse tree building
	OPEN P1~~ArpaMTP1, ArpaMTParseTable;

-- stores parse tree when done with parse

parseTree: REF ANY;

-- Set if the reduction routines had trouble and we should declare an invalid parse.

badSemantics: BOOLEAN;

-- local data base (supplied by parser)

v: P1.ValueStack;
l: P1.LinkStack;
q: P1.ActionStack;

prodData: ProdDataRef;

-- initialization/termination

AssignDescriptors: PUBLIC PROC[
	qd: P1.ActionStack,
	vd: P1.ValueStack, ld: P1.LinkStack,
	pp: ProdDataRef] ~ {
	q ← qd; v ← vd; l ← ld; prodData ← pp;};

EvalInit: PUBLIC PROC ~ {
	-- This routine should be called before a parse begins.  The parser will do this for you.
	parseTree ← NIL;
	badSemantics ← FALSE;
	};
	
-- Random

digits: Rope.ROPE ~ "0123456789";

-- Tree manipulation

CollectInternals: PROC [top, number: NAT] ~ {
	kids: P1.InternalNode;		-- LIST OF REF ANY
	
	kids ← LIST[v[top+number-1].t];
	FOR i: NAT DECREASING IN [top..top+number-1) DO
		kids ← CONS[v[i].t, kids];
		ENDLOOP;
	v[top].t ← kids;
	};



-- This procedure builds the parse tree.  It is called by the parser for each reduction to be
-- made.  In addition to building the tree, it does a few minor corrections for the extra
-- productions in the grammar.  In particular, a missing ";" is added to a group list, and extra
-- commas are removed from mailbox and address lists.

-- For dates, it does something else (more later).

-- The grammar here is really a pair of grammars, one for name lists and one for dates.
-- We put them together like this because it is pretty inconvenient to have two grammars
-- with pgs.  The name-dummy and date-dummy tokens are the first tokens returned by
-- the lexer in each case and are used to force the parser to parse a name list or a date.

ProcessQueue: PUBLIC PROC[qI, top: CARDINAL] ~ {
	FOR i: CARDINAL IN [0..qI) DO

		GetRule: PROC[n: CARDINAL] RETURNS [CARDINAL] ~ TRUSTED {
			RETURN [prodData[n].rule]};

		UnixProc1: PROC ~ {
			-- date-time ::= day month atom hour atom
			-- Unix date format.  Rearrange everything and fix up the year.
			time, date: LIST OF REF ANY;
			year: REF P1.TerminalNode ← NARROW[v[top+4].t];
			year.text ← year.text.Substr[year.text.Length[]-2, 2];
			time ← LIST[v[top+3].t];
			date ← LIST[v[top+2].t, v[top+1].t, year];
			v[top].t ← LIST[v[top].t, NEW[P1.TerminalNode ← [text~","]], date, time];
			};
			
		UnixProc2: PROC ~ {
			-- date-time ::= day month atom hour zone atom
			-- Unix date format.  Rearrange everything and fix up the year.
			time, date: LIST OF REF ANY;
			year: REF P1.TerminalNode ← NARROW[v[top+5].t];
			year.text ← year.text.Substr[year.text.Length[]-2, 2];
			time ← LIST[v[top+3].t, v[top+4].t];
			date ← LIST[v[top+2].t, v[top+1].t, year];
			v[top].t ← LIST[v[top].t, NEW[P1.TerminalNode ← [text~","]], date, time];
			};
			
		top ← top-q[i].tag.pLength+1;
		SELECT GetRule[q[i].transition] FROM

            0  => -- TABLE: ArpaMTParseData  TYPE: ParseTable  EXPORTS: SELF
				-- GOAL:  goal
				-- TERMINALS:
				--   name-dummy
				--   atom
				--   quoted-string
				--   domain-lit
				--   . , : ;
				--   @ < >
				--   eol
				--   error

				--   date-dummy
				--   ampm day month zone

				-- ALIASES:
				--   name-dummy nameDummy
				--   atom	atomTok
				--   quoted-string qStringTok
				--   domain-lit dLitTok
				--   . dotTok
				--   , commaTok
				--   : colonTok
				--   ; semiTok
				--   @ atSignTok
				--   <	lBracketTok
				--   > rBracketTok
				--   error	errorTok
				--   eol EOLTok

				--   date-dummy dateDummy
				--   ampm ampmTok
				--   day dayTok
				--   month monthTok
				--   zone zoneTok

				-- PRODUCTIONS:
				--   goal ::= name-dummy addr-list eol
					BEGIN
					CollectInternals[top+1, 2];		-- save the EOL, too.
					parseTree ← v[top+1].t;
					END;
           1  => -- goal ::= date-dummy date-time eol
					BEGIN
					CollectInternals[top+1, 2];		-- save the EOL, too.
					parseTree ← v[top+1].t;
					END;
           2  => -- address ::=
					v[top].t ← NIL;
           3  => -- address ::= mailbox
					NULL;
           4  => -- address ::= group
					NULL;
           5  => -- addr-list ::= address
					NULL;
           6  => -- addr-list ::= addr-list , address
					IF v[top+2].t # NIL THEN	-- only collect new address if non-empty
						CollectInternals[top, 3];
           7  => -- addr-spec ::= dot-words
					v[top].t ← NEW[P1.AddrSpecNode ← [localPart~v[top].t]];
           8  => -- addr-spec ::= addr-spec @ domain
					BEGIN
					t: REF P1.TerminalNode ← NARROW[v[top+1].t];
					d: REF P1.DomainNode ← NARROW[v[top+2].t];
					t.text ← "@";		-- make sure that we don't have "at"
					-- also zap white space from around the @
					t.whiteSpace ← NIL;
					WHILE d.domain # NIL DO
						d ← d.domain;
						ENDLOOP;
					d.subDomain.whiteSpace ← NIL;
					v[top].t ← NEW[P1.AddrSpecNode ← [localPart~v[top].t,
						atSign~t, domain~NARROW[v[top+2].t]]];
					END;
           9  => -- domain ::= sub-domain
					v[top].t ← NEW[P1.DomainNode ← [subDomain~NARROW[v[top].t]]];
          10  => -- domain ::= domain . sub-domain
					v[top].t ← NEW[P1.DomainNode ← [domain~NARROW[v[top].t],
						dot~NARROW[v[top+1].t], subDomain~NARROW[v[top+2].t]]];
          11  => -- dot-words ::= word
					v[top].t ← NEW[P1.DotWordsNode ← [word~NARROW[v[top].t]]];
          12  => -- dot-words ::= dot-words . word
					v[top].t ← NEW[P1.DotWordsNode ← [dotWords~NARROW[v[top].t],
						dot~NARROW[v[top+1].t], word~NARROW[v[top+2].t]]];
          13  => -- group ::= phrase : mailbox-list ;
					CollectInternals[top, 4];
          14  => -- group ::= phrase : mailbox-list
					BEGIN
					CollectInternals[top, 3];
					-- Add semicolon.
					v[top].t ← List.Nconc1[NARROW[v[top].t, LIST OF REF ANY],
						NEW[P1.TerminalNode ← [text~";"]]];
					END;
          15  => -- mailbox ::= addr-spec
					v[top].t ← NEW[P1.MailboxNode ← [address~v[top].t]];
          16  => -- mailbox ::= route-addr
					v[top].t ← NEW[P1.MailboxNode ← [address~v[top].t]];
          17  => -- mailbox ::= phrase route-addr
					v[top].t ← NEW[P1.MailboxNode ← [phrase~v[top].t, address~v[top+1].t]];
          18  => -- mailbox ::= @ dot-words
					BEGIN
					CollectInternals[top, 2];
					v[top].t ← NEW[P1.MailboxNode ← [address~v[top].t]];
					-- TODO: should hide structure of dl somehow.
					END;
          19  => -- mailbox-list ::=
					v[top].t ← NIL;
          20  => -- mailbox-list ::= mailbox
					NULL;
          21  => -- mailbox-list ::= mailbox-list ,
					NULL;		-- discard comma
          22  => -- mailbox-list ::= mailbox-list , mailbox
					IF v[top+2].t # NIL THEN	-- only collect new mailbox if non-empty
						CollectInternals[top, 3];
          23  => -- phrase ::= dot-words
					NULL;
          24  => -- phrase ::= phrase dot-words
					CollectInternals[top, 2];
          25  => -- route ::= @ domain
					v[top].t ← NEW[P1.RouteNode ← [atSign~NARROW[v[top].t],
						domain~NARROW[v[top+1].t]]];
          26  => -- route ::= route , @ domain
					v[top].t ← NEW[P1.RouteNode ← [route~NARROW[v[top].t],
						comma~NARROW[v[top+1].t], atSign~NARROW[v[top+2].t],
						domain~NARROW[v[top+3].t]]];
          27  => -- route-addr ::= < addr-spec >
					v[top].t ← NEW[P1.RouteAddrNode ← [lBrack~NARROW[v[top].t],
						addrSpec~NARROW[v[top+1].t], rBrack~NARROW[v[top+2].t]]];
          28  => -- route-addr ::= < route : addr-spec >
					v[top].t ← NEW[P1.RouteAddrNode ← [lBrack~NARROW[v[top].t],
						route~NARROW[v[top+1].t], colon~NARROW[v[top+2].t],
						addrSpec~NARROW[v[top+3].t], rBrack~NARROW[v[top+4].t]]];
          29  => -- sub-domain ::= atom
					NULL;
          30  => -- sub-domain ::= domain-lit
					NULL;
          31  => -- word ::= atom
					NULL;
          32  => -- word ::= quoted-string
					NULL;

          33  => -- date-time ::= date time
					CollectInternals[top, 2];
          34  => -- date-time ::= day date time
					BEGIN
					-- Add comma after day and treat like date-time ::= day , date time
					t: REF P1.TerminalNode ← NARROW[v[top].t];
					t.text ← t.text.Substr[0, 3];
					v[top].t ← LIST[v[top].t, NEW[P1.TerminalNode ← [text~","]], v[top+1].t,
						v[top+2].t];
					END;
          35  => -- date-time ::= day , date time
					BEGIN
					-- Day is supposed to be in 3-letter abbreviated form, so truncate it.
					t: REF P1.TerminalNode ← NARROW[v[top].t];
					t.text ← t.text.Substr[0, 3];
					CollectInternals[top, 4];
					END;
          36  => -- date-time ::= day , date , time
					BEGIN
					-- Remove comma after date and treat like date-time ::= day , date time
					t: REF P1.TerminalNode ← NARROW[v[top].t];
					t.text ← t.text.Substr[0, 3];
					-- Cheat!  Rather than chase down the tree to tack on the white space, we just
					-- leave the comma node in.
					t ← NARROW[v[top+3].t];
					t.whiteSpace ← " ";
					t.text ← NIL;
					CollectInternals[top, 5];
					END;
          37  => -- date-time ::= day month atom hour atom
					UnixProc1[];		--COMPILER--
          38  => -- date-time ::= day month atom hour zone atom
					UnixProc2[];		--COMPILER--
          39  => -- date ::= atom month atom
					BEGIN
					-- Month should be truncated to three letters.  Atom at end should be year
					-- in two digit form.
					t: REF P1.TerminalNode ← NARROW[v[top+1].t];
					t.text ← t.text.Substr[0, 3];
					t ← NARROW[v[top+2].t];
					IF t.text.Length[] < 2 OR t.text.SkipOver[skip: digits] # t.text.Length[] THEN
						badSemantics ← TRUE
					ELSE
						t.text ← t.text.Substr[t.text.Length[]-2, 2];
					CollectInternals[top, 3];
					END;
          40  => -- date ::= atom month . atom
					BEGIN
					-- Discard dot and handle as above.
					t: REF P1.TerminalNode ← NARROW[v[top+1].t];
					t.text ← t.text.Substr[0, 3];
					t ← NARROW[v[top+3].t];
					IF t.whiteSpace.IsEmpty THEN t.whiteSpace ← " ";
					IF t.text.Length[] < 2 OR t.text.SkipOver[skip: digits] # t.text.Length[] THEN
						badSemantics ← TRUE
					ELSE
						t.text ← t.text.Substr[t.text.Length[]-2, 2];
					v[top].t ← LIST[v[top].t, v[top+1].t, v[top+3].t];
					END;
          41  => -- date ::= atom
					BEGIN
					-- Most likely, we have a date of the form dd-mmm-yy, so replace the hyphens
					-- with spaces.
					t: REF P1.TerminalNode ← NARROW[v[top].t];
					t.text ← ArpaMTMiscOps.Substitute[t.text, '-, ' ];
					END;
          42  => -- time ::= hour zone
					CollectInternals[top, 2];
          43  => -- time ::= hour ampm zone
					BEGIN
					-- Flush ampm and adjust hour.
					t: REF P1.TerminalNode ←
						NARROW[NARROW[v[top].t, LIST OF REF ANY].first];
					ampm: REF P1.TerminalNode ← NARROW[v[top+1].t];
					hour: INT ← ArpaMTMiscOps.AToI[t.text];
					-- To quote Taft: "If hour is not 12 then AM leaves it alone and PM adds 12.
					-- But if hour is 12 then AM sets it to zero and PM leaves it alone.  The
					-- following code actually sets it to zero and then adds 12 in the 12 PM case."
					IF hour = 12 THEN
						hour ← 0;
					IF ampm.text.Equal["pm", FALSE] THEN
						hour ← hour + 12;
					t.text ← ArpaMTMiscOps.IToA[hour];
					IF t.text.Length < 2 THEN
						-- pad it
						t.text ← Rope.Cat["0", t.text];
					v[top].t ← LIST[v[top].t, v[top+2].t];
					END;
          44  => -- time ::= hour
					BEGIN
					-- This comes from Tenex/Twenex format times, in which the zone is separated
					-- from the hour by a hyphen.  Replace hyphens with spaces.
					t: REF P1.TerminalNode ← NARROW[List.NthElement[NARROW[v[top].t], -1]];

					t.text ← ArpaMTMiscOps.Substitute[t.text, '-, ' ];
					END;
          45  => -- hour ::= atom : atom
					BEGIN
					-- Make sure we have numbers, but don't force last atom to, as it may have
					-- zone mixed in.
					t: REF P1.TerminalNode ← NARROW[v[top].t];
					t2: REF P1.TerminalNode ← NARROW[v[top+2].t];

					IF t.text.SkipOver[skip: digits] # t.text.Length[] THEN
						badSemantics ← TRUE;
					CollectInternals[top, 3];
					END;
          46  => -- hour ::= atom : atom : atom
					BEGIN
					-- Make sure we have numbers.
					t: REF P1.TerminalNode ← NARROW[v[top].t];
					t2: REF P1.TerminalNode ← NARROW[v[top+2].t];
					t3: REF P1.TerminalNode ← NARROW[v[top+4].t];

					IF t.text.SkipOver[skip: digits] # t.text.Length[] OR
							t2.text.SkipOver[skip: digits] # t2.text.Length[] THEN
						badSemantics ← TRUE;
					CollectInternals[top, 5];
					END;
          47  => -- hour ::= atom
					BEGIN
					-- Time is in 4-digit form: hhmm.  Convert to hh:mm.  There may be junk
					-- after the 4 digits (e.g. -zone) which is preserved.
					t: REF P1.TerminalNode ← NARROW[v[top].t];
					r: Rope.ROPE ← t.text;
					
					IF r.SkipOver[skip: digits] = 4 AND ArpaMTMiscOps.AToI[r.Substr[0,2]] <= 23 AND
							ArpaMTMiscOps.AToI[r.Substr[2,2]] <= 59 THEN
						v[top].t ← LIST[NEW[P1.TerminalNode ← [whiteSpace~t.whiteSpace,
								text~r.Substr[0, 2]]],
							NEW[P1.TerminalNode ← [text~ ":"]],
							NEW[P1.TerminalNode ← [text~ r.Substr[start: 2]]]]
					ELSE {
						badSemantics ← TRUE;
						v[top].t ← LIST[v[top].t];
						};
					END;
			ENDCASE =>  ERROR;

		ENDLOOP};



ProcessError: PUBLIC PROC [top: CARDINAL, inputValue: P1.TValue] = {
	CollectInternals[1, top];
	parseTree ← LIST[v[1].t, inputValue];
	};

GetEvalResult: PUBLIC PROC RETURNS [tree: REF ANY, badSemantics: BOOL] = {
	RETURN [parseTree, ArpaMTEvalImpl.badSemantics];
	};

}.