-- adapted from the Mesa formatter's scanner

DIRECTORY
    Ascii: TYPE USING [NUL, DEL],
    Format USING [Char, Decimal],
    ParseDefs USING [Token],
    PrintingDefs: TYPE USING [outProc, OutCode],
    ParseInterface: TYPE USING [TableRef, HashIndex, TSymbol, VocabHashEntry,
    	EndMarker, tICON, tNAME, tSTRING, tDIV, tDIVASG],
    MStream USING [Handle],
    String USING [AppendLongDecimal, AppendLongNumber],
    Stream: TYPE USING [EndOfStream, GetChar],
    Storage: TYPE USING [CopyString, FreeString, AppendChar, AppendString];
    
Scanner: PROGRAM IMPORTS Format, PrintingDefs, String, Stream, Storage EXPORTS ParseDefs = {
    OPEN ParseInterface;
    
    -- this will allow us to put characters back into the input stream
    putBackQueueLimit: CARDINAL = 30;
    putBackStack: ARRAY [0..putBackQueueLimit) OF CHARACTER;
    putBackTop: CARDINAL;

    -- pointers to PGS generated hash table for token lookup
    hashTab: LONG POINTER TO ARRAY HashIndex OF VocabHashEntry;
    scanTab: LONG POINTER TO ARRAY CHARACTER [40C..177C] OF TSymbol;
    vocab: LONG STRING;
    vocabIndex: LONG POINTER TO ARRAY TSymbol OF CARDINAL;
    
    -- keeps a history of recent character to print on error
    -- the history array is a circular buffer
    historyLimit: CARDINAL = 20; -- number of character of history to keep
    iHistory: CARDINAL ← 0; -- position in circular history buffer
    history: ARRAY [0..historyLimit) OF CHARACTER;

    commentString: PUBLIC LONG STRING;
    stream: MStream.Handle ← NIL;  -- the input stream
    char: CHARACTER;  -- current (most recently scanned) character
    tEnded: BOOLEAN;	-- TRUE if the end of input file has been reached
    tPosition: CARDINAL;  -- character position in the input file
    buffer: LONG STRING;  -- token assembly area
    
    -- This should be a PUBLIC interface, scanner parameters
    -- variables for identifying tokens
    CharType: TYPE = {idStartAndContinue, idContinueOnly, digit,
    	nonIdChar, stringQuote, charQuote, firstCommentChar,
	secondCommentChar, underscoreChar, otherChar};
    charType: ARRAY CHARACTER OF CharType;
    
    PublicScanInit: PROCEDURE [] = {
	i: CHARACTER;
	FOR i IN CHARACTER DO charType[i] ← nonIdChar; ENDLOOP;
	FOR i IN [Ascii.NUL..' ] DO charType[i] ← otherChar; ENDLOOP;
	charType[Ascii.DEL] ← otherChar;
	FOR i IN ['a..'z] DO charType[i] ← idStartAndContinue; ENDLOOP;
	FOR i IN ['A..'Z] DO charType[i] ← idStartAndContinue; ENDLOOP;
	FOR i IN ['0..'9] DO charType[i] ← digit; ENDLOOP;
	charType['←] ←underscoreChar;
	charType[''] ← charQuote;
	charType['"] ← stringQuote;
	charType['/] ← firstCommentChar;
	charType['*] ← secondCommentChar;
        };

    NextChar: PROCEDURE = {
	IF putBackTop > 0 THEN {
	    putBackTop ← putBackTop - 1;
	    char ← putBackStack[putBackTop]; }
        ELSE IF tEnded THEN char ← Ascii.NUL
        ELSE {
            tPosition ← tPosition + 1;
            char ← Stream.GetChar[stream !
                Stream.EndOfStream => {
                    char ← Ascii.NUL;
                    tPosition ← tPosition - 1;
                    tEnded ← TRUE;
                    CONTINUE}];
	    history[iHistory] ← char;
	    Storage.AppendChar[@commentString, char];
	    iHistory ← (iHistory + 1) MOD historyLimit;}};

    Atom: PUBLIC PROC RETURNS [token: ParseDefs.Token] = {
        DO
            WHILE char IN [Ascii.NUL..' ] DO
                NextChar[];
                IF tEnded THEN GO TO EndFile;
                ENDLOOP;
            token.location ← tPosition;
            token.value ← NIL;
	    IF buffer # NIL THEN Storage.FreeString[buffer];
	    buffer ← Storage.CopyString[s: ""L, longer: 16];
            SELECT charType[char] FROM

                digit => {
		    n: CARDINAL;
		    radix, digit: CARDINAL;
		    IF char # '0 THEN
		        radix ← 10
		    ELSE
		    {
		        NextChar[]; -- skip '0 on octal and hex constants
			IF char = 'x OR char = 'X THEN
			{
			    radix ← 16;
			    NextChar[]; -- skip 'x on hex constants
			}
			ELSE
			    radix ← 8;
		    };
		    n ← 0;
                    DO
			IF char NOT IN ['0..'9]
			  AND char NOT IN ['a..'f]
			  AND char NOT IN ['A..'F]
			THEN EXIT;
			digit ← SELECT char FROM
			    IN ['0..'9] => char-'0,
			    IN ['a..'f] => char-'a,
			    IN ['A..'F] => char-'A
			    ENDCASE => 0;
			n ← n * radix + digit;
			NextChar[];
                        ENDLOOP;
		    -- skip past the "long" constant symbol (if there)
		    IF char = 'l OR char = 'L THEN NextChar[];
                    token.class ← tICON;
		    token.value ← Storage.CopyString[s: ""L, longer: 16];
		    IF n = 0 THEN radix ← 10;
		    SELECT radix FROM
		        10 => String.AppendLongDecimal[s: token.value, n: n];
		        8 => {
			    String.AppendLongNumber[s: token.value, n: n,
			        radix: 8];
			    Storage.AppendChar[@token.value, 'B]; };
		        16 => {
			    -- insure that the number begins with a digit [0..9]
			    -- later do this better (check first hex 'digit')
			    -- hex conversion does not seem to work
			    -- try this again later
			    --Storage.AppendChar[@token.value, '0];
			    --String.AppendLongNumber[s: token.value, n: n,
			    --    radix: 16];
			    --Storage.AppendChar[@token.value, 'H]; };
			    String.AppendLongNumber[s: token.value, n: n,
			        radix: 8];
			    Storage.AppendChar[@token.value, 'B]; };
		    ENDCASE;
                    GO TO GotNext};

                idStartAndContinue => {
                    j: CARDINAL;
                    DO
			Storage.AppendChar[@buffer, char];
                        NextChar[];
                        SELECT charType[char] FROM
                            idStartAndContinue, idContinueOnly, digit => NULL;
			    -- convert underscores to capital Xs
                            underscoreChar => char ← 'X;
                            ENDCASE => EXIT;
                        ENDLOOP;
                    j ← HashLookUp[buffer];
		    IF j # 0 THEN {token.class ← j; GO TO GotNext;};
		    token.class ← tNAME;
		    token.value ← Storage.CopyString[s: buffer, longer: 32];
                    GO TO GotNext};

                charQuote => {
		    ch: CHARACTER;
		    
                    NextChar[];
		    token.class ← tICON;
		    token.value ← Storage.CopyString[s: "ORD['"L, longer: 8];
		    IF char = '\\ THEN {
		        -- First copy the \ then the char following it
		        Storage.AppendChar[@token.value, char];
		        NextChar[];
			-- Then copy the character type letter
			ch ← char;
		        NextChar[];
		        -- In case of octal specified values copy the
		        -- next two characters.
		        -- This checks that octal specified characters
		        -- always use exactly three octal digits.  This is
		        -- required in Mesa but not in C.
			IF ch IN ['0..'7] THEN {
			    ch2, ch3: CHARACTER;
			    NextChar[]; ch2 ← char;
			    IF ch2 IN ['0..'7] THEN {
			        NextChar[]; ch3 ← char;
				IF ch3 IN ['0..'7] THEN {
				    NextChar[];
				    Storage.AppendChar[@token.value, ch];
				    Storage.AppendChar[@token.value, ch2];
				    ch ← ch3;  }
				ELSE {
				    Storage.AppendChar[@token.value, '0];
				    Storage.AppendChar[@token.value, ch];
				    ch ← ch2;  }  }
			    ELSE {
			        Storage.AppendChar[@token.value, '0];
				Storage.AppendChar[@token.value, '0];  };
		            };
		        Storage.AppendChar[@token.value, ch];  }
		    ELSE {
		        Storage.AppendChar[@token.value, char];
		        NextChar[];  };
		    Storage.AppendString[@token.value, "]"L];
		    IF char # '' THEN ScanError[char, tPosition];
		    GO TO GetNext;
		    };

                stringQuote => {
                    i: CARDINAL ← 0;
		    Storage.AppendString[@buffer, "StringToArray["""L];
                    NextChar[];
		    -- This should check that octal specified characters
		    -- always use exactly three octal digits.  This is
		    -- required in Mesa but not in C.
                    DO
                        IF tEnded THEN GO TO EOFEnd;
			IF charType[char] = stringQuote THEN GO TO QuoteEnd;
			Storage.AppendChar[@buffer, char];
			-- ADD CODE TO HANDLE \d and \dd correctly (add 0 or 00)
			NextChar[];
                        REPEAT
                            QuoteEnd => NULL;
                            EOFEnd => {ScanError[string, token.location]; char ← Ascii.NUL};
                        ENDLOOP;
		    Storage.AppendString[@buffer, " ""]"L];
		    buffer.text[buffer.length - 3] ← 0C;	-- NUL terminate the string
                    token.class ← tSTRING;
		    token.value ← Storage.CopyString[s: buffer, longer: 32];
		    GO TO GetNext;
		    };

                firstCommentChar => {
                    pChar: CHARACTER;
		    pChar ← char;
                    NextChar[];
                    IF charType[char] # secondCommentChar THEN {
		        IF char = '= THEN {
			    token.class ← tDIVASG;
			    GO TO GetNext; }
			ELSE {
			    token.class ← tDIV;
			    GO TO GotNext; } };
                    char ← Ascii.NUL;
                    DO
                        pChar ← char;
                        NextChar[];
                        IF tEnded THEN GO TO EndFile;
                        IF charType[char] = firstCommentChar
			    AND charType[pChar] = secondCommentChar
			        THEN EXIT;
                        ENDLOOP;
                    NextChar[];
                    };

                ENDCASE => {
                    j: CARDINAL;
                    DO
			Storage.AppendChar[@buffer, char];
                        NextChar[];
                        SELECT charType[char] FROM
			    -- ******** later recognize comments here ********
                            nonIdChar, firstCommentChar, secondCommentChar => NULL;
                            ENDCASE => EXIT;
                        ENDLOOP;
		    PutBackChar[char];
		    DO
			IF buffer.length = 1 THEN EXIT;
                        j ← HashLookUp[buffer];
			-- special hack because of PGS glitch
			IF buffer.length = 2 AND buffer.text[0] = '-
			    AND buffer.text[1] = '- THEN j ← HashLookUp["!!"L];
			IF j # 0 THEN {token.class ← j; GO TO GetNext;};
			PutBackChar[buffer.text[buffer.length-1]];
			buffer.length ← buffer.length - 1;
		        ENDLOOP;
		    token.class ← scanTab[buffer.text[0]];
		    token.value ← NIL;
                    GO TO GetNext};

            REPEAT
                GetNext => NextChar[];
                GotNext => NULL;
                EndFile => {
                    char ← Ascii.NUL;
                    token.class ← EndMarker;
                    token.location ← tPosition;
                    token.value ← NIL};
            ENDLOOP;
        RETURN};
	
    HashLookUp: PROCEDURE [string: LONG STRING] RETURNS [CARDINAL] = {
	j: CARDINAL;
	s1, s2: CARDINAL;
	h: HashIndex;
	first: CARDINAL ← ORD[string.text[0]];
	last: CARDINAL ← ORD[string.text[string.length - 1]];
        h ← ((first*128 - first) + last) MOD LAST[HashIndex] + 1;
        WHILE (j ← hashTab[h].symbol) # 0 DO
            IF vocabIndex[j] - (s2 ← vocabIndex[j - 1]) = string.length THEN
                FOR s1 IN [0..string.length) DO
                    IF string[s1] # vocab[s2] THEN EXIT;
                    s2 ← s2 + 1;
                    REPEAT
                        FINISHED => RETURN [j];
                    ENDLOOP;
            IF (h ← hashTab[h].link) = 0 THEN EXIT;
            ENDLOOP;
	RETURN[0];
	};
	
    PutBackChar: PROCEDURE [c: CHARACTER] = {
	IF putBackTop >= putBackQueueLimit THEN
	    ScanError[queueLimit, tPosition]
	ELSE {
	    putBackStack[putBackTop] ← c;
	    putBackTop ← putBackTop + 1;  };
	};

    -- initialization/finalization

    ScanInit: PUBLIC PROC [inputStream: MStream.Handle, table: ParseInterface.TableRef] = {
        stream ← inputStream;
        hashTab ← @table.scanTable.hashTab;
        scanTab ← @table.scanTable.scanTab;
        vocab ← LOOPHOLE[@table.scanTable.vocabBody];
        vocabIndex ← @table.scanTable.vocabIndex;
        tEnded ← FALSE;
	commentString ← Storage.CopyString[s: ""L, longer: 64];
        NextChar[];
        tPosition ← 0;
	iHistory ← 0;
	putBackTop ← 0;
	PublicScanInit[];
	buffer ← NIL;
        };
	
    ScanFinal: PUBLIC PROC [] = {
        IF buffer # NIL THEN Storage.FreeString[buffer];
	};

    -- error handling
    
    ErrorContext: PUBLIC PROCEDURE [] = {
	i: CARDINAL;
        FOR i IN [iHistory..historyLimit) DO
	    Format.Char[PrintingDefs.outProc↑, history[i]]; ENDLOOP;
        FOR i IN [0..iHistory) DO
	    Format.Char[PrintingDefs.outProc↑, history[i]]; ENDLOOP;
	};

    ScanError: PROCEDURE [code: {string, char, debug, queueLimit}, tokenIndex: CARDINAL] = {
	PrintingDefs.OutCode[
            SELECT code FROM
                string => "string unterminated or too long at ["L,
                char => "invalid character at ["L,
                queueLimit => "put back queue overflow at ["L,
                debug => "debug trace type = ["L,
                ENDCASE => "scan error at ["L
	    , 0];
	Format.Decimal[PrintingDefs.outProc↑, tokenIndex];
	PrintingDefs.OutCode["] around: "L, 0];
	ErrorContext[];
	PrintingDefs.OutCode["\n"L, 0];
        };

    }.