-- File: ArpaTokenImpl.mesa - last edit:
-- JAV                 17-Jun-87 14:48:34

-- Copyright (C) 1987 by Xerox Corporation. All rights reserved.

DIRECTORY
  Ascii USING [CR, NUL, SP, TAB],
  Heap: TYPE USING [systemZone],
  Inline USING [BITAND],
  String USING [
    AppendChar, CopyToNewString, InvalidNumber, Length,
    StringBoundsFault, StringToLongNumber, StringToNumber],
  ArpaToken;

ArpaTokenImpl: MONITOR IMPORTS Heap, Inline, String EXPORTS ArpaToken =

  BEGIN OPEN Ascii;

  UnterminatedQuote: PUBLIC SIGNAL = CODE;

  FilterState: TYPE = ArpaToken.FilterState;
  StandardFilterState: TYPE = ArpaToken.StandardFilterState;
  FilterProcType: TYPE = ArpaToken.FilterProcType;
  GetCharProcType: TYPE = ArpaToken.GetCharProcType;
  QuoteProcType: TYPE = ArpaToken.QuoteProcType;
  SkipMode: TYPE = ArpaToken.SkipMode;
  nonQuote: CHARACTER = ArpaToken.nonQuote;
  
  initialLength: CARDINAL = 100; --initial length for ArpaToken strings
  lengthIncrement: CARDINAL = 100; --amount of additional storage to allocate
  				   --on a bounds fault

  savedString: LONG STRING ← NIL;  -- keep one string of length initialLength
  
  GetString: ENTRY PROC [length: CARDINAL] RETURNS [LONG STRING] = {
    ENABLE UNWIND => NULL;
    IF length = initialLength AND savedString # NIL THEN {
      ls: LONG STRING ← savedString;
      savedString ← NIL;
      ls.length ← 0;
      RETURN[ls]};
    RETURN[Heap.systemZone.NEW[StringBody[length]]]};
  
  FreeString: ENTRY PROC [ls: LONG STRING] = {
    ENABLE UNWIND => NULL;
    IF ls # NIL AND ls.maxlength = initialLength AND savedString = NIL THEN {
      savedString ← ls; RETURN};
    Heap.systemZone.FREE[@ls]};
  
  Boolean: PUBLIC PROCEDURE [h: ArpaToken.Handle, signalOnError: BOOLEAN ← TRUE]
    RETURNS [true: BOOLEAN ← FALSE] =
    BEGIN
    temp: STRING ← [10];
    BEGIN
    InvalidChar: PROCEDURE [expected: CHARACTER] RETURNS [invalid: BOOLEAN] =
      BEGIN
      String.AppendChar[temp, h.break ← h.getChar[h]];
      invalid ← Inline.BITAND[h.break, 337B] # expected;
      END;
    Skip[h, NIL, WhiteSpace, TRUE];
    String.AppendChar[temp, h.break];
    SELECT Inline.BITAND[h.break, 337B] FROM
      'T =>
        BEGIN
        IF InvalidChar['R] OR InvalidChar['U] THEN GO TO invalid;
        IF InvalidChar['E] THEN GO TO invalid;  -- allow compiler to cross-jump
        RETURN[TRUE];
        END;
      'F =>
        BEGIN
        IF InvalidChar['A] OR InvalidChar['L] OR InvalidChar['S] THEN
          GO TO invalid;
        IF InvalidChar['E] THEN GO TO invalid;  -- allow compiler to cross-jump
        RETURN[FALSE];
        END;
      ENDCASE => GO TO invalid;
    EXITS invalid => IF signalOnError THEN SIGNAL SyntaxError[temp];
    END;
    END;

  -- MAS: Can we get rid of this and just use Inline.LowHalf[LongNumber]
  --      if HighHalf[] = 0 or -1?  Two issues: speed and equivalence of results.
  Number: PUBLIC PROC [
    h: ArpaToken.Handle, radix: CARDINAL, signalOnError: BOOLEAN ← TRUE]
    RETURNS [u: UNSPECIFIED] =
    BEGIN
    s: LONG STRING ← Filtered[h, NIL, NumberFilter];
    IF s = NIL THEN {IF signalOnError THEN SIGNAL SyntaxError[s]; RETURN[0]};
    u ← String.StringToNumber[
      s, radix !
      String.InvalidNumber => {
        IF signalOnError THEN SIGNAL SyntaxError[s]; u ← 0; CONTINUE};
      UNWIND => FreeString[s]];
    FreeString[s];
    END;

  Decimal: PUBLIC PROC [h: ArpaToken.Handle, signalOnError: BOOLEAN ← TRUE]
    RETURNS [i: INTEGER] = {i ← Number[h, 10, signalOnError]};
  Octal: PUBLIC PROC [h: ArpaToken.Handle, signalOnError: BOOLEAN ← TRUE]
    RETURNS [c: CARDINAL] = {c ← Number[h, 8, signalOnError]};

  LongNumber: PUBLIC PROC [
    h: ArpaToken.Handle, radix: CARDINAL, signalOnError: BOOLEAN ← TRUE]
    RETURNS [u: LONG UNSPECIFIED] =
    BEGIN
    s: LONG STRING ← Filtered[h, NIL, NumberFilter];
    IF s = NIL THEN {IF signalOnError THEN SIGNAL SyntaxError[s]; RETURN[0]};
    u ← String.StringToLongNumber[
      s, radix !
      String.InvalidNumber => {
        IF signalOnError THEN SIGNAL SyntaxError[s]; u ← 0; CONTINUE};
      UNWIND => FreeString[s]];
    FreeString[s];
    END;

  LongDecimal: PUBLIC PROC [h: ArpaToken.Handle, signalOnError: BOOLEAN ← TRUE]
    RETURNS [i: LONG INTEGER] = {i ← LongNumber[h, 10, signalOnError]};
  LongOctal: PUBLIC PROC [h: ArpaToken.Handle, signalOnError: BOOLEAN ← TRUE]
    RETURNS [c: LONG CARDINAL] = {c ← LongNumber[h, 8, signalOnError]};

  Item: PUBLIC PROCEDURE [h: ArpaToken.Handle, temporary: BOOLEAN ← TRUE]
    RETURNS [value: LONG STRING] =
    BEGIN
    value ← GetString[initialLength];
    BEGIN
    ENABLE {
      UNWIND => FreeString[value];
      String.StringBoundsFault =>
        BEGIN
	ns ← String.CopyToNewString[
	  s, Heap.systemZone, s.maxlength + lengthIncrement - s.length];
	FreeString[s];
	RESUME[value ← ns];
	END};
    Skip[h, NIL, WhiteSpace, TRUE];
    DO
      IF WhiteSpaceInline[h.break] OR h.break = NUL THEN EXIT;
      String.AppendChar[value, h.break];
      h.break ← h.getChar[h];
      ENDLOOP;
    IF value.length = 0 THEN {FreeString[value]; RETURN[NIL]};
    IF ~temporary THEN {
      old: LONG STRING ← value; 
      value ← String.CopyToNewString[old, Heap.systemZone]; 
      FreeString[old]};
    END;
    END;

  -- No default skip!

  ZeroData: PROCEDURE [data: FilterState] = INLINE
    BEGIN
    pu: LONG POINTER TO LONG UNSPECIFIED = LOOPHOLE[data];
    IF pu # NIL THEN pu↑ ← 0;
    END;

  Skip: PUBLIC PROCEDURE [
    h: ArpaToken.Handle, data: FilterState, filter: FilterProcType,
    skipInClass: BOOLEAN ← TRUE] =
    BEGIN
    ZeroData[data];
    WHILE (h.break ← h.getChar[h]) # NUL AND 
      filter[h.break, data] = skipInClass DO ENDLOOP;
    END;

  Filtered: PUBLIC PROCEDURE [
    h: ArpaToken.Handle, data: FilterState, filter: FilterProcType,
    skip: SkipMode ← whiteSpace, temporary: BOOLEAN ← TRUE]
    RETURNS [value: LONG STRING] =
    BEGIN
    value ← NIL;
    ZeroData[data];
    -- First handle (possible) skip
    DO
      IF (h.break ← h.getChar[h]) = NUL THEN RETURN;
      IF skip = whiteSpace AND WhiteSpaceInline[h.break] THEN LOOP;
      IF filter[h.break, data] THEN EXIT;
      IF skip = none OR skip = whiteSpace THEN RETURN;
      ENDLOOP;
    -- Then accumulate all inClass characters
    value ← GetString[initialLength];
    DO
      ENABLE {
        UNWIND => FreeString[value];
	String.StringBoundsFault =>
          BEGIN
	  ns ← String.CopyToNewString[
	    s, Heap.systemZone, s.maxlength + lengthIncrement - s.length];
	  FreeString[s];
	  RESUME[value ← ns];
	  END};
      String.AppendChar[value, h.break];
      IF (h.break ← h.getChar[h]) = NUL OR ~filter[h.break, data] THEN EXIT;
      ENDLOOP;
    IF ~temporary THEN {
      old: LONG STRING ← value;
      value ← String.CopyToNewString[old, Heap.systemZone];
      FreeString[old]};
    END;

  MaybeQuoted: PUBLIC PROCEDURE [
    h: ArpaToken.Handle, data: FilterState, filter: FilterProcType ← NonWhiteSpace,
    isQuote: QuoteProcType ← Quote, skip: SkipMode ← whiteSpace,
    temporary: BOOLEAN ← TRUE] RETURNS [value: LONG STRING] =
    BEGIN
    closeQuote: CHARACTER;
    QuoteFilter: FilterProcType = {
      IF c = NUL THEN {SIGNAL UnterminatedQuote; RETURN [FALSE]}
      ELSE RETURN[c # closeQuote]};
    ApplyFilter: FilterProcType ← filter;
    ZeroData[data];
    value ← NIL;
    -- First handle (possible) skip
    DO
      IF (h.break ← h.getChar[h]) = NUL THEN RETURN;
      IF skip = whiteSpace AND WhiteSpaceInline[h.break] THEN LOOP;
      IF (closeQuote ← isQuote[h.break]) # nonQuote THEN
        IF (h.break ← h.getChar[h]) = closeQuote THEN {
	  h.break ← h.getChar[h]; 
	  IF h.break = closeQuote THEN EXIT; -- doubling close quote is literal quote character
	  RETURN}
        ELSE {
	  IF h.break = NUL THEN {SIGNAL UnterminatedQuote; RETURN};
	  ApplyFilter ← QuoteFilter; EXIT};
      IF filter[h.break, data] THEN EXIT;
      IF skip = none OR skip = whiteSpace THEN RETURN;
      ENDLOOP;
    -- Then accumulate all inClass characters
    value ← GetString[initialLength];
    BEGIN
    ENABLE {
      UNWIND => FreeString[value];
      String.StringBoundsFault =>
        BEGIN
	ns ← String.CopyToNewString[
	  s, Heap.systemZone, s.maxlength + lengthIncrement - s.length];
	FreeString[s];
	RESUME[value ← ns];
	END};
    WHILE h.break # NUL DO
      String.AppendChar[value, h.break];
      IF ~ApplyFilter[h.break ← h.getChar[h], data] THEN 
        BEGIN
        IF ApplyFilter = QuoteFilter THEN {
	  h.break ← h.getChar[h]; -- get next character
	  IF h.break = closeQuote THEN LOOP}; -- of closeQuote, include character in token
	EXIT;
	END;
      ENDLOOP;
    IF ~temporary THEN {
      old: LONG STRING ← value;
      value ← String.CopyToNewString[old, Heap.systemZone];
      FreeString[old]};
    END;
    END;

  -- Standard filters

  Alphabetic: PUBLIC FilterProcType =
    BEGIN
    RETURN[SELECT c FROM IN ['a..'z], IN ['A..'Z] => TRUE, ENDCASE => FALSE];
    END;

  AlphaNumeric: PUBLIC FilterProcType =
    BEGIN
    RETURN[
      SELECT c FROM
        IN ['a..'z], IN ['A..'Z], IN ['0..'9] => TRUE,
        ENDCASE => FALSE];
    END;

  Delimited: PUBLIC FilterProcType =
    -- first non-blank char IS the delimiter. Must be used with skip = nonArpaToken
    BEGIN
    delimiter: LONG POINTER TO CHARACTER = LOOPHOLE[data];
    IF data = NIL THEN ERROR NilData;
    IF data[0] # 0 THEN RETURN[c # delimiter↑];
    IF ~WhiteSpaceInline[c] AND c # NUL THEN delimiter↑ ← c;
    RETURN[FALSE];
    END;

  uninitialized: CARDINAL = 0;
  inHost: CARDINAL = 1;
  startName: CARDINAL = 2;
  inName: CARDINAL = 3;

  FileName: PUBLIC FilterProcType =
    BEGIN
    IF data = NIL THEN ERROR NilData;
    SELECT c FROM
	IN ['a..'z], IN ['A..'Z], IN ['0..'9],  '>, '*, '!, ';, '#, '-,
          '., '$, '+ => 
	  IF data[0] = uninitialized OR data[0] = startName THEN 
	    data[0] ← inName;
        '< => 
	  IF data[0] = uninitialized OR data[0] = startName THEN 
	    data[0] ← inName 
	  ELSE RETURN[FALSE]; 
        '[ => 
	  IF data[0] = uninitialized THEN 
	    data[0] ← inHost 
	  ELSE RETURN[FALSE];
        '] => IF data[0] = inHost THEN data[0] ← startName ELSE RETURN[FALSE];
	ENDCASE => IF data[0] # inHost THEN RETURN[FALSE];
     RETURN[TRUE];
     END;
	
  Line: PUBLIC FilterProcType =
    BEGIN RETURN[SELECT c FROM CR, NUL => FALSE, ENDCASE => TRUE]; END;

  Numeric: PUBLIC FilterProcType =
    BEGIN RETURN[SELECT c FROM IN ['0..'9] => TRUE, ENDCASE => FALSE]; END;

  Switches: PUBLIC FilterProcType =  -- '~, '-, AlphaNumeric
    BEGIN
    RETURN[
      SELECT c FROM
        IN ['a..'z], IN ['A..'Z], IN ['0..'9], '~, '- => TRUE,
        ENDCASE => FALSE];
    END;

  NumberFilter: FilterProcType =
    BEGIN
    RETURN[
      SELECT c FROM
        IN ['0..'9], '+, '-, 'B, 'b, 'D, 'd => TRUE,
        ENDCASE => FALSE];
    END;

  NonWhiteSpace: PUBLIC FilterProcType = 
    BEGIN RETURN[c # NUL AND ~WhiteSpaceInline[c]]; END;

  WhiteSpace: PUBLIC FilterProcType = BEGIN RETURN[WhiteSpaceInline[c]]; END;

  WhiteSpaceInline: PROCEDURE [c: CHARACTER]
    RETURNS [ --isWhiteSpace:-- BOOLEAN] = INLINE
    BEGIN RETURN[SELECT c FROM SP, TAB, CR => TRUE, ENDCASE => FALSE]; END;

  Brackets: PUBLIC QuoteProcType =  -- () [] {} <>
    BEGIN
    RETURN[
      SELECT c FROM '( => '), '[ => '], '{ => '}, '< => '>, ENDCASE => nonQuote];
    END;

  Quote: PUBLIC QuoteProcType =  -- '"
    BEGIN RETURN[SELECT c FROM '', '" => c, ENDCASE => nonQuote]; END;


  -- Type conversion

  LSHandle: TYPE = LONG POINTER TO LSObject;
  LSObject: TYPE = MACHINE DEPENDENT RECORD [
    object: ArpaToken.Object, s: LONG STRING, i: CARDINAL];

  FreeStringHandle: PUBLIC PROCEDURE [h: ArpaToken.Handle]
    RETURNS [nil: ArpaToken.Handle ← NIL] = {Heap.systemZone.FREE[@h]};

  StringToHandle: PUBLIC PROCEDURE [s: LONG STRING, offset: CARDINAL ← 0]
    RETURNS [h: ArpaToken.Handle] =
    BEGIN
    lsh: LSHandle ← Heap.systemZone.NEW[
      LSObject ← [[getChar: StringGetChar, break: Ascii.NUL], s, offset]];
    h ← @lsh.object;
    END;

  FreeArpaTokenString: PUBLIC PROCEDURE [s: LONG STRING]
    RETURNS [nil: LONG STRING ← NIL] = {FreeString[s]};

  StringGetChar: ArpaToken.GetCharProcType =
    BEGIN
    lsh: LSHandle ← LOOPHOLE[h];
    IF lsh.i < String.Length[lsh.s] THEN {
      c ← lsh.s[lsh.i]; lsh.i ← lsh.i + 1}
    ELSE c ← Ascii.NUL;
    END;

  -- SIGNALs

  NilData: PUBLIC SIGNAL = CODE;
  SyntaxError: PUBLIC SIGNAL [s: LONG STRING] = CODE;

  END...