;;; ========================
;;; 3-LISP Lexical Analyser:
;;; ========================
;;; INIT-LEX Initializes all tables.
;;; ========
;;;
;;; SETUP-LEX Sets up functions for retrieving tokens from a given string
;;; ========= STRING. It makes available the following ancillary functions:
;;;
;;; CURRENT-TOKEN Returns the token currently being scanned.
;;; =============
;;;
;;; GET-TOKEN Also returns the current token being scanned but
;;; ========= moves past the token in the input. This is the normal
;;; function to be used in accessing tokens.
;;;
;;; NEXT-TOKEN Returns the 'look-ahead' token, the token just about to
;;; ========== be scanned. Useful for dispatching without actually
;;; reading the token.
(define INIT-LEX
(lambda []
(set *special-chars* [ #( #) #[ #] #$ #' #, #. #↑ #\ #{ #} ])
(set *white-space-chars* [ # cr ])
(set *delimiters* (append *white-space-chars* *special-chars*))
(set *digits* [ #0 #1 #2 #3 #4 #5 #6 #7 #8 #9 ])
(set *comment-begin* #;)
(set *comment-end* cr)
"ok"))
(define SETUP-LEX
(lambda [string]
(let [[current-tok ""]
[next-tok ""]]
(define GET-TOKEN
(lambda []
(let [[token-info (global-get-token string)]]
(set string (second token-info))
(set current-tok next-tok)
(set next-tok (first token-info))
current-tok)))
(define CURRENT-TOKEN
(lambda [] current-tok))
(define NEXT-TOKEN
(lambda [] next-tok))
(get-token))))
;;; Still to be deal with:
;;;
;;; -- strings
;;; -- charats
;;; -- character quotation (%)
;;; GLOBAL-GET-TOKEN takes a STRING and returns a rail of:
;;; ================ a) the prefix of the string corresponding to the
;;; first token in the string
;;; b) the rest of the string after the prefix
;;; In case there are no tokens left in the string, returns the
;;; handle 'eot (end of tokens).
;;; Used by GET-TOKEN (within INIT-LEX)
(define GLOBAL-GET-TOKEN
(lambda [string]
(add-to-token "" (strip-leading-whitespace string))))
;;; ADD-TO-TOKEN auxiliary function for GLOBAL-GET-TOKEN which
;;; ============ adds characters to a token found so far (SO-FAR)
;;; taking characters from a string STRING.
;;; The function uses information from global-variables:
;;; *special-chars* and *delimiters* defined by INIT-LEX
(define ADD-TO-TOKEN
(lambda [so-far string]
(cond [(string-null string)
[(if (string-null so-far) 'eot so-far) string]]
[(= (string-first string) *comment-begin*)
(add-to-token so-far (strip-comment string))]
[(and (string-null so-far)
(member (string-first string) *special-chars*))
[(substring 1 1 string) (string-rest string)]]
[(member (string-first string) *delimiters*)
[so-far string]]
[$true
(add-to-token (string-append so-far
(substring 1 1 string))
(string-rest string))])))
;;; STRIP-LEADING-WHITESPACE removes all leading white-space characters
;;; ======================== from a string. White-space is defined as
;;; any character in the global variable
;;; *white-space-chars* (defined by INIT-LEX).
(define STRIP-LEADING-WHITESPACE
(lambda [string]
(cond [(string-null string) string]
[(member (string-first string) *white-space-chars*)
(strip-leading-whitespace (string-rest string))]
[(= (string-first string) *comment-begin*)
(strip-leading-whitespace (strip-comment string))]
[$true
string])))
;;; STRIP-COMMENT Removes one full comment prefix from a string. A comment
;;; ============= is defined as any string of characters starting with the
;;; globally bound *comment-begin* character, up through and
;;; including *comment-end* (both defined by INIT-LEX).
(define STRIP-COMMENT
(lambda [string]
(cond [(string-null string) string]
[(= (string-first string) *comment-end*)
(string-rest string)]
[$true
(strip-comment (string-rest string))])))
;;; ===============================================================
;;; AUXILIARY STRING MANIPULATION ROUTINES
;;; ===============================================================
(define STRING-NULL
(lambda [string] (= string "")))
(define STRING-FIRST
(lambda [string] (nth-char 1 string)))
(define STRING-REST
(lambda [string] (substring 2 (string-length string) string)))
;;; ===============================================================
;;; TESTING and DEBUGGING ROUTINES
;;; ===============================================================
(define tokenize
(lambda [string]
(init-lex)
(setup-lex string)
(letrec [[collect (lambda []
(let [[tok (get-token)]]
(if (= tok 'eot)
[]
(cons tok (collect)))))]]
(collect))))