;;; ========================
;;; 3-LISP Lexical Analyser:
;;; ========================
;;; INIT-LEX Initializes all tables.
;;; ========
;;;
;;; TOKEN-LIST A type of object that embodies the tokenization of a
;;; ========== given string. Allows the following messages:
;;;
;;; CURRENT-TOKEN Returns the token currently being scanned.
;;; =============
;;;
;;; GET-TOKEN Also returns the current token being scanned but
;;; ========= moves past the token in the input. This is the normal
;;; function to be used in accessing tokens. It should be called
;;; once upon setting up a token-list object to initialize the lookahead token
;;; (next-tok).
;;;
;;; NEXT-TOKEN Returns the 'look-ahead' token, the token just about to
;;; ========== be scanned. Useful for dispatching without actually
;;; reading the token.
(define INIT-LEX
(lambda []
(set *special-chars* [ #( #) #[ #] #$ #' #, #. #↑ #\ #{ #} ])
(set *white-space-chars* [ # cr ])
(set *delimiters* (append *white-space-chars* *special-chars*))
(set *digits* [ #0 #1 #2 #3 #4 #5 #6 #7 #8 #9 ])
(set *comment-begin* #;)
(set *comment-end* cr)
"ok"))
;;; Assumes <3-lisp>course>utilities>objects.3-lisp
(define TOKEN-LIST
(object [string]
[[current-tok ""]
[next-tok ""]]
[GET-TOKEN
(lambda []
(let [[token-info (global-get-token string)]]
(set string (second token-info))
(set current-tok next-tok)
(set next-tok (first token-info))
current-tok))]
[CURRENT-TOKEN (lambda [] current-tok)]
[NEXT-TOKEN (lambda [] next-tok)]))
;;; Still to be dealt with:
;;;
;;; -- strings
;;; -- charats
;;; -- character quotation (%)
;;; GLOBAL-GET-TOKEN takes a STRING and returns a rail of:
;;; ================ a) the prefix of the string corresponding to the
;;; first token in the string
;;; b) the rest of the string after the prefix
;;; In case there are no tokens left in the string, returns the
;;; handle 'eot (end of tokens).
;;; Used by GET-TOKEN (within INIT-LEX)
(define GLOBAL-GET-TOKEN
(lambda [string]
(add-to-token "" (strip-leading-whitespace string))))
;;; ADD-TO-TOKEN auxiliary function for GLOBAL-GET-TOKEN which
;;; ============ adds characters to a token found so far (SO-FAR)
;;; taking characters from a string STRING.
;;; The function uses information from global-variables:
;;; *special-chars* and *delimiters* defined by INIT-LEX
(define ADD-TO-TOKEN
(lambda [so-far string]
(cond [(string-null string)
[(if (string-null so-far) 'eot so-far) string]]
[(and (string-null so-far)
(member (string-first string) *special-chars*))
[(substring 1 1 string) (string-rest string)]]
[(member (string-first string) *delimiters*)
[so-far string]]
[$true
(add-to-token (string-append so-far
(substring 1 1 string))
(string-rest string))])))
;;; STRIP-LEADING-WHITESPACE removes all leading white-space characters
;;; ======================== from a string. White-space is defined as
;;; any character in the global variable
;;; *white-space-chars* (defined by INIT-LEX).
(define STRIP-LEADING-WHITESPACE
(lambda [string]
(cond [(string-null string) string]
[(member (string-first string) *white-space-chars*)
(strip-leading-whitespace (string-rest string))]
[(= (string-first string) *comment-begin*)
(strip-leading-whitespace (strip-comment string))]
[$true
string])))
;;; STRIP-COMMENT Removes one full comment prefix from a string. A comment
;;; ============= is defined as any string of characters starting with the
;;; globally bound *comment-begin* character, up through and
;;; including *comment-end* (both defined by INIT-LEX).
(define STRIP-COMMENT
(lambda [string]
(cond [(string-null string) string]
[(= (string-first string) *comment-end*)
(string-rest string)]
[$true
(strip-comment (string-rest string))])))
;;; ===============================================================
;;; AUXILIARY STRING MANIPULATION ROUTINES
;;; ===============================================================
(define STRING-NULL
(lambda [string] (= string "")))
(define STRING-FIRST
(lambda [string] (nth-char 1 string)))
(define STRING-REST
(lambda [string] (substring 2 (string-length string) string)))
;;; TOKENIZE:
;;; =========
(define tokenize
(lambda [string]
(init-lex)
(let [[a (token-list string)]]
(get-token a)
(letrec [[collect (lambda []
(let [[tok (get-token a)]]
(if (= tok 'eot)
[]
(cons tok (collect)))))]]
(collect)))))