;;; ======================== ;;; 3-LISP Lexical Analyser: ;;; ======================== ;;; INIT-LEX Initializes all tables. ;;; ======== ;;; ;;; TOKEN-LIST A type of object that embodies the tokenization of a ;;; ========== given string. Allows the following messages: ;;; ;;; CURRENT-TOKEN Returns the token currently being scanned. ;;; ============= ;;; ;;; GET-TOKEN Also returns the current token being scanned but ;;; ========= moves past the token in the input. This is the normal ;;; function to be used in accessing tokens. It should be called ;;; once upon setting up a token-list object to initialize the lookahead token ;;; (next-tok). ;;; ;;; NEXT-TOKEN Returns the 'look-ahead' token, the token just about to ;;; ========== be scanned. Useful for dispatching without actually ;;; reading the token. (define INIT-LEX (lambda [] (set *special-chars* [ #( #) #[ #] #$ #' #, #. #^ #\ #{ #} ]) (set *white-space-chars* [ # cr ]) (set *delimiters* (append *white-space-chars* *special-chars*)) (set *digits* [ #0 #1 #2 #3 #4 #5 #6 #7 #8 #9 ]) (set *comment-begin* #;) (set *comment-end* cr) "ok")) ;;; Assumes <3-lisp>course>utilities>objects.3-lisp (define TOKEN-LIST (object [string] [[current-tok ""] [next-tok ""]] [GET-TOKEN (lambda [] (let [[token-info (global-get-token string)]] (set string (second token-info)) (set current-tok next-tok) (set next-tok (first token-info)) current-tok))] [CURRENT-TOKEN (lambda [] current-tok)] [NEXT-TOKEN (lambda [] next-tok)])) ;;; Still to be dealt with: ;;; ;;; -- strings ;;; -- charats ;;; -- character quotation (%) ;;; GLOBAL-GET-TOKEN takes a STRING and returns a rail of: ;;; ================ a) the prefix of the string corresponding to the ;;; first token in the string ;;; b) the rest of the string after the prefix ;;; In case there are no tokens left in the string, returns the ;;; handle 'eot (end of tokens). ;;; Used by GET-TOKEN (within INIT-LEX) (define GLOBAL-GET-TOKEN (lambda [string] (add-to-token "" (strip-leading-whitespace string)))) ;;; ADD-TO-TOKEN auxiliary function for GLOBAL-GET-TOKEN which ;;; ============ adds characters to a token found so far (SO-FAR) ;;; taking characters from a string STRING. ;;; The function uses information from global-variables: ;;; *special-chars* and *delimiters* defined by INIT-LEX (define ADD-TO-TOKEN (lambda [so-far string] (cond [(string-null string) [(if (string-null so-far) 'eot so-far) string]] [(and (string-null so-far) (member (string-first string) *special-chars*)) [(substring 1 1 string) (string-rest string)]] [(member (string-first string) *delimiters*) [so-far string]] [$true (add-to-token (string-append so-far (substring 1 1 string)) (string-rest string))]))) ;;; STRIP-LEADING-WHITESPACE removes all leading white-space characters ;;; ======================== from a string. White-space is defined as ;;; any character in the global variable ;;; *white-space-chars* (defined by INIT-LEX). (define STRIP-LEADING-WHITESPACE (lambda [string] (cond [(string-null string) string] [(member (string-first string) *white-space-chars*) (strip-leading-whitespace (string-rest string))] [(= (string-first string) *comment-begin*) (strip-leading-whitespace (strip-comment string))] [$true string]))) ;;; STRIP-COMMENT Removes one full comment prefix from a string. A comment ;;; ============= is defined as any string of characters starting with the ;;; globally bound *comment-begin* character, up through and ;;; including *comment-end* (both defined by INIT-LEX). (define STRIP-COMMENT (lambda [string] (cond [(string-null string) string] [(= (string-first string) *comment-end*) (string-rest string)] [$true (strip-comment (string-rest string))]))) ;;; =============================================================== ;;; AUXILIARY STRING MANIPULATION ROUTINES ;;; =============================================================== (define STRING-NULL (lambda [string] (= string ""))) (define STRING-FIRST (lambda [string] (nth-char 1 string))) (define STRING-REST (lambda [string] (substring 2 (string-length string) string))) ;;; TOKENIZE: ;;; ========= (define tokenize (lambda [string] (init-lex) (let [[a (token-list string)]] (get-token a) (letrec [[collect (lambda [] (let [[tok (get-token a)]] (if (= tok 'eot) [] (cons tok (collect)))))]] (collect)))))