;;; RECURSIVE DESCENT PARSER ;;; ;;; -- based on the version by Stuart Shieber ;;; -- Uses the DEFUN notation ;;; ;;; ======================================================================= (define PARSE (lambda [expression start-symbol grammar] (if (full-parse (parse-symbol expression start-symbol grammar)) "accepted" "rejected"))) (define FULL-PARSE (lambda [list-of-tails] (some null list-of-tails))) (define PARSE-SYMBOL (lambda [expression symbol grammar] (if (matches symbol (first expression) grammar) [(rest expression)] (mappend (lambda [rhs] (parse-frontier expression rhs grammar)) (rhsides symbol grammar))))) (define PARSE-FRONTIER (lambda [expression frontier grammar] (cond [(null frontier) [expression]] [(null expression) []] [$t (parse-all (parse-symbol expression (first frontier) grammar) (rest frontier) grammar)]))) (define PARSE-ALL (lambda [fragments frontier grammar] (mappend (lambda [fragment] (parse-frontier fragment frontier grammar)) fragments))) (define MATCHES (lambda [nonterm word grammar] (member [nonterm word] grammar))) (define RHSIDES (lambda [nonterm grammar] (cond [(null grammar) []] [(= nonterm (first (first grammar))) (cons (rest (first grammar)) (rhsides nonterm (rest grammar)))] [$true (rhsides nonterm (rest grammar))]))) ;;; ======================================================================= ;;; ;;; Modified version, that returns the parse: ;;; ;;; Basic idea is to have all the parsing routines return a sequence of ;;; parse/word-sequence pairs, where the parse is the parse so far and ;;; the word-sequence is the postfix of the input sequecne after the part ;;; that has been parsed so far. (define PARSE/ (lambda [expression start-symbol grammar] (let [[good-parses (filter (compose null second) first (parse-symbol/ expression start-symbol grammar))]] (if (null good-parses) "rejected" good-parses)))) (define PARSE-SYMBOL/ (lambda [expression symbol grammar] (if (matches symbol (first expression) grammar) [[[symbol (first expression)] (rest expression)]] (mappend (lambda [rhs] (parse-frontier/ symbol expression rhs grammar)) (rhsides symbol grammar))))) (define PARSE-FRONTIER/ (lambda [symbol expression frontier grammar] (cond [(null frontier) [[symbol expression]]] [(null expression) []] [$t (map (lambda [parse] [[symbol (first parse)] (second parse)]) (parse-all/ (parse-symbol/ expression (first frontier) grammar) (rest frontier) grammar))]))) (define PARSE-ALL/ (lambda [fragments frontier grammar] (mappend (lambda [fragment] (parse-frontier/ (first fragment) (second fragment) frontier grammar)) fragments))) ;;; ================================================================ ;;; Modified version takes functional and applies to constituents as they ;;; are found. ;;; An analysis is a rail of the form: [transduction tail] ;;; A transduction is a function to be applied to the transductions ;;; of the remaining constituents to be found, or (in the base case) ;;; any 3-LISP entity. The transductions are gotten from the grammar. ;;; The grammar format is a rail of rules, each rule of the form: ;;; ;;; [lhs transduction rhs] (define PARSE (lambda [string start-symbol grammar] (let [[good-parses (collect (compose null second) first (parse-symbol id (tokenize string) start-symbol grammar))]] (if (null good-parses) "rejected" good-parses)))) (define PARSE-SYMBOL (lambda [trans tail symbol grammar] (if (matches symbol (first tail)) ; If direct match, return single [[(trans (first tail)) (rest tail)]] ; good parse, transduced (map (lambda [analysis] [(trans (first analysis)) (second analysis)]) (mappend (lambda [rhs] ; otherwise trans analyses (parse-frontier (first rhs) ; obtainable by the recursive tail ; parsing calls (rest rhs) grammar)) (rhsides symbol grammar)))))) (define PARSE-FRONTIER (lambda [trans tail frontier grammar] (cond [(null frontier) [[trans tail]]] [(null tail) []] [$true (parse-all (parse-symbol trans tail (first frontier) grammar) (rest frontier) grammar)]))) (define PARSE-ALL (lambda [analyses frontier grammar] (mappend (lambda [analysis] (parse-frontier (first analysis) (second analysis) frontier grammar)) analyses))) (define MATCHES (lambda [nonterm word] (cond [(= nonterm 'atom-expression) (atom-expression word)] [(= nonterm 'numeral-expression) (numeral-expression word)] [(string word) (= nonterm word)] [$T $F]))) (define ATOM-EXPRESSION (lambda [word] (and (every-character (lambda [ch] (not (member ch *special-chars*))) word) (not (every-character (lambda [ch] (member ch *digits*)) word))))) (define NUMERAL-EXPRESSION (lambda [word] (every-character (lambda [ch] (member ch *digits*)) word))) (define RHSIDES (lambda [nonterm grammar] (collect (lambda [rule] (= nonterm (first rule))) rest grammar))) ;;; Test Cases: (parse-symbol [(lambda [x] x) ['arthur 'slept] ] 's montague-gram) (parse "arthur slept" 's *g*) (parse "arthur wielded excalibur" 's montague-gram) (parse-symbol [(lambda [x] x) ['arthur 'slept] ] 's *g*) ;;; ======================================================================= ;;; Sample grammars: (set *G* [['s (lambda [np] (lambda [vp] ['s np vp])) 'np 'vp] ['vp (lambda [v] (lambda [np] ['vp v np])) 'v 'np] ['vp (lambda [v] ['vp v]) 'v] ['v (lambda [lex] ['v lex]) "slept"] ['v (lambda [lex] ['v lex]) "loved"] ['v (lambda [lex] ['v lex]) "wielded"] ['np (lambda [lex] ['np lex]) "arthur"] ['np (lambda [lex] ['np lex]) "gwen"] ['np (lambda [lex] ['np lex]) "excalibur"] ]) (set montague-gram [['s (lambda [np] (lambda [vp] (pcons vp np))) 'np 'vp] ['vp (lambda [v] (lambda [np] (pcons v np))) 'v 'np] ['vp (lambda [v] v) 'v] ['v (lambda [lex] 'sleep/ ) "slept"] ['v (lambda [lex] 'love/ ) "loved"] ['v (lambda [lex] 'wield/ ) "wielded"] ['np (lambda [lex] 'arthur/ ) "arthur"] ['np (lambda [lex] 'gwen/ ) "gwen"] ['np (lambda [lex] 'excalibur/ ) "excalibur"] ]) (set *G2* [['s 'np 'vp] ['vp 'v1 'np] ['vp 'v0] ['v0 'slept] ['v0 'loved] ['v1 'loved] ['v1 'wielded] ['np 'arthur] ['np 'gwen] ['np 'excalibur]]) (set description-grammar [['expr (lambda [pair] ['expr pair]) 'pair] ['expr (lambda [rail] ['expr rail]) 'rail] ['expr (lambda [atom] ['expr ['atom atom]]) 'atom-expression] ['expr (lambda [numexpr] ['expr ['numeral numexpr]]) 'numeral-expression] ['pair (lambda [paren] (lambda [fun] (lambda [args] ['pair fun (cons 'pair-args args)]))) "(" 'expr 'pair-args] ['pair-args (lambda [paren] []) ")"] ['pair-args (lambda [first] (lambda [rest] (cons first rest))) 'expr 'pair-args] ['rail (lambda [bracket] (lambda [elements] (cons 'rail elements))) "[" 'rail-args] ['rail-args (lambda [bracket] []) "]"] ['rail-args (lambda [first] (lambda [rest] (cons first rest))) 'expr 'rail-args]]) (set recognition-grammar [['expr id 'pair] ['expr id 'rail] ['expr id 'atom-expression] ['pair (lambda [pren] (lambda [fun] (lambda [args] $true))) "(" 'expr 'pair-args] ['pair-args (lambda [pren] $true) ")"] ['pair-args (lambda [first] (lambda [rest] $true)) 'expr 'pair-args] ['rail (lambda [brak] (lambda [elements] $true)) "[" 'rail-args] ['rail-args (lambda [brak] $true) "]"] ['rail-args (lambda [first] (lambda [rest] $true)) 'expr 'rail-args]]) (set structure-grammar [['expr id 'pair] ['expr id 'rail] ['expr (lambda [tok] (atom-notated tok)) 'atom-expression] ['pair (lambda [pren] (lambda [fun] (lambda [args] (pcons fun (rcons . args))))) "(" 'expr 'pair-args] ['pair-args (lambda [pren] []) ")"] ['pair-args (lambda [first] (lambda [rest] (cons first rest))) 'expr 'pair-args] ['rail (lambda [brak] (lambda [elements] (rcons . elements))) "[" 'rail-args] ['rail-args (lambda [brak] []) "]"] ['rail-args (lambda [first] (lambda [rest] (cons first rest))) 'expr 'rail-args]]) (define NORM (lambda [exp] (normalize exp global standard-escape id))) (set process-grammar [['expr id 'pair] ['expr id 'rail] ['expr (lambda [tok] (binding (atom-notated tok) global)) 'atom-expression] ['expr (lambda [numexp] (intern numexp)) 'numeral-expression] ['pair (lambda [paren] (lambda [fun] (lambda [args] (norm (pcons fun (rcons . args)))))) "(" 'expr 'pair-args] ['pair-args (lambda [paren] []) ")"] ['pair-args (lambda [first] (lambda [rest] (cons first rest))) 'expr 'pair-args] ['rail (lambda [bracket] (lambda [elements] (norm (rcons . elements)))) "[" 'rail-args] ['rail-args (lambda [bracket] []) "]"] ['rail-args (lambda [first] (lambda [rest] (cons first rest))) 'expr 'rail-args]]) (parse "(lambda [a b] c d)" 'expr g3) ;;; The following example can be run using the recognition-grammar, ;;; the structure grammar, or the description grammar. However, the ;;; engendered computation barely fits in 3-LISP (on the Dorado). ;;; Be forewarned. (parse " ;;; This is a test of the top-down ;;; recursive descent parser (lambda [x y] (x [y])) ;;; end of test " 'expr description-grammar) ;;; =======================================================================