;;;         RECURSIVE DESCENT PARSER
;;;
;;; -- based on the version by Stuart Shieber
;;; -- Uses the DEFUN notation
;;; 
;;; =======================================================================

(define PARSE
   (lambda [expression start-symbol grammar]
      (if (full-parse (parse-symbol expression start-symbol grammar))
          "accepted"
          "rejected")))

(define FULL-PARSE
   (lambda [list-of-tails]
      (some null list-of-tails)))

(define PARSE-SYMBOL
   (lambda [expression symbol grammar]
      (if (matches symbol (first expression) grammar)
          [(rest expression)]
          (mappend (lambda [rhs] (parse-frontier expression rhs grammar))
                   (rhsides symbol grammar)))))

(define PARSE-FRONTIER
   (lambda [expression frontier grammar]
      (cond [(null frontier) [expression]]
            [(null expression) []]
            [$t (parse-all (parse-symbol expression (first frontier) grammar)
                           (rest frontier)
                           grammar)])))

(define PARSE-ALL
   (lambda [fragments frontier grammar]
      (mappend (lambda [fragment]
                  (parse-frontier fragment frontier grammar))
               fragments)))

(define MATCHES
   (lambda [nonterm word grammar]
      (member [nonterm word] grammar)))

(define RHSIDES
   (lambda [nonterm grammar]
      (cond [(null grammar) []]
            [(= nonterm (first (first grammar)))
             (cons (rest (first grammar))
                   (rhsides nonterm (rest grammar)))]
            [$true (rhsides nonterm (rest grammar))])))

;;; =======================================================================
;;; 
;;; Modified version, that returns the parse:
;;; 
;;; Basic idea is to have all the parsing routines return a sequence of
;;; parse/word-sequence pairs, where the parse is the parse so far and
;;; the word-sequence is the postfix of the input sequecne after the part
;;; that has been parsed so far. 

(define PARSE/
   (lambda [expression start-symbol grammar]
      (let [[good-parses
               (filter (compose null second)
                       first
                       (parse-symbol/ expression start-symbol grammar))]]
         (if (null good-parses)
             "rejected"
             good-parses))))

(define PARSE-SYMBOL/
   (lambda [expression symbol grammar]
      (if (matches symbol (first expression) grammar)
          [[[symbol (first expression)] (rest expression)]]
          (mappend (lambda [rhs]
                      (parse-frontier/ symbol expression rhs grammar))
                   (rhsides symbol grammar)))))

(define PARSE-FRONTIER/
   (lambda [symbol expression frontier grammar]
      (cond [(null frontier) [[symbol expression]]]
            [(null expression) []]
            [$t (map (lambda [parse]
                        [[symbol (first parse)]
                         (second parse)])
                     (parse-all/ (parse-symbol/ expression
                                                (first frontier)
                                                grammar)
                              (rest frontier)
                              grammar))])))

(define PARSE-ALL/
   (lambda [fragments frontier grammar]
      (mappend (lambda [fragment]
                  (parse-frontier/ (first fragment)
                                   (second fragment)
                                   frontier
                                   grammar))
               fragments)))

;;; ================================================================


;;; Modified version takes functional and applies to constituents as they
;;; are found.

;;; An analysis is a rail of the form: [transduction tail]

;;; A transduction is a function to be applied to the transductions
;;; of the remaining constituents to be found, or (in the base case)
;;; any 3-LISP entity.  The transductions are gotten from the grammar.

;;; The grammar format is a rail of rules, each rule of the form:
;;; 
;;;        [lhs transduction rhs]

(define PARSE
   (lambda [string start-symbol grammar]
      (let [[good-parses
               (collect (compose null second)
                        first
                        (parse-symbol id
                                      (tokenize string)
                                      start-symbol
                                      grammar))]]
          (if (null good-parses)
              "rejected"
              good-parses))))

(define PARSE-SYMBOL 
  (lambda [trans tail symbol grammar]
    (if (matches symbol (first tail))             ; If direct match, return single
        [[(trans (first tail)) (rest tail)]]      ; good parse, transduced
        (map (lambda [analysis]
                [(trans (first analysis))
                 (second analysis)])
             (mappend (lambda [rhs]                  ; otherwise trans analyses
                         (parse-frontier (first rhs) ; obtainable by the recursive
                                         tail        ; parsing calls
                                         (rest rhs)
                                         grammar))
                      (rhsides symbol grammar))))))

(define PARSE-FRONTIER
   (lambda [trans tail frontier grammar]
      (cond [(null frontier) [[trans tail]]]
            [(null tail) []]
            [$true (parse-all (parse-symbol trans tail (first frontier) grammar)
                              (rest frontier)
                              grammar)])))

(define PARSE-ALL
   (lambda [analyses frontier grammar]
      (mappend (lambda [analysis]
                  (parse-frontier (first analysis)
                                  (second analysis)
                                  frontier
                                  grammar))
               analyses)))

(define MATCHES 
   (lambda [nonterm word]
      (cond [(= nonterm 'atom-expression) (atom-expression word)]
            [(= nonterm 'numeral-expression) (numeral-expression word)]
            [(string word) (= nonterm word)]
            [$T $F])))

(define ATOM-EXPRESSION
   (lambda [word]
      (and (every-character
               (lambda [ch] (not (member ch *special-chars*)))
               word)
           (not (every-character
                   (lambda [ch] (member ch *digits*))
                   word)))))

(define NUMERAL-EXPRESSION
   (lambda [word]
      (every-character
          (lambda [ch] (member ch *digits*))
          word)))

(define RHSIDES
   (lambda [nonterm grammar]
      (collect (lambda [rule] (= nonterm (first rule)))
               rest
               grammar)))

;;; Test Cases:

(parse-symbol [(lambda [x] x) ['arthur 'slept] ] 's montague-gram)
(parse "arthur slept" 's *g*)
(parse "arthur wielded excalibur" 's montague-gram)
(parse-symbol [(lambda [x] x) ['arthur 'slept] ] 's *g*)


;;; =======================================================================

;;; Sample grammars:

(set *G*
     [['s (lambda [np] (lambda [vp] ['s np vp])) 'np 'vp]
      ['vp (lambda [v] (lambda [np] ['vp v np])) 'v 'np]
      ['vp (lambda [v] ['vp v]) 'v]
      ['v (lambda [lex] ['v lex]) "slept"]
      ['v (lambda [lex] ['v lex]) "loved"]
      ['v (lambda [lex] ['v lex]) "wielded"]
      ['np (lambda [lex] ['np lex]) "arthur"]
      ['np (lambda [lex] ['np lex]) "gwen"]
      ['np (lambda [lex] ['np lex]) "excalibur"]
     ])

(set montague-gram
     [['s (lambda [np] (lambda [vp] (pcons vp np))) 'np 'vp]
      ['vp (lambda [v] (lambda [np] (pcons v np))) 'v 'np]
      ['vp (lambda [v] v) 'v]
      ['v (lambda [lex] 'sleep/ ) "slept"]
      ['v (lambda [lex] 'love/ ) "loved"]
      ['v (lambda [lex] 'wield/ ) "wielded"]
      ['np (lambda [lex] 'arthur/ ) "arthur"]
      ['np (lambda [lex] 'gwen/ ) "gwen"]
      ['np (lambda [lex] 'excalibur/ ) "excalibur"]
     ])

(set *G2*
     [['s 'np 'vp]
      ['vp 'v1 'np]
      ['vp 'v0]
      ['v0 'slept]
      ['v0 'loved]
      ['v1 'loved]
      ['v1 'wielded]
      ['np 'arthur]
      ['np 'gwen]
      ['np 'excalibur]])

(set description-grammar
     [['expr (lambda [pair] ['expr pair])
             'pair] 
      ['expr (lambda [rail] ['expr rail])
             'rail] 
      ['expr (lambda [atom] ['expr ['atom atom]])
             'atom-expression] 
      ['expr (lambda [numexpr] ['expr ['numeral numexpr]])
             'numeral-expression]
      ['pair (lambda [paren]
                (lambda [fun]
                   (lambda [args] ['pair fun (cons 'pair-args args)])))
             "(" 'expr 'pair-args]
      ['pair-args (lambda [paren] [])
                  ")"]
      ['pair-args (lambda [first]
                     (lambda [rest] (cons first rest)))
                  'expr 'pair-args]
      ['rail (lambda [bracket]
                (lambda [elements] (cons 'rail elements)))
             "[" 'rail-args]
      ['rail-args (lambda [bracket] [])
                  "]"]
      ['rail-args (lambda [first]
                     (lambda [rest] (cons first rest)))
                  'expr 'rail-args]])

(set recognition-grammar
     [['expr id 'pair] 
      ['expr id 'rail] 
      ['expr id 'atom-expression]
      ['pair (lambda [pren] (lambda [fun] (lambda [args] $true)))
             "(" 'expr 'pair-args]
      ['pair-args (lambda [pren] $true) ")"]
      ['pair-args (lambda [first] (lambda [rest] $true))
                  'expr 'pair-args]
      ['rail (lambda [brak] (lambda [elements] $true))
             "[" 'rail-args]
      ['rail-args (lambda [brak] $true) "]"]
      ['rail-args (lambda [first] (lambda [rest] $true))
                  'expr 'rail-args]])

(set structure-grammar
     [['expr id 'pair] 
      ['expr id 'rail] 
      ['expr (lambda [tok] (atom-notated tok)) 'atom-expression]
      ['pair (lambda [pren] (lambda [fun] (lambda [args]
                                             (pcons fun (rcons . args)))))
             "(" 'expr 'pair-args]
      ['pair-args (lambda [pren] []) ")"]
      ['pair-args (lambda [first] (lambda [rest] (cons first rest)))
                  'expr 'pair-args]
      ['rail (lambda [brak] (lambda [elements] (rcons . elements)))
             "[" 'rail-args]
      ['rail-args (lambda [brak] []) "]"]
      ['rail-args (lambda [first] (lambda [rest] (cons first rest)))
                  'expr 'rail-args]])

(define NORM
   (lambda [exp]
      (normalize exp global standard-escape id)))

(set process-grammar
     [['expr id 'pair] 
      ['expr id 'rail] 
      ['expr
       (lambda [tok] (binding (atom-notated tok) global))
       'atom-expression]
      ['expr
       (lambda [numexp] (intern numexp))
       'numeral-expression]
      ['pair
       (lambda [paren]
          (lambda [fun]
             (lambda [args]
                (norm (pcons fun (rcons . args))))))
       "(" 'expr 'pair-args]
      ['pair-args
       (lambda [paren] [])
       ")"]
      ['pair-args
       (lambda [first]
          (lambda [rest]
             (cons first rest)))
       'expr 'pair-args]
      ['rail
       (lambda [bracket]
          (lambda [elements]
             (norm (rcons . elements))))
       "[" 'rail-args]
      ['rail-args
       (lambda [bracket] [])
       "]"]
      ['rail-args
       (lambda [first]
          (lambda [rest]
             (cons first rest)))
       'expr 'rail-args]])



(parse "(lambda [a b] c d)" 'expr g3)

;;; The following example can be run using the recognition-grammar,
;;; the structure grammar, or the description grammar.  However, the
;;; engendered computation barely fits in 3-LISP (on the Dorado).
;;; Be forewarned.

(parse "

;;; This is a test of the top-down
;;; recursive descent parser

(lambda [x y]
   (x [y]))

;;; end of test


"

'expr description-grammar)


;;; =======================================================================