aboutsummaryrefslogtreecommitdiff
path: root/argot.lisp
blob: f79e2edca3951db7965e05ca32a9c042a4f0e671 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
;;;; argot.lisp

(in-package #:argot)


(define-condition invalid-rule-def (error)
  ((rule :reader rule :initarg :rule))
  (:report (lambda (e stream)
             (format stream "The rule ~s is malformed." (car (rule e))))))

(defun nonterminal? (lhs)
  (and (symbolp lhs)
       (let* ((name (symbol-name lhs))
              (length (length name)))
         (and (<= 3 length)
              (eql #\< (elt name 0))
              (eql #\> (elt name (1- length)))))))

(defun var? (var)
  (and var (symbolp var) (not (keywordp var))))

(defun pattern? (pat)
  "Every pattern PAT is either a nonterminal symbol, or a pattern
expression. Such expressions look like (OP . ARGS) where OP is one of
:SEQ :? :* :+ :OR :OR= := :*= :?= :SEQ= :@ :EOF :ITEM"
  (or (nonterminal? pat)
      (and (consp pat)
           (destructuring-bind (op . more) pat
             (case op
               ((:seq :? :* :+ :or)
                (every #'pattern? more))
               ((:or= := :*= :+= :?= :seq=)
                (not (null more)))
               (:@ (and (var? (first more))
                        (not (third more))
                        (pattern? (second more))))
               (:{} (and (car more)
                         (symbolp (car more))
                         (endp (cdr more))))
               ((:item :eof) (endp more)))))))

(defun var-pattern? (pat)
  "VAR-PATTERN? checks that a pattern is a var pattern (:@ VAR PATTERN)
and it returns VAR in that case."
  (and (consp pat)
       (third pat)
       (destructuring-bind (at var . _) pat
         (declare (ignore _))
         (and (eq at :@) (var? var) var))))

(defun collect-vars (pat)
  "Collects all of the variables from the var patterns in PAT and returns them."
  (if-let (var (var-pattern? pat))
    (list var)
    (when (consp pat)
      (append (collect-vars (car pat))
              (collect-vars (cdr pat))))))

(defun parse-rule-def (ruledef)
  (unless (consp ruledef)
    (error 'invalid-rule-def :rule ruledef))
  
  (destructuring-bind (lhs . options) ruledef
    (let ((pattern (getf options :match))
          (check (getf options :if))
          (action (getf options :then))
          (doc (getf options :doc)))
      (unless (and (nonterminal? lhs) (pattern? pattern))
        (error 'invalid-rule-def :rule ruledef))
      (list lhs pattern (collect-vars pattern) check action doc))))


(defun function-form-p (s)
  (or (functionp s)
      (and (consp s)
           (eq 'cl:function (first s))
           (symbolp (second s))
           (endp (cddr s)))))

(defmacro deflanguage (name (&key (documentation "")) &body ruledefs)
  (let ((bindings-var (gensym "BINDINGS")))
    (labels ((collect-let-bindings (lhs vars)
               "Create let-bindings forms for the body of the :action."
               (loop :for var :in vars
                     :collect `(,var (cdr (assoc ',var ,bindings-var))) :into bs
                     :finally (return (cons (list lhs `(cdr (assoc ',lhs ,bindings-var))) bs))))
             (expand-functional-argument (initarg lhs bindings action vars)
               (cond ((and action (symbolp action))
                      (list initarg `(lambda (,bindings-var)
                                       (let ,bindings
                                         (declare (ignorable ,lhs ,@vars))
                                         (,action ,lhs)))))
                     ((function-form-p action)
                      (list initarg `(lambda (,bindings-var)
                                       (let ,bindings
                                         (declare (ignorable ,lhs ,@vars))
                                         (funcall ,action ,lhs)))))
                     ((consp action)
                      (list initarg `(lambda (,bindings-var)
                                       (let ,bindings
                                         (declare (ignorable ,lhs ,@vars))
                                         ,action))))))
             (rule-includer (name lhs pattern &optional check action bindings vars)
               `(include-rule
                 ,name
                 (make-rule
                  :lhs ',lhs
                  :pattern ',pattern
                  ,@(expand-functional-argument :check lhs bindings check vars)
                  ,@(expand-functional-argument :action lhs bindings action vars)))))
      (let* ((parsed-rules
               (loop :for ruledef :in ruledefs
                     :collect (handler-case (parse-rule-def ruledef)
                                (invalid-rule-def (e) (invoke-debugger e)))))
             (rule-adder-forms
               (loop :for (lhs pattern vars check action) :in parsed-rules
                     :for bindings := (collect-let-bindings lhs vars)
                     :collect (rule-includer name lhs pattern check action bindings vars)))
             (docstring
               (with-output-to-string (*standard-output*)
                 (princ documentation)
                 (terpri) (terpri)
                 (loop :for (lhs pattern . more) :in parsed-rules
                       :do (write-rule-doc lhs pattern)
                           (terpri))
                 (terpri)
                 (princ "KEY: ") (terpri)
                 (princ "::TOKEN::  Any ole token") (terpri)
                 (princ "::EOF::    Explicitly match the end of the input") (terpri)
                 (princ "{GRAMMAR}  Parse a sublist of tokens with GRAMMAR") (terpri)
                 (princ "(a|b|..)   One of the alternavites a b ...") (terpri)
                 (princ "PATTERN+   One or more PATTERN") (terpri)
                 (princ "PATTERN*   Zero or more PATTERN") (terpri)
                 (princ "<RULE>     A nonterminal symbol - naming a parse rule") (terpri)
                 (princ "[OPT]      Zero or one of OPT"))))
       `(progn
          (defvar ,name nil)
          (setf ,name (make-instance 'grammar
                                     :documentation ,documentation
                                     :start-rule ',(first (first ruledefs))))
          ,@rule-adder-forms

          (defmacro ,name (&body tokens)
            ,docstring
            (argot:parse ,name tokens)))))))