;;;; argot.lisp (in-package #:argot) (defconstant grammar-property 'grammar-property "Symbol in the ARGOT package used to store grammars on symbols' property lists.") (define-condition invalid-rule-def (error) ((rule :reader rule :initarg :rule)) (:report (lambda (e stream) (format stream "The rule ~s is malformed." (car (rule e)))))) (defun nonterminal? (lhs) (and (symbolp lhs) (let* ((name (symbol-name lhs)) (length (length name))) (and (<= 3 length) (eql #\< (elt name 0)) (eql #\> (elt name (1- length))))))) (defun var? (var) (and var (symbolp var) (not (keywordp var)))) (defun pattern? (pat) "Every pattern PAT is either a nonterminal symbol, or a pattern expression. Such expressions look like (OP . ARGS) where OP is one of :SEQ :? :* :+ :OR :OR= := :*= :?= :SEQ= :@ :EOF :ITEM" (or (nonterminal? pat) (and (consp pat) (destructuring-bind (op . more) pat (case op ((:seq :? :* :+ :or) (every #'pattern? more)) ((:or= := :*= :+= :?= :seq=) (not (null more))) (:@ (and (var? (first more)) (not (third more)) (pattern? (second more)))) (:{} (and (car more) (symbolp (car more)) (endp (cdr more)))) ((:item :eof) (endp more))))))) (defun var-pattern? (pat) "VAR-PATTERN? checks that a pattern is a var pattern (:@ VAR PATTERN) and it returns VAR in that case." (and (consp pat) (third pat) (destructuring-bind (at var . _) pat (declare (ignore _)) (and (eq at :@) (var? var) var)))) (defun collect-vars (pat) "Collects all of the variables from the var patterns in PAT and returns them." (if-let (var (var-pattern? pat)) (list var) (when (consp pat) (append (collect-vars (car pat)) (collect-vars (cdr pat)))))) (defun parse-rule-def (ruledef) (unless (consp ruledef) (error 'invalid-rule-def :rule ruledef)) (destructuring-bind (lhs . options) ruledef (let ((pattern (getf options :match)) (check (getf options :if)) (action (getf options :then)) (note (getf options :note))) (unless (and (nonterminal? lhs) (pattern? pattern)) (error 'invalid-rule-def :rule ruledef)) (list lhs pattern (collect-vars pattern) check action note)))) (defun function-form-p (s) (or (functionp s) (and (consp s) (eq 'cl:function (first s)) (symbolp (second s)) (endp (cddr s))))) (defmacro deflanguage (name (&key (documentation "") literals=) &body ruledefs) (let ((bindings-var (gensym "BINDINGS"))) (labels ((collect-let-bindings (lhs vars) "Create let-bindings forms for the body of the :action." (loop :for var :in vars :collect `(,var (cdr (assoc ',var ,bindings-var))) :into bs :finally (return (cons (list lhs `(cdr (assoc ',lhs ,bindings-var))) bs)))) (expand-functional-argument (initarg lhs bindings action vars) (cond ((and action (symbolp action)) (list initarg `(lambda (,bindings-var) (let ,bindings (declare (ignorable ,lhs ,@vars)) (,action ,lhs))))) ((function-form-p action) (list initarg `(lambda (,bindings-var) (let ,bindings (declare (ignorable ,lhs ,@vars)) (funcall ,action ,lhs))))) ((consp action) (list initarg `(lambda (,bindings-var) (let ,bindings (declare (ignorable ,lhs ,@vars)) ,action)))))) (rule-includer (name lhs pattern &optional check action bindings vars note) `(include-rule (get ',name 'argot::grammar-property) (make-rule :lhs ',lhs :pattern ',pattern :note ,note ,@(expand-functional-argument :check lhs bindings check vars) ,@(expand-functional-argument :action lhs bindings action vars))))) (let* ((parsed-rules (loop :for ruledef :in ruledefs :collect (handler-case (parse-rule-def ruledef) (invalid-rule-def (e) (invoke-debugger e))))) (rule-adder-forms (loop :for (lhs pattern vars check action note) :in parsed-rules :for bindings := (collect-let-bindings lhs vars) :collect (rule-includer name lhs pattern check action bindings vars note))) (docstring (with-output-to-string (*standard-output*) (princ documentation) (terpri) (terpri) (loop :for (lhs pattern . more) :in parsed-rules :do (write-rule-doc lhs pattern) (terpri)) (princ "------------------------------------------") (terpri) (princ "ADDITIONAL NOTES:") (terpri) (loop :for (lhs _p _v _c _a note) :in parsed-rules :when note :do (format *standard-output* "~15a ~a~%" (rule-name-string lhs) note)) (princ "------------------------------------------") (terpri) (princ "KEY: ") (terpri) (princ "token Any ole token") (terpri) (princ "eof Explicitly match the end of the input") (terpri) (princ "{LANGUAGE} Parse a sublist of tokens with LANGUAGE") (terpri) (princ "(A|B|...) One of the alternavites a b ...") (terpri) (princ "PATTERN+ One or more PATTERN") (terpri) (princ "PATTERN* Zero or more PATTERN") (terpri) (princ "OPT? Zero or one of OPT")))) `(progn (setf (get ',name 'argot::grammar-property) (make-instance 'grammar :documentation ,documentation :start-rule ',(first (first ruledefs)) ,@(when literals= `(:literal-comparator ,literals=)))) ,@rule-adder-forms (defmacro ,name (&body tokens) ,docstring (argot:parse (get ',name 'argot::grammar-property) tokens)))))))