;;;; argot.lisp (in-package #:argot) (defstruct rule lhs pattern action check) (define-condition invalid-rule-def (error) ((rule :reader rule :initarg :rule))) (defun include-rule (grammar rule) (setf (gethash (rule-lhs rule) grammar) rule)) (defun nonterminal? (lhs) (and (symbolp lhs) (let* ((name (symbol-name lhs)) (length (length name))) (and (<= 3 length) (eql #\< (elt name 0)) (eql #\> (elt name (1- length))))))) (defun var? (var) (and var (symbolp var) (not (keywordp var)))) (defun pattern? (pat) "Every pattern PAT is either a nonterminal symbol, or a pattern expression. Such expressions look like (OP . ARGS) where OP is one of :SEQ :? :* :+ :OR :OR= := :*= :?= :SEQ= :@ :EOF :ITEM" (or (nonterminal? pat) (and (consp pat) (destructuring-bind (op . more) pat (case op ((:seq :? :* :+ :or) (every #'pattern? more)) ((:or= := :*= :+= :?= :seq=) (not (null more))) (:@ (and (var? (first more)) (not (third more)) (pattern? (second more)))) ((:item :eof) (endp more))))))) (defun var-pattern? (pat) "VAR-PATTERN? checks that a pattern is a var pattern (:@ VAR PATTERN) and it returns VAR in that case." (and (consp pat) (third pat) (destructuring-bind (at var . _) pat (declare (ignore _)) (and (eq at :@) (var? var) var)))) (defun collect-vars (pat) "Collects all of the variables from the var patterns in PAT and returns them." (if-let (var (var-pattern? pat)) (list var) (when (consp pat) (append (collect-vars (car pat)) (collect-vars (cdr pat)))))) (defun parse-rule-def (ruledef) (handler-case (ematch ruledef ((guard (list lhs :-> pattern) (and (nonterminal? lhs) (pattern? pattern))) (list lhs pattern (collect-vars pattern) nil nil)) ((guard (list lhs :-> pattern :? check) (and (nonterminal? lhs) (pattern? pattern))) (list lhs pattern (collect-vars pattern) check nil)) ((guard (list lhs :-> pattern :=> action) (and (nonterminal? lhs) (pattern? pattern))) (list lhs pattern (collect-vars pattern) nil action)) ((guard (list lhs :-> pattern :=> action :? check) (and (nonterminal? lhs) (pattern? pattern))) (list lhs pattern (collect-vars pattern) check action)) ((guard (list lhs :-> pattern :? check :=> action) (and (nonterminal? lhs) (pattern? pattern))) (list lhs pattern (collect-vars pattern) check action))) (trivia::match-error () (error 'invalid-rule-def :rule ruledef)))) (defun function-form-p (s) (or (functionp s) (and (consp s) (eq 'cl:function (first s)) (symbolp (second s)) (endp (cddr s))))) (defmacro defgrammar (name (&key (documentation "")) &body ruledefs) (let ((bindings-var (gensym "BINDINGS"))) (labels ((collect-let-bindings (lhs vars) "Create let-bindings forms for the body of the :action." (loop :for var :in vars :collect `(,var (cdr (assoc ',var ,bindings-var))) :into bs :finally (return (cons (list lhs `(cdr (assoc ',lhs ,bindings-var))) bs)))) (expand-functional-argument (initarg lhs bindings action ) (cond ((and action (symbolp action)) (list initarg `(lambda (,bindings-var) (let ,bindings (,action ,lhs))))) ((function-form-p action) (list initarg `(lambda (,bindings-var) (let ,bindings (funcall ,action ,lhs))))) ((consp action) (list initarg `(lambda (,bindings-var) (let ,bindings (declare (ignore ,lhs)) ,action)))))) (rule-includer (name lhs pattern &optional check action bindings) `(include-rule ,name (make-rule :lhs ',lhs :pattern ',pattern ,@(expand-functional-argument :check lhs bindings check) ,@(expand-functional-argument :action lhs bindings action))))) `(progn (defvar ,name nil ,documentation) (setf ,name (make-hash-table :test #'eq)) ,@(loop :for ruledef :in ruledefs :for (lhs pattern vars check action) := (parse-rule-def ruledef) :for bindings := (collect-let-bindings lhs vars) :collect (rule-includer name lhs pattern check action bindings)) ,name)))) (defvar *position*) (defvar *grammar*) (defvar *tokens*) (defvar *bindings*) (defun succeed (result) (values result t)) (defun fail () (values nil nil)) (defparameter +eof+ #()) (defun next-token () (if (< *position* (length *tokens*)) (prog1 (elt *tokens* *position*) (incf *position*)) +eof+)) (defun update-bindings (var value) (setf *bindings* (acons var value *bindings*))) (defmacro try-parse ((&optional (result (gensym))) parse-expression &optional (success-clause `(succeed ,result)) (fail-clause '(fail))) (alexandria:with-gensyms (successp pos) `(let ((,pos *position*)) (multiple-value-bind (,result ,successp) ,parse-expression (if ,successp ,success-clause (progn (setf *position* ,pos) ,fail-clause)))))) (defmacro if-parse ((&optional (result (gensym))) parse-expression &optional (success-clause `(succeed ,result)) (fail-clause '(fail))) (alexandria:with-gensyms (successp) `(multiple-value-bind (,result ,successp) ,parse-expression (if ,successp ,success-clause ,fail-clause)))) (defun parse-sequence (patterns) "Parse each member of PATTERNS and succeed with a list of results. If any pattern fails the whole parse fails." (loop :for pattern :in patterns :for (result successp) := (multiple-value-list (parse-pattern pattern)) :if successp :collect result :into results :else :return (fail) :finally (return (succeed results)))) (defun parse-optional (pattern) (try-parse (res) (parse-pattern pattern) (succeed res) (succeed nil))) (defun parse-kleene (pattern) (loop :for (res successp) := (multiple-value-list (try-parse () (parse-pattern pattern))) :while successp :collect res :into results :finally (return-from parse-kleene (succeed results)))) (defun parse-one-or-more (pattern) (if-parse (first) (parse-pattern pattern) (if-parse (rest) (parse-kleene pattern) (succeed (cons first rest))))) (defun parse-alernatives (patterns) (if (endp patterns) (fail) (try-parse (result) (parse-pattern (first patterns)) (succeed result) (parse-alernatives (rest patterns))))) (defun literals->patterns (literals) (loop :for l :in literals :collect (list := l))) (defun literals-equal? (a b) (eq a b)) (defun parse-literal (literal) (if (literals-equal? (next-token) literal) (succeed literal) (fail))) (defun parse-literal-alternatives (literals) (parse-alernatives (literals->patterns literals))) (defun parse-kleene-literal (literal) (parse-kleene (list := literal))) (defun parse-one-or-more-literal (literal) (parse-one-or-more (list := literal))) (defun parse-optional-literal (literal) (parse-optional (list := literal))) (defun parse-literal-sequence (literals) (parse-sequence (literals->patterns literals))) (defun parse-binding-pattern (var pattern) (if-parse (result) (parse-pattern pattern) (progn (update-bindings var result) (succeed result)))) (defun parse-item () (let ((token (next-token))) (if (not (eq token +eof+)) (succeed token) (fail)))) (defun parse-eof () (if (eq +eof+ (next-token)) (succeed nil) (fail))) (defun parse-pattern (pattern) (if (nonterminal? pattern) (parse-rule pattern) (destructuring-bind (op . args) pattern (case op (:seq (parse-sequence args)) (:? (parse-optional (first args))) (:* (parse-kleene (first args))) (:+ (parse-one-or-more (first args))) (:or (parse-alernatives args)) (:or= (parse-literal-alternatives args)) (:= (parse-literal (first args))) (:*= (parse-kleene-literal (first args))) (:+= (parse-one-or-more-literal (first args))) (:?= (parse-optional-literal (first args))) (:seq= (parse-literal-sequence args)) (:@ (parse-binding-pattern (first args) (second args))) (:item (parse-item)) (:eof (parse-eof)))))) (defun handle-post-rule (rule result) (with-slots (lhs check action) rule (let ((bindings (acons lhs result *bindings*))) (when check (unless (funcall check bindings) (return-from handle-post-rule (fail)))) (if action (succeed (funcall action bindings)) (succeed result))))) (defun parse-rule (nonterminal) (if-let (rule (gethash nonterminal *grammar*)) (let ((*bindings* nil)) (if-parse (result) (parse-pattern (rule-pattern rule)) (handle-post-rule rule result))) (fail))) (define-condition argot-parse-error (error) ((position :initarg :position) (token :initarg :token) (input :initarg :input))) (defun parse (grammar rule tokens) (let ((*position* 0) (*grammar* grammar) (*tokens* (coerce tokens 'vector))) (multiple-value-bind (result successp) (parse-rule rule) (unless successp (error 'argot-parse-error :position *position* :input *tokens* :token (elt *tokens* (1- *position*)))) result)))