From a2f6e93519695cc145539049af60d3e41ec52fad Mon Sep 17 00:00:00 2001 From: colin <colin@cicadas.surf> Date: Tue, 18 Jul 2023 18:04:41 -0700 Subject: Initial commit --- argot.lisp | 232 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) create mode 100644 argot.lisp (limited to 'argot.lisp') diff --git a/argot.lisp b/argot.lisp new file mode 100644 index 0000000..1f3d5d2 --- /dev/null +++ b/argot.lisp @@ -0,0 +1,232 @@ +;;;; argot.lisp + +(in-package #:argot) + +(defstruct rule lhs pattern action) + +(define-condition invalid-rule-def (error) + ((rule :reader rule :initarg :rule))) + +(defun include-rule (grammar rule) + (setf (gethash (rule-lhs rule) grammar) rule)) + +(defun nonterminal? (lhs) + (and (symbolp lhs) + (let* ((name (symbol-name lhs)) + (length (length name))) + (and (<= 3 length) + (eql #\< (elt name 0)) + (eql #\> (elt name (1- length))))))) + +(defun var? (var) + (and var (symbolp var) (not (keywordp var)))) + +(defun pattern? (pat) + "Every pattern PAT is either a nonterminal symbol, or a pattern +expression. Such expressions look like (OP . ARGS) where OP is one of +:SEQ :? :* :+ :OR :OR= := :*= :?= :SEQ= :@ :EOF" + (or (nonterminal? pat) + (and (consp pat) + (destructuring-bind (op . more) pat + (case op + ((:seq :? :* :+ :or) + (every #'pattern? more)) + ((:or= := :*= :+= :?= :seq=) + (not (null more))) + (:@ (and (var? (first more)) + (not (third more)) + (pattern? (second more)))) + (:eof (endp more))))))) + +(defun var-pattern? (pat) + "VAR-PATTERN? checks that a pattern is a var pattern (:@ VAR PATTERN) +and it returns VAR in that case." + (and (consp pat) + (third pat) + (destructuring-bind (at var . _) pat + (declare (ignore _)) + (and (eq at :@) (var? var) var)))) + +(defun collect-vars (pat) + "Collects all of the variables from the var patterns in PAT and returns them." + (if-let (var (var-pattern? pat)) + (list var) + (when (consp pat) + (append (collect-vars (car pat)) + (collect-vars (cdr pat)))))) + +(defun parse-rule-def (ruledef) + (handler-case + (ematch ruledef + ((guard (list lhs :-> pattern) + (and (nonterminal? lhs) (pattern? pattern))) + (list lhs pattern (collect-vars pattern) nil)) + + ((guard (list lhs :-> pattern :=> action) + (and (nonterminal? lhs) (pattern? pattern))) + (list lhs pattern (collect-vars pattern) action))) + (trivia::match-error () + (error 'invalid-rule-def :rule ruledef)))) + +(defmacro defgrammar (name (&key (documentation "")) &body ruledefs) + (let ((bindings-var (gensym "BINDINGS"))) + `(progn + (defvar ,name nil ,documentation) + (setf ,name (make-hash-table :test #'eq)) + ,@(loop :for ruledef :in ruledefs + :for (lhs pattern vars action) := (parse-rule-def ruledef) + :for bindings + := (loop :for var :in vars + :collect `(,var (cdr (assoc ',var ,bindings-var)))) + :collect `(include-rule + ,name + (make-rule + :lhs ',lhs + :pattern ',pattern + ,@(when action + (list + :action `(lambda (,bindings-var) (let ,bindings ,action))))))) + ,name))) + +(defvar *position*) +(defvar *grammar*) +(defvar *tokens*) +(defvar *bindings*) + +(defun succeed (result) + (values result t)) + +(defun fail () + (values nil nil)) + +(defparameter +eof+ #()) + +(defun next-token () + (if (< *position* (length *tokens*)) + (prog1 (elt *tokens* *position*) + (incf *position*)) + +eof+)) + +(defun update-bindings (var value) + (setf *bindings* (acons var value *bindings*))) + +(defmacro try-parse + ((&optional (result (gensym))) + parse-expression + &optional + (success-clause `(succeed ,result)) + (fail-clause '(fail))) + (alexandria:with-gensyms (successp pos) + `(let ((,pos *position*)) + (multiple-value-bind (,result ,successp) ,parse-expression + (if ,successp ,success-clause + (progn + (setf *position* ,pos) + ,fail-clause)))))) + +(defun parse-sequence (patterns) + "Parse each member of PATTERNS and succeed with a list of results. If +any pattern fails the whole parse fails." + (try-parse + () (loop :for pattern :in patterns + :for (result successp) + := (multiple-value-list (parse-pattern pattern)) + :if successp + :collect result :into results + :else :return (fail) + :finally (return (succeed results))))) + +(defun parse-optional (pattern) + (try-parse + (res) (parse-pattern pattern) + (succeed res) + (succeed nil))) + +(defun parse-kleene (pattern) + (loop :for (res successp) := (multiple-value-list (try-parse () (parse-pattern pattern))) + :while successp :collect res :into results + :finally (return-from parse-kleene (succeed results)))) + +(defun parse-one-or-more (pattern) + (try-parse (first) (parse-pattern pattern) + (try-parse (rest) (parse-kleene pattern) + (succeed (cons first rest))))) + +(defun parse-alernatives (patterns) + (if (endp patterns) (fail) + (try-parse (result) (parse-pattern (first patterns)) + (succeed result) + (parse-alernatives (rest patterns))))) + +(defun literals->patterns (literals) + (loop :for l :in literals :collect (list := l))) + +(defun literals-equal? (a b) (eq a b)) + +(defun parse-literal (literal) + (if (literals-equal? (next-token) literal) + (succeed literal) + (fail))) + +(defun parse-literal-alternatives (literals) + (parse-alernatives (literals->patterns literals))) + +(defun parse-kleene-literal (literal) + (parse-kleene (list := literal))) + +(defun parse-one-or-more-literal (literal) + (parse-one-or-more (list := literal))) + +(defun parse-optional-literal (literal) + (parse-optional (list := literal))) + +(defun parse-literal-sequence (literals) + (parse-sequence (literals->patterns literals))) + +(defun parse-binding-pattern (var pattern) + (try-parse + (result) (parse-pattern pattern) + (progn + (update-bindings var result) + (succeed result)))) + +(defun parse-eof () + (if (eq +eof+ (next-token)) + (succeed nil) + (fail))) + +(defun parse-pattern (pattern) + (try-parse () + (if (nonterminal? pattern) + (parse-rule pattern) + (destructuring-bind (op . args) pattern + (case op + (:seq (parse-sequence args)) + (:? (parse-optional (first args))) + (:* (parse-kleene (first args))) + (:+ (parse-one-or-more (first args))) + (:or (parse-alernatives args)) + (:or= (parse-literal-alternatives args)) + (:= (parse-literal (first args))) + (:*= (parse-kleene-literal (first args))) + (:+= (parse-one-or-more-literal (first args))) + (:?= (parse-optional-literal (first args))) + (:seq= (parse-literal-sequence args)) + (:@ (parse-binding-pattern (first args) (second args))) + (:eof (parse-eof))))))) + +(defun parse-rule (nonterminal) + (if-let (rule (gethash nonterminal *grammar*)) + (try-parse (result) (parse-pattern (rule-pattern rule)) + (if (rule-action rule) + (succeed (funcall (rule-action rule))) + (succeed result))) + (fail))) + + +(defun parse (grammar rule tokens) + (let ((*position* 0) + (*grammar* grammar) + (*tokens* tokens) + (*bindings* nil)) + (parse-rule rule))) -- cgit v1.2.3