From a2f6e93519695cc145539049af60d3e41ec52fad Mon Sep 17 00:00:00 2001
From: colin <colin@cicadas.surf>
Date: Tue, 18 Jul 2023 18:04:41 -0700
Subject: Initial commit

---
 argot.lisp | 232 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 232 insertions(+)
 create mode 100644 argot.lisp

(limited to 'argot.lisp')

diff --git a/argot.lisp b/argot.lisp
new file mode 100644
index 0000000..1f3d5d2
--- /dev/null
+++ b/argot.lisp
@@ -0,0 +1,232 @@
+;;;; argot.lisp
+
+(in-package #:argot)
+
+(defstruct rule lhs pattern action)
+
+(define-condition invalid-rule-def (error)
+  ((rule :reader rule :initarg :rule)))
+
+(defun include-rule (grammar rule)
+  (setf (gethash (rule-lhs rule) grammar) rule))
+
+(defun nonterminal? (lhs)
+  (and (symbolp lhs)
+       (let* ((name (symbol-name lhs))
+              (length (length name)))
+         (and (<= 3 length)
+              (eql #\< (elt name 0))
+              (eql #\> (elt name (1- length)))))))
+
+(defun var? (var)
+  (and var (symbolp var) (not (keywordp var))))
+
+(defun pattern? (pat)
+  "Every pattern PAT is either a nonterminal symbol, or a pattern
+expression. Such expressions look like (OP . ARGS) where OP is one of
+:SEQ :? :* :+ :OR :OR= := :*= :?= :SEQ= :@ :EOF"
+  (or (nonterminal? pat)
+      (and (consp pat)
+           (destructuring-bind (op . more) pat
+             (case op
+               ((:seq :? :* :+ :or)
+                (every #'pattern? more))
+               ((:or= := :*= :+= :?= :seq=)
+                (not (null more)))
+               (:@ (and (var? (first more))
+                        (not (third more))
+                        (pattern? (second more))))
+               (:eof (endp more)))))))
+
+(defun var-pattern? (pat)
+  "VAR-PATTERN? checks that a pattern is a var pattern (:@ VAR PATTERN)
+and it returns VAR in that case."
+  (and (consp pat)
+       (third pat)
+       (destructuring-bind (at var . _) pat
+         (declare (ignore _))
+         (and (eq at :@) (var? var) var))))
+
+(defun collect-vars (pat)
+  "Collects all of the variables from the var patterns in PAT and returns them."
+  (if-let (var (var-pattern? pat))
+    (list var)
+    (when (consp pat)
+      (append (collect-vars (car pat))
+              (collect-vars (cdr pat))))))
+
+(defun parse-rule-def (ruledef)
+  (handler-case 
+      (ematch ruledef
+        ((guard (list lhs :-> pattern)
+                       (and (nonterminal? lhs) (pattern? pattern)))
+         (list lhs pattern (collect-vars pattern) nil))
+        
+        ((guard (list lhs :-> pattern :=> action)
+                       (and (nonterminal? lhs) (pattern? pattern)))
+         (list lhs pattern (collect-vars pattern) action)))
+    (trivia::match-error ()
+      (error 'invalid-rule-def :rule ruledef))))
+
+(defmacro defgrammar (name (&key (documentation "")) &body ruledefs)
+  (let ((bindings-var (gensym "BINDINGS")))
+    `(progn
+       (defvar ,name nil ,documentation)
+       (setf ,name (make-hash-table :test #'eq))
+       ,@(loop :for ruledef :in ruledefs
+               :for (lhs pattern vars action) := (parse-rule-def ruledef)
+               :for bindings
+                 := (loop :for var :in vars
+                          :collect `(,var (cdr (assoc ',var ,bindings-var))))
+               :collect `(include-rule
+                          ,name
+                          (make-rule
+                           :lhs ',lhs
+                           :pattern ',pattern
+                           ,@(when action
+                               (list 
+                                :action `(lambda (,bindings-var) (let ,bindings ,action)))))))
+       ,name)))
+
+(defvar *position*)
+(defvar *grammar*)
+(defvar *tokens*)
+(defvar *bindings*)
+
+(defun succeed (result)
+  (values result t))
+
+(defun fail ()
+  (values nil nil))
+
+(defparameter +eof+ #())
+
+(defun next-token ()
+  (if (< *position* (length *tokens*))
+      (prog1 (elt *tokens* *position*)
+        (incf *position*))
+      +eof+))
+
+(defun update-bindings (var value)
+  (setf *bindings* (acons var value *bindings*)))
+
+(defmacro try-parse
+    ((&optional (result (gensym)))
+     parse-expression
+     &optional
+       (success-clause `(succeed ,result))
+       (fail-clause '(fail)))
+  (alexandria:with-gensyms (successp pos)
+    `(let ((,pos *position*))
+       (multiple-value-bind (,result ,successp) ,parse-expression
+         (if ,successp ,success-clause
+             (progn
+               (setf *position* ,pos)
+               ,fail-clause))))))
+
+(defun parse-sequence (patterns)
+  "Parse each member of PATTERNS and succeed with a list of results. If
+any pattern fails the whole parse fails."
+  (try-parse
+      () (loop :for pattern :in patterns
+               :for (result successp)
+                 := (multiple-value-list (parse-pattern pattern))
+               :if successp
+                 :collect result :into results
+               :else :return (fail)
+               :finally (return (succeed results)))))
+
+(defun parse-optional (pattern)
+  (try-parse
+      (res) (parse-pattern pattern)
+      (succeed res)
+      (succeed nil)))
+
+(defun parse-kleene (pattern)
+  (loop :for (res successp) := (multiple-value-list (try-parse () (parse-pattern pattern)))
+        :while successp :collect res :into results
+        :finally (return-from parse-kleene (succeed results))))
+
+(defun parse-one-or-more (pattern)
+  (try-parse (first) (parse-pattern pattern)
+      (try-parse (rest) (parse-kleene pattern)
+          (succeed (cons first rest)))))
+
+(defun parse-alernatives (patterns)
+  (if (endp patterns) (fail)
+      (try-parse (result) (parse-pattern (first patterns))
+          (succeed result)
+          (parse-alernatives (rest patterns)))))
+
+(defun literals->patterns (literals)
+  (loop :for l :in literals :collect (list := l)))
+
+(defun literals-equal? (a b) (eq a b))
+
+(defun parse-literal (literal)
+  (if (literals-equal? (next-token) literal)
+      (succeed literal)
+      (fail)))
+
+(defun parse-literal-alternatives (literals)
+  (parse-alernatives (literals->patterns literals)))
+
+(defun parse-kleene-literal (literal)
+  (parse-kleene (list := literal)))
+
+(defun parse-one-or-more-literal (literal)
+  (parse-one-or-more (list := literal)))
+
+(defun parse-optional-literal (literal)
+  (parse-optional (list := literal)))
+
+(defun parse-literal-sequence (literals)
+  (parse-sequence (literals->patterns literals)))
+
+(defun parse-binding-pattern (var pattern)
+  (try-parse
+      (result) (parse-pattern pattern)
+      (progn
+        (update-bindings var result)
+        (succeed result))))
+
+(defun parse-eof ()
+  (if (eq +eof+ (next-token))
+      (succeed nil)
+      (fail)))
+
+(defun parse-pattern (pattern)
+  (try-parse ()
+             (if (nonterminal? pattern)
+                 (parse-rule pattern)
+                 (destructuring-bind (op . args) pattern
+                   (case op
+                     (:seq (parse-sequence args))
+                     (:? (parse-optional (first args)))
+                     (:* (parse-kleene (first args)))
+                     (:+ (parse-one-or-more (first args)))
+                     (:or (parse-alernatives args))
+                     (:or= (parse-literal-alternatives args))
+                     (:= (parse-literal (first args)))
+                     (:*= (parse-kleene-literal (first args)))
+                     (:+= (parse-one-or-more-literal (first args)))
+                     (:?= (parse-optional-literal (first args)))
+                     (:seq= (parse-literal-sequence args))
+                     (:@ (parse-binding-pattern (first args) (second args)))
+                     (:eof (parse-eof)))))))
+
+(defun parse-rule (nonterminal)
+  (if-let (rule (gethash nonterminal *grammar*))
+    (try-parse (result) (parse-pattern (rule-pattern rule))
+        (if (rule-action rule)
+            (succeed (funcall (rule-action rule)))
+            (succeed result)))
+    (fail)))
+
+
+(defun parse (grammar rule tokens)
+  (let ((*position* 0)
+        (*grammar* grammar)
+        (*tokens* tokens)
+        (*bindings* nil))
+    (parse-rule rule)))
-- 
cgit v1.2.3