aboutsummaryrefslogtreecommitdiff
path: root/grammars.lisp
diff options
context:
space:
mode:
authorcolin <colin@cicadas.surf>2023-08-02 07:08:30 -0700
committercolin <colin@cicadas.surf>2023-08-02 07:08:30 -0700
commit483867e966e5b009be1892cc3f81de0b5885ba17 (patch)
tree5bfbddaed41768289b2125b9053d994edc8ced4a /grammars.lisp
parent4fdd13fd1544117f05482c260033db5759db652a (diff)
Moving code around
Diffstat (limited to 'grammars.lisp')
-rw-r--r--grammars.lisp245
1 files changed, 245 insertions, 0 deletions
diff --git a/grammars.lisp b/grammars.lisp
new file mode 100644
index 0000000..38f1c2f
--- /dev/null
+++ b/grammars.lisp
@@ -0,0 +1,245 @@
+;;;; grammars.lisp
+
+(in-package #:argot)
+
+;;; GRAMMAR & RULES
+
+(defstruct rule lhs pattern action check)
+
+(defclass grammar ()
+ ((rules
+ :accessor grammar-rules
+ :initform (make-hash-table :test #'eq))
+ (start
+ :accessor start-rule
+ :initarg :start-rule
+ :initform (error "START-RULE is required")
+ :type symbol)
+ (documentation
+ :reader grammar-documentation
+ :initarg :documentation
+ :initform "")))
+
+(defun include-rule (grammar rule)
+ (setf (gethash (rule-lhs rule) (grammar-rules grammar)) rule))
+
+
+;;; PARSING
+
+(defvar *position*)
+(defvar *grammar*)
+(defvar *tokens*)
+(defvar *bindings*)
+
+(defun succeed (result)
+ (values result t))
+
+(defun fail ()
+ (values nil nil))
+
+(defparameter +eof+ #())
+
+(defun next-token ()
+ (if (< *position* (length *tokens*))
+ (prog1 (elt *tokens* *position*)
+ (incf *position*))
+ +eof+))
+
+(defun update-bindings (var value)
+ (setf *bindings* (acons var value *bindings*)))
+
+(defmacro try-parse
+ ((&optional (result (gensym)))
+ parse-expression
+ &optional
+ (success-clause `(succeed ,result))
+ (fail-clause '(fail)))
+ (alexandria:with-gensyms (successp pos)
+ `(let ((,pos *position*))
+ (multiple-value-bind (,result ,successp) ,parse-expression
+ (if ,successp ,success-clause
+ (progn
+ (setf *position* ,pos)
+ ,fail-clause))))))
+
+(defmacro if-parse
+ ((&optional (result (gensym)))
+ parse-expression
+ &optional
+ (success-clause `(succeed ,result))
+ (fail-clause '(fail)))
+ (alexandria:with-gensyms (successp)
+ `(multiple-value-bind (,result ,successp) ,parse-expression
+ (if ,successp ,success-clause
+ ,fail-clause))))
+
+
+(defun parse-sequence (patterns)
+ "Parse each member of PATTERNS and succeed with a list of results. If
+any pattern fails the whole parse fails."
+ (loop :for pattern :in patterns
+ :for (result successp)
+ := (multiple-value-list (parse-pattern pattern))
+ :if successp
+ :collect result :into results
+ :else :return (fail)
+ :finally (return (succeed results))))
+
+(defun parse-optional (pattern)
+ (try-parse
+ (res) (parse-pattern pattern)
+ (succeed res)
+ (succeed nil)))
+
+(defun parse-kleene (pattern)
+ (loop :for (res successp) := (multiple-value-list (try-parse () (parse-pattern pattern)))
+ :while successp :collect res :into results
+ :finally (return-from parse-kleene (succeed results))))
+
+(defun parse-one-or-more (pattern)
+ (if-parse (first) (parse-pattern pattern)
+ (if-parse (rest) (parse-kleene pattern)
+ (succeed (cons first rest)))))
+
+(defun parse-alternatives (patterns)
+ (if (endp patterns) (fail)
+ (try-parse (result) (parse-pattern (first patterns))
+ (succeed result)
+ (parse-alternatives (rest patterns)))))
+
+(defun literals->patterns (literals)
+ (loop :for l :in literals :collect (list := l)))
+
+(defun literals-equal? (a b) (eq a b))
+
+(defun parse-literal (literal)
+ (if (literals-equal? (next-token) literal)
+ (succeed literal)
+ (fail)))
+
+(defun parse-literal-alternatives (literals)
+ (parse-alternatives (literals->patterns literals)))
+
+(defun parse-kleene-literal (literal)
+ (parse-kleene (list := literal)))
+
+(defun parse-one-or-more-literal (literal)
+ (parse-one-or-more (list := literal)))
+
+(defun parse-optional-literal (literal)
+ (parse-optional (list := literal)))
+
+(defun parse-literal-sequence (literals)
+ (parse-sequence (literals->patterns literals)))
+
+(defun parse-binding-pattern (var pattern)
+ (if-parse
+ (result) (parse-pattern pattern)
+ (progn
+ (update-bindings var result)
+ (succeed result))))
+
+(defun parse-item ()
+ (let ((token (next-token)))
+ (if (not (eq token +eof+))
+ (succeed token)
+ (fail))))
+
+(defun parse-eof ()
+ (if (eq +eof+ (next-token))
+ (succeed nil)
+ (fail)))
+
+(defun find-grammar (name)
+ (and (symbolp name)
+ (boundp name)
+ (let ((val (symbol-value name)))
+ (and (typep val 'grammar)
+ val))))
+
+(defun parse-grammar-pattern (language)
+ (if-parse (subtokens) (parse-item)
+ (if (listp subtokens)
+ (if-let (grammar (find-grammar language))
+ (let ((*grammar* grammar)
+ (*position* 0)
+ (*tokens* (coerce subtokens 'vector)))
+ (parse-rule (start-rule grammar)))
+ (error "No grammar called ~s" language))
+ (fail))))
+
+(defun parse-pattern (pattern)
+ (if (nonterminal? pattern)
+ (parse-rule pattern)
+ (destructuring-bind (op . args) pattern
+ (case op
+ (:seq (parse-sequence args))
+ (:? (parse-optional (first args)))
+ (:* (parse-kleene (first args)))
+ (:+ (parse-one-or-more (first args)))
+ (:or (parse-alternatives args))
+ (:or= (parse-literal-alternatives args))
+ (:= (parse-literal (first args)))
+ (:*= (parse-kleene-literal (first args)))
+ (:+= (parse-one-or-more-literal (first args)))
+ (:?= (parse-optional-literal (first args)))
+ (:seq= (parse-literal-sequence args))
+ (:@ (parse-binding-pattern (first args) (second args)))
+ (:{} (parse-grammar-pattern (first args)))
+ (:item (parse-item))
+ (:eof (parse-eof))))))
+
+(defun handle-post-rule (rule result)
+ (with-slots (lhs check action) rule
+ (let ((bindings (acons lhs result *bindings*)))
+ (when check
+ (unless (funcall check bindings)
+ (return-from handle-post-rule (fail))))
+ (if action
+ (succeed (funcall action bindings))
+ (succeed result)))))
+
+(defun parse-rule (nonterminal)
+ (if-let (rule (gethash nonterminal (grammar-rules *grammar*)))
+ (let ((*bindings* nil))
+ (if-parse (result) (parse-pattern (rule-pattern rule))
+ (handle-post-rule rule result)))
+ (fail)))
+
+
+(define-condition argot-parse-error (error)
+ ((position :initarg :position)
+ (input :initarg :input))
+ (:report (lambda (err stream)
+ (with-slots (position input) err
+ (let ((pos (max 0 (- position 2))))
+ (format
+ stream
+ "Parse failed at position ~a, around:~%~a"
+ pos (print-seq-range-pointing-at-center input pos 4)))))))
+
+(defun listslice (seq start stop)
+ (coerce (alexandria-2:subseq* seq start stop) 'list))
+
+(defun print-seq-range-pointing-at-center (items position radius)
+ (let* ((front
+ (format nil "... ~{~s~^ ~}" (listslice items (max 0 (- position radius)) position )))
+ (line
+ (format nil "~a ~{~s~^ ~} ..."
+ front (listslice items position (+ position radius 1)))))
+ (concatenate
+ 'string
+ line (list #\Newline)
+ (loop :repeat (1+ (length front)) :collect #\Space)
+ "^" (list #\Newline))))
+
+(defun parse (grammar tokens)
+ (let ((*position* 0)
+ (*grammar* grammar)
+ (*tokens* (coerce tokens 'vector)))
+ (multiple-value-bind (result successp) (parse-rule (start-rule *grammar*))
+ (unless successp
+ (error 'argot-parse-error
+ :position *position*
+ :input *tokens*))
+ result)))