From 483867e966e5b009be1892cc3f81de0b5885ba17 Mon Sep 17 00:00:00 2001 From: colin Date: Wed, 2 Aug 2023 07:08:30 -0700 Subject: Moving code around --- grammars.lisp | 245 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 245 insertions(+) create mode 100644 grammars.lisp (limited to 'grammars.lisp') diff --git a/grammars.lisp b/grammars.lisp new file mode 100644 index 0000000..38f1c2f --- /dev/null +++ b/grammars.lisp @@ -0,0 +1,245 @@ +;;;; grammars.lisp + +(in-package #:argot) + +;;; GRAMMAR & RULES + +(defstruct rule lhs pattern action check) + +(defclass grammar () + ((rules + :accessor grammar-rules + :initform (make-hash-table :test #'eq)) + (start + :accessor start-rule + :initarg :start-rule + :initform (error "START-RULE is required") + :type symbol) + (documentation + :reader grammar-documentation + :initarg :documentation + :initform ""))) + +(defun include-rule (grammar rule) + (setf (gethash (rule-lhs rule) (grammar-rules grammar)) rule)) + + +;;; PARSING + +(defvar *position*) +(defvar *grammar*) +(defvar *tokens*) +(defvar *bindings*) + +(defun succeed (result) + (values result t)) + +(defun fail () + (values nil nil)) + +(defparameter +eof+ #()) + +(defun next-token () + (if (< *position* (length *tokens*)) + (prog1 (elt *tokens* *position*) + (incf *position*)) + +eof+)) + +(defun update-bindings (var value) + (setf *bindings* (acons var value *bindings*))) + +(defmacro try-parse + ((&optional (result (gensym))) + parse-expression + &optional + (success-clause `(succeed ,result)) + (fail-clause '(fail))) + (alexandria:with-gensyms (successp pos) + `(let ((,pos *position*)) + (multiple-value-bind (,result ,successp) ,parse-expression + (if ,successp ,success-clause + (progn + (setf *position* ,pos) + ,fail-clause)))))) + +(defmacro if-parse + ((&optional (result (gensym))) + parse-expression + &optional + (success-clause `(succeed ,result)) + (fail-clause '(fail))) + (alexandria:with-gensyms (successp) + `(multiple-value-bind (,result ,successp) ,parse-expression + (if ,successp ,success-clause + ,fail-clause)))) + + +(defun parse-sequence (patterns) + "Parse each member of PATTERNS and succeed with a list of results. If +any pattern fails the whole parse fails." + (loop :for pattern :in patterns + :for (result successp) + := (multiple-value-list (parse-pattern pattern)) + :if successp + :collect result :into results + :else :return (fail) + :finally (return (succeed results)))) + +(defun parse-optional (pattern) + (try-parse + (res) (parse-pattern pattern) + (succeed res) + (succeed nil))) + +(defun parse-kleene (pattern) + (loop :for (res successp) := (multiple-value-list (try-parse () (parse-pattern pattern))) + :while successp :collect res :into results + :finally (return-from parse-kleene (succeed results)))) + +(defun parse-one-or-more (pattern) + (if-parse (first) (parse-pattern pattern) + (if-parse (rest) (parse-kleene pattern) + (succeed (cons first rest))))) + +(defun parse-alternatives (patterns) + (if (endp patterns) (fail) + (try-parse (result) (parse-pattern (first patterns)) + (succeed result) + (parse-alternatives (rest patterns))))) + +(defun literals->patterns (literals) + (loop :for l :in literals :collect (list := l))) + +(defun literals-equal? (a b) (eq a b)) + +(defun parse-literal (literal) + (if (literals-equal? (next-token) literal) + (succeed literal) + (fail))) + +(defun parse-literal-alternatives (literals) + (parse-alternatives (literals->patterns literals))) + +(defun parse-kleene-literal (literal) + (parse-kleene (list := literal))) + +(defun parse-one-or-more-literal (literal) + (parse-one-or-more (list := literal))) + +(defun parse-optional-literal (literal) + (parse-optional (list := literal))) + +(defun parse-literal-sequence (literals) + (parse-sequence (literals->patterns literals))) + +(defun parse-binding-pattern (var pattern) + (if-parse + (result) (parse-pattern pattern) + (progn + (update-bindings var result) + (succeed result)))) + +(defun parse-item () + (let ((token (next-token))) + (if (not (eq token +eof+)) + (succeed token) + (fail)))) + +(defun parse-eof () + (if (eq +eof+ (next-token)) + (succeed nil) + (fail))) + +(defun find-grammar (name) + (and (symbolp name) + (boundp name) + (let ((val (symbol-value name))) + (and (typep val 'grammar) + val)))) + +(defun parse-grammar-pattern (language) + (if-parse (subtokens) (parse-item) + (if (listp subtokens) + (if-let (grammar (find-grammar language)) + (let ((*grammar* grammar) + (*position* 0) + (*tokens* (coerce subtokens 'vector))) + (parse-rule (start-rule grammar))) + (error "No grammar called ~s" language)) + (fail)))) + +(defun parse-pattern (pattern) + (if (nonterminal? pattern) + (parse-rule pattern) + (destructuring-bind (op . args) pattern + (case op + (:seq (parse-sequence args)) + (:? (parse-optional (first args))) + (:* (parse-kleene (first args))) + (:+ (parse-one-or-more (first args))) + (:or (parse-alternatives args)) + (:or= (parse-literal-alternatives args)) + (:= (parse-literal (first args))) + (:*= (parse-kleene-literal (first args))) + (:+= (parse-one-or-more-literal (first args))) + (:?= (parse-optional-literal (first args))) + (:seq= (parse-literal-sequence args)) + (:@ (parse-binding-pattern (first args) (second args))) + (:{} (parse-grammar-pattern (first args))) + (:item (parse-item)) + (:eof (parse-eof)))))) + +(defun handle-post-rule (rule result) + (with-slots (lhs check action) rule + (let ((bindings (acons lhs result *bindings*))) + (when check + (unless (funcall check bindings) + (return-from handle-post-rule (fail)))) + (if action + (succeed (funcall action bindings)) + (succeed result))))) + +(defun parse-rule (nonterminal) + (if-let (rule (gethash nonterminal (grammar-rules *grammar*))) + (let ((*bindings* nil)) + (if-parse (result) (parse-pattern (rule-pattern rule)) + (handle-post-rule rule result))) + (fail))) + + +(define-condition argot-parse-error (error) + ((position :initarg :position) + (input :initarg :input)) + (:report (lambda (err stream) + (with-slots (position input) err + (let ((pos (max 0 (- position 2)))) + (format + stream + "Parse failed at position ~a, around:~%~a" + pos (print-seq-range-pointing-at-center input pos 4))))))) + +(defun listslice (seq start stop) + (coerce (alexandria-2:subseq* seq start stop) 'list)) + +(defun print-seq-range-pointing-at-center (items position radius) + (let* ((front + (format nil "... ~{~s~^ ~}" (listslice items (max 0 (- position radius)) position ))) + (line + (format nil "~a ~{~s~^ ~} ..." + front (listslice items position (+ position radius 1))))) + (concatenate + 'string + line (list #\Newline) + (loop :repeat (1+ (length front)) :collect #\Space) + "^" (list #\Newline)))) + +(defun parse (grammar tokens) + (let ((*position* 0) + (*grammar* grammar) + (*tokens* (coerce tokens 'vector))) + (multiple-value-bind (result successp) (parse-rule (start-rule *grammar*)) + (unless successp + (error 'argot-parse-error + :position *position* + :input *tokens*)) + result))) -- cgit v1.2.3