;;;; grammars.lisp (in-package #:argot) ;;; GRAMMAR & RULES (defstruct rule lhs pattern action check note) (defclass grammar () ((rules :accessor grammar-rules :initform (make-hash-table :test #'eq)) (start :accessor start-rule :initarg :start-rule :initform (error "START-RULE is required") :type symbol) (literal-comparator :accessor literal-comparator :initarg :literal-comparator :initform #'equalp :type function :documentation "The function used to compare literals when using this grammar") (documentation :reader grammar-documentation :initarg :documentation :initform ""))) (defun include-rule (grammar rule) (setf (gethash (rule-lhs rule) (grammar-rules grammar)) rule)) ;;; PARSING (defvar *position*) (defvar *grammar*) (defvar *tokens*) (defvar *bindings*) (defvar *literals-equal-p* #'equalp) (defun succeed (result) (values result t)) (defun fail () (values nil nil)) (defparameter +eof+ #()) (defun next-token () (if (< *position* (length *tokens*)) (prog1 (elt *tokens* *position*) (incf *position*)) +eof+)) (defun update-bindings (var value) (setf *bindings* (acons var value *bindings*))) (defmacro try-parse ((&optional (result (gensym))) parse-expression &optional (success-clause `(succeed ,result)) (fail-clause '(fail))) (alexandria:with-gensyms (successp pos) `(let ((,pos *position*)) (multiple-value-bind (,result ,successp) ,parse-expression (if ,successp ,success-clause (progn (setf *position* ,pos) ,fail-clause)))))) (defmacro if-parse ((&optional (result (gensym))) parse-expression &optional (success-clause `(succeed ,result)) (fail-clause '(fail))) (alexandria:with-gensyms (successp) `(multiple-value-bind (,result ,successp) ,parse-expression (if ,successp ,success-clause ,fail-clause)))) (defun parse-sequence (patterns) "Parse each member of PATTERNS and succeed with a list of results. If any pattern fails the whole parse fails." (loop :for pattern :in patterns :for (result successp) := (multiple-value-list (parse-pattern pattern)) :if successp :collect result :into results :else :return (fail) :finally (return (succeed results)))) (defun parse-optional (pattern) (try-parse (res) (parse-pattern pattern) (succeed res) (succeed nil))) (defun parse-kleene (pattern) (loop :for (res successp) := (multiple-value-list (try-parse () (parse-pattern pattern))) :while successp :collect res :into results :finally (return-from parse-kleene (succeed results)))) (defun parse-one-or-more (pattern) (if-parse (first) (parse-pattern pattern) (if-parse (rest) (parse-kleene pattern) (succeed (cons first rest))))) (defun parse-alternatives (patterns) (if (endp patterns) (fail) (try-parse (result) (parse-pattern (first patterns)) (succeed result) (parse-alternatives (rest patterns))))) (defun literals->patterns (literals) (loop :for l :in literals :collect (list := l))) (defun literals-equal? (a b) (funcall *literals-equal-p* a b)) (defun parse-literal (literal) (if (literals-equal? (next-token) literal) (succeed literal) (fail))) (defun parse-literal-alternatives (literals) (parse-alternatives (literals->patterns literals))) (defun parse-kleene-literal (literal) (parse-kleene (list := literal))) (defun parse-one-or-more-literal (literal) (parse-one-or-more (list := literal))) (defun parse-optional-literal (literal) (parse-optional (list := literal))) (defun parse-literal-sequence (literals) (parse-sequence (literals->patterns literals))) (defun parse-binding-pattern (var pattern) (if-parse (result) (parse-pattern pattern) (progn (update-bindings var result) (succeed result)))) (defun parse-item () (let ((token (next-token))) (if (not (eq token +eof+)) (succeed token) (fail)))) (defun parse-eof () (if (eq +eof+ (next-token)) (succeed nil) (fail))) (defun find-grammar (name) (and (symbolp name) (get name 'argot::grammar-property))) (defun parse-grammar-pattern (language) (if-parse (subtokens) (parse-item) (if (listp subtokens) (if-let (grammar (find-grammar language)) (let ((*grammar* grammar) (*position* 0) (*tokens* (coerce subtokens 'vector)) (*literals-equal-p* (literal-comparator grammar))) (parse-rule (start-rule grammar))) (error "No grammar called ~s" language)) (fail)))) (defun parse-pattern (pattern) (if (nonterminal? pattern) (parse-rule pattern) (destructuring-bind (op . args) pattern (case op (:seq (parse-sequence args)) (:? (parse-optional (first args))) (:* (parse-kleene (first args))) (:+ (parse-one-or-more (first args))) (:or (parse-alternatives args)) (:or= (parse-literal-alternatives args)) (:= (parse-literal (first args))) (:*= (parse-kleene-literal (first args))) (:+= (parse-one-or-more-literal (first args))) (:?= (parse-optional-literal (first args))) (:seq= (parse-literal-sequence args)) (:@ (parse-binding-pattern (first args) (second args))) (:{} (parse-grammar-pattern (first args))) (:item (parse-item)) (:eof (parse-eof)))))) (defun handle-post-rule (rule result) (with-slots (lhs check action) rule (let ((bindings (acons lhs result *bindings*))) (when check (unless (funcall check bindings) (return-from handle-post-rule (fail)))) (if action (succeed (funcall action bindings)) (succeed result))))) (defun parse-rule (nonterminal) (if-let (rule (gethash nonterminal (grammar-rules *grammar*))) (let ((*bindings* nil)) (if-parse (result) (parse-pattern (rule-pattern rule)) (handle-post-rule rule result))) (fail))) (define-condition argot-parse-error (error) ((position :initarg :position) (input :initarg :input)) (:report (lambda (err stream) (with-slots (position input) err (let ((pos (max 0 (- position 2)))) (format stream "Parse failed at position ~a, around:~%~a" pos (print-seq-range-pointing-at-center input pos 4))))))) (defun listslice (seq start stop) (coerce (alexandria-2:subseq* seq start stop) 'list)) (defun print-seq-range-pointing-at-center (items position radius) (let* ((front (format nil "... ~{~s~^ ~}" (listslice items (max 0 (- position radius)) position ))) (line (format nil "~a ~{~s~^ ~} ..." front (listslice items position (+ position radius 1))))) (concatenate 'string line (list #\Newline) (loop :repeat (1+ (length front)) :collect #\Space) "^" (list #\Newline)))) (defun parse (grammar tokens) (let ((*position* 0) (*grammar* grammar) (*tokens* (coerce tokens 'vector)) (*literals-equal-p* (literal-comparator grammar))) (multiple-value-bind (result successp) (parse-rule (start-rule *grammar*)) (unless successp (error 'argot-parse-error :position *position* :input *tokens*)) result)))