From 483867e966e5b009be1892cc3f81de0b5885ba17 Mon Sep 17 00:00:00 2001 From: colin Date: Wed, 2 Aug 2023 07:08:30 -0700 Subject: Moving code around --- argot.asd | 2 + argot.lisp | 249 ++-------------------------------------------------------- grammars.lisp | 245 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 255 insertions(+), 241 deletions(-) create mode 100644 grammars.lisp diff --git a/argot.asd b/argot.asd index 1658d65..6d6aa61 100644 --- a/argot.asd +++ b/argot.asd @@ -8,4 +8,6 @@ :serial t :depends-on (#:trivia #:alexandria) :components ((:file "package") + ;(:file "grammars") + ;(:file "deflanguage") (:file "argot"))) diff --git a/argot.lisp b/argot.lisp index ec5f9b8..2218dd5 100644 --- a/argot.lisp +++ b/argot.lisp @@ -2,30 +2,12 @@ (in-package #:argot) -(defstruct rule lhs pattern action check) - -(defclass grammar () - ((rules - :accessor grammar-rules - :initform (make-hash-table :test #'eq)) - (start - :accessor start-rule - :initarg :start-rule - :initform (error "START-RULE is required") - :type symbol) - (documentation - :reader grammar-documentation - :initarg :documentation - :initform ""))) (define-condition invalid-rule-def (error) ((rule :reader rule :initarg :rule)) (:report (lambda (e stream) (format stream "The rule ~s is malformed." (car (rule e)))))) -(defun include-rule (grammar rule) - (setf (gethash (rule-lhs rule) (grammar-rules grammar)) rule)) - (defun nonterminal? (lhs) (and (symbolp lhs) (let* ((name (symbol-name lhs)) @@ -113,28 +95,30 @@ and it returns VAR in that case." (loop :for var :in vars :collect `(,var (cdr (assoc ',var ,bindings-var))) :into bs :finally (return (cons (list lhs `(cdr (assoc ',lhs ,bindings-var))) bs)))) - (expand-functional-argument (initarg lhs bindings action ) + (expand-functional-argument (initarg lhs bindings action vars) (cond ((and action (symbolp action)) (list initarg `(lambda (,bindings-var) (let ,bindings + (declare (ignorable ,lhs ,@vars)) (,action ,lhs))))) ((function-form-p action) (list initarg `(lambda (,bindings-var) (let ,bindings + (declare (ignorable ,lhs ,@vars)) (funcall ,action ,lhs))))) ((consp action) (list initarg `(lambda (,bindings-var) (let ,bindings - (declare (ignorable ,lhs)) + (declare (ignorable ,lhs ,@vars)) ,action)))))) - (rule-includer (name lhs pattern &optional check action bindings) + (rule-includer (name lhs pattern &optional check action bindings vars) `(include-rule ,name (make-rule :lhs ',lhs :pattern ',pattern - ,@(expand-functional-argument :check lhs bindings check) - ,@(expand-functional-argument :action lhs bindings action))))) + ,@(expand-functional-argument :check lhs bindings check vars) + ,@(expand-functional-argument :action lhs bindings action vars))))) `(progn (defvar ,name nil) (setf ,name (make-instance 'grammar @@ -145,228 +129,11 @@ and it returns VAR in that case." :for (lhs pattern vars check action) := (handler-case (parse-rule-def ruledef) (invalid-rule-def (e) (invoke-debugger e))) :for bindings := (collect-let-bindings lhs vars) - :collect (rule-includer name lhs pattern check action bindings)) + :collect (rule-includer name lhs pattern check action bindings vars)) (defmacro ,name (&body tokens) (argot:parse ,name tokens)))))) -(defvar *position*) -(defvar *grammar*) -(defvar *tokens*) -(defvar *bindings*) - -(defun succeed (result) - (values result t)) - -(defun fail () - (values nil nil)) - -(defparameter +eof+ #()) - -(defun next-token () - (if (< *position* (length *tokens*)) - (prog1 (elt *tokens* *position*) - (incf *position*)) - +eof+)) - -(defun update-bindings (var value) - (setf *bindings* (acons var value *bindings*))) - -(defmacro try-parse - ((&optional (result (gensym))) - parse-expression - &optional - (success-clause `(succeed ,result)) - (fail-clause '(fail))) - (alexandria:with-gensyms (successp pos) - `(let ((,pos *position*)) - (multiple-value-bind (,result ,successp) ,parse-expression - (if ,successp ,success-clause - (progn - (setf *position* ,pos) - ,fail-clause)))))) - -(defmacro if-parse - ((&optional (result (gensym))) - parse-expression - &optional - (success-clause `(succeed ,result)) - (fail-clause '(fail))) - (alexandria:with-gensyms (successp) - `(multiple-value-bind (,result ,successp) ,parse-expression - (if ,successp ,success-clause - ,fail-clause)))) - - -(defun parse-sequence (patterns) - "Parse each member of PATTERNS and succeed with a list of results. If -any pattern fails the whole parse fails." - (loop :for pattern :in patterns - :for (result successp) - := (multiple-value-list (parse-pattern pattern)) - :if successp - :collect result :into results - :else :return (fail) - :finally (return (succeed results)))) - -(defun parse-optional (pattern) - (try-parse - (res) (parse-pattern pattern) - (succeed res) - (succeed nil))) - -(defun parse-kleene (pattern) - (loop :for (res successp) := (multiple-value-list (try-parse () (parse-pattern pattern))) - :while successp :collect res :into results - :finally (return-from parse-kleene (succeed results)))) - -(defun parse-one-or-more (pattern) - (if-parse (first) (parse-pattern pattern) - (if-parse (rest) (parse-kleene pattern) - (succeed (cons first rest))))) - -(defun parse-alternatives (patterns) - (if (endp patterns) (fail) - (try-parse (result) (parse-pattern (first patterns)) - (succeed result) - (parse-alternatives (rest patterns))))) - -(defun literals->patterns (literals) - (loop :for l :in literals :collect (list := l))) - -(defun literals-equal? (a b) (eq a b)) - -(defun parse-literal (literal) - (if (literals-equal? (next-token) literal) - (succeed literal) - (fail))) - -(defun parse-literal-alternatives (literals) - (parse-alternatives (literals->patterns literals))) - -(defun parse-kleene-literal (literal) - (parse-kleene (list := literal))) - -(defun parse-one-or-more-literal (literal) - (parse-one-or-more (list := literal))) - -(defun parse-optional-literal (literal) - (parse-optional (list := literal))) - -(defun parse-literal-sequence (literals) - (parse-sequence (literals->patterns literals))) - -(defun parse-binding-pattern (var pattern) - (if-parse - (result) (parse-pattern pattern) - (progn - (update-bindings var result) - (succeed result)))) - -(defun parse-item () - (let ((token (next-token))) - (if (not (eq token +eof+)) - (succeed token) - (fail)))) - -(defun parse-eof () - (if (eq +eof+ (next-token)) - (succeed nil) - (fail))) - -(defun find-grammar (name) - (and (symbolp name) - (boundp name) - (let ((val (symbol-value name))) - (and (typep val 'grammar) - val)))) - -(defun parse-grammar-pattern (language) - (if-parse (subtokens) (parse-item) - (if (listp subtokens) - (if-let (grammar (find-grammar language)) - (let ((*grammar* grammar) - (*position* 0) - (*tokens* (coerce subtokens 'vector))) - (parse-rule (start-rule grammar))) - (error "No grammar called ~s" language)) - (fail)))) - -(defun parse-pattern (pattern) - (if (nonterminal? pattern) - (parse-rule pattern) - (destructuring-bind (op . args) pattern - (case op - (:seq (parse-sequence args)) - (:? (parse-optional (first args))) - (:* (parse-kleene (first args))) - (:+ (parse-one-or-more (first args))) - (:or (parse-alternatives args)) - (:or= (parse-literal-alternatives args)) - (:= (parse-literal (first args))) - (:*= (parse-kleene-literal (first args))) - (:+= (parse-one-or-more-literal (first args))) - (:?= (parse-optional-literal (first args))) - (:seq= (parse-literal-sequence args)) - (:@ (parse-binding-pattern (first args) (second args))) - (:{} (parse-grammar-pattern (first args))) - (:item (parse-item)) - (:eof (parse-eof)))))) - -(defun handle-post-rule (rule result) - (with-slots (lhs check action) rule - (let ((bindings (acons lhs result *bindings*))) - (when check - (unless (funcall check bindings) - (return-from handle-post-rule (fail)))) - (if action - (succeed (funcall action bindings)) - (succeed result))))) - -(defun parse-rule (nonterminal) - (if-let (rule (gethash nonterminal (grammar-rules *grammar*))) - (let ((*bindings* nil)) - (if-parse (result) (parse-pattern (rule-pattern rule)) - (handle-post-rule rule result))) - (fail))) - - -(define-condition argot-parse-error (error) - ((position :initarg :position) - (input :initarg :input)) - (:report (lambda (err stream) - (with-slots (position input) err - (let ((pos (max 0 (- position 2)))) - (format - stream - "Parse failed at position ~a, around:~%~a" - pos (print-seq-range-pointing-at-center input pos 4))))))) - -(defun listslice (seq start stop) - (coerce (alexandria-2:subseq* seq start stop) 'list)) - -(defun print-seq-range-pointing-at-center (items position radius) - (let* ((front - (format nil "... ~{~s~^ ~}" (listslice items (max 0 (- position radius)) position ))) - (line - (format nil "~a ~{~s~^ ~} ..." - front (listslice items position (+ position radius 1))))) - (concatenate - 'string - line (list #\Newline) - (loop :repeat (1+ (length front)) :collect #\Space) - "^" (list #\Newline)))) - -(defun parse (grammar tokens) - (let ((*position* 0) - (*grammar* grammar) - (*tokens* (coerce tokens 'vector))) - (multiple-value-bind (result successp) (parse-rule (start-rule *grammar*)) - (unless successp - (error 'argot-parse-error - :position *position* - :input *tokens*)) - result))) diff --git a/grammars.lisp b/grammars.lisp new file mode 100644 index 0000000..38f1c2f --- /dev/null +++ b/grammars.lisp @@ -0,0 +1,245 @@ +;;;; grammars.lisp + +(in-package #:argot) + +;;; GRAMMAR & RULES + +(defstruct rule lhs pattern action check) + +(defclass grammar () + ((rules + :accessor grammar-rules + :initform (make-hash-table :test #'eq)) + (start + :accessor start-rule + :initarg :start-rule + :initform (error "START-RULE is required") + :type symbol) + (documentation + :reader grammar-documentation + :initarg :documentation + :initform ""))) + +(defun include-rule (grammar rule) + (setf (gethash (rule-lhs rule) (grammar-rules grammar)) rule)) + + +;;; PARSING + +(defvar *position*) +(defvar *grammar*) +(defvar *tokens*) +(defvar *bindings*) + +(defun succeed (result) + (values result t)) + +(defun fail () + (values nil nil)) + +(defparameter +eof+ #()) + +(defun next-token () + (if (< *position* (length *tokens*)) + (prog1 (elt *tokens* *position*) + (incf *position*)) + +eof+)) + +(defun update-bindings (var value) + (setf *bindings* (acons var value *bindings*))) + +(defmacro try-parse + ((&optional (result (gensym))) + parse-expression + &optional + (success-clause `(succeed ,result)) + (fail-clause '(fail))) + (alexandria:with-gensyms (successp pos) + `(let ((,pos *position*)) + (multiple-value-bind (,result ,successp) ,parse-expression + (if ,successp ,success-clause + (progn + (setf *position* ,pos) + ,fail-clause)))))) + +(defmacro if-parse + ((&optional (result (gensym))) + parse-expression + &optional + (success-clause `(succeed ,result)) + (fail-clause '(fail))) + (alexandria:with-gensyms (successp) + `(multiple-value-bind (,result ,successp) ,parse-expression + (if ,successp ,success-clause + ,fail-clause)))) + + +(defun parse-sequence (patterns) + "Parse each member of PATTERNS and succeed with a list of results. If +any pattern fails the whole parse fails." + (loop :for pattern :in patterns + :for (result successp) + := (multiple-value-list (parse-pattern pattern)) + :if successp + :collect result :into results + :else :return (fail) + :finally (return (succeed results)))) + +(defun parse-optional (pattern) + (try-parse + (res) (parse-pattern pattern) + (succeed res) + (succeed nil))) + +(defun parse-kleene (pattern) + (loop :for (res successp) := (multiple-value-list (try-parse () (parse-pattern pattern))) + :while successp :collect res :into results + :finally (return-from parse-kleene (succeed results)))) + +(defun parse-one-or-more (pattern) + (if-parse (first) (parse-pattern pattern) + (if-parse (rest) (parse-kleene pattern) + (succeed (cons first rest))))) + +(defun parse-alternatives (patterns) + (if (endp patterns) (fail) + (try-parse (result) (parse-pattern (first patterns)) + (succeed result) + (parse-alternatives (rest patterns))))) + +(defun literals->patterns (literals) + (loop :for l :in literals :collect (list := l))) + +(defun literals-equal? (a b) (eq a b)) + +(defun parse-literal (literal) + (if (literals-equal? (next-token) literal) + (succeed literal) + (fail))) + +(defun parse-literal-alternatives (literals) + (parse-alternatives (literals->patterns literals))) + +(defun parse-kleene-literal (literal) + (parse-kleene (list := literal))) + +(defun parse-one-or-more-literal (literal) + (parse-one-or-more (list := literal))) + +(defun parse-optional-literal (literal) + (parse-optional (list := literal))) + +(defun parse-literal-sequence (literals) + (parse-sequence (literals->patterns literals))) + +(defun parse-binding-pattern (var pattern) + (if-parse + (result) (parse-pattern pattern) + (progn + (update-bindings var result) + (succeed result)))) + +(defun parse-item () + (let ((token (next-token))) + (if (not (eq token +eof+)) + (succeed token) + (fail)))) + +(defun parse-eof () + (if (eq +eof+ (next-token)) + (succeed nil) + (fail))) + +(defun find-grammar (name) + (and (symbolp name) + (boundp name) + (let ((val (symbol-value name))) + (and (typep val 'grammar) + val)))) + +(defun parse-grammar-pattern (language) + (if-parse (subtokens) (parse-item) + (if (listp subtokens) + (if-let (grammar (find-grammar language)) + (let ((*grammar* grammar) + (*position* 0) + (*tokens* (coerce subtokens 'vector))) + (parse-rule (start-rule grammar))) + (error "No grammar called ~s" language)) + (fail)))) + +(defun parse-pattern (pattern) + (if (nonterminal? pattern) + (parse-rule pattern) + (destructuring-bind (op . args) pattern + (case op + (:seq (parse-sequence args)) + (:? (parse-optional (first args))) + (:* (parse-kleene (first args))) + (:+ (parse-one-or-more (first args))) + (:or (parse-alternatives args)) + (:or= (parse-literal-alternatives args)) + (:= (parse-literal (first args))) + (:*= (parse-kleene-literal (first args))) + (:+= (parse-one-or-more-literal (first args))) + (:?= (parse-optional-literal (first args))) + (:seq= (parse-literal-sequence args)) + (:@ (parse-binding-pattern (first args) (second args))) + (:{} (parse-grammar-pattern (first args))) + (:item (parse-item)) + (:eof (parse-eof)))))) + +(defun handle-post-rule (rule result) + (with-slots (lhs check action) rule + (let ((bindings (acons lhs result *bindings*))) + (when check + (unless (funcall check bindings) + (return-from handle-post-rule (fail)))) + (if action + (succeed (funcall action bindings)) + (succeed result))))) + +(defun parse-rule (nonterminal) + (if-let (rule (gethash nonterminal (grammar-rules *grammar*))) + (let ((*bindings* nil)) + (if-parse (result) (parse-pattern (rule-pattern rule)) + (handle-post-rule rule result))) + (fail))) + + +(define-condition argot-parse-error (error) + ((position :initarg :position) + (input :initarg :input)) + (:report (lambda (err stream) + (with-slots (position input) err + (let ((pos (max 0 (- position 2)))) + (format + stream + "Parse failed at position ~a, around:~%~a" + pos (print-seq-range-pointing-at-center input pos 4))))))) + +(defun listslice (seq start stop) + (coerce (alexandria-2:subseq* seq start stop) 'list)) + +(defun print-seq-range-pointing-at-center (items position radius) + (let* ((front + (format nil "... ~{~s~^ ~}" (listslice items (max 0 (- position radius)) position ))) + (line + (format nil "~a ~{~s~^ ~} ..." + front (listslice items position (+ position radius 1))))) + (concatenate + 'string + line (list #\Newline) + (loop :repeat (1+ (length front)) :collect #\Space) + "^" (list #\Newline)))) + +(defun parse (grammar tokens) + (let ((*position* 0) + (*grammar* grammar) + (*tokens* (coerce tokens 'vector))) + (multiple-value-bind (result successp) (parse-rule (start-rule *grammar*)) + (unless successp + (error 'argot-parse-error + :position *position* + :input *tokens*)) + result))) -- cgit v1.2.3