From 597e64a252b3d000ac5fd925e2b48d4e7716cb18 Mon Sep 17 00:00:00 2001 From: colin Date: Fri, 31 May 2024 13:04:10 -0700 Subject: Initial commit; instr fetching script and dump --- fetch-wasm-instrs.lisp | 107 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) create mode 100644 fetch-wasm-instrs.lisp (limited to 'fetch-wasm-instrs.lisp') diff --git a/fetch-wasm-instrs.lisp b/fetch-wasm-instrs.lisp new file mode 100644 index 0000000..4735214 --- /dev/null +++ b/fetch-wasm-instrs.lisp @@ -0,0 +1,107 @@ +(ql:quickload '(#:dexador #:cl-ppcre #:flatbind)) + +(defpackage #:fetch-wasm-instrs + (:use #:cl) + (:import-from #:flatbind #:do>)) + +(in-package #:fetch-wasm-instrs) + + +(defvar +source-url+ + "https://raw.githubusercontent.com/WebAssembly/spec/main/document/core/appendix/index-instructions.py" + + "Incredibly obnoxiously, the only textual up-to-date table of all the instructions exists in a fucking python file.") + +(defun fetch-source-file () + (dexador:get +source-url+)) + +(defparameter +start-instrs-pattern+ "INSTRUCTIONS = [ +") + +(defparameter +end-instrs-pattern+ " +]") + +(defun trim-junk (str) + (string-trim '(#\space #\tab #\newline #\return #\( #\) #\comma) str)) + +(defun parse-instruction (str) + (let ((str (trim-junk str))) + (ppcre:split + ", ?" + (subseq str #.(length "Instruction(") (- (length str) 2))))) + +(defun excise-instructions (source) + (let* ((start-pos (+ (length +start-instrs-pattern+) + (search +start-instrs-pattern+ source))) + (end-pos (search +end-instrs-pattern+ source :start2 start-pos))) + (mapcar #'parse-instruction + (ppcre:split "\\n" (subseq source start-pos end-pos))))) + +(defun parse-hex (str) + (do> + (ok groups) := (ppcre:scan-to-strings "hex{([A-F0-9]+)}" str) + :when (and ok (vectorp groups)) + hex := (elt groups 0) + (parse-integer hex :radix 16))) + +(defun parse-code (str) + (remove-if-not #'identity (mapcar #'parse-hex (ppcre:split "~" str)))) + +(defun dropchar? (c) (find c "\\'{}")) + +(defun clean-string (str &optional extra) + (remove-if + (if extra + (lambda (c) (or (dropchar? c) (find c extra))) + #'dropchar?) + (ppcre:regex-replace "\\\\K" (subseq str 1) ""))) + +(defun parse-instr (str) + (when (not (string-equal "None" str)) + (destructuring-bind + (first . rest) + (loop :for part :in (ppcre:split "~" (clean-string str)) + :collect (ppcre:regex-replace "ast" part "*")) + (cons (string-downcase + (ppcre:regex-replace + "^(LOCAL|GLOBAL|TABLE|MEMORY|DATA|ELEM|REF)" + first + "\\1.")) + rest)))) + +(defun parse-type-list (str) + (loop :for part :in (ppcre:split "~" (clean-string str "[]_^")) + :collect (alexandria:make-keyword + (ppcre:regex-replace "ast" part "*")))) + +(defun parse-instr-type (str) + (when str + (mapcar #'parse-type-list (ppcre:split " to " (clean-string str))))) + +(defun make-instr (instr code type) + (list :instr instr + :code code + :type type)) + +(defun get-all-instructions () + (do> + str :when= (fetch-source-file) + entries := (excise-instructions str) + (loop :for (namestr codestr typestr . more) :in entries + :for instr := (parse-instr namestr) + :for code := (parse-code codestr) + :for type := (parse-instr-type typestr) + :when instr + :collect (make-instr instr code type)))) + +(format t "Fetching all instructions from~% ~a~%" +) +(let ((instrs (get-all-instructions))) + (format t "Writing to disk in ./instrs.sexp~%") + (with-open-file (out "instrs.sexp" + :direction :output + :if-exists :supersede + :if-does-not-exist :create) + (write instrs :stream out))) +(format t "DONE~%") + +(uiop:quit) -- cgit v1.2.3