summaryrefslogtreecommitdiff
path: root/fetch-wasm-instrs.lisp
blob: e2cfaecd2c179bbe431c064c096ff6abe1dfb6fa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
(ql:quickload '(#:dexador #:cl-ppcre #:flatbind))

(defpackage #:fetch-wasm-instrs
  (:use #:cl)
  (:import-from #:flatbind #:do>))

(in-package #:fetch-wasm-instrs)


(defvar +source-url+
  "https://raw.githubusercontent.com/WebAssembly/spec/main/document/core/appendix/index-instructions.py"

  "Incredibly obnoxiously, the only textual up-to-date table of all the instructions exists in a fucking python file.")

(defun fetch-source-file ()
  (dexador:get +source-url+))

(defparameter +start-instrs-pattern+ "INSTRUCTIONS = [
")

(defparameter +end-instrs-pattern+ "
]")

(defun trim-junk (str)
  (string-trim '(#\space #\tab #\newline #\return #\( #\) #\comma) str))

(defun parse-instruction (str)
  (let ((str (trim-junk str)))
   (ppcre:split
    ", ?"
    (subseq str #.(length "Instruction(") (- (length str) 1)))))

(defun excise-instructions (source)
  (let* ((start-pos (+ (length +start-instrs-pattern+)
                       (search +start-instrs-pattern+ source)))
         (end-pos (search +end-instrs-pattern+ source :start2 start-pos)))
    (mapcar #'parse-instruction
            (ppcre:split "\\n" (subseq source start-pos end-pos)))))

(defun parse-hex (str)
  (do>
    (ok groups) := (ppcre:scan-to-strings "hex{([A-F0-9]+)}" str)
    :when (and ok (vectorp groups))
    hex := (elt groups 0)
    (parse-integer hex :radix 16)))

(defun parse-code (str)
  (remove-if-not #'identity (mapcar #'parse-hex (ppcre:split "~" str))))

(defun dropchar? (c) (find c "\\'{}"))

(defun clean-string (str &optional extra)
  (remove-if
   (if extra
       (lambda (c) (or (dropchar? c) (find c extra)))
       #'dropchar?)
   (ppcre:regex-replace "\\\\K" (subseq str 1) "")))

(defun parse-instr (str)
  (when (not (string-equal "None" str))
    (destructuring-bind
        (first . rest)
        (loop :for part :in (ppcre:split "~" (clean-string str))
              :collect (ppcre:regex-replace "ast" part "*"))
      (cons (string-downcase
             (ppcre:regex-replace
              "^(LOCAL|GLOBAL|TABLE|MEMORY|DATA|ELEM|REF)"
              first
              "\\1."))
            rest))))

(defun parse-type-list (str)
  (loop :for part :in (ppcre:split "~" (clean-string str "[]_^"))
        :collect (alexandria:make-keyword
                  (ppcre:regex-replace "ast" part "*"))))

(defun parse-instr-type (str)
  (when str
    (mapcar #'parse-type-list (ppcre:split " to " (clean-string str)))))

(defun make-instr (instr code type)
  (list :instr instr
        :code code
        :type type))

(defun get-all-instructions ()
  (do>
    str :when= (fetch-source-file)
    entries := (excise-instructions str)
    (loop :for (namestr codestr typestr . more) :in entries
          :for instr := (parse-instr namestr)
          :for code := (parse-code codestr)
          :for type := (parse-instr-type typestr)
          :when instr
            :collect (make-instr instr code type))))

(format t "Fetching all instructions from~%    ~a~%"
        +source-url+)
(let ((instrs (get-all-instructions)))
  (format t "Writing to disk in ./src/raw-instrs.lisp~%")
  (with-open-file (out "src/raw-instrs.lisp"
                       :direction :output
                       :if-exists :supersede
                       :if-does-not-exist :create)
    (princ "
(defpackage #:sarcasm.raw-instrs
  (:use :cl)
  (:export #:raw-instrs))

(in-package #:sarcasm.raw-instrs)
(defparameter raw-instrs
  '" out)
    (write instrs :stream out)
    (princ ")"  out)))
(format t "DONE~%")

(uiop:quit)