Login
7 branches 0 tags
Ben (X13/Arch) Optimized static allocation size 3b55356 2 years ago 968 Commits
nujel / stdlib / compiler / backend_bytecode / disassembler.nuj
;;; Nujel - Copyright (C) 2020-2021 - Benjamin Vincent Schulenburg
;;; This project uses the MIT license, a copy should be included under /LICENSE
;;;
;;; Contains the disassembler, which is probably not that important to most
;;; people, but invaluable when working on compiler internals

(def disassemble (let*
                   (defn disassemble/length (op)
                         "Return the length in bytes of a bytecode operation and all its arguments"
                         (case op
                               ((#$0 #$1 #$3 #$C #$D #$11 #$12 #$13 #$14 #$15 #$16 #$17 #$18 #$19 #$1B #$1C #$1E #$1F #$20 #$21 #$22 #$23 #$24 #$25 #$26 #$27 #$28 #$29 #$2A #$2B #$2C) 1)
                               ((#$2 #$4 #$5 #$7 #$E #$1A) 2)
                               ((#$6 #$8 #$9 #$A #$B #$F #$10 ) 3)
                               (otherwise (throw (list :unknown-op "This op needs its length specified for disassembly to work" op (current-lambda))))))

                   (defn bytecode/nil-catcher (error) (if (= (car error) :type-error) #nil (throw error)))

                   (defn bytecode-arr->val (a i)
                         "Read a bytecode encoded value in A at I and return it"
                         (try bytecode/nil-catcher (bytecode-op->val (ref a i) (ref a (+ 1 i)) (ref a (+ 2 i)))))

                   (defn bytecode-arr->sym (a i)
                         "Read a bytecode encoded symbol in A at I and return it"
                         (try bytecode/nil-catcher (bytecode-op->sym (ref a i) (ref a (+ 1 i)) (ref a (+ 2 i)))))

                   (defn bytecode-op->offset (a b)
                         "Turn two bytecode ops encoding an offset into the integer representation"
                         (def t (bit-or (bit-shift-left (bytecode-op->int a) 8)
                                        (bytecode-op->int b)   )   )
                         (if-not (> t 32768) t
                                 (- (- 65536 t))))

                   (defn bytecode-arr->offset (a i)
                         "Read a bytecode encoded offset in A at I and return it as a signed integer"
                         (bytecode-op->offset (ref a i) (ref a (+ 1 i))))

                   (defn disassemble/maybe-quote (v)
                         "Quotes symbols but just passes through every other value"
                         (cond ((symbol? v) (list 'quote v))
                               ((bytecode-array? v) (disassemble/bytecode-array v))
                               (#t v)))

                   (defn disassemble/op (a i literals)
                         "Disassemble a single bytecode op in A at I and return it as an s-expression, that could be applied to eval"
                         (case (ref a i)
                               (#$0  '($nop))
                               (#$1  '($ret))
                               (#$2  `($push/int/byte ~(bytecode-op->int (ref a (+ i 1)))))
                               (#$3  '($add/int))
                               (#$4  `($apply ~(bytecode-op->int (ref a (+ i 1)))))
                               (#$5  `($set/val ~(disassemble/maybe-quote (ref literals (bytecode-op->int (ref a (+ i 1)))))))
                               (#$6  `($push/val/ext ~(ref literals (bit-or (bytecode-op->int (ref a (+ i 1)))
                                                                            (bit-shift-left (bytecode-op->int (ref a (+ i 2))) 8)))))
                               (#$7  `($def/val ~(disassemble/maybe-quote (ref literals (bytecode-op->int (ref a (+ i 1)))))))
                               (#$8  `($def/val/ext ~(ref literals (bit-or (bytecode-op->int (ref a (+ i 1)))
                                                                           (bit-shift-left (bytecode-op->int (ref a (+ i 2))) 8)))))
                               (#$9  `($jmp*  ~(bytecode-arr->offset a (+ i 1))))
                               (#$A  `($jt*   ~(bytecode-arr->offset a (+ i 1))))
                               (#$B  `($jf*   ~(bytecode-arr->offset a (+ i 1))))
                               (#$C  '($dup))
                               (#$D  '($drop))
                               (#$E  `($get/val ~(disassemble/maybe-quote (ref literals (bytecode-op->int (ref a (+ i 1)))))))
                               (#$F  `($get/val/ext ~(ref literals (bit-or (bytecode-op->int (ref a (+ i 1)))
                                                                           (bit-shift-left (bytecode-op->int (ref a (+ i 2))) 8)))))
                               (#$10 `($set/val/ext ~(ref literals (bit-or (bytecode-op->int (ref a (+ i 1)))
                                                                           (bit-shift-left (bytecode-op->int (ref a (+ i 2))) 8)))))
                               (#$11 '($car))
                               (#$12 '($cdr))
                               (#$13 '($closure/push))
                               (#$14 '($cons))
                               (#$15 '($let))
                               (#$16 '($closure/pop))
                               (#$17 '($fn/dynamic))
                               (#$18 '($macro/dynamic))
                               (#$19 `($try      ~(bytecode-arr->offset a (+ i 1))))
                               (#$1A `($push/val ~(disassemble/maybe-quote (ref literals (bytecode-op->int (ref a (+ i 1)))))))
                               (#$1B '($push/true))
                               (#$1C '($push/false))
                               (#$1D '($eval))
                               (#$1E '($<))
                               (#$1F '($<=))
                               (#$20 '($=))
                               (#$21 '($>=))
                               (#$22 '($>))
                               (#$23 '($inc/int))
                               (#$24 '($push/nil))
                               (#$25 '($add))
                               (#$26 '($sub))
                               (#$27 '($mul))
                               (#$28 '($div))
                               (#$29 '($rem))
                               (#$2A '($zero?))
                               (#$2B '($ref))
                               (#$2C '($cadr))
                               (otherwise :unknown-op)))

                   (defn disassemble/array (a i literals)
                         "Disassemble all bytecode operations in the plain array A starting at I, turning it into an assembler S-Expression and return it as a dotted pair, with the car containing the offset and the cdr containing the S-Expression"
                         (def ret #nil)
                         (while (< i (array/length a))
                           (cons! (cons i (disassemble/op a i literals)) ret)
                           (set! i (+ i (disassemble/length (ref a i)))))
                         (nreverse ret))

                   (defn disassemble/bytecode-array (code)
                         "Disassemble the bytecode array CODE, turning it into a list of dotted pairs, with the car containing the offset and the cdr containing assembler S-Expressions,"
                         (disassemble/array (bytecode-arr->arr code) 0 (bytecode-literals code)))

                   (defn disassemble/to-string (bc)
                         (disassemble/bytecode-array bc))

                   (defn disassemble (bc)
                         "Disassemble the bytecode array CODE, turning it into a list of dotted pairs, with the car containing the offset and the cdr containing assembler S-Expressions,"
                         :cat :bytecode
                         :internal
                         (case (type-of bc)
                               ((:lambda :macro) (disassemble/to-string (closure/code bc)))
                               (:bytecode-array  (disassemble/to-string bc))
                               (otherwise (throw (list :type-error "Can't disassemble that" bc (current-lambda))))))))