Login
7 branches 0 tags
Ben (X13/Arch) WIP a25bc3e 3 years ago 651 Commits
nujel / stdlib / compiler / disassembler.nuj
;; Nujel - Copyright (C) 2020-2021 - Benjamin Vincent Schulenburg
;; This project uses the MIT license, a copy should be included under /LICENSE
;;
;; Contains the disassembler, which is probably not that important to most
;; people, but invaluable when working on compiler internals

[defn disassemble/length [op]
  "Return the length in bytes of a bytecode operation and all its arguments"
  [case op
    [[#$0 #$1 #$3 #$C #$D #$11 #$12 #$13 #$14 #$15 #$16 #$1B #$1C #$1E #$1F #$20 #$21 #$22 #$24] 1]
    [[#$2 #$7 #$1A] 2]
    [[#$9 #$A #$B #$18 #$19] 3]
    [[#$5 #$E #$F #$10 #$23] 4]
    [[#$4 #$8] 5]
    [[#$25 #$26] 13]
    [otherwise [throw [list :unknown-op "This op needs its length specified for disassembly to work" op [current-lambda]]]]]]

[defn bytecode/nil-catcher [error] [if [== [car error] :type-error] #nil [throw error]]]

[defn bytecode-op->val [a b c]
  "Turn three bytecode ops representing an encoded value into an actual value"
  [index->val [logior [ash [bytecode-op->int a] 16]
                      [ash [bytecode-op->int b]  8]
                      [bytecode-op->int c]]]]

[defn bytecode-arr->val [a i]
  "Read a bytecode encoded value in A at I and return it"
  [try bytecode/nil-catcher [bytecode-op->val [ref a i] [ref a [+ 1 i]] [ref a [+ 2 i]]]]]

[defn bytecode-op->sym [a b c]
  "Turn three bytecode ops representing an encoded symbol into an actual symbol"
  [index->sym [logior [ash [bytecode-op->int a] 16]
                      [ash [bytecode-op->int b]  8]
                      [bytecode-op->int c]]]]

[defn bytecode-arr->sym [a i]
  "Read a bytecode encoded symbol in A at I and return it"
  [try bytecode/nil-catcher [bytecode-op->sym [ref a i] [ref a [+ 1 i]] [ref a [+ 2 i]]]]]

[defn bytecode-op->offset [a b]
  "Turn two bytecode ops encoding an offset into the integer representation"
  [def t [logior [ash [bytecode-op->int a] 8]
                 [bytecode-op->int b]   ]   ]
  [if-not [> t 32768] t
          [- [- 65536 t]]]]

[defn bytecode-arr->offset [a i]
  "Read a bytecode encoded offset in A at I and return it as a signed integer"
  [bytecode-op->offset [ref a i] [ref a [+ 1 i]]]]

[defn disassemble/op [a i]
  "Disassemble a single bytecode op in A at I and return it as an s-expression, that could be applied to eval"
  [case [ref a i]
    [#$0  '[$nop]]
    [#$1  '[$ret]]
    [#$2  `[$push/int/byte ~[bytecode-op->int [ref a [+ i 1]]]]]
    [#$3  '[$add/int]]
    [#$4  `[$apply/new ~[bytecode-op->int [ref a [+ i 1]]]
                       ~[bytecode-arr->val a [+ i 2]]]]
    [#$5  `[$push/lval ~[bytecode-arr->val a [+ i 1]]]]

    [#$7  `[$apply/dynamic/new ~[bytecode-op->int [ref a [+ i 1]]]]]
    [#$8  `[$apply ~[bytecode-op->int [ref a [+ i 1]]]
                   ~[bytecode-arr->val a [+ i 2]]]]
    [#$9  `[$jmp*  ~[bytecode-arr->offset a [+ i 1]]]]
    [#$A  `[$jt*   ~[bytecode-arr->offset a [+ i 1]]]]
    [#$B  `[$jf*   ~[bytecode-arr->offset a [+ i 1]]]]
    [#$C  '[$dup]]
    [#$D  '[$drop]]
    [#$E  `[$def ~[bytecode-arr->sym a [+ i 1]]]]
    [#$F  `[$set ~[bytecode-arr->sym a [+ i 1]]]]
    [#$10 `[$get ~[bytecode-arr->sym a [+ i 1]]]]
    [#$11 '[$car]]
    [#$12 '[$cdr]]
    [#$13 '[$closure/push]]
    [#$14 '[$cons]]
    [#$15 '[$let]]
    [#$16 '[$closure/pop]]

    [#$18 `[$try/old ~[bytecode-arr->offset a [+ i 1]]]]
    [#$19 `[$try ~[bytecode-arr->offset a [+ i 1]]]]
    [#$1A `[$apply/dynamic ~[bytecode-op->int [ref a [+ i 1]]]]]
    [#$1B `[$roots/push]]
    [#$1C `[$roots/pop]]

    [#$1E `[$<]]
    [#$1F `[$<=]]
    [#$20 `[$==]]
    [#$21 `[$>=]]
    [#$22 `[$>]]
    [#$23 `[$push/symbol ~[bytecode-arr->sym a [+ i 1]]]]
    [#$24 `[$push/nil]]
    [#$25 `[$fn     ~[bytecode-arr->val a [+ i 1]]
                    ~[bytecode-arr->val a [+ i 4]]
                    ~[bytecode-arr->val a [+ i 7]]
                    ~[bytecode-arr->val a [+ i 10]]]]
    [#$26 `[$macro* ~[bytecode-arr->val a [+ i 1]]
                    ~[bytecode-arr->val a [+ i 4]]
                    ~[bytecode-arr->val a [+ i 7]]
                    ~[bytecode-arr->val a [+ i 10]]]]
    [otherwise :unknown-op]]]

[defn disassemble/array [a i]
  "Disassemble all bytecode operations in the plain array A starting at I, turning it into an assembler S-Expression and return it as a dotted pair, with the car containing the offset and the cdr containing the S-Expression"
  [def ret #nil]
  [while [< i [array/length a]]
    [set! ret [cons [cons i [disassemble/op a i]] ret]]
    [set! i [+ i [disassemble/length [ref a i]]]]]
  [nreverse ret]]

[defn disassemble/bytecode-array [code]
  "Disassemble the bytecode array CODE, turning it into a list of dotted pairs, with the car containing the offset and the cdr containing assembler S-Expressions,"
  [disassemble/array [bytecode-arr->arr code] 0]]

[defn disassemble/print [bc]
  [for-in [a [disassemble/bytecode-array bc]]
          [println [cat [ansi-blue [string/pad-start [string [car a]] 6]]
                        " - "
                        [cdr a]]]]]

[defn disassemble [bc]
  "Disassemble the bytecode array CODE, turning it into a list of dotted pairs, with the car containing the offset and the cdr containing assembler S-Expressions,"
  [case [type-of bc]
    [[:lambda :macro] [disassemble/print [ref [closure bc] :code]]]
    [:bytecode-array  [disassemble/print bc]]
    [otherwise [throw [list :type-error "Can't disassemble that" bc [current-lambda]]]]]]