Login
7 branches 0 tags
Ben (X13/Arch) Experiment with letting Claude write a Nujel reader in Nujel dfea60a 10 days ago 1257 Commits
nujel / tests / reader.nuj
(import (green red) :ansi)

(def nujel-reader
  (let*
    (def Comment (gensym))

    (defn delimiter? (c)
          (or (<= c 32)
              (= c #\;)
              (= c #\()
              (= c #\))
              (= c #\[)
              (= c #\])
              (= c #\{)
              (= c #\})
              (= c #\#)
              (= c #\')
              (= c #\`)
              (= c #\~)
              (= c #\")))

    (defn closing-bracket? (c)
          (or (= c #\)) (= c #\]) (= c #\})))

    (defn digit? (c)
          (and (>= c #\0) (<= c #\9)))

    (defn numeric-separator? (c)
          (or (= c #\_) (= c #\,)))

    (defn char->digit (c)
          (cond ((and (>= c #\0) (<= c #\9)) (- c #\0))
                ((and (>= c #\a) (<= c #\z)) (+ (- c #\a) 10))
                ((and (>= c #\A) (<= c #\Z)) (+ (- c #\A) 10))
                (#t -1)))

    (defn hex-char->value (c)
          (cond ((and (>= c #\0) (<= c #\9)) (- c #\0))
                ((and (>= c #\A) (<= c #\F)) (+ (- c #\A) 10))
                (#t -1)))

    (defn read-buffer (buf start end)
          (def result (buffer/allocate 256))
          (def i start)
          (def j 0)
          (while (< i end)
            (def c (ref buf i))
            (when (or (<= c 32) (delimiter? c))
              (return (cons (:cut result 0 j) (- i 1))))
            (def high (hex-char->value c))
            (when (< high 0)
              (exception :read-error "Invalid hex char in buffer literal" c))
            (set! i (inc/int i))
            (when (>= i end)
              (return (cons (:cut result 0 j) (- i 1))))
            (def c2 (ref buf i))
            (when (or (<= c2 32) (delimiter? c2))
              (exception :read-error "Odd number of hex digits in buffer literal"))
            (def low (hex-char->value c2))
            (when (< low 0)
              (exception :read-error "Invalid hex char in buffer literal" c2))
            (set! result j (bit-or (bit-shift-left high 4) low))
            (set! j (inc j))
            (set! i (inc/int i)))
          (cons (:cut result 0 j) (- i 1)))

    (defn read-number-base (buf start end base)
          (def i start)
          (def result 0)
          (while (< i end)
            (def c (ref buf i))
            (when (or (<= c 32) (delimiter? c) (= c #\.))
              (return (cons result (- i 1))))
            (def digit (char->digit c))
            (cond ((and (>= digit 0) (< digit base))
                   (set! result (+ (* result base) digit)))
                  ((not (numeric-separator? c))
                   (exception :read-error "Invalid digit in number" c)))
            (set! i (inc/int i)))
          (cons result (- i 1)))

    (defn count-digits (n)
          (if (zero? n)
              1
              (do (def count 0)
                  (def v n)
                  (while (> v 0)
                    (set! count (inc count))
                    (set! v (div/int v 10)))
                  count)))

    (defn read-number (buf start end base)
          (def i start)
          (def negative #f)
          (when (and (< i end) (= (ref buf i) #\-))
            (set! negative #t)
            (set! i (inc/int i)))
          (def int-result (read-number-base buf i end base))
          (def value (car int-result))
          (set! i (+ (cdr int-result) 1))
          (if (and (< i end) (= (ref buf i) #\.))
              (do
                (set! i (inc/int i))
                (def leading-zeroes 0)
                (while (and (< i end) (= (ref buf i) #\0))
                  (set! leading-zeroes (inc leading-zeroes))
                  (set! i (inc/int i)))
                (def mant-result (read-number-base buf i end base))
                (def mantissa (car mant-result))
                (set! i (+ (cdr mant-result) 1))
                (def total-digits (+ leading-zeroes (count-digits mantissa)))
                (def float-val
                     (if (zero? mantissa)
                         (+ 0.0 value)
                         (+ value (* mantissa (pow 10.0 (- total-digits))))))
                (cons (if negative (- float-val) float-val) (- i 1)))
              (cons (if negative (- value) value) (- i 1))))

    (defn new-symbol (buf start end)
          (:symbol (:cut buf start end)))

    (defn new-keyword (buf start end)
          (:keyword (:cut buf start end)))

    (defn read-symbol-path (buf start end ref-val as-keyword)
          ; as-keyword: if true, first segment is keyword; subsequent are always keywords
          (defn make-segment (s e)
                (if (or as-keyword (not (nil? ref-val)))
                    (new-keyword buf s e)
                    (new-symbol buf s e)))
          (defn finish (seg pos)
                (cons (if (nil? ref-val) seg (list 'ref ref-val seg)) pos))
          (def i start)
          (while (< i end)
            (def c (ref buf i))
            (cond ((= c #\.)
                   (def seg (make-segment start i))
                   (def combined (if (nil? ref-val) seg (list 'ref ref-val seg)))
                   (return (read-symbol-path buf (inc i) end combined #f)))
                  ((= c #\:)
                   (def nc (if (< (inc i) end) (ref buf (inc i)) 0))
                   (if (or (delimiter? nc) (= nc 0))
                       (return (finish (new-keyword buf start i) i))
                       (exception :read-error "Colon in middle of symbol")))
                  ((delimiter? c)
                   (return (finish (make-segment start i) (- i 1)))))
            (set! i (inc/int i)))
          (finish (make-segment start i) i))

    (defn read-symbol (buf start end)
          (read-symbol-path buf start end #nil #f))

    (defn read-keyword (buf start end)
          (read-symbol-path buf start end #nil #t))

    (defn escape-char (c)
          (case c
                (#\0 0)
                (#\a 7)
                (#\b 8)
                (#\t 9)
                (#\n 10)
                (#\v 11)
                (#\f 12)
                (#\r 13)
                (#\e 27)
                (#\" 34)
                (#\\ 92)
                (otherwise (exception :read-error "Unknown escape character" c))))

    (defn read-string (buf start end)
          (def result (buffer/allocate 256))
          (def i start)
          (def j 0)
          (while (< i end)
            (def c (ref buf i))
            (cond ((= c #\\)
                   (set! i (inc/int i))
                   (when (>= i end)
                     (exception :read-error "Unexpected end in string escape"))
                   (def ec (escape-char (ref buf i)))
                   (set! result j ec)
                   (set! j (inc j)))
                  ((= c #\")
                   (return (cons (:string (:cut result 0 j)) i)))
                  (#t
                   (set! result j c)
                   (set! j (inc j))))
            (set! i (inc/int i)))
          (exception :read-error "Unterminated string"))

    (defn read-comment (buf start end)
          (def i start)
          (while (< i end)
            (when (= (ref buf i) 10)
              (return (cons Comment i)))
            (set! i (inc/int i)))
          (cons Comment i))

    (defn read-ml-comment (buf start end)
          (def i start)
          (def l 0)
          (def c 0)
          (def depth 1)
          (while (< i end)
            (set! l c)
            (set! c (ref buf i))
            (cond ((and (= l #\#) (= c #\|))
                   (set! depth (inc depth))
                   (set! c 0))
                  ((and (= l #\|) (= c #\#))
                   (set! depth (dec depth))
                   (when (zero? depth)
                     (return (cons Comment i)))
                   (set! c 0)))
            (set! i (inc/int i)))
          (cons Comment i))

    (defn read-character (buf start end)
          (when (>= start end) (exception :read-error "Empty character literal"))
          (def c0 (ref buf start))
          (def c1 (if (< (inc start) end) (ref buf (inc start)) 0))
          (def i (inc start))
          (while (and (< i end)
                      (not (delimiter? (ref buf i)))
                      (> (ref buf i) 32))
            (set! i (inc/int i)))
          (def result
               (cond ((and (= c0 #\B) (= c1 #\a)) 8)   ; Backspace
                     ((and (= c0 #\T) (= c1 #\a)) 9)   ; Tab
                     ((and (= c0 #\L) (= c1 #\i)) 10)  ; Linefeed
                     ((and (= c0 #\R) (= c1 #\e)) 13)  ; Return
                     ((and (= c0 #\l) (= c1 #\f)) 10)
                     ((and (= c0 #\c) (= c1 #\r)) 13)
                     (#t c0)))
          (cons result (- i 1)))

    (defn read-literal-collection (buf start end constructor)
          (if (and (< (inc start) end) (= (ref buf (inc start)) #\())
              (let ((t (read-list buf (+ start 2) end #\) 1))
                    (items (car t)))
                (cons (apply constructor (if (nil? items) (list #nil) items))
                      (cdr t)))
              (cons Comment (inc start))))

    (defn read-special (buf start end)
          (when (>= start end) (return (cons Comment start)))
          (def c (ref buf start))
          (case c
                (#\! (read-comment buf start end))
                (#\| (read-ml-comment buf start end))
                (#\; (cons Comment (cdr (read-value buf (inc start) end))))
                (#\\ (read-character buf (inc start) end))
                (#\x (read-number buf (inc start) end 16))
                (#\d (read-number buf (inc start) end 10))
                (#\o (read-number buf (inc start) end 8))
                (#\b (read-number buf (inc start) end 2))
                (#\m (read-buffer buf (inc start) end))
                (#\# (read-literal-collection buf start end array/new))
                (#\@ (read-literal-collection buf start end tree/new))
                (#\t (def s (read-symbol buf start end))
                 (if (or (= (car s) 't) (= (car s) 'true))
                     (cons #t (cdr s))
                     (exception :read-error "Unknown #t special value" s)))
                (#\f (def s (read-symbol buf start end))
                 (if (or (= (car s) 'f) (= (car s) 'false))
                     (cons #f (cdr s))
                     (exception :read-error "Unknown #f special value" s)))
                (#\n (def s (read-symbol buf start end))
                 (if (or (= (car s) 'n) (= (car s) 'nil))
                     (cons #nil (cdr s))
                     (exception :read-error "Unknown #n special value" s)))
                (otherwise (cons Comment (inc start)))))

    (defn read-quote (buf start end sym)
          (def t (read-value buf start end))
          (cons (list sym (car t)) (cdr t)))

    (defn nreverse-dotted (lst final-cdr)
          (def result final-cdr)
          (while (pair? lst)
            (set! result (cons (car lst) result))
            (set! lst (cdr lst)))
          result)

    (defn finalize-list (ret is-dotted dotted-cdr)
          (cond (is-dotted (nreverse-dotted ret dotted-cdr))
                ((nil? ret) (cons #nil #nil))
                (#t (nreverse ret))))

    (defn read-list (buf start end terminator depth)
          (def ret #nil)
          (def dotted-cdr #nil)
          (def is-dotted #f)
          (def i start)
          (while (< i end)
            (def c (ref buf i))
            (cond ((<= c 32) #t)
                  ((= c #\#) (def t (read-special buf (inc i) end))
                   (when-not (= (car t) Comment) (set! ret (cons (car t) ret)))
                   (set! i (cdr t)))
                  ((= c #\;) (set! i (cdr (read-comment buf (inc i) end))))
                  ((or (= c terminator) (closing-bracket? c))
                   (if (zero? depth)
                       (exception :read-error "Unmatching bracket")
                       (return (cons (finalize-list ret is-dotted dotted-cdr) i))))
                  ((and (= c #\.)
                        (or (>= (inc i) end)
                            (let ((nc (ref buf (inc i))))
                              (or (<= nc 32) (delimiter? nc)))))
                   (when (nil? ret)
                     (exception :read-error "Missing car in dotted pair"))
                   (set! i (inc/int i))
                   (while (and (< i end) (<= (ref buf i) 32))
                     (set! i (inc/int i)))
                   (def t (read-value buf i end))
                   (set! dotted-cdr (car t))
                   (set! is-dotted #t)
                   (set! i (cdr t)))
                  (#t (def t (read-value buf i end))
                      (when-not (= (car t) Comment)
                        (set! ret (cons (car t) ret)))
                      (set! i (cdr t))))
            (set! i (inc/int i)))
          (cons (finalize-list ret is-dotted dotted-cdr) i))

    (defn wrap-collection (t sym)
          (def items (car t))
          (cons (cons sym (if (nil? items) (list #nil) items)) (cdr t)))

    (defn read-value (buf start end)
              (def i start)
              (while (and (< i end) (<= (ref buf i) 32))
                (set! i (inc/int i)))
              (when (>= i end) (return (cons Comment i)))
              (def c (ref buf i))
              (case c
                    (#\# (read-special buf (inc i) end))
                    (#\; (read-comment buf (inc i) end))
                    (#\( (read-list buf (inc i) end #\) 1))
                    (#\[ (wrap-collection (read-list buf (inc i) end #\] 1) 'array/new))
                    (#\{ (wrap-collection (read-list buf (inc i) end #\} 1) 'tree/new))
                    (#\" (read-string buf (inc i) end))
                    (#\' (read-quote buf (inc i) end 'quote))
                    (#\` (read-quote buf (inc i) end 'quasiquote))
                    (#\~ (if (and (< (inc i) end) (= (ref buf (inc i)) #\@))
                             (read-quote buf (+ i 2) end 'unquote-splicing)
                             (read-quote buf (inc i) end 'unquote)))
                    (#\: (read-keyword buf (inc i) end))
                    (otherwise
                     (if (or (digit? c)
                             (and (= c #\-)
                                  (< (inc i) end)
                                  (digit? (ref buf (inc i)))))
                         (read-number buf i end 10)
                         (read-symbol buf i end)))))

    (defn nujel-reader (buf)
          (car (read-list buf 0 (:length buf) #\) 0)))))

(defn test-reader (file)
      (def raw (slurp file))
      (def l (nujel-reader raw))
      (def t (read raw))
      (pfmtln "-- {file} -- ")
      (pfmtln "{:?} - {:?}" l (map l :type-of))
      (pfmtln "{:?} - {:?}" t (map t :type-of))
      (if (equal? l t)
          (pfmtln "-- {} --" (green "Both readers return an identical result"))
          (do (pfmtln "-- {} --" (red "ERROR"))
              (def i 0)
              (def max-i (min (:length l) (:length t)))
              (while (< i max-i)
                (def li (ref l i))
                (def ti (ref t i))
                (when-not (equal? li ti)
                  (pfmtln "  Item {}: L={:?} T={:?}" i li ti)
                  (when (and (pair? li) (pair? ti))
                    (pfmtln "    L-car: {:?} L-cdr: {:?}" (car li) (cdr li))
                    (pfmtln "    T-car: {:?} T-cdr: {:?}" (car ti) (cdr ti))
                    (when (pair? (cadr li))
                      (pfmtln "    L-cadr types: {:?}" (map (cadr li) :type-of))
                      (pfmtln "    T-cadr types: {:?}" (map (cadr ti) :type-of)))))
                (set! i (inc i))))))

(for-each (directory/read-recursive "tests/reader") test-reader)