summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Thompson <dthompson@vistahigherlearning.com>2022-08-12 23:19:54 -0400
committerDavid Thompson <dthompson@vistahigherlearning.com>2022-09-22 17:12:26 -0400
commit1259d6fb38873b82e10e634b910f9bc4036c8ad9 (patch)
treedf3536d73cabf88ea6bd63f8466db0737066a4f1
parentcbed819a84f88d08e1f415ae5f006cdc44626eee (diff)
Step 7: Heap Allocation
This one took me a looooong time to figure out.
-rw-r--r--compiler.scm245
1 files changed, 216 insertions, 29 deletions
diff --git a/compiler.scm b/compiler.scm
index 4305f2c..ab76e20 100644
--- a/compiler.scm
+++ b/compiler.scm
@@ -15,17 +15,21 @@
(define boolean-shift 7)
(define boolean-tag 31)
(define empty-list 47) ;; #b00101111
+;; 3 bit tags for heap allocated values.
+(define pair-tag 1)
+(define vector-tag 2)
+(define string-tag 3)
(define (immediate-rep x)
- (cond
- ((integer? x)
- (ash x fixnum-shift))
- ((char? x)
- (logior (ash (char->integer x) char-shift) char-tag))
- ((boolean? x)
- (logior (ash (if x 1 0) boolean-shift) boolean-tag))
- ((null? x)
- empty-list)))
+ (cond
+ ((integer? x)
+ (ash x fixnum-shift))
+ ((char? x)
+ (logior (ash (char->integer x) char-shift) char-tag))
+ ((boolean? x)
+ (logior (ash (if x 1 0) boolean-shift) boolean-tag))
+ ((null? x)
+ empty-list)))
(define (immediate? x)
(or (integer? x)
@@ -43,18 +47,15 @@
(define (primcall? x)
(and (pair? x)
(memq (first x)
- '(add1
- sub1
- integer->char
- char->integer
- zero?
- null?
- integer?
- char?
- boolean?
+ '(add1 sub1
+ integer->char char->integer
+ zero? null? integer? char? boolean?
+ - *
= < <= > >=
- eq? char=?))))
+ eq? char=?
+ cons car cdr
+ make-vector vector vector-length vector-ref vector-set!
+ make-string string string-length string-ref string-set!))))
(define (if? x)
(and (pair? x)
@@ -69,6 +70,9 @@
(define (primcall-operand2 x)
(third x))
+(define (primcall-operand3 x)
+ (fourth x))
+
(define (lookup name env)
(or (assq-ref env name)
(error "unbound variable:" name)))
@@ -105,12 +109,17 @@
(format #f "$~a" value))
(define rax (register "rax"))
+(define rbx (register "rbx"))
(define rsp (register "rsp"))
+(define rsi (register "rsi"))
(define al (register "al"))
(define (offset n register)
(format #f "~a(~a)" n register))
+(define (register-offset base-register index-register)
+ (format #f "(~a, ~a)" base-register index-register))
+
(define (emit template-string . args)
(display " ")
(apply format #t template-string args)
@@ -119,9 +128,15 @@
(define (emit-mov src dest)
(emit "mov ~a, ~a" src dest))
+(define (emit-movb src dest)
+ (emit "movb ~a, ~a" src dest))
+
(define (emit-and mask dest)
(emit "and ~a, ~a" mask dest))
+(define (emit-andq mask dest)
+ (emit "andq ~a, ~a" mask dest))
+
(define (emit-or addend dest)
(emit "or ~a, ~a" addend dest))
@@ -257,6 +272,167 @@
(emit-comparison (primcall-operand1 x) (primcall-operand2 x) emit-sete si env))
((char=?)
(emit-comparison (primcall-operand1 x) (primcall-operand2 x) emit-sete si env))
+ ((cons)
+ (emit-expr (primcall-operand2 x) si env) ; eval cdr
+ (emit-mov rax (offset si rsp)) ; save car to stack
+ (emit-expr (primcall-operand1 x) (- si wordsize) env) ; eval car
+ (emit-mov rax (offset 0 rsi)) ; move car onto heap
+ (emit-mov (offset si rsp) rax) ; copy cdr from the stack
+ (emit-mov rax (offset wordsize rsi)) ; move cdr onto heap
+ (emit-mov rsi rax) ; heap pointer is the value returned
+ (emit-or (immediate pair-tag) rax) ; set tag
+ (emit-add (immediate (* wordsize 2)) rsi)) ; bump heap pointer
+ ((car)
+ (emit-expr (primcall-operand1 x) si env)
+ ;; We have to untag the pair to get the pointer to the 'car'.
+ ;; The pair tag is 1 so simply subtracting 1 gets us the pointer.
+ (emit-mov (offset -1 rax) rax))
+ ((cdr)
+ (emit-expr (primcall-operand1 x) si env)
+ ;; Again, the value is the pointer incremented by 1, so to get to
+ ;; the cdr we need to jump ahead one word minus 1 byte.
+ (emit-mov (offset (- wordsize 1) rax) rax))
+ ((make-vector)
+ (emit-expr (primcall-operand1 x) si env)
+ ;; Wouldn't it be better to save the length untagged so that
+ ;; vector-ref and vector-set! don't have untag it and
+ ;; vector-length just needs to retag it?
+ (emit-mov rax (offset 0 rsi)) ; save length onto heap
+ (emit-mov rax rbx) ; save length in another register
+ (emit-mov rsi rax) ; copy heap pointer
+ (emit-or (immediate vector-tag) rax) ; set tag
+ ;; Align to next two-word object boundary. I had to add an
+ ;; additional shift instruction compared to what the paper did to
+ ;; accommodate the difference in word size (the paper uses 4 byte
+ ;; words, I'm using 8 byte words) which makes me wonder: For a 64
+ ;; bit compiler, should the fixnum tag be 3 bits instead of 2?
+ (emit-sal (immediate 1) rbx)
+ (emit-add (immediate (* 2 wordsize)) rbx)
+ (emit-and (immediate (- (* 2 wordsize))) rbx)
+ (emit-add rbx rsi)) ; bump heap pointer by length of vector
+ ((vector)
+ (let ((items (cdr x)))
+ ;; Eval all vector items and save them to stack locations.
+ ;; It's important that we eval all items first, and not copy to
+ ;; the heap as we go, because any sub-expression that also does
+ ;; heap allocation will corrupt the heap space we think we have
+ ;; all to ourselves here.
+ (let loop ((items items)
+ (si si))
+ (unless (null? items)
+ (emit-expr (car items) si env) ; eval item
+ (emit-mov rax (offset si rsp)) ; save to stack
+ (loop (cdr items) (- si wordsize))))
+ ;; Save length onto heap (tagged as immediate int)
+ (emit-mov (immediate (ash (length items) 2)) rax)
+ (emit-mov rax (offset 0 rsi))
+ ;; Copy items from the stack to the vector.
+ (let loop ((items items)
+ (si si)
+ (heap-offset wordsize))
+ (unless (null? items)
+ (emit-mov (offset si rsp) rax) ; copy from stack
+ (emit-mov rax (offset heap-offset rsi)) ; save to heap
+ (loop (cdr items) (- si wordsize) (+ heap-offset wordsize))))
+ (emit-mov rsi rax) ; copy heap pointer
+ (emit-or (immediate vector-tag) rax) ; set tag
+ ;; Align heap pointer to next available 2 word boundary.
+ (emit-add (immediate (logand (* (+ (length items) 2) wordsize)
+ (- (* 2 wordsize))))
+ rsi)))
+ ((vector-length)
+ (emit-expr (primcall-operand1 x) si env) ; get vector pointer
+ (emit-sub (immediate vector-tag) rax) ; untag vector
+ (emit-mov (offset 0 rax) rax)) ; the first word contains the length
+ ((vector-ref)
+ (emit-expr (primcall-operand2 x) si env) ; get index arg
+ (emit-shr (immediate fixnum-shift) rax) ; untag it
+ (emit-add (immediate 1) rax) ; first word is the length so skip over it
+ (emit-imul (immediate wordsize) rax) ; scale by word size
+ (emit-mov rax rbx) ; save index to another register
+ (emit-expr (primcall-operand1 x) si env) ; get vector pointer
+ (emit-sub (immediate vector-tag) rax) ; untag vector
+ (emit-mov (register-offset rbx rax) rax)) ; get element
+ ((vector-set!)
+ (emit-expr (primcall-operand1 x) si env) ; get vector pointer
+ (emit-sub (immediate vector-tag) rax) ; untag vector
+ (emit-mov rax rbx) ; save vector
+ (emit-expr (primcall-operand2 x) si env) ; get index
+ (emit-shr (immediate fixnum-shift) rax) ; untag it
+ (emit-add (immediate 1) rax) ; first word is the length so skip over it
+ (emit-imul (immediate wordsize) rax) ; scale by word size
+ (emit-add rax rbx) ; advance pointer to element being set
+ (emit-expr (primcall-operand3 x) si env) ; get value
+ (emit-mov rax (offset 0 rbx)))
+ ((make-string)
+ (emit-expr (primcall-operand1 x) si env)
+ (emit-mov rax (offset 0 rsi)) ; save length onto heap
+ (emit-mov rax rbx) ; save length in another register
+ (emit-mov rsi rax) ; write to heap
+ (emit-or (immediate string-tag) rax) ; set tag
+ (emit-shr (immediate fixnum-shift) rbx) ; untag length
+ ;; Align to next two-word object boundary, keeping in mind that
+ ;; we need one additional word to store the length. Since we're
+ ;; only storing ASCII characters in this simple compiler, we only
+ ;; need 1 byte per character.
+ (emit-add (immediate (- (* 3 wordsize) 1)) rbx)
+ (emit-and (immediate (- (* 2 wordsize))) rbx)
+ (emit-add rbx rsi)) ; bump heap pointer by length of string
+ ((string)
+ (let ((chars (cdr x)))
+ ;; Save length onto heap (tagged as immediate int)
+ (emit-mov (immediate (ash (length chars) 2)) rax)
+ (emit-mov rax (offset 0 rsi))
+ ;; Add chars to string, one byte per item since we're only
+ ;; covering the ASCII character set.
+ (let loop ((chars chars)
+ (heap-offset 1))
+ (unless (null? chars)
+ (emit-expr (first chars) si env) ; eval arg (should be char)
+ (emit-shr (immediate char-shift) rax) ; untag char
+ (emit-andq (immediate -255) (offset heap-offset rsi)) ; clear LSB
+ (emit-or rax (offset heap-offset rsi)) ; set char
+ (loop (cdr chars) (+ heap-offset 1))))
+ (emit-mov rsi rax) ; copy heap pointer
+ (emit-or (immediate string-tag) rax) ; set tag
+ ;; Align heap pointer to next available 2 word boundary.
+ (emit-add (immediate (logand (+ (length chars) (- (* 3 wordsize) 1))
+ (- (* 2 wordsize))))
+ rsi)))
+ ((string-length)
+ (emit-expr (primcall-operand1 x) si env) ; get string pointer
+ (emit-sub (immediate string-tag) rax) ; untag string
+ (emit-mov (offset 0 rax) rax)) ; the first word contains the length
+ ((string-ref)
+ (emit-expr (primcall-operand2 x) si env) ; get index arg
+ (emit-shr (immediate fixnum-shift) rax) ; untag it
+ ;; The first word of a string contains the length, however we
+ ;; don't want to advance the pointer by a word because each
+ ;; character is only a single byte. Instead, we advance the
+ ;; pointer by a single byte so that the character we want to
+ ;; access is in the least significant bit (LSB) section of the
+ ;; word. From there, its a simple matter of masking out
+ ;; everything but the LSB to isolate the character.
+ (emit-add (immediate 1) rax)
+ (emit-mov rax rbx) ; save index to another register
+ (emit-expr (primcall-operand1 x) si env) ; get string pointer
+ (emit-sub (immediate string-tag) rax) ; untag string
+ (emit-mov (register-offset rbx rax) rax) ; get char into LSB position
+ (emit-and (immediate 255) rax) ; clear out everything but the LSB
+ (emit-sal (immediate char-shift) rax) ; tag char
+ (emit-or (immediate char-tag) rax))
+ ((string-set!)
+ (emit-expr (primcall-operand1 x) si env) ; get string pointer
+ (emit-sub (immediate string-tag) rax) ; untag string
+ (emit-mov rax rbx) ; save string pointer
+ (emit-expr (primcall-operand2 x) si env) ; get index arg
+ (emit-shr (immediate fixnum-shift) rax) ; untag it
+ (emit-add (immediate 1) rax)
+ (emit-add rax rbx) ; get char into LSB position
+ (emit-expr (primcall-operand3 x) si env) ; get value
+ (emit-shr (immediate char-shift) rax) ; untag char
+ (emit-andq (immediate -255) (offset 0 rbx)) ; clear LSB
+ (emit-or rax (offset 0 rbx))) ; copy char
(else
(error "unknown primcall op" (primcall-op x)))))
@@ -308,7 +484,7 @@
(emit "ret")))
(define (compile-and-run x)
- (with-output-to-file "scheme_entry.S"
+ (with-output-to-file "scheme_entry.s"
(lambda ()
(display ".text
.p2align 4
@@ -317,17 +493,21 @@
scheme_entry:
")
(compile-program x)))
- (and (zero? (system* "gcc" "-c" "scheme_entry.S"))
- (zero? (system* "gcc" "-c" "test.c"))
- (zero? (system* "gcc" "-o" "test" "scheme_entry.o" "test.o"))
- (let* ((pipe (open-pipe* OPEN_READ "./test"))
- (output (read-line pipe)))
- (close pipe)
- output)))
+ (unless (zero? (system* "gcc" "-c" "scheme_entry.s"))
+ (error "failed to compile scheme_entry.s"))
+ (unless (zero? (system* "gcc" "-c" "test.c"))
+ (error "failed to compile test.c"))
+ (unless (zero? (system* "gcc" "-o" "test" "scheme_entry.o" "test.o"))
+ (error "failed to link program"))
+ (let* ((pipe (open-pipe* OPEN_READ "./test"))
+ (output (read-line pipe)))
+ (close pipe)
+ output))
(define (test-case x expected-output)
(let ((result (compile-and-run x)))
- (if (string=? result expected-output)
+ (if (and (not (eof-object? result))
+ (string=? result expected-output))
#t
(begin
(display "expected: ")
@@ -383,4 +563,11 @@ scheme_entry:
(test-case '(let ((x 1)) x) "1")
(test-case '(let ((x 1) (y 2)) (+ x y)) "3")
(test-case '(if #t 1 2) "1")
- (test-case '(if #f 1 2) "2"))
+ (test-case '(if #f 1 2) "2")
+ (test-case '(car (cons 10 20)) "10")
+ (test-case '(cdr (cons 10 20)) "20")
+ (test-case '(car (cdr (cons 1 (cons 2 '())))) "2")
+ (test-case '(vector-length (make-vector 3)) "3")
+ (test-case '(vector-ref (vector 1 2 3) 1) "2")
+ (test-case '(string-length (make-string 5)) "5")
+ (test-case '(string-ref (string #\a #\b #\c) 1) "b"))