summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Thompson <dthompson@vistahigherlearning.com>2022-08-19 08:22:54 -0400
committerDavid Thompson <dthompson@vistahigherlearning.com>2022-09-22 17:12:26 -0400
commit18203a946df9fc565acee20f4ee6199cc677e0ea (patch)
treef3db386c1b7c64e012774718a2f593d892c15773
parent0f78dded117af2e416357ca857015b2a13ae113f (diff)
Step 9: Closures
This was a big one! Took me forever to wrap my head around the implementation but it's so cool now that it works!
-rw-r--r--compiler.scm332
1 files changed, 252 insertions, 80 deletions
diff --git a/compiler.scm b/compiler.scm
index f890276..01bc40e 100644
--- a/compiler.scm
+++ b/compiler.scm
@@ -1,7 +1,8 @@
(use-modules (ice-9 format)
(ice-9 popen)
(ice-9 rdelim)
- (srfi srfi-1))
+ (srfi srfi-1)
+ (srfi srfi-11))
;; Assuming a 64 bit intel machine here.
(define wordsize 8)
@@ -19,6 +20,7 @@
(define pair-tag 1)
(define vector-tag 2)
(define string-tag 3)
+(define closure-tag 6)
(define (immediate-rep x)
(cond
@@ -40,13 +42,15 @@
(define (variable? x)
(symbol? x))
+(define (op-eq? lst op)
+ (and (pair? lst)
+ (eq? (first lst) op)))
+
(define (if? x)
- (and (pair? x)
- (eq? (first x) 'if)))
+ (op-eq? x 'if))
(define (let? x)
- (and (pair? x)
- (eq? (first x) 'let)))
+ (op-eq? x 'let))
(define (primcall? x)
(and (pair? x)
@@ -62,16 +66,19 @@
make-string string string-length string-ref string-set!))))
(define (labels? x)
- (and (pair? x)
- (eq? (first x) 'labels)))
+ (op-eq? x 'labels))
(define (code? x)
- (and (pair? x)
- (eq? (first x) 'code)))
+ (op-eq? x 'code))
-(define (labelcall? x)
- (and (pair? x)
- (eq? (first x) 'labelcall)))
+(define (closure? x)
+ (op-eq? x 'closure))
+
+(define (funcall? x)
+ (op-eq? x 'funcall))
+
+(define (lambda? x)
+ (op-eq? x 'lambda))
(define (primcall-op x)
(first x))
@@ -87,7 +94,7 @@
(define (lookup name env)
(or (assq-ref env name)
- (error "unbound variable:" name)))
+ (error "unbound variable:" name env)))
(define (extend-env name si-or-label env)
(cons (cons name si-or-label) env))
@@ -113,18 +120,25 @@
(define (alternate x)
(fourth x))
-(define (lvar x)
+(define (funcall-proc x)
(second x))
-(define (arguments x)
+(define (funcall-arguments x)
(drop x 2))
(define unique-counter (make-parameter 0))
-(define (unique-label)
+(define (unique-number)
(let ((n (unique-counter)))
(unique-counter (+ n 1))
- (format #f "L~a" n)))
+ n))
+
+(define (unique-lvar)
+ (string->symbol
+ (format #f "f~a" (unique-number))))
+
+(define (unique-label)
+ (format #f "L~a" (unique-number)))
(define (register name)
(format #f "%~a" name))
@@ -132,10 +146,12 @@
(define (immediate value)
(format #f "$~a" value))
-(define rax (register "rax"))
-(define rbx (register "rbx"))
-(define rsp (register "rsp"))
-(define rsi (register "rsi"))
+(define rax (register "rax")) ; default register for storing values
+(define rbx (register "rbx")) ; extra storage
+(define rdi (register "rdi")) ; closure pointers
+(define rsp (register "rsp")) ; heap
+(define rsi (register "rsi")) ; stack
+(define rip (register "rip")) ; instruction pointer
(define al (register "al"))
(define (offset n register)
@@ -206,6 +222,9 @@
(define (emit-call label)
(emit "call ~a" label))
+(define (emit-lea src dest)
+ (emit "lea ~a, ~a" src dest))
+
(define (emit-ret)
(emit "ret"))
@@ -489,18 +508,100 @@
(emit-expr altern si env)
(emit-label L1)))
-(define (emit-code label vars body env)
+(define (emit-code label vars free-vars body env)
(emit-label label)
+ ;; Extend environment to include all procedure variables
(let loop ((vars vars)
- (env env)
- (si (- wordsize)))
+ (si (- wordsize))
+ (env env))
(if (null? vars)
- (begin
- (emit-expr body si env)
- (emit-ret))
+ ;; Extend environment to include all free variables and
+ ;; emit code to initialize their value from the current
+ ;; closure pointer.
+ (let free-loop ((free-vars free-vars)
+ (ci wordsize)
+ (si si)
+ (env env))
+ (if (null? free-vars)
+ ;; All variable setup is complete, we can finally
+ ;; emit the body of the procedure.
+ (begin
+ (emit-expr body si env)
+ (emit-ret))
+ (begin
+ (emit-mov (offset ci rdi) rax) ; get value from closure
+ (emit-mov rax (offset si rsp)) ; push it to stack
+ (free-loop (cdr free-vars)
+ (+ ci wordsize)
+ (- si wordsize)
+ (extend-env (first free-vars) si env)))))
(loop (cdr vars)
- (extend-env (first vars) si env)
- (- si wordsize)))))
+ (- si wordsize)
+ (extend-env (first vars) si env)))))
+
+(define (emit-funcall proc args si env)
+ (emit-mov rdi (offset si rsp)) ; save current closure pointer
+ (let loop ((args args)
+ ;; Skip a stack space to use as the return point.
+ (si* (- si (* wordsize 2))))
+ (if (null? args)
+ (let ((stack-start si))
+ (emit-expr proc si env) ; eval closure
+ (emit-sub (immediate closure-tag) rax) ; untag it to get pointer
+ (emit-mov rax rdi) ; store pointer in destination register
+ (unless (zero? stack-start)
+ (emit-add (immediate stack-start) rsp)) ; move stack pointer
+ (emit-call (string-append "*" (offset 0 rdi)))
+ (unless (zero? stack-start)
+ (emit-sub (immediate stack-start) rsp)) ; restore stack pointer
+ (emit-mov (offset (- si wordsize) rsp) rdi)) ; restore closure pointer
+ (begin
+ (emit-expr (first args) si* env)
+ (emit-mov rax (offset si* rsp))
+ (loop (cdr args)
+ (- si* wordsize))))))
+
+(define (emit-closure lvar vars si env)
+ (let ((label (lookup lvar env)))
+ (emit-lea (offset label rip) rax) ; first word of closure points to label
+ (emit-mov rax (offset 0 rsi)) ; first element of closure is label pointer
+ (let loop ((vars vars)
+ (i wordsize))
+ (if (null? vars)
+ (begin
+ (emit-mov rsi rax) ; capture heap pointer
+ (emit-or (immediate closure-tag) rax) ; set tag
+ ;; Align heap pointer to nearest 2 word boundary for next
+ ;; heap allocation.
+ (emit-add (immediate (logand (+ i wordsize)
+ (- (* 2 wordsize))))
+ rsi))
+ (let ((var-offset (lookup (first vars) env)))
+ (emit-mov (offset var-offset rsp) rax) ; copy value of free variable
+ (emit-mov rax (offset i rsi)) ; save it to closure
+ (loop (cdr vars)
+ (+ i wordsize)))))))
+
+(define (emit-expr x si env)
+ (cond
+ ((immediate? x)
+ (emit-mov (immediate (immediate-rep x)) rax))
+ ((variable? x)
+ (emit-mov (offset (lookup x env) rsp) rax))
+ ((if? x)
+ (emit-if (test x) (consequent x) (alternate x) si env))
+ ((let? x)
+ (emit-let (bindings x) (body x) si env))
+ ((primcall? x)
+ (emit-primitive-call x si env))
+ ((labels? x)
+ (emit-labels (bindings x) (body x) si env))
+ ((closure? x)
+ (emit-closure (second x) (cddr x) si env))
+ ((funcall? x)
+ (emit-funcall (lvar x) (arguments x) si env))
+ (else
+ (error "unknown expression" x))))
(define (emit-labels lvars body-exp si env)
(let* ((lvars* (map (lambda (lvar)
@@ -515,64 +616,129 @@
(let ((label (first lvar))
(code (third lvar)))
(if (code? code)
- (emit-code label (bindings code) (body code) env*)
+ (emit-code label (second code) (third code) (fourth code) env*)
(error "expected a code expression" code))))
lvars*)
(emit-label "scheme_entry")
(emit-expr body-exp si env*)
(emit-ret)))
-(define (emit-labelcall lvar args si env)
- (let ((label (lookup lvar env)))
- (let loop ((args args)
- (si* (- si wordsize)))
- (if (null? args)
- ;; The stack pointer needs to be advanced to one word below
- ;; the return point of the upcoming call. Our stack index
- ;; value starts at (- wordsize). The 'call' instruction
- ;; automatically increments the stack pointer by a word.
- ;; So, if we incremented by the stack index, we'd be moving
- ;; too far. We need to advance by one less word. In the
- ;; case where nothing has been allocated to the stack before
- ;; the call, it means not incrementing the stack pointer at
- ;; all! Took me awhile to wrap my head around this!
- (let ((stack-start (+ si wordsize)))
- (unless (zero? stack-start)
- (emit-add (immediate stack-start) rsp))
- (emit-call label)
- (unless (zero? stack-start)
- (emit-sub (immediate stack-start) rsp)))
- (begin
- (emit-expr (first args) si* env)
- (emit-mov rax (offset si* rsp))
- (loop (cdr args)
- (- si* wordsize)))))))
-
-(define (emit-expr x si env)
+;; Perform free variable analysis and transform 'lambda' forms into
+;; closure/funcall forms and generate top-level labels for all
+;; procedures.
+(define (free-variables x)
+ (define (add-variables vars more-vars)
+ (fold (lambda (new-var prev)
+ (if (memq new-var vars)
+ prev
+ (cons new-var prev)))
+ vars more-vars))
+ (let loop ((vars (second x))
+ (x (third x)))
+ (cond
+ ((immediate? x) '())
+ ((variable? x)
+ (if (memq x vars)
+ '()
+ (list x)))
+ ((if? x)
+ (delete-duplicates
+ (append (loop vars (test x))
+ (loop vars (consequent x))
+ (loop vars (alternate x)))))
+ ((let? x)
+ (loop (add-variables vars (map lhs (bindings x))) (body x)))
+ ((primcall? x)
+ (append-map (lambda (operand)
+ (loop vars operand))
+ (cdr x)))
+ ((lambda? x)
+ (loop (add-variables vars (second x)) (third x)))
+ ((pair? x)
+ (append-map (lambda (arg)
+ (loop vars arg))
+ x)))))
+
+(define (annotate-free-variables x)
(cond
- ((immediate? x)
- (emit-mov (immediate (immediate-rep x)) rax))
- ((variable? x)
- (emit-mov (offset (lookup x env) rsp) rax))
+ ((immediate? x) x)
+ ((variable? x) x)
((if? x)
- (emit-if (test x) (consequent x) (alternate x) si env))
+ `(if ,(annotate-free-variables (test x))
+ ,(annotate-free-variables (consequent x))
+ ,(annotate-free-variables (alternate x))))
((let? x)
- (emit-let (bindings x) (body x) si env))
+ `(let ,(map (lambda (binding)
+ (list (lhs binding)
+ (annotate-free-variables (rhs binding))))
+ (bindings x))
+ ,(annotate-free-variables (body x))))
((primcall? x)
- (emit-primitive-call x si env))
- ((labels? x)
- (emit-labels (bindings x) (body x) si env))
- ((labelcall? x)
- (emit-labelcall (lvar x) (arguments x) si env))
- (else
- (error "unknown expression" x))))
+ (cons (primcall-op x)
+ (map annotate-free-variables (cdr x))))
+ ((lambda? x)
+ `(lambda ,(second x)
+ ,(free-variables x)
+ ,(annotate-free-variables (third x))))
+ ((pair? x)
+ `(funcall ,@(map annotate-free-variables x)))))
+
+(define (replace-lambdas-with-closures-and-funcalls x)
+ (define (iter x)
+ (cond
+ ((immediate? x) (values x '()))
+ ((variable? x) (values x '()))
+ ((if? x)
+ (let-values (((test* labels0) (iter (test x)))
+ ((consequent* labels1) (iter (consequent x)))
+ ((alternate* labels2) (iter (alternate x))))
+ (values `(if ,test* ,consequent* ,alternate*)
+ (append labels0 labels1 labels2))))
+ ((let? x)
+ (let-values (((bindings*)
+ (map (lambda (binding)
+ (let-values (((rhs* labels) (iter (rhs binding))))
+ (list (list (lhs binding) rhs*) labels)))
+ (bindings x)))
+ ((body* body-labels) (iter (body x))))
+ (values `(let ,(map first bindings*)
+ ,body*)
+ (append (concatenate (map second bindings*))
+ body-labels))))
+ ((primcall? x)
+ (let ((operands (map (lambda (operand)
+ (let-values (((operand* labels) (iter operand)))
+ (list operand* labels)))
+ (cdr x))))
+ (values (cons (primcall-op x)
+ (map first operands))
+ (concatenate (map second operands)))))
+ ((funcall? x)
+ (let ((args (map (lambda (arg)
+ (let-values (((arg* labels) (iter arg)))
+ (list arg* labels)))
+ (cdr x))))
+ (values `(funcall ,@(map first args))
+ (concatenate (map second args)))))
+ ((lambda? x)
+ (let-values (((body* labels) (iter (fourth x))))
+ (let ((name (unique-lvar)))
+ (values `(closure ,name ,@(third x))
+ (cons (list name
+ `(code ,(second x) ,(third x) ,body*))
+ labels)))))))
+ (let-values (((new-x labels) (iter x)))
+ `(labels ,labels ,new-x)))
+
+(define (expand x)
+ (parameterize ((unique-counter 0))
+ (replace-lambdas-with-closures-and-funcalls
+ (annotate-free-variables x))))
(define (compile-program x)
- (parameterize ((unique-counter 0))
- (let ((x* (if (labels? x)
- x
- `(labels () ,x))))
- (emit-expr x* (- wordsize) '()))))
+ (let ((x* (expand x)))
+ (parameterize ((unique-counter 0))
+ (emit-labels (second x*) (third x*) (- wordsize) '()))))
(define (compile-and-run x)
(with-output-to-file "scheme_entry.s"
@@ -661,8 +827,14 @@
(test-case '(vector-ref (vector 1 2 3) 1) "2")
(test-case '(string-length (make-string 5)) "5")
(test-case '(string-ref (string #\a #\b #\c) 1) "b")
- (test-case '(labels ((perimeter (code (length width)
- (+ (* length 2)
- (* width 2)))))
- (labelcall perimeter 4 3))
- "14"))
+ ;; procedure with no free variables and multiple args
+ (test-case '(let ((perimeter (lambda (length width)
+ (+ (* length 2)
+ (* width 2)))))
+ (perimeter 4 3))
+ "14")
+ ;; closures!
+ (test-case '(let ((x 5))
+ (let ((f (lambda (y) (+ x y))))
+ (f 4)))
+ "9"))