From 18203a946df9fc565acee20f4ee6199cc677e0ea Mon Sep 17 00:00:00 2001 From: David Thompson Date: Fri, 19 Aug 2022 08:22:54 -0400 Subject: Step 9: Closures This was a big one! Took me forever to wrap my head around the implementation but it's so cool now that it works! --- compiler.scm | 332 +++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 252 insertions(+), 80 deletions(-) (limited to 'compiler.scm') diff --git a/compiler.scm b/compiler.scm index f890276..01bc40e 100644 --- a/compiler.scm +++ b/compiler.scm @@ -1,7 +1,8 @@ (use-modules (ice-9 format) (ice-9 popen) (ice-9 rdelim) - (srfi srfi-1)) + (srfi srfi-1) + (srfi srfi-11)) ;; Assuming a 64 bit intel machine here. (define wordsize 8) @@ -19,6 +20,7 @@ (define pair-tag 1) (define vector-tag 2) (define string-tag 3) +(define closure-tag 6) (define (immediate-rep x) (cond @@ -40,13 +42,15 @@ (define (variable? x) (symbol? x)) +(define (op-eq? lst op) + (and (pair? lst) + (eq? (first lst) op))) + (define (if? x) - (and (pair? x) - (eq? (first x) 'if))) + (op-eq? x 'if)) (define (let? x) - (and (pair? x) - (eq? (first x) 'let))) + (op-eq? x 'let)) (define (primcall? x) (and (pair? x) @@ -62,16 +66,19 @@ make-string string string-length string-ref string-set!)))) (define (labels? x) - (and (pair? x) - (eq? (first x) 'labels))) + (op-eq? x 'labels)) (define (code? x) - (and (pair? x) - (eq? (first x) 'code))) + (op-eq? x 'code)) -(define (labelcall? x) - (and (pair? x) - (eq? (first x) 'labelcall))) +(define (closure? x) + (op-eq? x 'closure)) + +(define (funcall? x) + (op-eq? x 'funcall)) + +(define (lambda? x) + (op-eq? x 'lambda)) (define (primcall-op x) (first x)) @@ -87,7 +94,7 @@ (define (lookup name env) (or (assq-ref env name) - (error "unbound variable:" name))) + (error "unbound variable:" name env))) (define (extend-env name si-or-label env) (cons (cons name si-or-label) env)) @@ -113,18 +120,25 @@ (define (alternate x) (fourth x)) -(define (lvar x) +(define (funcall-proc x) (second x)) -(define (arguments x) +(define (funcall-arguments x) (drop x 2)) (define unique-counter (make-parameter 0)) -(define (unique-label) +(define (unique-number) (let ((n (unique-counter))) (unique-counter (+ n 1)) - (format #f "L~a" n))) + n)) + +(define (unique-lvar) + (string->symbol + (format #f "f~a" (unique-number)))) + +(define (unique-label) + (format #f "L~a" (unique-number))) (define (register name) (format #f "%~a" name)) @@ -132,10 +146,12 @@ (define (immediate value) (format #f "$~a" value)) -(define rax (register "rax")) -(define rbx (register "rbx")) -(define rsp (register "rsp")) -(define rsi (register "rsi")) +(define rax (register "rax")) ; default register for storing values +(define rbx (register "rbx")) ; extra storage +(define rdi (register "rdi")) ; closure pointers +(define rsp (register "rsp")) ; heap +(define rsi (register "rsi")) ; stack +(define rip (register "rip")) ; instruction pointer (define al (register "al")) (define (offset n register) @@ -206,6 +222,9 @@ (define (emit-call label) (emit "call ~a" label)) +(define (emit-lea src dest) + (emit "lea ~a, ~a" src dest)) + (define (emit-ret) (emit "ret")) @@ -489,18 +508,100 @@ (emit-expr altern si env) (emit-label L1))) -(define (emit-code label vars body env) +(define (emit-code label vars free-vars body env) (emit-label label) + ;; Extend environment to include all procedure variables (let loop ((vars vars) - (env env) - (si (- wordsize))) + (si (- wordsize)) + (env env)) (if (null? vars) - (begin - (emit-expr body si env) - (emit-ret)) + ;; Extend environment to include all free variables and + ;; emit code to initialize their value from the current + ;; closure pointer. + (let free-loop ((free-vars free-vars) + (ci wordsize) + (si si) + (env env)) + (if (null? free-vars) + ;; All variable setup is complete, we can finally + ;; emit the body of the procedure. + (begin + (emit-expr body si env) + (emit-ret)) + (begin + (emit-mov (offset ci rdi) rax) ; get value from closure + (emit-mov rax (offset si rsp)) ; push it to stack + (free-loop (cdr free-vars) + (+ ci wordsize) + (- si wordsize) + (extend-env (first free-vars) si env))))) (loop (cdr vars) - (extend-env (first vars) si env) - (- si wordsize))))) + (- si wordsize) + (extend-env (first vars) si env))))) + +(define (emit-funcall proc args si env) + (emit-mov rdi (offset si rsp)) ; save current closure pointer + (let loop ((args args) + ;; Skip a stack space to use as the return point. + (si* (- si (* wordsize 2)))) + (if (null? args) + (let ((stack-start si)) + (emit-expr proc si env) ; eval closure + (emit-sub (immediate closure-tag) rax) ; untag it to get pointer + (emit-mov rax rdi) ; store pointer in destination register + (unless (zero? stack-start) + (emit-add (immediate stack-start) rsp)) ; move stack pointer + (emit-call (string-append "*" (offset 0 rdi))) + (unless (zero? stack-start) + (emit-sub (immediate stack-start) rsp)) ; restore stack pointer + (emit-mov (offset (- si wordsize) rsp) rdi)) ; restore closure pointer + (begin + (emit-expr (first args) si* env) + (emit-mov rax (offset si* rsp)) + (loop (cdr args) + (- si* wordsize)))))) + +(define (emit-closure lvar vars si env) + (let ((label (lookup lvar env))) + (emit-lea (offset label rip) rax) ; first word of closure points to label + (emit-mov rax (offset 0 rsi)) ; first element of closure is label pointer + (let loop ((vars vars) + (i wordsize)) + (if (null? vars) + (begin + (emit-mov rsi rax) ; capture heap pointer + (emit-or (immediate closure-tag) rax) ; set tag + ;; Align heap pointer to nearest 2 word boundary for next + ;; heap allocation. + (emit-add (immediate (logand (+ i wordsize) + (- (* 2 wordsize)))) + rsi)) + (let ((var-offset (lookup (first vars) env))) + (emit-mov (offset var-offset rsp) rax) ; copy value of free variable + (emit-mov rax (offset i rsi)) ; save it to closure + (loop (cdr vars) + (+ i wordsize))))))) + +(define (emit-expr x si env) + (cond + ((immediate? x) + (emit-mov (immediate (immediate-rep x)) rax)) + ((variable? x) + (emit-mov (offset (lookup x env) rsp) rax)) + ((if? x) + (emit-if (test x) (consequent x) (alternate x) si env)) + ((let? x) + (emit-let (bindings x) (body x) si env)) + ((primcall? x) + (emit-primitive-call x si env)) + ((labels? x) + (emit-labels (bindings x) (body x) si env)) + ((closure? x) + (emit-closure (second x) (cddr x) si env)) + ((funcall? x) + (emit-funcall (lvar x) (arguments x) si env)) + (else + (error "unknown expression" x)))) (define (emit-labels lvars body-exp si env) (let* ((lvars* (map (lambda (lvar) @@ -515,64 +616,129 @@ (let ((label (first lvar)) (code (third lvar))) (if (code? code) - (emit-code label (bindings code) (body code) env*) + (emit-code label (second code) (third code) (fourth code) env*) (error "expected a code expression" code)))) lvars*) (emit-label "scheme_entry") (emit-expr body-exp si env*) (emit-ret))) -(define (emit-labelcall lvar args si env) - (let ((label (lookup lvar env))) - (let loop ((args args) - (si* (- si wordsize))) - (if (null? args) - ;; The stack pointer needs to be advanced to one word below - ;; the return point of the upcoming call. Our stack index - ;; value starts at (- wordsize). The 'call' instruction - ;; automatically increments the stack pointer by a word. - ;; So, if we incremented by the stack index, we'd be moving - ;; too far. We need to advance by one less word. In the - ;; case where nothing has been allocated to the stack before - ;; the call, it means not incrementing the stack pointer at - ;; all! Took me awhile to wrap my head around this! - (let ((stack-start (+ si wordsize))) - (unless (zero? stack-start) - (emit-add (immediate stack-start) rsp)) - (emit-call label) - (unless (zero? stack-start) - (emit-sub (immediate stack-start) rsp))) - (begin - (emit-expr (first args) si* env) - (emit-mov rax (offset si* rsp)) - (loop (cdr args) - (- si* wordsize))))))) - -(define (emit-expr x si env) +;; Perform free variable analysis and transform 'lambda' forms into +;; closure/funcall forms and generate top-level labels for all +;; procedures. +(define (free-variables x) + (define (add-variables vars more-vars) + (fold (lambda (new-var prev) + (if (memq new-var vars) + prev + (cons new-var prev))) + vars more-vars)) + (let loop ((vars (second x)) + (x (third x))) + (cond + ((immediate? x) '()) + ((variable? x) + (if (memq x vars) + '() + (list x))) + ((if? x) + (delete-duplicates + (append (loop vars (test x)) + (loop vars (consequent x)) + (loop vars (alternate x))))) + ((let? x) + (loop (add-variables vars (map lhs (bindings x))) (body x))) + ((primcall? x) + (append-map (lambda (operand) + (loop vars operand)) + (cdr x))) + ((lambda? x) + (loop (add-variables vars (second x)) (third x))) + ((pair? x) + (append-map (lambda (arg) + (loop vars arg)) + x))))) + +(define (annotate-free-variables x) (cond - ((immediate? x) - (emit-mov (immediate (immediate-rep x)) rax)) - ((variable? x) - (emit-mov (offset (lookup x env) rsp) rax)) + ((immediate? x) x) + ((variable? x) x) ((if? x) - (emit-if (test x) (consequent x) (alternate x) si env)) + `(if ,(annotate-free-variables (test x)) + ,(annotate-free-variables (consequent x)) + ,(annotate-free-variables (alternate x)))) ((let? x) - (emit-let (bindings x) (body x) si env)) + `(let ,(map (lambda (binding) + (list (lhs binding) + (annotate-free-variables (rhs binding)))) + (bindings x)) + ,(annotate-free-variables (body x)))) ((primcall? x) - (emit-primitive-call x si env)) - ((labels? x) - (emit-labels (bindings x) (body x) si env)) - ((labelcall? x) - (emit-labelcall (lvar x) (arguments x) si env)) - (else - (error "unknown expression" x)))) + (cons (primcall-op x) + (map annotate-free-variables (cdr x)))) + ((lambda? x) + `(lambda ,(second x) + ,(free-variables x) + ,(annotate-free-variables (third x)))) + ((pair? x) + `(funcall ,@(map annotate-free-variables x))))) + +(define (replace-lambdas-with-closures-and-funcalls x) + (define (iter x) + (cond + ((immediate? x) (values x '())) + ((variable? x) (values x '())) + ((if? x) + (let-values (((test* labels0) (iter (test x))) + ((consequent* labels1) (iter (consequent x))) + ((alternate* labels2) (iter (alternate x)))) + (values `(if ,test* ,consequent* ,alternate*) + (append labels0 labels1 labels2)))) + ((let? x) + (let-values (((bindings*) + (map (lambda (binding) + (let-values (((rhs* labels) (iter (rhs binding)))) + (list (list (lhs binding) rhs*) labels))) + (bindings x))) + ((body* body-labels) (iter (body x)))) + (values `(let ,(map first bindings*) + ,body*) + (append (concatenate (map second bindings*)) + body-labels)))) + ((primcall? x) + (let ((operands (map (lambda (operand) + (let-values (((operand* labels) (iter operand))) + (list operand* labels))) + (cdr x)))) + (values (cons (primcall-op x) + (map first operands)) + (concatenate (map second operands))))) + ((funcall? x) + (let ((args (map (lambda (arg) + (let-values (((arg* labels) (iter arg))) + (list arg* labels))) + (cdr x)))) + (values `(funcall ,@(map first args)) + (concatenate (map second args))))) + ((lambda? x) + (let-values (((body* labels) (iter (fourth x)))) + (let ((name (unique-lvar))) + (values `(closure ,name ,@(third x)) + (cons (list name + `(code ,(second x) ,(third x) ,body*)) + labels))))))) + (let-values (((new-x labels) (iter x))) + `(labels ,labels ,new-x))) + +(define (expand x) + (parameterize ((unique-counter 0)) + (replace-lambdas-with-closures-and-funcalls + (annotate-free-variables x)))) (define (compile-program x) - (parameterize ((unique-counter 0)) - (let ((x* (if (labels? x) - x - `(labels () ,x)))) - (emit-expr x* (- wordsize) '())))) + (let ((x* (expand x))) + (parameterize ((unique-counter 0)) + (emit-labels (second x*) (third x*) (- wordsize) '())))) (define (compile-and-run x) (with-output-to-file "scheme_entry.s" @@ -661,8 +827,14 @@ (test-case '(vector-ref (vector 1 2 3) 1) "2") (test-case '(string-length (make-string 5)) "5") (test-case '(string-ref (string #\a #\b #\c) 1) "b") - (test-case '(labels ((perimeter (code (length width) - (+ (* length 2) - (* width 2))))) - (labelcall perimeter 4 3)) - "14")) + ;; procedure with no free variables and multiple args + (test-case '(let ((perimeter (lambda (length width) + (+ (* length 2) + (* width 2))))) + (perimeter 4 3)) + "14") + ;; closures! + (test-case '(let ((x 5)) + (let ((f (lambda (y) (+ x y)))) + (f 4))) + "9")) -- cgit v1.2.3