diff options
author | David Thompson <dthompson@vistahigherlearning.com> | 2022-11-29 21:03:00 -0500 |
---|---|---|
committer | David Thompson <dthompson@vistahigherlearning.com> | 2022-11-29 21:03:00 -0500 |
commit | 0ca9cc9f1adc07a1831a8711be77844d853f84c1 (patch) | |
tree | da8827a3e58f70b90af1949f72e1e95b51c3f172 | |
parent | 628f86e51a7500de4e174e73e33606b8aceca86b (diff) |
Step 13: Extending the syntax.
-rw-r--r-- | compiler.scm | 571 |
1 files changed, 463 insertions, 108 deletions
diff --git a/compiler.scm b/compiler.scm index 3a147a2..bcf8813 100644 --- a/compiler.scm +++ b/compiler.scm @@ -36,9 +36,39 @@ (define (if? x) (op-eq? x 'if)) +(define (cond? x) + (op-eq? x 'cond)) + +(define (case? x) + (op-eq? x 'case)) + +(define (when? x) + (op-eq? x 'when)) + +(define (unless? x) + (op-eq? x 'unless)) + +(define (or? x) + (op-eq? x 'or)) + +(define (and? x) + (op-eq? x 'and)) + +(define (define? x) + (op-eq? x 'define)) + (define (let? x) (op-eq? x 'let)) +(define (let*? x) + (op-eq? x 'let*)) + +(define (letrec? x) + (op-eq? x 'letrec)) + +(define (letrec*? x) + (op-eq? x 'letrec*)) + (define (primcall? x) (and (pair? x) (memq (first x) @@ -53,6 +83,9 @@ make-string string string-length string-ref string-set! constant-init constant-ref)))) +(define (tagged-primcall? x) + (op-eq? x 'primcall)) + (define (labels? x) (op-eq? x 'labels)) @@ -86,15 +119,21 @@ (define (primcall-operands x) (cdr x)) -(define (primcall-operand1 x) +(define (tagged-primcall-op x) (second x)) -(define (primcall-operand2 x) +(define (tagged-primcall-operands x) + (cddr x)) + +(define (tagged-primcall-operand1 x) (third x)) -(define (primcall-operand3 x) +(define (tagged-primcall-operand2 x) (fourth x)) +(define (tagged-primcall-operand3 x) + (fifth x)) + (define (lookup name env) (or (assq-ref env name) (error "unbound variable:" name env))) @@ -139,7 +178,7 @@ (second x)) (define (lambda-body x) - (third x)) + (cddr x)) (define (funcall-proc x) (second x)) @@ -317,22 +356,22 @@ (emit-or (immediate boolean-tag) rax)) (define (emit-primitive-call x si env) - (case (primcall-op x) + (case (tagged-primcall-op x) ((add1) - (emit-expr (primcall-operand1 x) si env) + (emit-expr (tagged-primcall-operand1 x) si env) (emit-add (immediate (immediate-rep 1)) rax)) ((sub1) - (emit-expr (primcall-operand1 x) si env) + (emit-expr (tagged-primcall-operand1 x) si env) (emit-sub (immediate (immediate-rep 1)) rax)) ((integer->char) - (emit-expr (primcall-operand1 x) si env) + (emit-expr (tagged-primcall-operand1 x) si env) (emit-sal (immediate (- char-shift fixnum-shift)) rax) (emit-or (immediate char-tag) rax)) ((char->integer) - (emit-expr (primcall-operand1 x) si env) + (emit-expr (tagged-primcall-operand1 x) si env) (emit-shr (immediate (- char-shift fixnum-shift)) rax)) ((zero?) - (emit-expr (primcall-operand1 x) si env) + (emit-expr (tagged-primcall-operand1 x) si env) ;; Since the tag of fixnums is 0, we can skip an 'andl' ;; instruction that would apply the mask to the immediate ;; value. @@ -342,55 +381,69 @@ (emit-sal (immediate boolean-shift) rax) (emit-or (immediate boolean-tag) rax)) ((null?) - (emit-expr (primcall-operand1 x) si env) + (emit-expr (tagged-primcall-operand1 x) si env) (emit-cmp (immediate empty-list) rax) (emit-mov (immediate 0) rax) (emit-sete al) (emit-sal (immediate boolean-shift) rax) (emit-or (immediate boolean-tag) rax)) ((integer?) - (emit-tag-check (primcall-operand1 x) fixnum-mask fixnum-tag si env)) + (emit-tag-check (tagged-primcall-operand1 x) fixnum-mask fixnum-tag si env)) ((char?) - (emit-tag-check (primcall-operand1 x) char-mask char-tag si env)) + (emit-tag-check (tagged-primcall-operand1 x) char-mask char-tag si env)) ((boolean?) - (emit-tag-check (primcall-operand1 x) boolean-mask boolean-tag si env)) + (emit-tag-check (tagged-primcall-operand1 x) boolean-mask boolean-tag si env)) ((+) - (emit-expr (primcall-operand2 x) si env) + (emit-expr (tagged-primcall-operand2 x) si env) (emit-mov rax (offset si rsp)) - (emit-expr (primcall-operand1 x) (- si wordsize) env) + (emit-expr (tagged-primcall-operand1 x) (- si wordsize) env) (emit-add (offset si rsp) rax)) ((-) - (emit-expr (primcall-operand2 x) si env) + (emit-expr (tagged-primcall-operand2 x) si env) (emit-mov rax (offset si rsp)) - (emit-expr (primcall-operand1 x) (- si wordsize) env) + (emit-expr (tagged-primcall-operand1 x) (- si wordsize) env) (emit-sub (offset si rsp) rax)) ((*) - (emit-expr (primcall-operand2 x) si env) + (emit-expr (tagged-primcall-operand2 x) si env) (emit-mov rax (offset si rsp)) - (emit-expr (primcall-operand1 x) (- si wordsize) env) + (emit-expr (tagged-primcall-operand1 x) (- si wordsize) env) (emit-imul (offset si rsp) rax) ;; When two fixnums (which have 2 tag bits) are multiplied, the ;; relevant bits for the result are now 4 bytes to the left, so ;; we have to shift back 2 bytes. (emit-shr (immediate fixnum-shift) rax)) ((=) - (emit-comparison (primcall-operand1 x) (primcall-operand2 x) emit-sete si env)) + (emit-comparison (tagged-primcall-operand1 x) + (tagged-primcall-operand2 x) + emit-sete si env)) ((<) - (emit-comparison (primcall-operand1 x) (primcall-operand2 x) emit-setl si env)) + (emit-comparison (tagged-primcall-operand1 x) + (tagged-primcall-operand2 x) + emit-setl si env)) ((<=) - (emit-comparison (primcall-operand1 x) (primcall-operand2 x) emit-setle si env)) + (emit-comparison (tagged-primcall-operand1 x) + (tagged-primcall-operand2 x) + emit-setle si env)) ((>) - (emit-comparison (primcall-operand1 x) (primcall-operand2 x) emit-setg si env)) + (emit-comparison (tagged-primcall-operand1 x) + (tagged-primcall-operand2 x) + emit-setg si env)) ((>=) - (emit-comparison (primcall-operand1 x) (primcall-operand2 x) emit-setge si env)) + (emit-comparison (tagged-primcall-operand1 x) + (tagged-primcall-operand2 x) + emit-setge si env)) ((eq?) - (emit-comparison (primcall-operand1 x) (primcall-operand2 x) emit-sete si env)) + (emit-comparison (tagged-primcall-operand1 x) + (tagged-primcall-operand2 x) + emit-sete si env)) ((char=?) - (emit-comparison (primcall-operand1 x) (primcall-operand2 x) emit-sete si env)) + (emit-comparison (tagged-primcall-operand1 x) + (tagged-primcall-operand2 x) + emit-sete si env)) ((cons) - (emit-expr (primcall-operand2 x) si env) ; eval cdr + (emit-expr (tagged-primcall-operand2 x) si env) ; eval cdr (emit-mov rax (offset si rsp)) ; save car to stack - (emit-expr (primcall-operand1 x) (- si wordsize) env) ; eval car + (emit-expr (tagged-primcall-operand1 x) (- si wordsize) env) ; eval car (emit-mov rax (offset 0 rsi)) ; move car onto heap (emit-mov (offset si rsp) rax) ; copy cdr from the stack (emit-mov rax (offset wordsize rsi)) ; move cdr onto heap @@ -398,17 +451,17 @@ (emit-or (immediate pair-tag) rax) ; set tag (emit-add (immediate (* wordsize 2)) rsi)) ; bump heap pointer ((car) - (emit-expr (primcall-operand1 x) si env) + (emit-expr (tagged-primcall-operand1 x) si env) ;; We have to untag the pair to get the pointer to the 'car'. ;; The pair tag is 1 so simply subtracting 1 gets us the pointer. (emit-mov (offset -1 rax) rax)) ((cdr) - (emit-expr (primcall-operand1 x) si env) + (emit-expr (tagged-primcall-operand1 x) si env) ;; Again, the value is the pointer incremented by 1, so to get to ;; the cdr we need to jump ahead one word minus 1 byte. (emit-mov (offset (- wordsize 1) rax) rax)) ((make-vector) - (emit-expr (primcall-operand1 x) si env) + (emit-expr (tagged-primcall-operand1 x) si env) ;; Wouldn't it be better to save the length untagged so that ;; vector-ref and vector-set! don't have untag it and ;; vector-length just needs to retag it? @@ -426,7 +479,7 @@ (emit-and (immediate (- (* 2 wordsize))) rbx) (emit-add rbx rsi)) ; bump heap pointer by length of vector ((vector) - (let ((items (cdr x))) + (let ((items (tagged-primcall-operands x))) ;; Eval all vector items and save them to stack locations. ;; It's important that we eval all items first, and not copy to ;; the heap as we go, because any sub-expression that also does @@ -456,31 +509,31 @@ (- (* 2 wordsize)))) rsi))) ((vector-length) - (emit-expr (primcall-operand1 x) si env) ; get vector pointer + (emit-expr (tagged-primcall-operand1 x) si env) ; get vector pointer (emit-sub (immediate vector-tag) rax) ; untag vector (emit-mov (offset 0 rax) rax)) ; the first word contains the length ((vector-ref) - (emit-expr (primcall-operand2 x) si env) ; get index arg + (emit-expr (tagged-primcall-operand2 x) si env) ; get index arg (emit-shr (immediate fixnum-shift) rax) ; untag it (emit-add (immediate 1) rax) ; first word is the length so skip over it (emit-imul (immediate wordsize) rax) ; scale by word size (emit-mov rax rbx) ; save index to another register - (emit-expr (primcall-operand1 x) si env) ; get vector pointer + (emit-expr (tagged-primcall-operand1 x) si env) ; get vector pointer (emit-sub (immediate vector-tag) rax) ; untag vector (emit-mov (register-offset rbx rax) rax)) ; get element ((vector-set!) - (emit-expr (primcall-operand1 x) si env) ; get vector pointer + (emit-expr (tagged-primcall-operand1 x) si env) ; get vector pointer (emit-sub (immediate vector-tag) rax) ; untag vector (emit-mov rax rbx) ; save vector - (emit-expr (primcall-operand2 x) si env) ; get index + (emit-expr (tagged-primcall-operand2 x) si env) ; get index (emit-shr (immediate fixnum-shift) rax) ; untag it (emit-add (immediate 1) rax) ; first word is the length so skip over it (emit-imul (immediate wordsize) rax) ; scale by word size (emit-add rax rbx) ; advance pointer to element being set - (emit-expr (primcall-operand3 x) si env) ; get value + (emit-expr (tagged-primcall-operand3 x) si env) ; get value (emit-mov rax (offset 0 rbx))) ((make-string) - (emit-expr (primcall-operand1 x) si env) + (emit-expr (tagged-primcall-operand1 x) si env) (emit-mov rax (offset 0 rsi)) ; save length onto heap (emit-mov rax rbx) ; save length in another register (emit-mov rsi rax) ; write to heap @@ -494,7 +547,7 @@ (emit-and (immediate (- (* 2 wordsize))) rbx) (emit-add rbx rsi)) ; bump heap pointer by length of string ((string) - (let ((chars (cdr x))) + (let ((chars (tagged-primcall-operands x))) ;; Save length onto heap (tagged as immediate int) (emit-mov (immediate (ash (length chars) 2)) rax) (emit-mov rax (offset 0 rsi)) @@ -515,11 +568,11 @@ (- (* 2 wordsize)))) rsi))) ((string-length) - (emit-expr (primcall-operand1 x) si env) ; get string pointer + (emit-expr (tagged-primcall-operand1 x) si env) ; get string pointer (emit-sub (immediate string-tag) rax) ; untag string (emit-mov (offset 0 rax) rax)) ; the first word contains the length ((string-ref) - (emit-expr (primcall-operand2 x) si env) ; get index arg + (emit-expr (tagged-primcall-operand2 x) si env) ; get index arg (emit-shr (immediate fixnum-shift) rax) ; untag it ;; The first word of a string contains the length, however we ;; don't want to advance the pointer by a word because each @@ -530,31 +583,31 @@ ;; everything but the LSB to isolate the character. (emit-add (immediate 1) rax) (emit-mov rax rbx) ; save index to another register - (emit-expr (primcall-operand1 x) si env) ; get string pointer + (emit-expr (tagged-primcall-operand1 x) si env) ; get string pointer (emit-sub (immediate string-tag) rax) ; untag string (emit-mov (register-offset rbx rax) rax) ; get char into LSB position (emit-and (immediate 255) rax) ; clear out everything but the LSB (emit-sal (immediate char-shift) rax) ; tag char (emit-or (immediate char-tag) rax)) ((string-set!) - (emit-expr (primcall-operand1 x) si env) ; get string pointer + (emit-expr (tagged-primcall-operand1 x) si env) ; get string pointer (emit-sub (immediate string-tag) rax) ; untag string (emit-mov rax rbx) ; save string pointer - (emit-expr (primcall-operand2 x) si env) ; get index arg + (emit-expr (tagged-primcall-operand2 x) si env) ; get index arg (emit-shr (immediate fixnum-shift) rax) ; untag it (emit-add (immediate 1) rax) (emit-add rax rbx) ; get char into LSB position - (emit-expr (primcall-operand3 x) si env) ; get value + (emit-expr (tagged-primcall-operand3 x) si env) ; get value (emit-shr (immediate char-shift) rax) ; untag char (emit-andq (immediate -255) (offset 0 rbx)) ; clear LSB (emit-or rax (offset 0 rbx))) ; copy char ((constant-init) - (let ((label (lookup (primcall-operand1 x) env))) - (emit-expr (primcall-operand2 x) si env) + (let ((label (lookup (tagged-primcall-operand1 x) env))) + (emit-expr (tagged-primcall-operand2 x) si env) (emit-lea (offset label rip) rbx) (emit-mov rax (offset 0 rbx)))) ((constant-ref) - (let ((label (lookup (primcall-operand1 x) env))) + (let ((label (lookup (tagged-primcall-operand1 x) env))) (emit-lea (offset label rip) rax) (emit-mov (offset 0 rax) rax))) (else @@ -704,7 +757,7 @@ (emit-if (test x) (consequent x) (alternate x) si env)) ((let? x) (emit-let (let-bindings x) (let-body x) si env)) - ((primcall? x) + ((tagged-primcall? x) (emit-primitive-call x si env)) ((labels? x) (emit-labels (label-bindings x) (label-body x) si env)) @@ -715,7 +768,7 @@ ((tail-call? x) (emit-tail-call (tail-call-proc x) (tail-call-arguments x) si env)) (else - (error "unknown expression" x)))) + (error "unknown expression in native code generation" x)))) (define (emit-labels lvars body-exps si env) (let* ((lvars* (map (lambda (lvar) @@ -776,12 +829,18 @@ (append-map (lambda (y) (loop (add-variables vars (map lhs (let-bindings x))) y)) (let-body x))) - ((primcall? x) + ((tagged-primcall? x) (append-map (lambda (operand) (loop vars operand)) - (cdr x))) + (tagged-primcall-operands x))) ((lambda? x) - (loop (add-variables vars (lambda-args x)) (lambda-body x))) + (append-map (lambda (y) + (loop (add-variables vars (lambda-args x)) y)) + (lambda-body x))) + ((funcall? x) + (append-map (lambda (operand) + (loop vars operand)) + (cdr x))) ((pair? x) (append-map (lambda (arg) (loop vars arg)) @@ -805,15 +864,17 @@ (annotate-free-variables (rhs binding)))) (let-bindings x)) ,@(map annotate-free-variables (let-body x)))) - ((primcall? x) - (cons (primcall-op x) - (map annotate-free-variables (cdr x)))) + ((tagged-primcall? x) + `(primcall ,(tagged-primcall-op x) + ,@(map annotate-free-variables (tagged-primcall-operands x)))) + ((funcall? x) + `(funcall ,@(map annotate-free-variables (cdr x)))) ((lambda? x) `(lambda ,(lambda-args x) ,(free-variables x) - ,(annotate-free-variables (lambda-body x)))) - ((pair? x) - `(funcall ,@(map annotate-free-variables x))))) + ,@(map annotate-free-variables (lambda-body x)))) + (else + (error "unknown form in free variable annotation" x)))) ;; Determine if var is mutated (via set!) within an expression. (define (mutable? var x) @@ -841,17 +902,19 @@ (any (lambda (y) (mutable? var y)) (let-body x)))))) - ((primcall? x) + ((tagged-primcall? x) (any (lambda (arg) (mutable? var arg)) - (cdr x))) + (tagged-primcall-operands x))) ((lambda? x) (let ((shadowed? (any (lambda (arg) (eq? arg var)) (lambda-args x)))) (if shadowed? #f - (mutable? var (lambda-body x))))) + (any (lambda (y) + (mutable? var y)) + (lambda-body x))))) ((set? x) (eq? (second x) var)) ((pair? x) @@ -864,7 +927,7 @@ ((variable? x) ;; Mutable variable references must be unboxed. (if (memq x mutable-vars) - `(vector-ref ,x 0) + `(primcall vector-ref ,x 0) x)) ((if? x) `(if ,(box-mutable-variables (test x) mutable-vars) @@ -874,19 +937,21 @@ (let* ((bindings (let-bindings x)) (mutable-bindings ;; Find all mutable bindings. - (filter (lambda (binding) - (any (lambda (y) - (mutable? (lhs binding) y)) - (let-body x))) - bindings))) + (filter-map (lambda (binding) + (and (any (lambda (y) + (mutable? (lhs binding) y)) + (let-body x)) + (lhs binding))) + bindings))) `(let ,(map (lambda (binding) (let ((var (lhs binding))) (list var ;; Use a 1 element vector to box mutable ;; variables. (if (memq var mutable-bindings) - `(vector ,(box-mutable-variables (rhs binding) - mutable-vars)) + `(primcall vector + ,(box-mutable-variables (rhs binding) + mutable-vars)) (box-mutable-variables (rhs binding) mutable-vars))))) bindings) @@ -905,7 +970,7 @@ memo) (delq var memo))) mutable-vars - bindings))) + mutable-bindings))) (map (lambda (y) (box-mutable-variables y mutable-vars*)) (let-body x)))))) @@ -929,45 +994,52 @@ ;; element vectors bound to the original names. (let ,(filter-map (lambda (arg) (and (not (eq? (car arg) (cdr arg))) - (list (car arg) `(vector ,(cdr arg))))) + (list (car arg) + `(primcall vector ,(cdr arg))))) args*) - ,(box-mutable-variables (lambda-body x) - (fold (lambda (arg memo) - (if (eq? (car arg) (cdr arg)) - memo - (cons (car arg) memo))) - mutable-vars* - args*)))))) - ((primcall? x) - (cons (primcall-op x) - (map (lambda (arg) - (box-mutable-variables arg mutable-vars)) - (primcall-operands x)))) + ,@(let ((mutable-vars* + (fold (lambda (arg memo) + (if (eq? (car arg) (cdr arg)) + memo + (cons (car arg) memo))) + mutable-vars* + args*))) + (map (lambda (y) + (box-mutable-variables y mutable-vars*)) + (lambda-body x))))))) + ((tagged-primcall? x) + `(primcall ,(tagged-primcall-op x) + ,@(map (lambda (arg) + (box-mutable-variables arg mutable-vars)) + (tagged-primcall-operands x)))) + ((funcall? x) + `(funcall ,@(map (lambda (y) + (box-mutable-variables y mutable-vars)) + (cdr x)))) ((set? x) ;; Calls to set! are simply transformed to calls to vector-set! to ;; modify the item inside the box. - `(vector-set! ,(second x) 0 ,(third x))) - ((pair? x) - (map (lambda (y) - (box-mutable-variables y mutable-vars)) - x)))) + `(primcall vector-set! ,(second x) 0 + ,(box-mutable-variables (third x) mutable-vars))) + (else + (error "unknown form in mutable variable boxing pass" x)))) ;; Transforms a quote form into code. (define (expand-quote x) (cond ((immediate? x) x) ((string? x) - `(string ,@(string->list x))) + `(primcall string ,@(string->list x))) ((vector? x) - `(vector ,@(map expand-quote (vector->list x)))) + `(primcall vector ,@(map expand-quote (vector->list x)))) ((pair? x) - `(cons ,(expand-quote (car x)) - ,(expand-quote (cdr x)))) + `(primcall cons ,(expand-quote (car x)) + ,(expand-quote (cdr x)))) ((null? x) '()) ;; The compiler doesn't support symbols yet... (else - (error "unknown quoted thing" x)))) + (error "unknown quoted expression" x)))) ;; Extracts certain forms to global program labels. Lambdas are ;; converted to closures and given a label. Quoted expressions are @@ -989,9 +1061,12 @@ (if (immediate? quoted) (values quoted '() '()) (let ((const (unique-variable))) - (values `(constant-ref ,const) + (values `(primcall constant-ref ,const) (list (list const '(datum))) - (list `(constant-init ,const ,(expand-quote (quote-data x))))))))) + (list `(primcall constant-init + ,const + ,(expand-quote + (quote-data x))))))))) ((variable? x) (values x '() '())) ((if? x) @@ -1021,14 +1096,13 @@ (concatenate (map second body*))) (append (concatenate (map third bindings*)) (concatenate (map third body*)))))) - ((primcall? x) + ((tagged-primcall? x) (let ((operands (map (lambda (operand) (let-values (((operand* labels initializers) (iter operand))) (list operand* labels initializers))) - (cdr x)))) - (values (cons (primcall-op x) - (map first operands)) + (tagged-primcall-operands x)))) + (values `(primcall ,(tagged-primcall-op x) ,@(map first operands)) (concatenate (map second operands)) (concatenate (map third operands))))) ;; Convert procedure calls to 'funcall' form that refers to a @@ -1064,6 +1138,7 @@ ((immediate? x) x) ((variable? x) x) ((closure? x) x) + ((quote? x) x) ((if? x) `(if ,(mark-tail-calls (test x)) ,(maybe-mark (consequent x)) @@ -1074,9 +1149,9 @@ (mark-tail-calls (rhs binding)))) (let-bindings x)) ,@(map maybe-mark (let-body x)))) - ((primcall? x) - (cons (primcall-op x) - (map mark-tail-calls (cdr x)))) + ((tagged-primcall? x) + `(primcall ,(tagged-primcall-op x) + ,@(map mark-tail-calls (tagged-primcall-operands x)))) ((funcall? x) `(funcall ,@(map mark-tail-calls (cdr x)))) ((code? x) @@ -1090,6 +1165,216 @@ (label-bindings x)) ,@(map mark-tail-calls (drop x 2)))))) +;; Transform expressions like: +;; (define (foo x) x) +;; Into: +;; (define foo (lambda (x) x)) +(define (expand-definition x) + (if (pair? (second x)) ; + `(define ,(car (second x)) + (lambda ,(cdr (second x)) + ,@(cddr x))) + x)) + +;; Tranform a series of expressions with 'define' forms into a 'let' +;; expression that binds all the defined variables. +(define (hoist-definitions exprs) + (let loop ((orig-exprs exprs) + (defs '()) + (new-exprs '())) + (cond + ((and (null? orig-exprs) (null? defs)) + exprs) + ((null? orig-exprs) + `((let ,(reverse defs) + ,@(reverse new-exprs)))) + ((define? (car orig-exprs)) + (let ((x (expand-definition (car orig-exprs)))) + (loop (cdr orig-exprs) + (cons (list (second x) (third x)) defs) + new-exprs))) + (else + (loop (cdr orig-exprs) + defs + (cons (car orig-exprs) new-exprs)))))) + +;; Expand a fixed set of macros (let*, letrec, letrec*, cond, case, +;; when, unless, or, and, and define) in a hygienic manner and +;; explicitly tag primitive calls and function calls. +(define (macro-expand x) + (let loop ((x x) + (vars '())) + (cond + ;; Immediates and quoted values are left as-is. + ((immediate? x) x) + ((quote? x) x) + ;; Variable references are replaced with their unique, + ;; alpha-converted name. This prevents lexical variable names + ;; from clashing with primitive call names. + ((symbol? x) + (or (assq-ref vars x) + (error "unbound variable in macro expansion" x))) + ((if? x) + `(if ,(loop (test x) vars) + ,(loop (consequent x) vars) + ,(loop (alternate x) vars))) + ((set? x) + `(set! ,(loop (second x) vars) ,(loop (third x) vars))) + ;; Convert extended forms to primitive forms. For example, a + ;; let* expression is rewritten as a nested series of let + ;; expression. + ((let*? x) + (loop (car + (let expand-let* ((bindings (let-bindings x))) + (if (null? bindings) + (let-body x) + (let ((binding (car bindings))) + `((let (,binding) + ,@(expand-let* (cdr bindings)))))))) + vars)) + ;; Simple implementation of letrec: Bind variables to empty + ;; values. Bind temporary variables to the real values. Mutate + ;; original variables to have the values of the temporaries. + ((letrec? x) + ;; TODO: Use special unspecified value instead of #f. + (loop (let ((temps (map (lambda (binding) + (cons (lhs binding) (unique-variable))) + (let-bindings x)))) + `(let ,(map (lambda (binding) + (list (lhs binding) #f)) + (let-bindings x)) + (let ,(map (lambda (binding temp) + (list (cdr temp) (rhs binding))) + (let-bindings x) + temps) + ,@(map (lambda (binding) + (let ((var (lhs binding))) + `(set! ,var ,(assq-ref temps var)))) + (let-bindings x)) + ,@(let-body x)))) + vars)) + ;; letrec* is simpler than letrec because binding happens + ;; sequentially so we don't need to use temporaries. + ((letrec*? x) + ;; TODO: Use special unspecified value instead of #f. + (loop `(let ,(map (lambda (binding) + (list (lhs binding) #f)) + (let-bindings x)) + ,@(map (lambda (binding) + `(set! ,(lhs binding) ,(rhs binding))) + (let-bindings x)) + ,@(let-body x)) + vars)) + ;; TODO: Handle multiple expressions in clause. + ;; TODO: Handle => syntax. + ((cond? x) ; 'cond' is just nested 'if's + (loop (let expand-cond ((clauses (cdr x))) + (if (null? clauses) + #f + (let ((clause (car clauses))) + (cond + ((and (eq? (first clause) 'else) + (null? (cdr clauses))) + (second clause)) + ((eq? (first clause) 'else) + (error "else must be the last clause of cond" x)) + (else + `(if ,(first clause) + ,(second clause) + ,(expand-cond (cdr clauses)))))))) + vars)) + ;; 'when' and 'unless' are, you guessed it, special forms of + ;; 'if'! + ;; + ;; TODO: Handle multiple expressions + ((when? x) + (loop `(if ,(second x) ,(third x) #f) vars)) + ((unless? x) + (loop `(if (eq? ,(second x) #f) ,(third x) #f) vars)) + ;; 'or' and 'and' are also defined in terms of 'if' + ((or? x) + (loop (let or-loop ((exprs (cdr x))) + (if (null? exprs) + #f + `(let ((v ,(car exprs))) + (if v v ,(or-loop (cdr exprs)))))) + vars)) + ((and? x) + (loop (let and-loop ((exprs (cdr x))) + (cond + ((null? exprs) #t) + ((null? (cdr exprs)) + `(let ((v ,(car exprs))) + (if v v #f))) + (else + `(let ((v ,(car exprs))) + (if v ,(and-loop (cdr exprs)) #f))))) + vars)) + ;; TODO: Handle => syntax + ;; TODO: Handle symbols once implemented + ;; TODO: Handle multiple expressions in a clause + ((case? x) + (loop `(let ((key ,(second x))) + ,(let expand-cond ((clauses (cddr x))) + (if (null? clauses) + #f + (let ((clause (car clauses))) + (cond + ((and (eq? (first clause) 'else) + (null? (cdr clauses))) + (second clause)) + ((eq? (first clause) 'else) + (error "else must be the last clause of case" x)) + (else + `(if (or ,@(map (lambda (datum) + `(= key ,datum)) + (first clause))) + ,(second clause) + ,(expand-cond (cdr clauses))))))))) + vars)) + ;; Convert lexical variables to unique identifiers through + ;; alpha-conversion for 'let' and 'lambda' forms. + ((let? x) + (let ((new-vars (append (map (lambda (binding) + (cons (lhs binding) (unique-variable))) + (let-bindings x)) + vars))) + `(let ,(map (lambda (binding) + (list (assq-ref new-vars (lhs binding)) + (loop (rhs binding) vars))) + (let-bindings x)) + ,@(map (lambda (y) (loop y new-vars)) + (hoist-definitions (let-body x)))))) + ((lambda? x) + (let ((new-vars (append (map (lambda (arg) + (cons arg (unique-variable))) + (lambda-args x)) + vars))) + `(lambda ,(map (lambda (arg) + (assq-ref new-vars arg)) + (lambda-args x)) + ,@(map (lambda (y) + (loop y new-vars)) + (hoist-definitions (lambda-body x)))))) + ;; Function calls. The 'or' expression first tests for a valid + ;; variable reference in the operator position. Failing that, it + ;; for a sub-expression in the operator position. We need to do + ;; this kind of analysis because primitive calls are currently + ;; *not* part of the lexical environment, so if we tried to + ;; lookup the variable '+', for example, it would fail. + ((and (pair? x) + (or (and (symbol? (car x)) (assq-ref vars (car x))) + (pair? (car x)))) + `(funcall ,@(map (lambda (y) (loop y vars)) x))) + ;; If the function call operator is not a variable reference or a + ;; more complex expression, then it might be a primitive call. + ((primcall? x) + `(primcall ,(primcall-op x) + ,@(map (lambda (y) (loop y vars)) + (primcall-operands x)))) + (else ; oh no + (error "unknown form" x))))) + ;; Apply all compiler passes to transform the input program into a ;; form that it is suitable for compilation to native assembly code. (define (transform x) @@ -1097,7 +1382,8 @@ (mark-tail-calls (convert-closures-and-constants (annotate-free-variables - (box-mutable-variables x)))))) + (box-mutable-variables + (macro-expand x))))))) ;;; @@ -1220,7 +1506,7 @@ "789") ;; complex constants (test-case '(let ((f (lambda () (quote (1 . "H"))))) - (eq? (f) (f))) + (eq? (f) (f))) "#t") ;; mutable variables (test-case '(let ((f (lambda (c) @@ -1229,4 +1515,73 @@ (let ((p (f 0))) ((car p) 12) ((cdr p)))) - "12")) + "12") + ;; overriding primcalls + (test-case '(let ((+ (lambda (a b) (* a b)))) + (+ 3 3)) + "9") + ;; let* + (test-case '(let* ((x 1) + (y (+ x 2))) + (+ x y)) + "4") + ;; letrec + (test-case '(letrec ((f (lambda (x) (+ (g x) 1))) + (g (lambda (x) (+ x 2)))) + (f 1)) + "4") + ;; letrec* + (test-case '(letrec* ((f (lambda (x) (+ (g x) 1))) + (g (lambda (x) (+ x 2)))) + (f 1)) + "4") + ;; when + (test-case '(let ((x 0)) + (when (= x 0) + 123)) + "123") + (test-case '(let ((x 1)) + (when (= x 0) + 123)) + "#f") + ;; unless + (test-case '(let ((x 1)) + (unless (= x 0) + 123)) + "123") + (test-case '(let ((x 1)) + (unless (= x 1) + 123)) + "#f") + ;; cond + (test-case '(let ((x 3)) + (cond + ((= x 0) 10) + ((= x 1) 11) + ((= x 2) 12) + (else 13))) + "13") + ;; or + (test-case '(or) "#f") + (test-case '(or #f 666) "666") + (test-case '(or #f #f) "#f") + ;; and + (test-case '(and) "#t") + (test-case '(and 1 2 3) "3") + (test-case '(and 1 2 #f) "#f") + ;; case + (test-case '(case 666 + ((111 222 333) + 444) + ((555 666) + 777) + (else 888)) + "777") + ;; internal define + (test-case '(let () + (define x 1) + (define y 2) + (define (f a b) + (+ a b)) + (f x y)) + "3")) |