summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Thompson <dthompson@vistahigherlearning.com>2022-11-29 21:03:00 -0500
committerDavid Thompson <dthompson@vistahigherlearning.com>2022-11-29 21:03:00 -0500
commit0ca9cc9f1adc07a1831a8711be77844d853f84c1 (patch)
treeda8827a3e58f70b90af1949f72e1e95b51c3f172
parent628f86e51a7500de4e174e73e33606b8aceca86b (diff)
Step 13: Extending the syntax.
-rw-r--r--compiler.scm571
1 files changed, 463 insertions, 108 deletions
diff --git a/compiler.scm b/compiler.scm
index 3a147a2..bcf8813 100644
--- a/compiler.scm
+++ b/compiler.scm
@@ -36,9 +36,39 @@
(define (if? x)
(op-eq? x 'if))
+(define (cond? x)
+ (op-eq? x 'cond))
+
+(define (case? x)
+ (op-eq? x 'case))
+
+(define (when? x)
+ (op-eq? x 'when))
+
+(define (unless? x)
+ (op-eq? x 'unless))
+
+(define (or? x)
+ (op-eq? x 'or))
+
+(define (and? x)
+ (op-eq? x 'and))
+
+(define (define? x)
+ (op-eq? x 'define))
+
(define (let? x)
(op-eq? x 'let))
+(define (let*? x)
+ (op-eq? x 'let*))
+
+(define (letrec? x)
+ (op-eq? x 'letrec))
+
+(define (letrec*? x)
+ (op-eq? x 'letrec*))
+
(define (primcall? x)
(and (pair? x)
(memq (first x)
@@ -53,6 +83,9 @@
make-string string string-length string-ref string-set!
constant-init constant-ref))))
+(define (tagged-primcall? x)
+ (op-eq? x 'primcall))
+
(define (labels? x)
(op-eq? x 'labels))
@@ -86,15 +119,21 @@
(define (primcall-operands x)
(cdr x))
-(define (primcall-operand1 x)
+(define (tagged-primcall-op x)
(second x))
-(define (primcall-operand2 x)
+(define (tagged-primcall-operands x)
+ (cddr x))
+
+(define (tagged-primcall-operand1 x)
(third x))
-(define (primcall-operand3 x)
+(define (tagged-primcall-operand2 x)
(fourth x))
+(define (tagged-primcall-operand3 x)
+ (fifth x))
+
(define (lookup name env)
(or (assq-ref env name)
(error "unbound variable:" name env)))
@@ -139,7 +178,7 @@
(second x))
(define (lambda-body x)
- (third x))
+ (cddr x))
(define (funcall-proc x)
(second x))
@@ -317,22 +356,22 @@
(emit-or (immediate boolean-tag) rax))
(define (emit-primitive-call x si env)
- (case (primcall-op x)
+ (case (tagged-primcall-op x)
((add1)
- (emit-expr (primcall-operand1 x) si env)
+ (emit-expr (tagged-primcall-operand1 x) si env)
(emit-add (immediate (immediate-rep 1)) rax))
((sub1)
- (emit-expr (primcall-operand1 x) si env)
+ (emit-expr (tagged-primcall-operand1 x) si env)
(emit-sub (immediate (immediate-rep 1)) rax))
((integer->char)
- (emit-expr (primcall-operand1 x) si env)
+ (emit-expr (tagged-primcall-operand1 x) si env)
(emit-sal (immediate (- char-shift fixnum-shift)) rax)
(emit-or (immediate char-tag) rax))
((char->integer)
- (emit-expr (primcall-operand1 x) si env)
+ (emit-expr (tagged-primcall-operand1 x) si env)
(emit-shr (immediate (- char-shift fixnum-shift)) rax))
((zero?)
- (emit-expr (primcall-operand1 x) si env)
+ (emit-expr (tagged-primcall-operand1 x) si env)
;; Since the tag of fixnums is 0, we can skip an 'andl'
;; instruction that would apply the mask to the immediate
;; value.
@@ -342,55 +381,69 @@
(emit-sal (immediate boolean-shift) rax)
(emit-or (immediate boolean-tag) rax))
((null?)
- (emit-expr (primcall-operand1 x) si env)
+ (emit-expr (tagged-primcall-operand1 x) si env)
(emit-cmp (immediate empty-list) rax)
(emit-mov (immediate 0) rax)
(emit-sete al)
(emit-sal (immediate boolean-shift) rax)
(emit-or (immediate boolean-tag) rax))
((integer?)
- (emit-tag-check (primcall-operand1 x) fixnum-mask fixnum-tag si env))
+ (emit-tag-check (tagged-primcall-operand1 x) fixnum-mask fixnum-tag si env))
((char?)
- (emit-tag-check (primcall-operand1 x) char-mask char-tag si env))
+ (emit-tag-check (tagged-primcall-operand1 x) char-mask char-tag si env))
((boolean?)
- (emit-tag-check (primcall-operand1 x) boolean-mask boolean-tag si env))
+ (emit-tag-check (tagged-primcall-operand1 x) boolean-mask boolean-tag si env))
((+)
- (emit-expr (primcall-operand2 x) si env)
+ (emit-expr (tagged-primcall-operand2 x) si env)
(emit-mov rax (offset si rsp))
- (emit-expr (primcall-operand1 x) (- si wordsize) env)
+ (emit-expr (tagged-primcall-operand1 x) (- si wordsize) env)
(emit-add (offset si rsp) rax))
((-)
- (emit-expr (primcall-operand2 x) si env)
+ (emit-expr (tagged-primcall-operand2 x) si env)
(emit-mov rax (offset si rsp))
- (emit-expr (primcall-operand1 x) (- si wordsize) env)
+ (emit-expr (tagged-primcall-operand1 x) (- si wordsize) env)
(emit-sub (offset si rsp) rax))
((*)
- (emit-expr (primcall-operand2 x) si env)
+ (emit-expr (tagged-primcall-operand2 x) si env)
(emit-mov rax (offset si rsp))
- (emit-expr (primcall-operand1 x) (- si wordsize) env)
+ (emit-expr (tagged-primcall-operand1 x) (- si wordsize) env)
(emit-imul (offset si rsp) rax)
;; When two fixnums (which have 2 tag bits) are multiplied, the
;; relevant bits for the result are now 4 bytes to the left, so
;; we have to shift back 2 bytes.
(emit-shr (immediate fixnum-shift) rax))
((=)
- (emit-comparison (primcall-operand1 x) (primcall-operand2 x) emit-sete si env))
+ (emit-comparison (tagged-primcall-operand1 x)
+ (tagged-primcall-operand2 x)
+ emit-sete si env))
((<)
- (emit-comparison (primcall-operand1 x) (primcall-operand2 x) emit-setl si env))
+ (emit-comparison (tagged-primcall-operand1 x)
+ (tagged-primcall-operand2 x)
+ emit-setl si env))
((<=)
- (emit-comparison (primcall-operand1 x) (primcall-operand2 x) emit-setle si env))
+ (emit-comparison (tagged-primcall-operand1 x)
+ (tagged-primcall-operand2 x)
+ emit-setle si env))
((>)
- (emit-comparison (primcall-operand1 x) (primcall-operand2 x) emit-setg si env))
+ (emit-comparison (tagged-primcall-operand1 x)
+ (tagged-primcall-operand2 x)
+ emit-setg si env))
((>=)
- (emit-comparison (primcall-operand1 x) (primcall-operand2 x) emit-setge si env))
+ (emit-comparison (tagged-primcall-operand1 x)
+ (tagged-primcall-operand2 x)
+ emit-setge si env))
((eq?)
- (emit-comparison (primcall-operand1 x) (primcall-operand2 x) emit-sete si env))
+ (emit-comparison (tagged-primcall-operand1 x)
+ (tagged-primcall-operand2 x)
+ emit-sete si env))
((char=?)
- (emit-comparison (primcall-operand1 x) (primcall-operand2 x) emit-sete si env))
+ (emit-comparison (tagged-primcall-operand1 x)
+ (tagged-primcall-operand2 x)
+ emit-sete si env))
((cons)
- (emit-expr (primcall-operand2 x) si env) ; eval cdr
+ (emit-expr (tagged-primcall-operand2 x) si env) ; eval cdr
(emit-mov rax (offset si rsp)) ; save car to stack
- (emit-expr (primcall-operand1 x) (- si wordsize) env) ; eval car
+ (emit-expr (tagged-primcall-operand1 x) (- si wordsize) env) ; eval car
(emit-mov rax (offset 0 rsi)) ; move car onto heap
(emit-mov (offset si rsp) rax) ; copy cdr from the stack
(emit-mov rax (offset wordsize rsi)) ; move cdr onto heap
@@ -398,17 +451,17 @@
(emit-or (immediate pair-tag) rax) ; set tag
(emit-add (immediate (* wordsize 2)) rsi)) ; bump heap pointer
((car)
- (emit-expr (primcall-operand1 x) si env)
+ (emit-expr (tagged-primcall-operand1 x) si env)
;; We have to untag the pair to get the pointer to the 'car'.
;; The pair tag is 1 so simply subtracting 1 gets us the pointer.
(emit-mov (offset -1 rax) rax))
((cdr)
- (emit-expr (primcall-operand1 x) si env)
+ (emit-expr (tagged-primcall-operand1 x) si env)
;; Again, the value is the pointer incremented by 1, so to get to
;; the cdr we need to jump ahead one word minus 1 byte.
(emit-mov (offset (- wordsize 1) rax) rax))
((make-vector)
- (emit-expr (primcall-operand1 x) si env)
+ (emit-expr (tagged-primcall-operand1 x) si env)
;; Wouldn't it be better to save the length untagged so that
;; vector-ref and vector-set! don't have untag it and
;; vector-length just needs to retag it?
@@ -426,7 +479,7 @@
(emit-and (immediate (- (* 2 wordsize))) rbx)
(emit-add rbx rsi)) ; bump heap pointer by length of vector
((vector)
- (let ((items (cdr x)))
+ (let ((items (tagged-primcall-operands x)))
;; Eval all vector items and save them to stack locations.
;; It's important that we eval all items first, and not copy to
;; the heap as we go, because any sub-expression that also does
@@ -456,31 +509,31 @@
(- (* 2 wordsize))))
rsi)))
((vector-length)
- (emit-expr (primcall-operand1 x) si env) ; get vector pointer
+ (emit-expr (tagged-primcall-operand1 x) si env) ; get vector pointer
(emit-sub (immediate vector-tag) rax) ; untag vector
(emit-mov (offset 0 rax) rax)) ; the first word contains the length
((vector-ref)
- (emit-expr (primcall-operand2 x) si env) ; get index arg
+ (emit-expr (tagged-primcall-operand2 x) si env) ; get index arg
(emit-shr (immediate fixnum-shift) rax) ; untag it
(emit-add (immediate 1) rax) ; first word is the length so skip over it
(emit-imul (immediate wordsize) rax) ; scale by word size
(emit-mov rax rbx) ; save index to another register
- (emit-expr (primcall-operand1 x) si env) ; get vector pointer
+ (emit-expr (tagged-primcall-operand1 x) si env) ; get vector pointer
(emit-sub (immediate vector-tag) rax) ; untag vector
(emit-mov (register-offset rbx rax) rax)) ; get element
((vector-set!)
- (emit-expr (primcall-operand1 x) si env) ; get vector pointer
+ (emit-expr (tagged-primcall-operand1 x) si env) ; get vector pointer
(emit-sub (immediate vector-tag) rax) ; untag vector
(emit-mov rax rbx) ; save vector
- (emit-expr (primcall-operand2 x) si env) ; get index
+ (emit-expr (tagged-primcall-operand2 x) si env) ; get index
(emit-shr (immediate fixnum-shift) rax) ; untag it
(emit-add (immediate 1) rax) ; first word is the length so skip over it
(emit-imul (immediate wordsize) rax) ; scale by word size
(emit-add rax rbx) ; advance pointer to element being set
- (emit-expr (primcall-operand3 x) si env) ; get value
+ (emit-expr (tagged-primcall-operand3 x) si env) ; get value
(emit-mov rax (offset 0 rbx)))
((make-string)
- (emit-expr (primcall-operand1 x) si env)
+ (emit-expr (tagged-primcall-operand1 x) si env)
(emit-mov rax (offset 0 rsi)) ; save length onto heap
(emit-mov rax rbx) ; save length in another register
(emit-mov rsi rax) ; write to heap
@@ -494,7 +547,7 @@
(emit-and (immediate (- (* 2 wordsize))) rbx)
(emit-add rbx rsi)) ; bump heap pointer by length of string
((string)
- (let ((chars (cdr x)))
+ (let ((chars (tagged-primcall-operands x)))
;; Save length onto heap (tagged as immediate int)
(emit-mov (immediate (ash (length chars) 2)) rax)
(emit-mov rax (offset 0 rsi))
@@ -515,11 +568,11 @@
(- (* 2 wordsize))))
rsi)))
((string-length)
- (emit-expr (primcall-operand1 x) si env) ; get string pointer
+ (emit-expr (tagged-primcall-operand1 x) si env) ; get string pointer
(emit-sub (immediate string-tag) rax) ; untag string
(emit-mov (offset 0 rax) rax)) ; the first word contains the length
((string-ref)
- (emit-expr (primcall-operand2 x) si env) ; get index arg
+ (emit-expr (tagged-primcall-operand2 x) si env) ; get index arg
(emit-shr (immediate fixnum-shift) rax) ; untag it
;; The first word of a string contains the length, however we
;; don't want to advance the pointer by a word because each
@@ -530,31 +583,31 @@
;; everything but the LSB to isolate the character.
(emit-add (immediate 1) rax)
(emit-mov rax rbx) ; save index to another register
- (emit-expr (primcall-operand1 x) si env) ; get string pointer
+ (emit-expr (tagged-primcall-operand1 x) si env) ; get string pointer
(emit-sub (immediate string-tag) rax) ; untag string
(emit-mov (register-offset rbx rax) rax) ; get char into LSB position
(emit-and (immediate 255) rax) ; clear out everything but the LSB
(emit-sal (immediate char-shift) rax) ; tag char
(emit-or (immediate char-tag) rax))
((string-set!)
- (emit-expr (primcall-operand1 x) si env) ; get string pointer
+ (emit-expr (tagged-primcall-operand1 x) si env) ; get string pointer
(emit-sub (immediate string-tag) rax) ; untag string
(emit-mov rax rbx) ; save string pointer
- (emit-expr (primcall-operand2 x) si env) ; get index arg
+ (emit-expr (tagged-primcall-operand2 x) si env) ; get index arg
(emit-shr (immediate fixnum-shift) rax) ; untag it
(emit-add (immediate 1) rax)
(emit-add rax rbx) ; get char into LSB position
- (emit-expr (primcall-operand3 x) si env) ; get value
+ (emit-expr (tagged-primcall-operand3 x) si env) ; get value
(emit-shr (immediate char-shift) rax) ; untag char
(emit-andq (immediate -255) (offset 0 rbx)) ; clear LSB
(emit-or rax (offset 0 rbx))) ; copy char
((constant-init)
- (let ((label (lookup (primcall-operand1 x) env)))
- (emit-expr (primcall-operand2 x) si env)
+ (let ((label (lookup (tagged-primcall-operand1 x) env)))
+ (emit-expr (tagged-primcall-operand2 x) si env)
(emit-lea (offset label rip) rbx)
(emit-mov rax (offset 0 rbx))))
((constant-ref)
- (let ((label (lookup (primcall-operand1 x) env)))
+ (let ((label (lookup (tagged-primcall-operand1 x) env)))
(emit-lea (offset label rip) rax)
(emit-mov (offset 0 rax) rax)))
(else
@@ -704,7 +757,7 @@
(emit-if (test x) (consequent x) (alternate x) si env))
((let? x)
(emit-let (let-bindings x) (let-body x) si env))
- ((primcall? x)
+ ((tagged-primcall? x)
(emit-primitive-call x si env))
((labels? x)
(emit-labels (label-bindings x) (label-body x) si env))
@@ -715,7 +768,7 @@
((tail-call? x)
(emit-tail-call (tail-call-proc x) (tail-call-arguments x) si env))
(else
- (error "unknown expression" x))))
+ (error "unknown expression in native code generation" x))))
(define (emit-labels lvars body-exps si env)
(let* ((lvars* (map (lambda (lvar)
@@ -776,12 +829,18 @@
(append-map (lambda (y)
(loop (add-variables vars (map lhs (let-bindings x))) y))
(let-body x)))
- ((primcall? x)
+ ((tagged-primcall? x)
(append-map (lambda (operand)
(loop vars operand))
- (cdr x)))
+ (tagged-primcall-operands x)))
((lambda? x)
- (loop (add-variables vars (lambda-args x)) (lambda-body x)))
+ (append-map (lambda (y)
+ (loop (add-variables vars (lambda-args x)) y))
+ (lambda-body x)))
+ ((funcall? x)
+ (append-map (lambda (operand)
+ (loop vars operand))
+ (cdr x)))
((pair? x)
(append-map (lambda (arg)
(loop vars arg))
@@ -805,15 +864,17 @@
(annotate-free-variables (rhs binding))))
(let-bindings x))
,@(map annotate-free-variables (let-body x))))
- ((primcall? x)
- (cons (primcall-op x)
- (map annotate-free-variables (cdr x))))
+ ((tagged-primcall? x)
+ `(primcall ,(tagged-primcall-op x)
+ ,@(map annotate-free-variables (tagged-primcall-operands x))))
+ ((funcall? x)
+ `(funcall ,@(map annotate-free-variables (cdr x))))
((lambda? x)
`(lambda ,(lambda-args x)
,(free-variables x)
- ,(annotate-free-variables (lambda-body x))))
- ((pair? x)
- `(funcall ,@(map annotate-free-variables x)))))
+ ,@(map annotate-free-variables (lambda-body x))))
+ (else
+ (error "unknown form in free variable annotation" x))))
;; Determine if var is mutated (via set!) within an expression.
(define (mutable? var x)
@@ -841,17 +902,19 @@
(any (lambda (y)
(mutable? var y))
(let-body x))))))
- ((primcall? x)
+ ((tagged-primcall? x)
(any (lambda (arg)
(mutable? var arg))
- (cdr x)))
+ (tagged-primcall-operands x)))
((lambda? x)
(let ((shadowed? (any (lambda (arg)
(eq? arg var))
(lambda-args x))))
(if shadowed?
#f
- (mutable? var (lambda-body x)))))
+ (any (lambda (y)
+ (mutable? var y))
+ (lambda-body x)))))
((set? x)
(eq? (second x) var))
((pair? x)
@@ -864,7 +927,7 @@
((variable? x)
;; Mutable variable references must be unboxed.
(if (memq x mutable-vars)
- `(vector-ref ,x 0)
+ `(primcall vector-ref ,x 0)
x))
((if? x)
`(if ,(box-mutable-variables (test x) mutable-vars)
@@ -874,19 +937,21 @@
(let* ((bindings (let-bindings x))
(mutable-bindings
;; Find all mutable bindings.
- (filter (lambda (binding)
- (any (lambda (y)
- (mutable? (lhs binding) y))
- (let-body x)))
- bindings)))
+ (filter-map (lambda (binding)
+ (and (any (lambda (y)
+ (mutable? (lhs binding) y))
+ (let-body x))
+ (lhs binding)))
+ bindings)))
`(let ,(map (lambda (binding)
(let ((var (lhs binding)))
(list var
;; Use a 1 element vector to box mutable
;; variables.
(if (memq var mutable-bindings)
- `(vector ,(box-mutable-variables (rhs binding)
- mutable-vars))
+ `(primcall vector
+ ,(box-mutable-variables (rhs binding)
+ mutable-vars))
(box-mutable-variables (rhs binding)
mutable-vars)))))
bindings)
@@ -905,7 +970,7 @@
memo)
(delq var memo)))
mutable-vars
- bindings)))
+ mutable-bindings)))
(map (lambda (y)
(box-mutable-variables y mutable-vars*))
(let-body x))))))
@@ -929,45 +994,52 @@
;; element vectors bound to the original names.
(let ,(filter-map (lambda (arg)
(and (not (eq? (car arg) (cdr arg)))
- (list (car arg) `(vector ,(cdr arg)))))
+ (list (car arg)
+ `(primcall vector ,(cdr arg)))))
args*)
- ,(box-mutable-variables (lambda-body x)
- (fold (lambda (arg memo)
- (if (eq? (car arg) (cdr arg))
- memo
- (cons (car arg) memo)))
- mutable-vars*
- args*))))))
- ((primcall? x)
- (cons (primcall-op x)
- (map (lambda (arg)
- (box-mutable-variables arg mutable-vars))
- (primcall-operands x))))
+ ,@(let ((mutable-vars*
+ (fold (lambda (arg memo)
+ (if (eq? (car arg) (cdr arg))
+ memo
+ (cons (car arg) memo)))
+ mutable-vars*
+ args*)))
+ (map (lambda (y)
+ (box-mutable-variables y mutable-vars*))
+ (lambda-body x)))))))
+ ((tagged-primcall? x)
+ `(primcall ,(tagged-primcall-op x)
+ ,@(map (lambda (arg)
+ (box-mutable-variables arg mutable-vars))
+ (tagged-primcall-operands x))))
+ ((funcall? x)
+ `(funcall ,@(map (lambda (y)
+ (box-mutable-variables y mutable-vars))
+ (cdr x))))
((set? x)
;; Calls to set! are simply transformed to calls to vector-set! to
;; modify the item inside the box.
- `(vector-set! ,(second x) 0 ,(third x)))
- ((pair? x)
- (map (lambda (y)
- (box-mutable-variables y mutable-vars))
- x))))
+ `(primcall vector-set! ,(second x) 0
+ ,(box-mutable-variables (third x) mutable-vars)))
+ (else
+ (error "unknown form in mutable variable boxing pass" x))))
;; Transforms a quote form into code.
(define (expand-quote x)
(cond
((immediate? x) x)
((string? x)
- `(string ,@(string->list x)))
+ `(primcall string ,@(string->list x)))
((vector? x)
- `(vector ,@(map expand-quote (vector->list x))))
+ `(primcall vector ,@(map expand-quote (vector->list x))))
((pair? x)
- `(cons ,(expand-quote (car x))
- ,(expand-quote (cdr x))))
+ `(primcall cons ,(expand-quote (car x))
+ ,(expand-quote (cdr x))))
((null? x)
'())
;; The compiler doesn't support symbols yet...
(else
- (error "unknown quoted thing" x))))
+ (error "unknown quoted expression" x))))
;; Extracts certain forms to global program labels. Lambdas are
;; converted to closures and given a label. Quoted expressions are
@@ -989,9 +1061,12 @@
(if (immediate? quoted)
(values quoted '() '())
(let ((const (unique-variable)))
- (values `(constant-ref ,const)
+ (values `(primcall constant-ref ,const)
(list (list const '(datum)))
- (list `(constant-init ,const ,(expand-quote (quote-data x)))))))))
+ (list `(primcall constant-init
+ ,const
+ ,(expand-quote
+ (quote-data x)))))))))
((variable? x)
(values x '() '()))
((if? x)
@@ -1021,14 +1096,13 @@
(concatenate (map second body*)))
(append (concatenate (map third bindings*))
(concatenate (map third body*))))))
- ((primcall? x)
+ ((tagged-primcall? x)
(let ((operands (map (lambda (operand)
(let-values (((operand* labels initializers)
(iter operand)))
(list operand* labels initializers)))
- (cdr x))))
- (values (cons (primcall-op x)
- (map first operands))
+ (tagged-primcall-operands x))))
+ (values `(primcall ,(tagged-primcall-op x) ,@(map first operands))
(concatenate (map second operands))
(concatenate (map third operands)))))
;; Convert procedure calls to 'funcall' form that refers to a
@@ -1064,6 +1138,7 @@
((immediate? x) x)
((variable? x) x)
((closure? x) x)
+ ((quote? x) x)
((if? x)
`(if ,(mark-tail-calls (test x))
,(maybe-mark (consequent x))
@@ -1074,9 +1149,9 @@
(mark-tail-calls (rhs binding))))
(let-bindings x))
,@(map maybe-mark (let-body x))))
- ((primcall? x)
- (cons (primcall-op x)
- (map mark-tail-calls (cdr x))))
+ ((tagged-primcall? x)
+ `(primcall ,(tagged-primcall-op x)
+ ,@(map mark-tail-calls (tagged-primcall-operands x))))
((funcall? x)
`(funcall ,@(map mark-tail-calls (cdr x))))
((code? x)
@@ -1090,6 +1165,216 @@
(label-bindings x))
,@(map mark-tail-calls (drop x 2))))))
+;; Transform expressions like:
+;; (define (foo x) x)
+;; Into:
+;; (define foo (lambda (x) x))
+(define (expand-definition x)
+ (if (pair? (second x)) ;
+ `(define ,(car (second x))
+ (lambda ,(cdr (second x))
+ ,@(cddr x)))
+ x))
+
+;; Tranform a series of expressions with 'define' forms into a 'let'
+;; expression that binds all the defined variables.
+(define (hoist-definitions exprs)
+ (let loop ((orig-exprs exprs)
+ (defs '())
+ (new-exprs '()))
+ (cond
+ ((and (null? orig-exprs) (null? defs))
+ exprs)
+ ((null? orig-exprs)
+ `((let ,(reverse defs)
+ ,@(reverse new-exprs))))
+ ((define? (car orig-exprs))
+ (let ((x (expand-definition (car orig-exprs))))
+ (loop (cdr orig-exprs)
+ (cons (list (second x) (third x)) defs)
+ new-exprs)))
+ (else
+ (loop (cdr orig-exprs)
+ defs
+ (cons (car orig-exprs) new-exprs))))))
+
+;; Expand a fixed set of macros (let*, letrec, letrec*, cond, case,
+;; when, unless, or, and, and define) in a hygienic manner and
+;; explicitly tag primitive calls and function calls.
+(define (macro-expand x)
+ (let loop ((x x)
+ (vars '()))
+ (cond
+ ;; Immediates and quoted values are left as-is.
+ ((immediate? x) x)
+ ((quote? x) x)
+ ;; Variable references are replaced with their unique,
+ ;; alpha-converted name. This prevents lexical variable names
+ ;; from clashing with primitive call names.
+ ((symbol? x)
+ (or (assq-ref vars x)
+ (error "unbound variable in macro expansion" x)))
+ ((if? x)
+ `(if ,(loop (test x) vars)
+ ,(loop (consequent x) vars)
+ ,(loop (alternate x) vars)))
+ ((set? x)
+ `(set! ,(loop (second x) vars) ,(loop (third x) vars)))
+ ;; Convert extended forms to primitive forms. For example, a
+ ;; let* expression is rewritten as a nested series of let
+ ;; expression.
+ ((let*? x)
+ (loop (car
+ (let expand-let* ((bindings (let-bindings x)))
+ (if (null? bindings)
+ (let-body x)
+ (let ((binding (car bindings)))
+ `((let (,binding)
+ ,@(expand-let* (cdr bindings))))))))
+ vars))
+ ;; Simple implementation of letrec: Bind variables to empty
+ ;; values. Bind temporary variables to the real values. Mutate
+ ;; original variables to have the values of the temporaries.
+ ((letrec? x)
+ ;; TODO: Use special unspecified value instead of #f.
+ (loop (let ((temps (map (lambda (binding)
+ (cons (lhs binding) (unique-variable)))
+ (let-bindings x))))
+ `(let ,(map (lambda (binding)
+ (list (lhs binding) #f))
+ (let-bindings x))
+ (let ,(map (lambda (binding temp)
+ (list (cdr temp) (rhs binding)))
+ (let-bindings x)
+ temps)
+ ,@(map (lambda (binding)
+ (let ((var (lhs binding)))
+ `(set! ,var ,(assq-ref temps var))))
+ (let-bindings x))
+ ,@(let-body x))))
+ vars))
+ ;; letrec* is simpler than letrec because binding happens
+ ;; sequentially so we don't need to use temporaries.
+ ((letrec*? x)
+ ;; TODO: Use special unspecified value instead of #f.
+ (loop `(let ,(map (lambda (binding)
+ (list (lhs binding) #f))
+ (let-bindings x))
+ ,@(map (lambda (binding)
+ `(set! ,(lhs binding) ,(rhs binding)))
+ (let-bindings x))
+ ,@(let-body x))
+ vars))
+ ;; TODO: Handle multiple expressions in clause.
+ ;; TODO: Handle => syntax.
+ ((cond? x) ; 'cond' is just nested 'if's
+ (loop (let expand-cond ((clauses (cdr x)))
+ (if (null? clauses)
+ #f
+ (let ((clause (car clauses)))
+ (cond
+ ((and (eq? (first clause) 'else)
+ (null? (cdr clauses)))
+ (second clause))
+ ((eq? (first clause) 'else)
+ (error "else must be the last clause of cond" x))
+ (else
+ `(if ,(first clause)
+ ,(second clause)
+ ,(expand-cond (cdr clauses))))))))
+ vars))
+ ;; 'when' and 'unless' are, you guessed it, special forms of
+ ;; 'if'!
+ ;;
+ ;; TODO: Handle multiple expressions
+ ((when? x)
+ (loop `(if ,(second x) ,(third x) #f) vars))
+ ((unless? x)
+ (loop `(if (eq? ,(second x) #f) ,(third x) #f) vars))
+ ;; 'or' and 'and' are also defined in terms of 'if'
+ ((or? x)
+ (loop (let or-loop ((exprs (cdr x)))
+ (if (null? exprs)
+ #f
+ `(let ((v ,(car exprs)))
+ (if v v ,(or-loop (cdr exprs))))))
+ vars))
+ ((and? x)
+ (loop (let and-loop ((exprs (cdr x)))
+ (cond
+ ((null? exprs) #t)
+ ((null? (cdr exprs))
+ `(let ((v ,(car exprs)))
+ (if v v #f)))
+ (else
+ `(let ((v ,(car exprs)))
+ (if v ,(and-loop (cdr exprs)) #f)))))
+ vars))
+ ;; TODO: Handle => syntax
+ ;; TODO: Handle symbols once implemented
+ ;; TODO: Handle multiple expressions in a clause
+ ((case? x)
+ (loop `(let ((key ,(second x)))
+ ,(let expand-cond ((clauses (cddr x)))
+ (if (null? clauses)
+ #f
+ (let ((clause (car clauses)))
+ (cond
+ ((and (eq? (first clause) 'else)
+ (null? (cdr clauses)))
+ (second clause))
+ ((eq? (first clause) 'else)
+ (error "else must be the last clause of case" x))
+ (else
+ `(if (or ,@(map (lambda (datum)
+ `(= key ,datum))
+ (first clause)))
+ ,(second clause)
+ ,(expand-cond (cdr clauses)))))))))
+ vars))
+ ;; Convert lexical variables to unique identifiers through
+ ;; alpha-conversion for 'let' and 'lambda' forms.
+ ((let? x)
+ (let ((new-vars (append (map (lambda (binding)
+ (cons (lhs binding) (unique-variable)))
+ (let-bindings x))
+ vars)))
+ `(let ,(map (lambda (binding)
+ (list (assq-ref new-vars (lhs binding))
+ (loop (rhs binding) vars)))
+ (let-bindings x))
+ ,@(map (lambda (y) (loop y new-vars))
+ (hoist-definitions (let-body x))))))
+ ((lambda? x)
+ (let ((new-vars (append (map (lambda (arg)
+ (cons arg (unique-variable)))
+ (lambda-args x))
+ vars)))
+ `(lambda ,(map (lambda (arg)
+ (assq-ref new-vars arg))
+ (lambda-args x))
+ ,@(map (lambda (y)
+ (loop y new-vars))
+ (hoist-definitions (lambda-body x))))))
+ ;; Function calls. The 'or' expression first tests for a valid
+ ;; variable reference in the operator position. Failing that, it
+ ;; for a sub-expression in the operator position. We need to do
+ ;; this kind of analysis because primitive calls are currently
+ ;; *not* part of the lexical environment, so if we tried to
+ ;; lookup the variable '+', for example, it would fail.
+ ((and (pair? x)
+ (or (and (symbol? (car x)) (assq-ref vars (car x)))
+ (pair? (car x))))
+ `(funcall ,@(map (lambda (y) (loop y vars)) x)))
+ ;; If the function call operator is not a variable reference or a
+ ;; more complex expression, then it might be a primitive call.
+ ((primcall? x)
+ `(primcall ,(primcall-op x)
+ ,@(map (lambda (y) (loop y vars))
+ (primcall-operands x))))
+ (else ; oh no
+ (error "unknown form" x)))))
+
;; Apply all compiler passes to transform the input program into a
;; form that it is suitable for compilation to native assembly code.
(define (transform x)
@@ -1097,7 +1382,8 @@
(mark-tail-calls
(convert-closures-and-constants
(annotate-free-variables
- (box-mutable-variables x))))))
+ (box-mutable-variables
+ (macro-expand x)))))))
;;;
@@ -1220,7 +1506,7 @@
"789")
;; complex constants
(test-case '(let ((f (lambda () (quote (1 . "H")))))
- (eq? (f) (f)))
+ (eq? (f) (f)))
"#t")
;; mutable variables
(test-case '(let ((f (lambda (c)
@@ -1229,4 +1515,73 @@
(let ((p (f 0)))
((car p) 12)
((cdr p))))
- "12"))
+ "12")
+ ;; overriding primcalls
+ (test-case '(let ((+ (lambda (a b) (* a b))))
+ (+ 3 3))
+ "9")
+ ;; let*
+ (test-case '(let* ((x 1)
+ (y (+ x 2)))
+ (+ x y))
+ "4")
+ ;; letrec
+ (test-case '(letrec ((f (lambda (x) (+ (g x) 1)))
+ (g (lambda (x) (+ x 2))))
+ (f 1))
+ "4")
+ ;; letrec*
+ (test-case '(letrec* ((f (lambda (x) (+ (g x) 1)))
+ (g (lambda (x) (+ x 2))))
+ (f 1))
+ "4")
+ ;; when
+ (test-case '(let ((x 0))
+ (when (= x 0)
+ 123))
+ "123")
+ (test-case '(let ((x 1))
+ (when (= x 0)
+ 123))
+ "#f")
+ ;; unless
+ (test-case '(let ((x 1))
+ (unless (= x 0)
+ 123))
+ "123")
+ (test-case '(let ((x 1))
+ (unless (= x 1)
+ 123))
+ "#f")
+ ;; cond
+ (test-case '(let ((x 3))
+ (cond
+ ((= x 0) 10)
+ ((= x 1) 11)
+ ((= x 2) 12)
+ (else 13)))
+ "13")
+ ;; or
+ (test-case '(or) "#f")
+ (test-case '(or #f 666) "666")
+ (test-case '(or #f #f) "#f")
+ ;; and
+ (test-case '(and) "#t")
+ (test-case '(and 1 2 3) "3")
+ (test-case '(and 1 2 #f) "#f")
+ ;; case
+ (test-case '(case 666
+ ((111 222 333)
+ 444)
+ ((555 666)
+ 777)
+ (else 888))
+ "777")
+ ;; internal define
+ (test-case '(let ()
+ (define x 1)
+ (define y 2)
+ (define (f a b)
+ (+ a b))
+ (f x y))
+ "3"))